libidn/idna.c

   1 /* idna.c       Convert to or from IDN strings.
   2  * Copyright (C) 2002, 2003, 2004, 2011  Simon Josefsson
   3  *
   4  * This file is part of GNU Libidn.
   5  *
   6  * GNU Libidn is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * GNU Libidn is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>.
  18  */
  19
  20 #if HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 #include <stdlib.h>
  25 #include <string.h>
  26 #include <stringprep.h>
  27 #include <punycode.h>
  28 #include <stdint.h>
  29
  30 #include "idna.h"
  31
  32 #define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||      \
  33                  (c) == 0xFF0E || (c) == 0xFF61)
  34
  35 /* Core functions */
  36
  37 /**
  38  * idna_to_ascii_4i
  39  * @in: input array with unicode code points.
  40  * @inlen: length of input array with unicode code points.
  41  * @out: output zero terminated string that must have room for at
  42  *       least 63 characters plus the terminating zero.
  43  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
  44  *
  45  * The ToASCII operation takes a sequence of Unicode code points that make
  46  * up one label and transforms it into a sequence of code points in the
  47  * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
  48  * resulting sequence are equivalent labels.
  49  *
  50  * It is important to note that the ToASCII operation can fail. ToASCII
  51  * fails if any step of it fails. If any step of the ToASCII operation
  52  * fails on any label in a domain name, that domain name MUST NOT be used
  53  * as an internationalized domain name. The method for deadling with this
  54  * failure is application-specific.
  55  *
  56  * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
  57  * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
  58  * sequence of ASCII code points or a failure condition.
  59  *
  60  * ToASCII never alters a sequence of code points that are all in the ASCII
  61  * range to begin with (although it could fail). Applying the ToASCII
  62  * operation multiple times has exactly the same effect as applying it just
  63  * once.
  64  *
  65  * Return value: Returns 0 on success, or an error code.
  66  */
  67 int
  68 idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
  69 {
  70   size_t len, outlen;
  71   uint32_t *src;                /* XXX don't need to copy data? */
  72   int rc;
  73
  74   /*
  75    * ToASCII consists of the following steps:
  76    *
  77    * 1. If all code points in the sequence are in the ASCII range (0..7F)
  78    * then skip to step 3.
  79    */
  80
  81   {
  82     size_t i;
  83     int inasciirange;
  84
  85     inasciirange = 1;
  86     for (i = 0; i < inlen; i++)
  87       if (in[i] > 0x7F)
  88         inasciirange = 0;
  89     if (inasciirange)
  90       {
  91         src = malloc (sizeof (in[0]) * (inlen + 1));
  92         if (src == NULL)
  93           return IDNA_MALLOC_ERROR;
  94
  95         memcpy (src, in, sizeof (in[0]) * inlen);
  96         src[inlen] = 0;
  97
  98         goto step3;
  99       }
 100   }
 101
 102   /*
 103    * 2. Perform the steps specified in [NAMEPREP] and fail if there is
 104    * an error. The AllowUnassigned flag is used in [NAMEPREP].
 105    */
 106
 107   {
 108     char *p;
 109
 110     p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
 111     if (p == NULL)
 112       return IDNA_MALLOC_ERROR;
 113
 114     len = strlen (p);
 115     do
 116       {
 117         char *newp;
 118
 119         len = 2 * len + 10;     /* XXX better guess? */
 120         newp = realloc (p, len);
 121         if (newp == NULL)
 122           {
 123             free (p);
 124             return IDNA_MALLOC_ERROR;
 125           }
 126         p = newp;
 127
 128         if (flags & IDNA_ALLOW_UNASSIGNED)
 129           rc = stringprep_nameprep (p, len);
 130         else
 131           rc = stringprep_nameprep_no_unassigned (p, len);
 132       }
 133     while (rc == STRINGPREP_TOO_SMALL_BUFFER);
 134
 135     if (rc != STRINGPREP_OK)
 136       {
 137         free (p);
 138         return IDNA_STRINGPREP_ERROR;
 139       }
 140
 141     src = stringprep_utf8_to_ucs4 (p, -1, NULL);
 142
 143     free (p);
 144   }
 145
 146 step3:
 147   /*
 148    * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
 149    *
 150    * (a) Verify the absence of non-LDH ASCII code points; that is,
 151    * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
 152    *
 153    * (b) Verify the absence of leading and trailing hyphen-minus;
 154    * that is, the absence of U+002D at the beginning and end of
 155    * the sequence.
 156    */
 157
 158   if (flags & IDNA_USE_STD3_ASCII_RULES)
 159     {
 160       size_t i;
 161
 162       for (i = 0; src[i]; i++)
 163         if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
 164             (src[i] >= 0x3A && src[i] <= 0x40) ||
 165             (src[i] >= 0x5B && src[i] <= 0x60) ||
 166             (src[i] >= 0x7B && src[i] <= 0x7F))
 167           {
 168             free (src);
 169             return IDNA_CONTAINS_NON_LDH;
 170           }
 171
 172       if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
 173         {
 174           free (src);
 175           return IDNA_CONTAINS_MINUS;
 176         }
 177     }
 178
 179   /*
 180    * 4. If all code points in the sequence are in the ASCII range
 181    * (0..7F), then skip to step 8.
 182    */
 183
 184   {
 185     size_t i;
 186     int inasciirange;
 187
 188     inasciirange = 1;
 189     for (i = 0; src[i]; i++)
 190       {
 191         if (src[i] > 0x7F)
 192           inasciirange = 0;
 193         /* copy string to output buffer if we are about to skip to step8 */
 194         if (i < 64)
 195           out[i] = src[i];
 196       }
 197     if (i < 64)
 198       out[i] = '\0';
 199     if (inasciirange)
 200       goto step8;
 201   }
 202
 203   /*
 204    * 5. Verify that the sequence does NOT begin with the ACE prefix.
 205    *
 206    */
 207
 208   {
 209     size_t i;
 210     int match;
 211
 212     match = 1;
 213     for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
 214       if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
 215         match = 0;
 216     if (match)
 217       {
 218         free (src);
 219         return IDNA_CONTAINS_ACE_PREFIX;
 220       }
 221   }
 222
 223   /*
 224    * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
 225    * and fail if there is an error.
 226    */
 227   for (len = 0; src[len]; len++)
 228     ;
 229   src[len] = '\0';
 230   outlen = 63 - strlen (IDNA_ACE_PREFIX);
 231   rc = punycode_encode (len, src, NULL,
 232                         &outlen, &out[strlen (IDNA_ACE_PREFIX)]);
 233   if (rc != PUNYCODE_SUCCESS)
 234     {
 235       free (src);
 236       return IDNA_PUNYCODE_ERROR;
 237     }
 238   out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
 239
 240   /*
 241    * 7. Prepend the ACE prefix.
 242    */
 243
 244   memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
 245
 246   /*
 247    * 8. Verify that the number of code points is in the range 1 to 63
 248    * inclusive (0 is excluded).
 249    */
 250
 251 step8:
 252   free (src);
 253   if (strlen (out) < 1 || strlen (out) > 63)
 254     return IDNA_INVALID_LENGTH;
 255
 256   return IDNA_SUCCESS;
 257 }
 258
 259 /* ToUnicode().  May realloc() utf8in. */
 260 static int
 261 idna_to_unicode_internal (char *utf8in,
 262                           uint32_t * out, size_t * outlen, int flags)
 263 {
 264   int rc;
 265   char tmpout[64];
 266   size_t utf8len = strlen (utf8in) + 1;
 267   size_t addlen = 0;
 268
 269   /*
 270    * ToUnicode consists of the following steps:
 271    *
 272    * 1. If the sequence contains any code points outside the ASCII range
 273    * (0..7F) then proceed to step 2, otherwise skip to step 3.
 274    */
 275
 276   {
 277     size_t i;
 278     int inasciirange;
 279
 280     inasciirange = 1;
 281     for (i = 0; utf8in[i]; i++)
 282       if (utf8in[i] & ~0x7F)
 283         inasciirange = 0;
 284     if (inasciirange)
 285       goto step3;
 286   }
 287
 288   /*
 289    * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
 290    * error. (If step 3 of ToASCII is also performed here, it will not
 291    * affect the overall behavior of ToUnicode, but it is not
 292    * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
 293    */
 294   do
 295     {
 296       char *newp = realloc (utf8in, utf8len + addlen);
 297       if (newp == NULL)
 298         {
 299           free (utf8in);
 300           return IDNA_MALLOC_ERROR;
 301         }
 302       utf8in = newp;
 303       if (flags & IDNA_ALLOW_UNASSIGNED)
 304         rc = stringprep_nameprep (utf8in, utf8len + addlen);
 305       else
 306         rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
 307       addlen += 1;
 308     }
 309   while (rc == STRINGPREP_TOO_SMALL_BUFFER);
 310
 311   if (rc != STRINGPREP_OK)
 312     {
 313       free (utf8in);
 314       return IDNA_STRINGPREP_ERROR;
 315     }
 316
 317   /* 3. Verify that the sequence begins with the ACE prefix, and save a
 318    * copy of the sequence.
 319    */
 320
 321 step3:
 322   if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
 323     {
 324       free (utf8in);
 325       return IDNA_NO_ACE_PREFIX;
 326     }
 327
 328   /* 4. Remove the ACE prefix.
 329    */
 330
 331   memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
 332            strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
 333
 334   /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
 335    * and fail if there is an error. Save a copy of the result of
 336    * this step.
 337    */
 338
 339   (*outlen)--;                  /* reserve one for the zero */
 340
 341   rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
 342   if (rc != PUNYCODE_SUCCESS)
 343     {
 344       free (utf8in);
 345       return IDNA_PUNYCODE_ERROR;
 346     }
 347
 348   out[*outlen] = 0;             /* add zero */
 349
 350   /* 6. Apply ToASCII.
 351    */
 352
 353   rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
 354   if (rc != IDNA_SUCCESS)
 355     {
 356       free (utf8in);
 357       return rc;
 358     }
 359
 360   /* 7. Verify that the result of step 6 matches the saved copy from
 361    * step 3, using a case-insensitive ASCII comparison.
 362    */
 363
 364   if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
 365     {
 366       free (utf8in);
 367       return IDNA_ROUNDTRIP_VERIFY_ERROR;
 368     }
 369
 370   /* 8. Return the saved copy from step 5.
 371    */
 372
 373   free (utf8in);
 374   return IDNA_SUCCESS;
 375 }
 376
 377 /**
 378  * idna_to_unicode_44i
 379  * @in: input array with unicode code points.
 380  * @inlen: length of input array with unicode code points.
 381  * @out: output array with unicode code points.
 382  * @outlen: on input, maximum size of output array with unicode code points,
 383  *          on exit, actual size of output array with unicode code points.
 384  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 385  *
 386  * The ToUnicode operation takes a sequence of Unicode code points
 387  * that make up one label and returns a sequence of Unicode code
 388  * points. If the input sequence is a label in ACE form, then the
 389  * result is an equivalent internationalized label that is not in ACE
 390  * form, otherwise the original sequence is returned unaltered.
 391  *
 392  * ToUnicode never fails. If any step fails, then the original input
 393  * sequence is returned immediately in that step.
 394  *
 395  * The Punycode decoder can never output more code points than it
 396  * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
 397  * the number of octets needed to represent a sequence of code points
 398  * depends on the particular character encoding used.
 399  *
 400  * The inputs to ToUnicode are a sequence of code points, the
 401  * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
 402  * ToUnicode is always a sequence of Unicode code points.
 403  *
 404  * Return value: Returns error condition, but it must only be used for
 405  *               debugging purposes.  The output buffer is always
 406  *               guaranteed to contain the correct data according to
 407  *               the specification (sans malloc induced errors).  NB!
 408  *               This means that you normally ignore the return code
 409  *               from this function, as checking it means breaking the
 410  *               standard.
 411  */
 412 int
 413 idna_to_unicode_44i (const uint32_t * in, size_t inlen,
 414                      uint32_t * out, size_t * outlen, int flags)
 415 {
 416   int rc;
 417   size_t outlensave = *outlen;
 418   char *p;
 419
 420   p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
 421   if (p == NULL)
 422     return IDNA_MALLOC_ERROR;
 423
 424   rc = idna_to_unicode_internal (p, out, outlen, flags);
 425   if (rc != IDNA_SUCCESS)
 426     {
 427       memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
 428                                          inlen : outlensave));
 429       *outlen = inlen;
 430     }
 431
 432   /* p is freed in idna_to_unicode_internal.  */
 433
 434   return rc;
 435 }
 436
 437 /* Wrappers that handle several labels */
 438
 439 /**
 440  * idna_to_ascii_4z:
 441  * @input: zero terminated input Unicode string.
 442  * @output: pointer to newly allocated output string.
 443  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 444  *
 445  * Convert UCS-4 domain name to ASCII string.  The domain name may
 446  * contain several labels, separated by dots.  The output buffer must
 447  * be deallocated by the caller.
 448  *
 449  * Return value: Returns IDNA_SUCCESS on success, or error code.
 450  **/
 451 int
 452 idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
 453 {
 454   const uint32_t *start = input;
 455   const uint32_t *end = input;
 456   char buf[64];
 457   char *out = NULL;
 458   int rc;
 459
 460   /* 1) Whenever dots are used as label separators, the following
 461      characters MUST be recognized as dots: U+002E (full stop),
 462      U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
 463      U+FF61 (halfwidth ideographic full stop). */
 464
 465   if (input[0] == 0)
 466     {
 467       /* Handle implicit zero-length root label. */
 468       *output = malloc (1);
 469       if (!*output)
 470         return IDNA_MALLOC_ERROR;
 471       strcpy (*output, "");
 472       return IDNA_SUCCESS;
 473     }
 474
 475   if (DOTP (input[0]) && input[1] == 0)
 476     {
 477       /* Handle explicit zero-length root label. */
 478       *output = malloc (2);
 479       if (!*output)
 480         return IDNA_MALLOC_ERROR;
 481       strcpy (*output, ".");
 482       return IDNA_SUCCESS;
 483     }
 484
 485   *output = NULL;
 486   do
 487     {
 488       end = start;
 489
 490       for (; *end && !DOTP (*end); end++)
 491         ;
 492
 493       if (*end == '\0' && start == end)
 494         {
 495           /* Handle explicit zero-length root label. */
 496           buf[0] = '\0';
 497         }
 498       else
 499         {
 500           rc = idna_to_ascii_4i (start, end - start, buf, flags);
 501           if (rc != IDNA_SUCCESS)
 502             return rc;
 503         }
 504
 505       if (out)
 506         {
 507           char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
 508           if (!newp)
 509             {
 510               free (out);
 511               return IDNA_MALLOC_ERROR;
 512             }
 513           out = newp;
 514           strcat (out, ".");
 515           strcat (out, buf);
 516         }
 517       else
 518         {
 519           out = (char *) malloc (strlen (buf) + 1);
 520           if (!out)
 521             return IDNA_MALLOC_ERROR;
 522           strcpy (out, buf);
 523         }
 524
 525       start = end + 1;
 526     }
 527   while (*end);
 528
 529   *output = out;
 530
 531   return IDNA_SUCCESS;
 532 }
 533
 534 /**
 535  * idna_to_ascii_8z:
 536  * @input: zero terminated input UTF-8 string.
 537  * @output: pointer to newly allocated output string.
 538  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 539  *
 540  * Convert UTF-8 domain name to ASCII string.  The domain name may
 541  * contain several labels, separated by dots.  The output buffer must
 542  * be deallocated by the caller.
 543  *
 544  * Return value: Returns IDNA_SUCCESS on success, or error code.
 545  **/
 546 int
 547 idna_to_ascii_8z (const char *input, char **output, int flags)
 548 {
 549   uint32_t *ucs4;
 550   size_t ucs4len;
 551   int rc;
 552
 553   ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
 554   if (!ucs4)
 555     return IDNA_ICONV_ERROR;
 556
 557   rc = idna_to_ascii_4z (ucs4, output, flags);
 558
 559   free (ucs4);
 560
 561   return rc;
 562
 563 }
 564
 565 /**
 566  * idna_to_ascii_lz:
 567  * @input: zero terminated input UTF-8 string.
 568  * @output: pointer to newly allocated output string.
 569  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 570  *
 571  * Convert domain name in the locale's encoding to ASCII string.  The
 572  * domain name may contain several labels, separated by dots.  The
 573  * output buffer must be deallocated by the caller.
 574  *
 575  * Return value: Returns IDNA_SUCCESS on success, or error code.
 576  **/
 577 int
 578 idna_to_ascii_lz (const char *input, char **output, int flags)
 579 {
 580   char *utf8;
 581   int rc;
 582
 583   utf8 = stringprep_locale_to_utf8 (input);
 584   if (!utf8)
 585     return IDNA_ICONV_ERROR;
 586
 587   rc = idna_to_ascii_8z (utf8, output, flags);
 588
 589   free (utf8);
 590
 591   return rc;
 592 }
 593
 594 /**
 595  * idna_to_unicode_4z4z:
 596  * @input: zero-terminated Unicode string.
 597  * @output: pointer to newly allocated output Unicode string.
 598  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 599  *
 600  * Convert possibly ACE encoded domain name in UCS-4 format into a
 601  * UCS-4 string.  The domain name may contain several labels,
 602  * separated by dots.  The output buffer must be deallocated by the
 603  * caller.
 604  *
 605  * Return value: Returns IDNA_SUCCESS on success, or error code.
 606  **/
 607 int
 608 idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
 609 {
 610   const uint32_t *start = input;
 611   const uint32_t *end = input;
 612   uint32_t *buf;
 613   size_t buflen;
 614   uint32_t *out = NULL;
 615   size_t outlen = 0;
 616
 617   *output = NULL;
 618
 619   do
 620     {
 621       end = start;
 622
 623       for (; *end && !DOTP (*end); end++)
 624         ;
 625
 626       buflen = end - start;
 627       buf = malloc (sizeof (buf[0]) * (buflen + 1));
 628       if (!buf)
 629         return IDNA_MALLOC_ERROR;
 630
 631       idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
 632       /* don't check return value as per specification! */
 633
 634       if (out)
 635         {
 636           uint32_t *newp = realloc (out,
 637                                     sizeof (out[0])
 638                                     * (outlen + 1 + buflen + 1));
 639           if (!newp)
 640             {
 641               free (buf);
 642               free (out);
 643               return IDNA_MALLOC_ERROR;
 644             }
 645           out = newp;
 646           out[outlen++] = 0x002E;       /* '.' (full stop) */
 647           memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
 648           outlen += buflen;
 649           out[outlen] = 0x0;
 650           free (buf);
 651         }
 652       else
 653         {
 654           out = buf;
 655           outlen = buflen;
 656           out[outlen] = 0x0;
 657         }
 658
 659       start = end + 1;
 660     }
 661   while (*end);
 662
 663   *output = out;
 664
 665   return IDNA_SUCCESS;
 666 }
 667
 668 /**
 669  * idna_to_unicode_8z4z:
 670  * @input: zero-terminated UTF-8 string.
 671  * @output: pointer to newly allocated output Unicode string.
 672  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 673  *
 674  * Convert possibly ACE encoded domain name in UTF-8 format into a
 675  * UCS-4 string.  The domain name may contain several labels,
 676  * separated by dots.  The output buffer must be deallocated by the
 677  * caller.
 678  *
 679  * Return value: Returns IDNA_SUCCESS on success, or error code.
 680  **/
 681 int
 682 idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
 683 {
 684   uint32_t *ucs4;
 685   size_t ucs4len;
 686   int rc;
 687
 688   ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
 689   if (!ucs4)
 690     return IDNA_ICONV_ERROR;
 691
 692   rc = idna_to_unicode_4z4z (ucs4, output, flags);
 693   free (ucs4);
 694
 695   return rc;
 696 }
 697
 698 /**
 699  * idna_to_unicode_8z8z:
 700  * @input: zero-terminated UTF-8 string.
 701  * @output: pointer to newly allocated output UTF-8 string.
 702  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 703  *
 704  * Convert possibly ACE encoded domain name in UTF-8 format into a
 705  * UTF-8 string.  The domain name may contain several labels,
 706  * separated by dots.  The output buffer must be deallocated by the
 707  * caller.
 708  *
 709  * Return value: Returns IDNA_SUCCESS on success, or error code.
 710  **/
 711 int
 712 idna_to_unicode_8z8z (const char *input, char **output, int flags)
 713 {
 714   uint32_t *ucs4;
 715   int rc;
 716
 717   rc = idna_to_unicode_8z4z (input, &ucs4, flags);
 718   *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
 719   free (ucs4);
 720
 721   if (!*output)
 722     return IDNA_ICONV_ERROR;
 723
 724   return rc;
 725 }
 726
 727 /**
 728  * idna_to_unicode_8zlz:
 729  * @input: zero-terminated UTF-8 string.
 730  * @output: pointer to newly allocated output string encoded in the
 731  *   current locale's character set.
 732  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 733  *
 734  * Convert possibly ACE encoded domain name in UTF-8 format into a
 735  * string encoded in the current locale's character set.  The domain
 736  * name may contain several labels, separated by dots.  The output
 737  * buffer must be deallocated by the caller.
 738  *
 739  * Return value: Returns IDNA_SUCCESS on success, or error code.
 740  **/
 741 int
 742 idna_to_unicode_8zlz (const char *input, char **output, int flags)
 743 {
 744   char *utf8;
 745   int rc;
 746
 747   rc = idna_to_unicode_8z8z (input, &utf8, flags);
 748   *output = stringprep_utf8_to_locale (utf8);
 749   free (utf8);
 750
 751   if (!*output)
 752     return IDNA_ICONV_ERROR;
 753
 754   return rc;
 755 }
 756
 757 /**
 758  * idna_to_unicode_lzlz:
 759  * @input: zero-terminated string encoded in the current locale's
 760  *   character set.
 761  * @output: pointer to newly allocated output string encoded in the
 762  *   current locale's character set.
 763  * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 764  *
 765  * Convert possibly ACE encoded domain name in the locale's character
 766  * set into a string encoded in the current locale's character set.
 767  * The domain name may contain several labels, separated by dots.  The
 768  * output buffer must be deallocated by the caller.
 769  *
 770  * Return value: Returns IDNA_SUCCESS on success, or error code.
 771  **/
 772 int
 773 idna_to_unicode_lzlz (const char *input, char **output, int flags)
 774 {
 775   char *utf8;
 776   int rc;
 777
 778   utf8 = stringprep_locale_to_utf8 (input);
 779   if (!utf8)
 780     return IDNA_ICONV_ERROR;
 781
 782   rc = idna_to_unicode_8zlz (utf8, output, flags);
 783   free (utf8);
 784
 785   return rc;
 786 }
 787
 788 /**
 789  * IDNA_ACE_PREFIX
 790  *
 791  * The IANA allocated prefix to use for IDNA. "xn--"
 792  */
 793
 794 /**
 795  * Idna_rc:
 796  * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to
 797  *   always be zero, the remaining ones are only guaranteed to hold
 798  *   non-zero values, for logical comparison purposes.
 799  * @IDNA_STRINGPREP_ERROR:  Error during string preparation.
 800  * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
 801  * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
 802  *   the string contains non-LDH ASCII characters.
 803  * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
 804  *   the string contains a leading or trailing hyphen-minus (U+002D).
 805  * @IDNA_INVALID_LENGTH: The final output string is not within the
 806  *   (inclusive) range 1 to 63 characters.
 807  * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
 808  *   (for ToUnicode).
 809  * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
 810  *   string does not equal the input.
 811  * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
 812  *   ToASCII).
 813  * @IDNA_ICONV_ERROR: Could not convert string in locale encoding.
 814  * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
 815  *   fatal error).
 816  * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
 817  *   internally in libc).
 818  *
 819  * Enumerated return codes of idna_to_ascii_4i(),
 820  * idna_to_unicode_44i() functions (and functions derived from those
 821  * functions).  The value 0 is guaranteed to always correspond to
 822  * success.
 823  */
 824
 825
 826 /**
 827  * Idna_flags:
 828  * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
 829  *   Unicode code points.
 830  * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
 831  *   rules (i.e., normal host name rules).
 832  *
 833  * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
 834  */