1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2010-2012, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2010mar05
14 * created by: Markus W. Scherer
22 * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
25 #include "unicode/utypes.h"
29 #include "unicode/bytestream.h"
30 #include "unicode/stringpiece.h"
31 #include "unicode/uidna.h"
32 #include "unicode/unistr.h"
39 * Abstract base class for IDNA processing.
40 * See http://www.unicode.org/reports/tr46/
41 * and http://www.ietf.org/rfc/rfc3490.txt
43 * The IDNA class is not intended for public subclassing.
45 * This C++ API currently only implements UTS #46.
46 * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
47 * and IDNA2003 (functions that do not use a service object).
50 class U_COMMON_API IDNA : public UObject {
59 * Returns an IDNA instance which implements UTS #46.
60 * Returns an unmodifiable instance, owned by the caller.
61 * Cache it for multiple operations, and delete it when done.
62 * The instance is thread-safe, that is, it can be used concurrently.
64 * UTS #46 defines Unicode IDNA Compatibility Processing,
65 * updated to the latest version of Unicode and compatible with both
66 * IDNA2003 and IDNA2008.
68 * The worker functions use transitional processing, including deviation mappings,
69 * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
70 * is used in which case the deviation characters are passed through without change.
72 * Disallowed characters are mapped to U+FFFD.
74 * For available options see the uidna.h header.
75 * Operations with the UTS #46 instance do not support the
76 * UIDNA_ALLOW_UNASSIGNED option.
78 * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
79 * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
80 * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
82 * @param options Bit set to modify the processing and error checking.
83 * See option bit set values in uidna.h.
84 * @param errorCode Standard ICU error code. Its input value must
85 * pass the U_SUCCESS() test, or else the function returns
86 * immediately. Check for U_FAILURE() on output or use with
87 * function chaining. (See User Guide for details.)
88 * @return the UTS #46 IDNA instance, if successful
92 createUTS46Instance(uint32_t options, UErrorCode &errorCode);
95 * Converts a single domain name label into its ASCII form for DNS lookup.
96 * If any processing step fails, then info.hasErrors() will be TRUE and
97 * the result might not be an ASCII string.
98 * The label might be modified according to the types of errors.
99 * Labels with severe errors will be left in (or turned into) their Unicode form.
101 * The UErrorCode indicates an error only in exceptional cases,
102 * such as a U_MEMORY_ALLOCATION_ERROR.
104 * @param label Input domain name label
105 * @param dest Destination string object
106 * @param info Output container of IDNA processing details.
107 * @param errorCode Standard ICU error code. Its input value must
108 * pass the U_SUCCESS() test, or else the function returns
109 * immediately. Check for U_FAILURE() on output or use with
110 * function chaining. (See User Guide for details.)
114 virtual UnicodeString &
115 labelToASCII(const UnicodeString &label, UnicodeString &dest,
116 IDNAInfo &info, UErrorCode &errorCode) const = 0;
119 * Converts a single domain name label into its Unicode form for human-readable display.
120 * If any processing step fails, then info.hasErrors() will be TRUE.
121 * The label might be modified according to the types of errors.
123 * The UErrorCode indicates an error only in exceptional cases,
124 * such as a U_MEMORY_ALLOCATION_ERROR.
126 * @param label Input domain name label
127 * @param dest Destination string object
128 * @param info Output container of IDNA processing details.
129 * @param errorCode Standard ICU error code. Its input value must
130 * pass the U_SUCCESS() test, or else the function returns
131 * immediately. Check for U_FAILURE() on output or use with
132 * function chaining. (See User Guide for details.)
136 virtual UnicodeString &
137 labelToUnicode(const UnicodeString &label, UnicodeString &dest,
138 IDNAInfo &info, UErrorCode &errorCode) const = 0;
141 * Converts a whole domain name into its ASCII form for DNS lookup.
142 * If any processing step fails, then info.hasErrors() will be TRUE and
143 * the result might not be an ASCII string.
144 * The domain name might be modified according to the types of errors.
145 * Labels with severe errors will be left in (or turned into) their Unicode form.
147 * The UErrorCode indicates an error only in exceptional cases,
148 * such as a U_MEMORY_ALLOCATION_ERROR.
150 * @param name Input domain name
151 * @param dest Destination string object
152 * @param info Output container of IDNA processing details.
153 * @param errorCode Standard ICU error code. Its input value must
154 * pass the U_SUCCESS() test, or else the function returns
155 * immediately. Check for U_FAILURE() on output or use with
156 * function chaining. (See User Guide for details.)
160 virtual UnicodeString &
161 nameToASCII(const UnicodeString &name, UnicodeString &dest,
162 IDNAInfo &info, UErrorCode &errorCode) const = 0;
165 * Converts a whole domain name into its Unicode form for human-readable display.
166 * If any processing step fails, then info.hasErrors() will be TRUE.
167 * The domain name might be modified according to the types of errors.
169 * The UErrorCode indicates an error only in exceptional cases,
170 * such as a U_MEMORY_ALLOCATION_ERROR.
172 * @param name Input domain name
173 * @param dest Destination string object
174 * @param info Output container of IDNA processing details.
175 * @param errorCode Standard ICU error code. Its input value must
176 * pass the U_SUCCESS() test, or else the function returns
177 * immediately. Check for U_FAILURE() on output or use with
178 * function chaining. (See User Guide for details.)
182 virtual UnicodeString &
183 nameToUnicode(const UnicodeString &name, UnicodeString &dest,
184 IDNAInfo &info, UErrorCode &errorCode) const = 0;
186 // UTF-8 versions of the processing methods ---------------------------- ***
189 * Converts a single domain name label into its ASCII form for DNS lookup.
190 * UTF-8 version of labelToASCII(), same behavior.
192 * @param label Input domain name label
193 * @param dest Destination byte sink; Flush()ed if successful
194 * @param info Output container of IDNA processing details.
195 * @param errorCode Standard ICU error code. Its input value must
196 * pass the U_SUCCESS() test, or else the function returns
197 * immediately. Check for U_FAILURE() on output or use with
198 * function chaining. (See User Guide for details.)
203 labelToASCII_UTF8(StringPiece label, ByteSink &dest,
204 IDNAInfo &info, UErrorCode &errorCode) const;
207 * Converts a single domain name label into its Unicode form for human-readable display.
208 * UTF-8 version of labelToUnicode(), same behavior.
210 * @param label Input domain name label
211 * @param dest Destination byte sink; Flush()ed if successful
212 * @param info Output container of IDNA processing details.
213 * @param errorCode Standard ICU error code. Its input value must
214 * pass the U_SUCCESS() test, or else the function returns
215 * immediately. Check for U_FAILURE() on output or use with
216 * function chaining. (See User Guide for details.)
221 labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
222 IDNAInfo &info, UErrorCode &errorCode) const;
225 * Converts a whole domain name into its ASCII form for DNS lookup.
226 * UTF-8 version of nameToASCII(), same behavior.
228 * @param name Input domain name
229 * @param dest Destination byte sink; Flush()ed if successful
230 * @param info Output container of IDNA processing details.
231 * @param errorCode Standard ICU error code. Its input value must
232 * pass the U_SUCCESS() test, or else the function returns
233 * immediately. Check for U_FAILURE() on output or use with
234 * function chaining. (See User Guide for details.)
239 nameToASCII_UTF8(StringPiece name, ByteSink &dest,
240 IDNAInfo &info, UErrorCode &errorCode) const;
243 * Converts a whole domain name into its Unicode form for human-readable display.
244 * UTF-8 version of nameToUnicode(), same behavior.
246 * @param name Input domain name
247 * @param dest Destination byte sink; Flush()ed if successful
248 * @param info Output container of IDNA processing details.
249 * @param errorCode Standard ICU error code. Its input value must
250 * pass the U_SUCCESS() test, or else the function returns
251 * immediately. Check for U_FAILURE() on output or use with
252 * function chaining. (See User Guide for details.)
257 nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
258 IDNAInfo &info, UErrorCode &errorCode) const;
264 * Output container for IDNA processing errors.
265 * The IDNAInfo class is not suitable for subclassing.
268 class U_COMMON_API IDNAInfo : public UMemory {
271 * Constructor for stack allocation.
274 IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
276 * Were there IDNA processing errors?
277 * @return TRUE if there were processing errors
280 UBool hasErrors() const { return errors!=0; }
282 * Returns a bit set indicating IDNA processing errors.
283 * See UIDNA_ERROR_... constants in uidna.h.
284 * @return bit set of processing errors
287 uint32_t getErrors() const { return errors; }
289 * Returns TRUE if transitional and nontransitional processing produce different results.
290 * This is the case when the input label or domain name contains
291 * one or more deviation characters outside a Punycode label (see UTS #46).
293 * <li>With nontransitional processing, such characters are
294 * copied to the destination string.
295 * <li>With transitional processing, such characters are
296 * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
298 * @return TRUE if transitional and nontransitional processing produce different results
301 UBool isTransitionalDifferent() const { return isTransDiff; }
306 IDNAInfo(const IDNAInfo &other); // no copying
307 IDNAInfo &operator=(const IDNAInfo &other); // no copying
310 errors=labelErrors=0;
316 uint32_t errors, labelErrors;
324 #endif // UCONFIG_NO_IDNA