1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (c) 2001-2011, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * Date Name Description
9 * 11/19/2001 aliu Creation.
10 **********************************************************************
13 #include "unicode/utypes.h"
15 #if !UCONFIG_NO_TRANSLITERATION
17 #include "unicode/utf16.h"
23 static const UChar UNIPRE[] = {85,43,0}; // "U+"
24 static const UChar BS_u[] = {92,117,0}; // "\\u"
25 static const UChar BS_U[] = {92,85,0}; // "\\U"
26 static const UChar XMLPRE[] = {38,35,120,0}; // "&#x"
27 static const UChar XML10PRE[] = {38,35,0}; // "&#"
28 static const UChar PERLPRE[] = {92,120,123,0}; // "\\x{"
29 static const UChar SEMI[] = {59,0}; // ";"
30 static const UChar RBRACE[] = {125,0}; // "}"
32 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(EscapeTransliterator)
37 static Transliterator* _createEscUnicode(const UnicodeString& ID, Transliterator::Token /*context*/) {
38 // Unicode: "U+10FFFF" hex, min=4, max=6
39 return new EscapeTransliterator(ID, UnicodeString(TRUE, UNIPRE, 2), UnicodeString(), 16, 4, TRUE, NULL);
41 static Transliterator* _createEscJava(const UnicodeString& ID, Transliterator::Token /*context*/) {
42 // Java: "\\uFFFF" hex, min=4, max=4
43 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, FALSE, NULL);
45 static Transliterator* _createEscC(const UnicodeString& ID, Transliterator::Token /*context*/) {
46 // C: "\\uFFFF" hex, min=4, max=4; \\U0010FFFF hex, min=8, max=8
47 return new EscapeTransliterator(ID, UnicodeString(TRUE, BS_u, 2), UnicodeString(), 16, 4, TRUE,
48 new EscapeTransliterator(UnicodeString(), UnicodeString(TRUE, BS_U, 2), UnicodeString(), 16, 8, TRUE, NULL));
50 static Transliterator* _createEscXML(const UnicodeString& ID, Transliterator::Token /*context*/) {
51 // XML: "" hex, min=1, max=6
52 return new EscapeTransliterator(ID, UnicodeString(TRUE, XMLPRE, 3), UnicodeString(SEMI[0]), 16, 1, TRUE, NULL);
54 static Transliterator* _createEscXML10(const UnicodeString& ID, Transliterator::Token /*context*/) {
55 // XML10: "&1114111;" dec, min=1, max=7 (not really "Any-Hex")
56 return new EscapeTransliterator(ID, UnicodeString(TRUE, XML10PRE, 2), UnicodeString(SEMI[0]), 10, 1, TRUE, NULL);
58 static Transliterator* _createEscPerl(const UnicodeString& ID, Transliterator::Token /*context*/) {
59 // Perl: "\\x{263A}" hex, min=1, max=6
60 return new EscapeTransliterator(ID, UnicodeString(TRUE, PERLPRE, 3), UnicodeString(RBRACE[0]), 16, 1, TRUE, NULL);
64 * Registers standard variants with the system. Called by
65 * Transliterator during initialization.
67 void EscapeTransliterator::registerIDs() {
68 Token t = integerToken(0);
70 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Unicode"), _createEscUnicode, t);
72 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Java"), _createEscJava, t);
74 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/C"), _createEscC, t);
76 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML"), _createEscXML, t);
78 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/XML10"), _createEscXML10, t);
80 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex/Perl"), _createEscPerl, t);
82 Transliterator::_registerFactory(UNICODE_STRING_SIMPLE("Any-Hex"), _createEscJava, t);
86 * Constructs an escape transliterator with the given ID and
87 * parameters. See the class member documentation for details.
89 EscapeTransliterator::EscapeTransliterator(const UnicodeString& newID,
90 const UnicodeString& _prefix, const UnicodeString& _suffix,
91 int32_t _radix, int32_t _minDigits,
92 UBool _grokSupplementals,
93 EscapeTransliterator* adoptedSupplementalHandler) :
94 Transliterator(newID, NULL)
96 this->prefix = _prefix;
97 this->suffix = _suffix;
99 this->minDigits = _minDigits;
100 this->grokSupplementals = _grokSupplementals;
101 this->supplementalHandler = adoptedSupplementalHandler;
107 EscapeTransliterator::EscapeTransliterator(const EscapeTransliterator& o) :
112 minDigits(o.minDigits),
113 grokSupplementals(o.grokSupplementals) {
114 supplementalHandler = (o.supplementalHandler != 0) ?
115 new EscapeTransliterator(*o.supplementalHandler) : NULL;
118 EscapeTransliterator::~EscapeTransliterator() {
119 delete supplementalHandler;
123 * Transliterator API.
125 Transliterator* EscapeTransliterator::clone() const {
126 return new EscapeTransliterator(*this);
130 * Implements {@link Transliterator#handleTransliterate}.
132 void EscapeTransliterator::handleTransliterate(Replaceable& text,
134 UBool /*isIncremental*/) const
136 /* TODO: Verify that isIncremental can be ignored */
137 int32_t start = pos.start;
138 int32_t limit = pos.limit;
140 UnicodeString buf(prefix);
141 int32_t prefixLen = prefix.length();
142 UBool redoPrefix = FALSE;
144 while (start < limit) {
145 int32_t c = grokSupplementals ? text.char32At(start) : text.charAt(start);
146 int32_t charLen = grokSupplementals ? U16_LENGTH(c) : 1;
148 if ((c & 0xFFFF0000) != 0 && supplementalHandler != NULL) {
150 buf.append(supplementalHandler->prefix);
151 ICU_Utility::appendNumber(buf, c, supplementalHandler->radix,
152 supplementalHandler->minDigits);
153 buf.append(supplementalHandler->suffix);
161 buf.truncate(prefixLen);
163 ICU_Utility::appendNumber(buf, c, radix, minDigits);
167 text.handleReplaceBetween(start, start + charLen, buf);
168 start += buf.length();
169 limit += buf.length() - charLen;
172 pos.contextLimit += limit - pos.limit;
179 #endif /* #if !UCONFIG_NO_TRANSLITERATION */