2 *******************************************************************************
4 * Copyright (C) 2009-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: filterednormalizer2.cpp
10 * tab size: 8 (not used)
13 * created on: 2009dec10
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
19 #if !UCONFIG_NO_NORMALIZATION
21 #include "unicode/normalizer2.h"
22 #include "unicode/uniset.h"
23 #include "unicode/unistr.h"
24 #include "unicode/unorm.h"
29 FilteredNormalizer2::~FilteredNormalizer2() {}
32 FilteredNormalizer2::normalize(const UnicodeString &src,
34 UErrorCode &errorCode) const {
35 uprv_checkCanGetBuffer(src, errorCode);
36 if(U_FAILURE(errorCode)) {
41 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
45 return normalize(src, dest, USET_SPAN_SIMPLE, errorCode);
48 // Internal: No argument checking, and appends to dest.
49 // Pass as input spanCondition the one that is likely to yield a non-zero
50 // span length at the start of src.
51 // For set=[:age=3.2:], since almost all common characters were in Unicode 3.2,
52 // USET_SPAN_SIMPLE should be passed in for the start of src
53 // and USET_SPAN_NOT_CONTAINED should be passed in if we continue after
54 // an in-filter prefix.
56 FilteredNormalizer2::normalize(const UnicodeString &src,
58 USetSpanCondition spanCondition,
59 UErrorCode &errorCode) const {
60 UnicodeString tempDest; // Don't throw away destination buffer between iterations.
61 for(int32_t prevSpanLimit=0; prevSpanLimit<src.length();) {
62 int32_t spanLimit=set.span(src, prevSpanLimit, spanCondition);
63 int32_t spanLength=spanLimit-prevSpanLimit;
64 if(spanCondition==USET_SPAN_NOT_CONTAINED) {
66 dest.append(src, prevSpanLimit, spanLength);
68 spanCondition=USET_SPAN_SIMPLE;
71 // Not norm2.normalizeSecondAndAppend() because we do not want
72 // to modify the non-filter part of dest.
73 dest.append(norm2.normalize(src.tempSubStringBetween(prevSpanLimit, spanLimit),
74 tempDest, errorCode));
75 if(U_FAILURE(errorCode)) {
79 spanCondition=USET_SPAN_NOT_CONTAINED;
81 prevSpanLimit=spanLimit;
87 FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
88 const UnicodeString &second,
89 UErrorCode &errorCode) const {
90 return normalizeSecondAndAppend(first, second, TRUE, errorCode);
94 FilteredNormalizer2::append(UnicodeString &first,
95 const UnicodeString &second,
96 UErrorCode &errorCode) const {
97 return normalizeSecondAndAppend(first, second, FALSE, errorCode);
101 FilteredNormalizer2::normalizeSecondAndAppend(UnicodeString &first,
102 const UnicodeString &second,
104 UErrorCode &errorCode) const {
105 uprv_checkCanGetBuffer(first, errorCode);
106 uprv_checkCanGetBuffer(second, errorCode);
107 if(U_FAILURE(errorCode)) {
110 if(&first==&second) {
111 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
114 if(first.isEmpty()) {
116 return normalize(second, first, errorCode);
121 // merge the in-filter suffix of the first string with the in-filter prefix of the second
122 int32_t prefixLimit=set.span(second, 0, USET_SPAN_SIMPLE);
124 UnicodeString prefix(second.tempSubString(0, prefixLimit));
125 int32_t suffixStart=set.spanBack(first, INT32_MAX, USET_SPAN_SIMPLE);
128 norm2.normalizeSecondAndAppend(first, prefix, errorCode);
130 norm2.append(first, prefix, errorCode);
133 UnicodeString middle(first, suffixStart, INT32_MAX);
135 norm2.normalizeSecondAndAppend(middle, prefix, errorCode);
137 norm2.append(middle, prefix, errorCode);
139 first.replace(suffixStart, INT32_MAX, middle);
142 if(prefixLimit<second.length()) {
143 UnicodeString rest(second.tempSubString(prefixLimit, INT32_MAX));
145 normalize(rest, first, USET_SPAN_NOT_CONTAINED, errorCode);
154 FilteredNormalizer2::getDecomposition(UChar32 c, UnicodeString &decomposition) const {
155 return set.contains(c) && norm2.getDecomposition(c, decomposition);
159 FilteredNormalizer2::getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
160 return set.contains(c) && norm2.getRawDecomposition(c, decomposition);
164 FilteredNormalizer2::composePair(UChar32 a, UChar32 b) const {
165 return (set.contains(a) && set.contains(b)) ? norm2.composePair(a, b) : U_SENTINEL;
169 FilteredNormalizer2::getCombiningClass(UChar32 c) const {
170 return set.contains(c) ? norm2.getCombiningClass(c) : 0;
174 FilteredNormalizer2::isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
175 uprv_checkCanGetBuffer(s, errorCode);
176 if(U_FAILURE(errorCode)) {
179 USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
180 for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
181 int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
182 if(spanCondition==USET_SPAN_NOT_CONTAINED) {
183 spanCondition=USET_SPAN_SIMPLE;
185 if( !norm2.isNormalized(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode) ||
190 spanCondition=USET_SPAN_NOT_CONTAINED;
192 prevSpanLimit=spanLimit;
197 UNormalizationCheckResult
198 FilteredNormalizer2::quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
199 uprv_checkCanGetBuffer(s, errorCode);
200 if(U_FAILURE(errorCode)) {
203 UNormalizationCheckResult result=UNORM_YES;
204 USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
205 for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
206 int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
207 if(spanCondition==USET_SPAN_NOT_CONTAINED) {
208 spanCondition=USET_SPAN_SIMPLE;
210 UNormalizationCheckResult qcResult=
211 norm2.quickCheck(s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
212 if(U_FAILURE(errorCode) || qcResult==UNORM_NO) {
214 } else if(qcResult==UNORM_MAYBE) {
217 spanCondition=USET_SPAN_NOT_CONTAINED;
219 prevSpanLimit=spanLimit;
225 FilteredNormalizer2::spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
226 uprv_checkCanGetBuffer(s, errorCode);
227 if(U_FAILURE(errorCode)) {
230 USetSpanCondition spanCondition=USET_SPAN_SIMPLE;
231 for(int32_t prevSpanLimit=0; prevSpanLimit<s.length();) {
232 int32_t spanLimit=set.span(s, prevSpanLimit, spanCondition);
233 if(spanCondition==USET_SPAN_NOT_CONTAINED) {
234 spanCondition=USET_SPAN_SIMPLE;
238 norm2.spanQuickCheckYes(
239 s.tempSubStringBetween(prevSpanLimit, spanLimit), errorCode);
240 if(U_FAILURE(errorCode) || yesLimit<spanLimit) {
243 spanCondition=USET_SPAN_NOT_CONTAINED;
245 prevSpanLimit=spanLimit;
251 FilteredNormalizer2::hasBoundaryBefore(UChar32 c) const {
252 return !set.contains(c) || norm2.hasBoundaryBefore(c);
256 FilteredNormalizer2::hasBoundaryAfter(UChar32 c) const {
257 return !set.contains(c) || norm2.hasBoundaryAfter(c);
261 FilteredNormalizer2::isInert(UChar32 c) const {
262 return !set.contains(c) || norm2.isInert(c);
267 // C API ------------------------------------------------------------------- ***
271 U_CAPI UNormalizer2 * U_EXPORT2
272 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode) {
273 if(U_FAILURE(*pErrorCode)) {
276 if(filterSet==NULL) {
277 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
280 Normalizer2 *fn2=new FilteredNormalizer2(*(Normalizer2 *)norm2,
281 *UnicodeSet::fromUSet(filterSet));
283 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
285 return (UNormalizer2 *)fn2;
288 #endif // !UCONFIG_NO_NORMALIZATION