1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2003-2014, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
11 * Modification History:
13 * Date Name Description
14 * 08/22/2003 ram Creation.
15 *******************************************************************************
18 // Safer use of UnicodeString.
19 #ifndef UNISTR_FROM_CHAR_EXPLICIT
20 # define UNISTR_FROM_CHAR_EXPLICIT explicit
23 // Less important, but still a good idea.
24 #ifndef UNISTR_FROM_STRING_EXPLICIT
25 # define UNISTR_FROM_STRING_EXPLICIT explicit
28 #include "unicode/regex.h"
29 #include "unicode/unistr.h"
30 #include "unicode/parseerr.h"
37 #if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when RegularExpressions not available */
39 #define MAX_SPLIT_STRINGS 20
41 const char *patternStrings[UPC_LIMIT]={
47 removeText(UChar *source, int32_t srcLen,
48 UnicodeString patString,uint32_t options,
49 UnicodeString replaceText, UErrorCode *status){
51 if(status == NULL || U_FAILURE(*status)){
55 UnicodeString src(source, srcLen);
57 RegexMatcher myMatcher(patString, src, options, *status);
58 if(U_FAILURE(*status)){
64 dest = myMatcher.replaceAll(replaceText,*status);
67 return dest.extract(source, srcLen, *status);
71 trim(UChar *src, int32_t srcLen, UErrorCode *status){
72 srcLen = removeText(src, srcLen, UnicodeString("^[ \\r\\n]+ "), 0, UnicodeString(), status); // remove leading new lines
73 srcLen = removeText(src, srcLen, UnicodeString("^\\s+"), 0, UnicodeString(), status); // remove leading spaces
74 srcLen = removeText(src, srcLen, UnicodeString("\\s+$"), 0, UnicodeString(), status); // remvoe trailing spcaes
79 removeCmtText(UChar* source, int32_t srcLen, UErrorCode* status){
80 srcLen = trim(source, srcLen, status);
81 UnicodeString patString("^\\s*?\\*\\s*?"); // remove pattern like " * " at the begining of the line
82 srcLen = removeText(source, srcLen, patString, UREGEX_MULTILINE, UnicodeString(), status);
83 return removeText(source, srcLen, UnicodeString("[ \\r\\n]+"), 0, UnicodeString(" "), status);// remove new lines;
87 getText(const UChar* source, int32_t srcLen,
88 UChar** dest, int32_t destCapacity,
89 UnicodeString patternString,
92 if(status == NULL || U_FAILURE(*status)){
96 UnicodeString stringArray[MAX_SPLIT_STRINGS];
97 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), 0, *status);
98 UnicodeString src (source,srcLen);
100 if (U_FAILURE(*status)) {
103 pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
105 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
106 if (U_FAILURE(*status)) {
109 for(int32_t i=0; i<MAX_SPLIT_STRINGS; i++){
110 matcher.reset(stringArray[i]);
111 if(matcher.lookingAt(*status)){
112 UnicodeString out = matcher.group(1, *status);
114 return out.extract(*dest, destCapacity,*status);
121 #define AT_SIGN 0x0040
124 getDescription( const UChar* source, int32_t srcLen,
125 UChar** dest, int32_t destCapacity,
127 if(status == NULL || U_FAILURE(*status)){
131 UnicodeString stringArray[MAX_SPLIT_STRINGS];
132 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
133 UnicodeString src(source, srcLen);
135 if (U_FAILURE(*status)) {
138 pattern->split(src, stringArray,MAX_SPLIT_STRINGS , *status);
140 if(stringArray[0].indexOf((UChar)AT_SIGN)==-1){
141 int32_t destLen = stringArray[0].extract(*dest, destCapacity, *status);
142 return trim(*dest, destLen, status);
148 getCount(const UChar* source, int32_t srcLen,
149 UParseCommentsOption option, UErrorCode *status){
151 if(status == NULL || U_FAILURE(*status)){
155 UnicodeString stringArray[MAX_SPLIT_STRINGS];
156 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
157 UnicodeString src (source, srcLen);
160 if (U_FAILURE(*status)) {
163 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
165 UnicodeString patternString(patternStrings[option]);
166 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
167 if (U_FAILURE(*status)) {
171 for(int32_t i=0; i<retLen; i++){
172 matcher.reset(stringArray[i]);
173 if(matcher.lookingAt(*status)){
177 if(option == UPC_TRANSLATE && count > 1){
178 fprintf(stderr, "Multiple @translate tags cannot be supported.\n");
179 exit(U_UNSUPPORTED_ERROR);
185 getAt(const UChar* source, int32_t srcLen,
186 UChar** dest, int32_t destCapacity,
188 UParseCommentsOption option,
191 if(status == NULL || U_FAILURE(*status)){
195 UnicodeString stringArray[MAX_SPLIT_STRINGS];
196 RegexPattern *pattern = RegexPattern::compile(UnicodeString("@"), UREGEX_MULTILINE, *status);
197 UnicodeString src (source, srcLen);
200 if (U_FAILURE(*status)) {
203 int32_t retLen = pattern->split(src, stringArray, MAX_SPLIT_STRINGS, *status);
205 UnicodeString patternString(patternStrings[option]);
206 RegexMatcher matcher(patternString, UREGEX_DOTALL, *status);
207 if (U_FAILURE(*status)) {
211 for(int32_t i=0; i<retLen; i++){
212 matcher.reset(stringArray[i]);
213 if(matcher.lookingAt(*status)){
215 UnicodeString out = matcher.group(1, *status);
216 return out.extract(*dest, destCapacity,*status);
227 getTranslate( const UChar* source, int32_t srcLen,
228 UChar** dest, int32_t destCapacity,
230 UnicodeString notePatternString("^translate\\s*?(.*)");
232 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
233 return trim(*dest, destLen, status);
237 getNote(const UChar* source, int32_t srcLen,
238 UChar** dest, int32_t destCapacity,
241 UnicodeString notePatternString("^note\\s*?(.*)");
242 int32_t destLen = getText(source, srcLen, dest, destCapacity, notePatternString, status);
243 return trim(*dest, destLen, status);
247 #endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */