1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 **********************************************************************
5 * Copyright (c) 2003-2011, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
9 * Created: September 24 2003
11 **********************************************************************
14 #include "unicode/parsepos.h"
15 #include "unicode/symtable.h"
16 #include "unicode/unistr.h"
17 #include "unicode/utf16.h"
18 #include "patternprops.h"
20 /* \U87654321 or \ud800\udc00 */
21 #define MAX_U_NOTATION_LEN 12
25 RuleCharacterIterator::RuleCharacterIterator(const UnicodeString& theText, const SymbolTable* theSym,
26 ParsePosition& thePos) :
34 UBool RuleCharacterIterator::atEnd() const {
35 return buf == 0 && pos.getIndex() == text.length();
38 UChar32 RuleCharacterIterator::next(int32_t options, UBool& isEscaped, UErrorCode& ec) {
39 if (U_FAILURE(ec)) return DONE;
46 _advance(U16_LENGTH(c));
48 if (c == SymbolTable::SYMBOL_REF && buf == 0 &&
49 (options & PARSE_VARIABLES) != 0 && sym != 0) {
50 UnicodeString name = sym->parseReference(text, pos, text.length());
51 // If name is empty there was an isolated SYMBOL_REF;
52 // return it. Caller must be prepared for this.
53 if (name.length() == 0) {
57 buf = sym->lookup(name);
59 ec = U_UNDEFINED_VARIABLE;
62 // Handle empty variable value
63 if (buf->length() == 0) {
69 if ((options & SKIP_WHITESPACE) != 0 && PatternProps::isWhiteSpace(c)) {
73 if (c == 0x5C /*'\\'*/ && (options & PARSE_ESCAPES) != 0) {
74 UnicodeString tempEscape;
76 c = lookahead(tempEscape, MAX_U_NOTATION_LEN).unescapeAt(offset);
80 ec = U_MALFORMED_UNICODE_ESCAPE;
91 void RuleCharacterIterator::getPos(RuleCharacterIterator::Pos& p) const {
93 p.pos = pos.getIndex();
97 void RuleCharacterIterator::setPos(const RuleCharacterIterator::Pos& p) {
103 void RuleCharacterIterator::skipIgnored(int32_t options) {
104 if ((options & SKIP_WHITESPACE) != 0) {
106 UChar32 a = _current();
107 if (!PatternProps::isWhiteSpace(a)) break;
108 _advance(U16_LENGTH(a));
113 UnicodeString& RuleCharacterIterator::lookahead(UnicodeString& result, int32_t maxLookAhead) const {
114 if (maxLookAhead < 0) {
115 maxLookAhead = 0x7FFFFFFF;
118 buf->extract(bufPos, maxLookAhead, result);
120 text.extract(pos.getIndex(), maxLookAhead, result);
125 void RuleCharacterIterator::jumpahead(int32_t count) {
130 UnicodeString& RuleCharacterIterator::toString(UnicodeString& result) const {
131 int32_t b = pos.getIndex();
132 text.extract(0, b, result);
133 return result.append((UChar) 0x7C).append(text, b, 0x7FFFFFFF); // Insert '|' at index
137 UChar32 RuleCharacterIterator::_current() const {
139 return buf->char32At(bufPos);
141 int i = pos.getIndex();
142 return (i < text.length()) ? text.char32At(i) : (UChar32)DONE;
146 void RuleCharacterIterator::_advance(int32_t count) {
149 if (bufPos == buf->length()) {
153 pos.setIndex(pos.getIndex() + count);
154 if (pos.getIndex() > text.length()) {
155 pos.setIndex(text.length());