1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2010-2012, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bytestrieiterator.cpp
10 * tab size: 8 (not used)
13 * created on: 2010nov03
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/bytestrie.h"
19 #include "unicode/stringpiece.h"
25 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
26 UErrorCode &errorCode)
27 : bytes_(static_cast<const uint8_t *>(trieBytes)),
28 pos_(bytes_), initialPos_(bytes_),
29 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
30 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
31 if(U_FAILURE(errorCode)) {
34 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
35 // a public API header for which we would want it to depend only on
36 // other public headers.
37 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
38 // via the CharString and UVector32 implementations, so this additional
40 str_=new CharString();
41 stack_=new UVector32(errorCode);
42 if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
43 errorCode=U_MEMORY_ALLOCATION_ERROR;
47 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
48 UErrorCode &errorCode)
49 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
50 remainingMatchLength_(trie.remainingMatchLength_),
51 initialRemainingMatchLength_(trie.remainingMatchLength_),
52 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
53 if(U_FAILURE(errorCode)) {
56 str_=new CharString();
57 stack_=new UVector32(errorCode);
58 if(U_FAILURE(errorCode)) {
61 if(str_==NULL || stack_==NULL) {
62 errorCode=U_MEMORY_ALLOCATION_ERROR;
65 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
67 // Pending linear-match node, append remaining bytes to str_.
69 if(maxLength_>0 && length>maxLength_) {
70 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
72 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
74 remainingMatchLength_-=length;
78 BytesTrie::Iterator::~Iterator() {
84 BytesTrie::Iterator::reset() {
86 remainingMatchLength_=initialRemainingMatchLength_;
87 int32_t length=remainingMatchLength_+1; // Remaining match length.
88 if(maxLength_>0 && length>maxLength_) {
91 str_->truncate(length);
93 remainingMatchLength_-=length;
99 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
102 BytesTrie::Iterator::next(UErrorCode &errorCode) {
103 if(U_FAILURE(errorCode)) {
106 const uint8_t *pos=pos_;
108 if(stack_->isEmpty()) {
111 // Pop the state off the stack and continue with the next outbound edge of
113 int32_t stackSize=stack_->size();
114 int32_t length=stack_->elementAti(stackSize-1);
115 pos=bytes_+stack_->elementAti(stackSize-2);
116 stack_->setSize(stackSize-2);
117 str_->truncate(length&0xffff);
118 length=(int32_t)((uint32_t)length>>16);
120 pos=branchNext(pos, length, errorCode);
122 return TRUE; // Reached a final value.
125 str_->append((char)*pos++, errorCode);
128 if(remainingMatchLength_>=0) {
129 // We only get here if we started in a pending linear-match node
130 // with more than maxLength remaining bytes.
131 return truncateAndStop();
135 if(node>=kMinValueLead) {
136 // Deliver value for the byte sequence so far.
137 UBool isFinal=(UBool)(node&kValueIsFinal);
138 value_=readValue(pos, node>>1);
139 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
142 pos_=skipValue(pos, node);
146 if(maxLength_>0 && str_->length()==maxLength_) {
147 return truncateAndStop();
149 if(node<kMinLinearMatch) {
153 pos=branchNext(pos, node+1, errorCode);
155 return TRUE; // Reached a final value.
158 // Linear-match node, append length bytes to str_.
159 int32_t length=node-kMinLinearMatch+1;
160 if(maxLength_>0 && str_->length()+length>maxLength_) {
161 str_->append(reinterpret_cast<const char *>(pos),
162 maxLength_-str_->length(), errorCode);
163 return truncateAndStop();
165 str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
172 BytesTrie::Iterator::getString() const {
173 return str_ == NULL ? StringPiece() : str_->toStringPiece();
177 BytesTrie::Iterator::truncateAndStop() {
179 value_=-1; // no real value for str
183 // Branch node, needs to take the first outbound edge and push state for the rest.
185 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
186 while(length>kMaxBranchLinearSubNodeLength) {
187 ++pos; // ignore the comparison byte
188 // Push state for the greater-or-equal edge.
189 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
190 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
191 // Follow the less-than edge.
193 pos=jumpByDelta(pos);
195 // List of key-value pairs where values are either final values or jump deltas.
196 // Read the first (key, value) pair.
197 uint8_t trieByte=*pos++;
199 UBool isFinal=(UBool)(node&kValueIsFinal);
200 int32_t value=readValue(pos, node>>1);
201 pos=skipValue(pos, node);
202 stack_->addElement((int32_t)(pos-bytes_), errorCode);
203 stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
204 str_->append((char)trieByte, errorCode);