2 *******************************************************************************
3 * Copyright (C) 2010-2012, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: bytestrieiterator.cpp
8 * tab size: 8 (not used)
11 * created on: 2010nov03
12 * created by: Markus W. Scherer
15 #include "unicode/utypes.h"
16 #include "unicode/bytestrie.h"
17 #include "unicode/stringpiece.h"
23 BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
24 UErrorCode &errorCode)
25 : bytes_(static_cast<const uint8_t *>(trieBytes)),
26 pos_(bytes_), initialPos_(bytes_),
27 remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
28 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
29 if(U_FAILURE(errorCode)) {
32 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
33 // a public API header for which we would want it to depend only on
34 // other public headers.
35 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
36 // via the CharString and UVector32 implementations, so this additional
38 str_=new CharString();
39 stack_=new UVector32(errorCode);
40 if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
41 errorCode=U_MEMORY_ALLOCATION_ERROR;
45 BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
46 UErrorCode &errorCode)
47 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
48 remainingMatchLength_(trie.remainingMatchLength_),
49 initialRemainingMatchLength_(trie.remainingMatchLength_),
50 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
51 if(U_FAILURE(errorCode)) {
54 str_=new CharString();
55 stack_=new UVector32(errorCode);
56 if(U_FAILURE(errorCode)) {
59 if(str_==NULL || stack_==NULL) {
60 errorCode=U_MEMORY_ALLOCATION_ERROR;
63 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
65 // Pending linear-match node, append remaining bytes to str_.
67 if(maxLength_>0 && length>maxLength_) {
68 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
70 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
72 remainingMatchLength_-=length;
76 BytesTrie::Iterator::~Iterator() {
82 BytesTrie::Iterator::reset() {
84 remainingMatchLength_=initialRemainingMatchLength_;
85 int32_t length=remainingMatchLength_+1; // Remaining match length.
86 if(maxLength_>0 && length>maxLength_) {
89 str_->truncate(length);
91 remainingMatchLength_-=length;
97 BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
100 BytesTrie::Iterator::next(UErrorCode &errorCode) {
101 if(U_FAILURE(errorCode)) {
104 const uint8_t *pos=pos_;
106 if(stack_->isEmpty()) {
109 // Pop the state off the stack and continue with the next outbound edge of
111 int32_t stackSize=stack_->size();
112 int32_t length=stack_->elementAti(stackSize-1);
113 pos=bytes_+stack_->elementAti(stackSize-2);
114 stack_->setSize(stackSize-2);
115 str_->truncate(length&0xffff);
116 length=(int32_t)((uint32_t)length>>16);
118 pos=branchNext(pos, length, errorCode);
120 return TRUE; // Reached a final value.
123 str_->append((char)*pos++, errorCode);
126 if(remainingMatchLength_>=0) {
127 // We only get here if we started in a pending linear-match node
128 // with more than maxLength remaining bytes.
129 return truncateAndStop();
133 if(node>=kMinValueLead) {
134 // Deliver value for the byte sequence so far.
135 UBool isFinal=(UBool)(node&kValueIsFinal);
136 value_=readValue(pos, node>>1);
137 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
140 pos_=skipValue(pos, node);
142 sp_.set(str_->data(), str_->length());
145 if(maxLength_>0 && str_->length()==maxLength_) {
146 return truncateAndStop();
148 if(node<kMinLinearMatch) {
152 pos=branchNext(pos, node+1, errorCode);
154 return TRUE; // Reached a final value.
157 // Linear-match node, append length bytes to str_.
158 int32_t length=node-kMinLinearMatch+1;
159 if(maxLength_>0 && str_->length()+length>maxLength_) {
160 str_->append(reinterpret_cast<const char *>(pos),
161 maxLength_-str_->length(), errorCode);
162 return truncateAndStop();
164 str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
171 BytesTrie::Iterator::truncateAndStop() {
173 sp_.set(str_->data(), str_->length());
174 value_=-1; // no real value for str
178 // Branch node, needs to take the first outbound edge and push state for the rest.
180 BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
181 while(length>kMaxBranchLinearSubNodeLength) {
182 ++pos; // ignore the comparison byte
183 // Push state for the greater-or-equal edge.
184 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
185 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
186 // Follow the less-than edge.
188 pos=jumpByDelta(pos);
190 // List of key-value pairs where values are either final values or jump deltas.
191 // Read the first (key, value) pair.
192 uint8_t trieByte=*pos++;
194 UBool isFinal=(UBool)(node&kValueIsFinal);
195 int32_t value=readValue(pos, node>>1);
196 pos=skipValue(pos, node);
197 stack_->addElement((int32_t)(pos-bytes_), errorCode);
198 stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
199 str_->append((char)trieByte, errorCode);
202 sp_.set(str_->data(), str_->length());