2 *******************************************************************************
3 * Copyright (C) 2010-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 *******************************************************************************
6 * file name: bytestrie.cpp
8 * tab size: 8 (not used)
11 * created on: 2010sep25
12 * created by: Markus W. Scherer
15 #include "unicode/utypes.h"
16 #include "unicode/bytestream.h"
17 #include "unicode/bytestrie.h"
18 #include "unicode/uobject.h"
24 BytesTrie::~BytesTrie() {
25 uprv_free(ownedArray_);
28 // lead byte already shifted right by 1.
30 BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
32 if(leadByte<kMinTwoByteValueLead) {
33 value=leadByte-kMinOneByteValueLead;
34 } else if(leadByte<kMinThreeByteValueLead) {
35 value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
36 } else if(leadByte<kFourByteValueLead) {
37 value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
38 } else if(leadByte==kFourByteValueLead) {
39 value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
41 value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
47 BytesTrie::jumpByDelta(const uint8_t *pos) {
49 if(delta<kMinTwoByteDeltaLead) {
51 } else if(delta<kMinThreeByteDeltaLead) {
52 delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
53 } else if(delta<kFourByteDeltaLead) {
54 delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
56 } else if(delta==kFourByteDeltaLead) {
57 delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
60 delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
67 BytesTrie::current() const {
68 const uint8_t *pos=pos_;
70 return USTRINGTRIE_NO_MATCH;
73 return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
74 valueResult(node) : USTRINGTRIE_NO_VALUE;
79 BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
80 // Branch according to the current byte.
85 // The length of the branch is the number of bytes to select from.
86 // The data structure encodes a binary search.
87 while(length>kMaxBranchLinearSubNodeLength) {
92 length=length-(length>>1);
96 // Drop down to linear search for the last few bytes.
97 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
98 // and divides length by 2.
101 UStringTrieResult result;
103 U_ASSERT(node>=kMinValueLead);
104 if(node&kValueIsFinal) {
105 // Leave the final value for getValue() to read.
106 result=USTRINGTRIE_FINAL_VALUE;
108 // Use the non-final value as the jump delta.
110 // int32_t delta=readValue(pos, node>>1);
113 if(node<kMinTwoByteValueLead) {
114 delta=node-kMinOneByteValueLead;
115 } else if(node<kMinThreeByteValueLead) {
116 delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
117 } else if(node<kFourByteValueLead) {
118 delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
120 } else if(node==kFourByteValueLead) {
121 delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
124 delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
130 result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
141 return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
144 return USTRINGTRIE_NO_MATCH;
149 BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
152 if(node<kMinLinearMatch) {
153 return branchNext(pos, node, inByte);
154 } else if(node<kMinValueLead) {
155 // Match the first of length+1 bytes.
156 int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
158 remainingMatchLength_=--length;
160 return (length<0 && (node=*pos)>=kMinValueLead) ?
161 valueResult(node) : USTRINGTRIE_NO_VALUE;
166 } else if(node&kValueIsFinal) {
167 // No further matching bytes.
170 // Skip intermediate value.
171 pos=skipValue(pos, node);
172 // The next node must not also be a value node.
173 U_ASSERT(*pos<kMinValueLead);
177 return USTRINGTRIE_NO_MATCH;
181 BytesTrie::next(int32_t inByte) {
182 const uint8_t *pos=pos_;
184 return USTRINGTRIE_NO_MATCH;
189 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
191 // Remaining part of a linear-match node.
193 remainingMatchLength_=--length;
196 return (length<0 && (node=*pos)>=kMinValueLead) ?
197 valueResult(node) : USTRINGTRIE_NO_VALUE;
200 return USTRINGTRIE_NO_MATCH;
203 return nextImpl(pos, inByte);
207 BytesTrie::next(const char *s, int32_t sLength) {
208 if(sLength<0 ? *s==0 : sLength==0) {
212 const uint8_t *pos=pos_;
214 return USTRINGTRIE_NO_MATCH;
216 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
218 // Fetch the next input byte, if there is one.
219 // Continue a linear-match node without rechecking sLength<0.
223 if((inByte=*s++)==0) {
224 remainingMatchLength_=length;
227 return (length<0 && (node=*pos)>=kMinValueLead) ?
228 valueResult(node) : USTRINGTRIE_NO_VALUE;
231 remainingMatchLength_=length;
236 return USTRINGTRIE_NO_MATCH;
244 remainingMatchLength_=length;
247 return (length<0 && (node=*pos)>=kMinValueLead) ?
248 valueResult(node) : USTRINGTRIE_NO_VALUE;
253 remainingMatchLength_=length;
258 return USTRINGTRIE_NO_MATCH;
266 if(node<kMinLinearMatch) {
267 UStringTrieResult result=branchNext(pos, node, inByte);
268 if(result==USTRINGTRIE_NO_MATCH) {
269 return USTRINGTRIE_NO_MATCH;
271 // Fetch the next input byte, if there is one.
273 if((inByte=*s++)==0) {
283 if(result==USTRINGTRIE_FINAL_VALUE) {
284 // No further matching bytes.
286 return USTRINGTRIE_NO_MATCH;
288 pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
289 } else if(node<kMinValueLead) {
290 // Match length+1 bytes.
291 length=node-kMinLinearMatch; // Actual match length minus 1.
294 return USTRINGTRIE_NO_MATCH;
299 } else if(node&kValueIsFinal) {
300 // No further matching bytes.
302 return USTRINGTRIE_NO_MATCH;
304 // Skip intermediate value.
305 pos=skipValue(pos, node);
306 // The next node must not also be a value node.
307 U_ASSERT(*pos<kMinValueLead);
314 BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
315 UBool haveUniqueValue, int32_t &uniqueValue) {
316 while(length>kMaxBranchLinearSubNodeLength) {
317 ++pos; // ignore the comparison byte
318 if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
321 length=length-(length>>1);
325 ++pos; // ignore a comparison byte
328 UBool isFinal=(UBool)(node&kValueIsFinal);
329 int32_t value=readValue(pos, node>>1);
330 pos=skipValue(pos, node);
332 if(haveUniqueValue) {
333 if(value!=uniqueValue) {
338 haveUniqueValue=TRUE;
341 if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
344 haveUniqueValue=TRUE;
347 return pos+1; // ignore the last comparison byte
351 BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
354 if(node<kMinLinearMatch) {
358 pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
362 haveUniqueValue=TRUE;
363 } else if(node<kMinValueLead) {
365 pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
367 UBool isFinal=(UBool)(node&kValueIsFinal);
368 int32_t value=readValue(pos, node>>1);
369 if(haveUniqueValue) {
370 if(value!=uniqueValue) {
375 haveUniqueValue=TRUE;
380 pos=skipValue(pos, node);
386 BytesTrie::getNextBytes(ByteSink &out) const {
387 const uint8_t *pos=pos_;
391 if(remainingMatchLength_>=0) {
392 append(out, *pos); // Next byte of a pending linear-match node.
396 if(node>=kMinValueLead) {
397 if(node&kValueIsFinal) {
400 pos=skipValue(pos, node);
402 U_ASSERT(node<kMinValueLead);
405 if(node<kMinLinearMatch) {
409 getNextBranchBytes(pos, ++node, out);
412 // First byte of the linear-match node.
419 BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
420 while(length>kMaxBranchLinearSubNodeLength) {
421 ++pos; // ignore the comparison byte
422 getNextBranchBytes(jumpByDelta(pos), length>>1, out);
423 length=length-(length>>1);
434 BytesTrie::append(ByteSink &out, int c) {