1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 *******************************************************************************
5 * Copyright (C) 2010-2011, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * file name: bytestrie.cpp
10 * tab size: 8 (not used)
13 * created on: 2010sep25
14 * created by: Markus W. Scherer
17 #include "unicode/utypes.h"
18 #include "unicode/bytestream.h"
19 #include "unicode/bytestrie.h"
20 #include "unicode/uobject.h"
26 BytesTrie::~BytesTrie() {
27 uprv_free(ownedArray_);
30 // lead byte already shifted right by 1.
32 BytesTrie::readValue(const uint8_t *pos, int32_t leadByte) {
34 if(leadByte<kMinTwoByteValueLead) {
35 value=leadByte-kMinOneByteValueLead;
36 } else if(leadByte<kMinThreeByteValueLead) {
37 value=((leadByte-kMinTwoByteValueLead)<<8)|*pos;
38 } else if(leadByte<kFourByteValueLead) {
39 value=((leadByte-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
40 } else if(leadByte==kFourByteValueLead) {
41 value=(pos[0]<<16)|(pos[1]<<8)|pos[2];
43 value=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
49 BytesTrie::jumpByDelta(const uint8_t *pos) {
51 if(delta<kMinTwoByteDeltaLead) {
53 } else if(delta<kMinThreeByteDeltaLead) {
54 delta=((delta-kMinTwoByteDeltaLead)<<8)|*pos++;
55 } else if(delta<kFourByteDeltaLead) {
56 delta=((delta-kMinThreeByteDeltaLead)<<16)|(pos[0]<<8)|pos[1];
58 } else if(delta==kFourByteDeltaLead) {
59 delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
62 delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
69 BytesTrie::current() const {
70 const uint8_t *pos=pos_;
72 return USTRINGTRIE_NO_MATCH;
75 return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
76 valueResult(node) : USTRINGTRIE_NO_VALUE;
81 BytesTrie::branchNext(const uint8_t *pos, int32_t length, int32_t inByte) {
82 // Branch according to the current byte.
87 // The length of the branch is the number of bytes to select from.
88 // The data structure encodes a binary search.
89 while(length>kMaxBranchLinearSubNodeLength) {
94 length=length-(length>>1);
98 // Drop down to linear search for the last few bytes.
99 // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
100 // and divides length by 2.
103 UStringTrieResult result;
105 U_ASSERT(node>=kMinValueLead);
106 if(node&kValueIsFinal) {
107 // Leave the final value for getValue() to read.
108 result=USTRINGTRIE_FINAL_VALUE;
110 // Use the non-final value as the jump delta.
112 // int32_t delta=readValue(pos, node>>1);
115 if(node<kMinTwoByteValueLead) {
116 delta=node-kMinOneByteValueLead;
117 } else if(node<kMinThreeByteValueLead) {
118 delta=((node-kMinTwoByteValueLead)<<8)|*pos++;
119 } else if(node<kFourByteValueLead) {
120 delta=((node-kMinThreeByteValueLead)<<16)|(pos[0]<<8)|pos[1];
122 } else if(node==kFourByteValueLead) {
123 delta=(pos[0]<<16)|(pos[1]<<8)|pos[2];
126 delta=(pos[0]<<24)|(pos[1]<<16)|(pos[2]<<8)|pos[3];
132 result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
143 return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
146 return USTRINGTRIE_NO_MATCH;
151 BytesTrie::nextImpl(const uint8_t *pos, int32_t inByte) {
154 if(node<kMinLinearMatch) {
155 return branchNext(pos, node, inByte);
156 } else if(node<kMinValueLead) {
157 // Match the first of length+1 bytes.
158 int32_t length=node-kMinLinearMatch; // Actual match length minus 1.
160 remainingMatchLength_=--length;
162 return (length<0 && (node=*pos)>=kMinValueLead) ?
163 valueResult(node) : USTRINGTRIE_NO_VALUE;
168 } else if(node&kValueIsFinal) {
169 // No further matching bytes.
172 // Skip intermediate value.
173 pos=skipValue(pos, node);
174 // The next node must not also be a value node.
175 U_ASSERT(*pos<kMinValueLead);
179 return USTRINGTRIE_NO_MATCH;
183 BytesTrie::next(int32_t inByte) {
184 const uint8_t *pos=pos_;
186 return USTRINGTRIE_NO_MATCH;
191 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
193 // Remaining part of a linear-match node.
195 remainingMatchLength_=--length;
198 return (length<0 && (node=*pos)>=kMinValueLead) ?
199 valueResult(node) : USTRINGTRIE_NO_VALUE;
202 return USTRINGTRIE_NO_MATCH;
205 return nextImpl(pos, inByte);
209 BytesTrie::next(const char *s, int32_t sLength) {
210 if(sLength<0 ? *s==0 : sLength==0) {
214 const uint8_t *pos=pos_;
216 return USTRINGTRIE_NO_MATCH;
218 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
220 // Fetch the next input byte, if there is one.
221 // Continue a linear-match node without rechecking sLength<0.
225 if((inByte=*s++)==0) {
226 remainingMatchLength_=length;
229 return (length<0 && (node=*pos)>=kMinValueLead) ?
230 valueResult(node) : USTRINGTRIE_NO_VALUE;
233 remainingMatchLength_=length;
238 return USTRINGTRIE_NO_MATCH;
246 remainingMatchLength_=length;
249 return (length<0 && (node=*pos)>=kMinValueLead) ?
250 valueResult(node) : USTRINGTRIE_NO_VALUE;
255 remainingMatchLength_=length;
260 return USTRINGTRIE_NO_MATCH;
268 if(node<kMinLinearMatch) {
269 UStringTrieResult result=branchNext(pos, node, inByte);
270 if(result==USTRINGTRIE_NO_MATCH) {
271 return USTRINGTRIE_NO_MATCH;
273 // Fetch the next input byte, if there is one.
275 if((inByte=*s++)==0) {
285 if(result==USTRINGTRIE_FINAL_VALUE) {
286 // No further matching bytes.
288 return USTRINGTRIE_NO_MATCH;
290 pos=pos_; // branchNext() advanced pos and wrote it to pos_ .
291 } else if(node<kMinValueLead) {
292 // Match length+1 bytes.
293 length=node-kMinLinearMatch; // Actual match length minus 1.
296 return USTRINGTRIE_NO_MATCH;
301 } else if(node&kValueIsFinal) {
302 // No further matching bytes.
304 return USTRINGTRIE_NO_MATCH;
306 // Skip intermediate value.
307 pos=skipValue(pos, node);
308 // The next node must not also be a value node.
309 U_ASSERT(*pos<kMinValueLead);
316 BytesTrie::findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
317 UBool haveUniqueValue, int32_t &uniqueValue) {
318 while(length>kMaxBranchLinearSubNodeLength) {
319 ++pos; // ignore the comparison byte
320 if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
323 length=length-(length>>1);
327 ++pos; // ignore a comparison byte
330 UBool isFinal=(UBool)(node&kValueIsFinal);
331 int32_t value=readValue(pos, node>>1);
332 pos=skipValue(pos, node);
334 if(haveUniqueValue) {
335 if(value!=uniqueValue) {
340 haveUniqueValue=TRUE;
343 if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
346 haveUniqueValue=TRUE;
349 return pos+1; // ignore the last comparison byte
353 BytesTrie::findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
356 if(node<kMinLinearMatch) {
360 pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
364 haveUniqueValue=TRUE;
365 } else if(node<kMinValueLead) {
367 pos+=node-kMinLinearMatch+1; // Ignore the match bytes.
369 UBool isFinal=(UBool)(node&kValueIsFinal);
370 int32_t value=readValue(pos, node>>1);
371 if(haveUniqueValue) {
372 if(value!=uniqueValue) {
377 haveUniqueValue=TRUE;
382 pos=skipValue(pos, node);
388 BytesTrie::getNextBytes(ByteSink &out) const {
389 const uint8_t *pos=pos_;
393 if(remainingMatchLength_>=0) {
394 append(out, *pos); // Next byte of a pending linear-match node.
398 if(node>=kMinValueLead) {
399 if(node&kValueIsFinal) {
402 pos=skipValue(pos, node);
404 U_ASSERT(node<kMinValueLead);
407 if(node<kMinLinearMatch) {
411 getNextBranchBytes(pos, ++node, out);
414 // First byte of the linear-match node.
421 BytesTrie::getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out) {
422 while(length>kMaxBranchLinearSubNodeLength) {
423 ++pos; // ignore the comparison byte
424 getNextBranchBytes(jumpByDelta(pos), length>>1, out);
425 length=length-(length>>1);
436 BytesTrie::append(ByteSink &out, int c) {