1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
4 ***************************************************************************
5 * Copyright (C) 2002-2016 International Business Machines Corporation *
6 * and others. All rights reserved. *
7 ***************************************************************************
13 // Implementation of class RBBINode, which represents a node in the
14 // tree generated when parsing the Rules Based Break Iterator rules.
16 // This "Class" is actually closer to a struct.
17 // Code using it is expected to directly access fields much of the time.
20 #include "unicode/utypes.h"
22 #if !UCONFIG_NO_BREAK_ITERATION
24 #include "unicode/unistr.h"
25 #include "unicode/uniset.h"
26 #include "unicode/uchar.h"
27 #include "unicode/parsepos.h"
41 static int gLastSerial = 0;
45 //-------------------------------------------------------------------------
47 // Constructor. Just set the fields to reasonable default values.
49 //-------------------------------------------------------------------------
50 RBBINode::RBBINode(NodeType t) : UMemory() {
52 fSerialNum = ++gLastSerial;
62 fLookAheadEnd = FALSE;
66 fPrecedence = precZero;
68 UErrorCode status = U_ZERO_ERROR;
69 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
70 fLastPosSet = new UVector(status);
71 fFollowPos = new UVector(status);
72 if (t==opCat) {fPrecedence = precOpCat;}
73 else if (t==opOr) {fPrecedence = precOpOr;}
74 else if (t==opStart) {fPrecedence = precStart;}
75 else if (t==opLParen) {fPrecedence = precLParen;}
80 RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
82 fSerialNum = ++gLastSerial;
88 fInputSet = other.fInputSet;
89 fPrecedence = other.fPrecedence;
91 fFirstPos = other.fFirstPos;
92 fLastPos = other.fLastPos;
93 fNullable = other.fNullable;
96 fChainIn = other.fChainIn;
97 UErrorCode status = U_ZERO_ERROR;
98 fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
99 fLastPosSet = new UVector(status);
100 fFollowPos = new UVector(status);
104 //-------------------------------------------------------------------------
106 // Destructor. Deletes both this node AND any child nodes,
107 // except in the case of variable reference nodes. For
108 // these, the l. child points back to the definition, which
109 // is common for all references to the variable, meaning
110 // it can't be deleted here.
112 //-------------------------------------------------------------------------
113 RBBINode::~RBBINode() {
114 // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
118 switch (this->fType) {
121 // for these node types, multiple instances point to the same "children"
122 // Storage ownership of children handled elsewhere. Don't delete here.
140 //-------------------------------------------------------------------------
142 // cloneTree Make a copy of the subtree rooted at this node.
143 // Discard any variable references encountered along the way,
144 // and replace with copies of the variable's definitions.
145 // Used to replicate the expression underneath variable
146 // references in preparation for generating the DFA tables.
148 //-------------------------------------------------------------------------
149 RBBINode *RBBINode::cloneTree() {
152 if (fType == RBBINode::varRef) {
153 // If the current node is a variable reference, skip over it
154 // and clone the definition of the variable instead.
155 n = fLeftChild->cloneTree();
156 } else if (fType == RBBINode::uset) {
159 n = new RBBINode(*this);
160 // Check for null pointer.
162 if (fLeftChild != NULL) {
163 n->fLeftChild = fLeftChild->cloneTree();
164 n->fLeftChild->fParent = n;
166 if (fRightChild != NULL) {
167 n->fRightChild = fRightChild->cloneTree();
168 n->fRightChild->fParent = n;
177 //-------------------------------------------------------------------------
179 // flattenVariables Walk a parse tree, replacing any variable
180 // references with a copy of the variable's definition.
181 // Aside from variables, the tree is not changed.
183 // Return the root of the tree. If the root was not a variable
184 // reference, it remains unchanged - the root we started with
185 // is the root we return. If, however, the root was a variable
186 // reference, the root of the newly cloned replacement tree will
187 // be returned, and the original tree deleted.
189 // This function works by recursively walking the tree
190 // without doing anything until a variable reference is
191 // found, then calling cloneTree() at that point. Any
192 // nested references are handled by cloneTree(), not here.
194 //-------------------------------------------------------------------------
195 RBBINode *RBBINode::flattenVariables() {
196 if (fType == varRef) {
197 RBBINode *retNode = fLeftChild->cloneTree();
198 if (retNode != NULL) {
199 retNode->fRuleRoot = this->fRuleRoot;
200 retNode->fChainIn = this->fChainIn;
202 delete this; // TODO: undefined behavior. Fix.
206 if (fLeftChild != NULL) {
207 fLeftChild = fLeftChild->flattenVariables();
208 fLeftChild->fParent = this;
210 if (fRightChild != NULL) {
211 fRightChild = fRightChild->flattenVariables();
212 fRightChild->fParent = this;
218 //-------------------------------------------------------------------------
220 // flattenSets Walk the parse tree, replacing any nodes of type setRef
221 // with a copy of the expression tree for the set. A set's
222 // equivalent expression tree is precomputed and saved as
223 // the left child of the uset node.
225 //-------------------------------------------------------------------------
226 void RBBINode::flattenSets() {
227 U_ASSERT(fType != setRef);
229 if (fLeftChild != NULL) {
230 if (fLeftChild->fType==setRef) {
231 RBBINode *setRefNode = fLeftChild;
232 RBBINode *usetNode = setRefNode->fLeftChild;
233 RBBINode *replTree = usetNode->fLeftChild;
234 fLeftChild = replTree->cloneTree();
235 fLeftChild->fParent = this;
238 fLeftChild->flattenSets();
242 if (fRightChild != NULL) {
243 if (fRightChild->fType==setRef) {
244 RBBINode *setRefNode = fRightChild;
245 RBBINode *usetNode = setRefNode->fLeftChild;
246 RBBINode *replTree = usetNode->fLeftChild;
247 fRightChild = replTree->cloneTree();
248 fRightChild->fParent = this;
251 fRightChild->flattenSets();
258 //-------------------------------------------------------------------------
260 // findNodes() Locate all the nodes of the specified type, starting
261 // at the specified root.
263 //-------------------------------------------------------------------------
264 void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
265 /* test for buffer overflows */
266 if (U_FAILURE(status)) {
270 dest->addElement(this, status);
272 if (fLeftChild != NULL) {
273 fLeftChild->findNodes(dest, kind, status);
275 if (fRightChild != NULL) {
276 fRightChild->findNodes(dest, kind, status);
281 //-------------------------------------------------------------------------
283 // print. Print out a single node, for debugging.
285 //-------------------------------------------------------------------------
288 static int32_t serial(const RBBINode *node) {
289 return (node == NULL? -1 : node->fSerialNum);
293 void RBBINode::printNode(const RBBINode *node) {
294 static const char * const nodeTypeNames[] = {
314 RBBIDebugPrintf("%10p", (void *)node);
316 RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ",
317 (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
318 node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
319 serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
320 node->fFirstPos, node->fVal);
321 if (node->fType == varRef) {
322 RBBI_DEBUG_printUnicodeString(node->fText);
325 RBBIDebugPrintf("\n");
331 U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
332 RBBIDebugPrintf("%*s", minWidth, CStr(s)());
337 //-------------------------------------------------------------------------
339 // print. Print out the tree of nodes rooted at "this"
341 //-------------------------------------------------------------------------
343 void RBBINode::printNodeHeader() {
344 RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n");
347 void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
353 // Only dump the definition under a variable reference if asked to.
354 // Unconditinally dump children of all other node types.
355 if (node->fType != varRef) {
356 if (node->fLeftChild != NULL) {
357 printTree(node->fLeftChild, FALSE);
360 if (node->fRightChild != NULL) {
361 printTree(node->fRightChild, FALSE);
372 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */