1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program Test Executor
3 * ------------------------------------------
5 * Copyright 2014 The Android Open Source Project
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
11 * http://www.apache.org/licenses/LICENSE-2.0
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
22 *//*--------------------------------------------------------------------*/
24 #include "xeXMLParser.hpp"
34 TOKENIZER_INITIAL_BUFFER_SIZE = 1024
37 static inline bool isIdentifierStartChar (int ch)
39 return de::inRange<int>(ch, 'a', 'z') || de::inRange<int>(ch, 'A', 'Z');
42 static inline bool isIdentifierChar (int ch)
44 return isIdentifierStartChar(ch) || de::inRange<int>(ch, '0', '9') || (ch == '-') || (ch == '_');
47 static inline bool isWhitespaceChar (int ch)
49 return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
52 static int getNextBufferSize (int curSize, int minNewSize)
54 return de::max(curSize*2, 1<<deLog2Ceil32(minNewSize));
57 Tokenizer::Tokenizer (void)
58 : m_curToken (TOKEN_INCOMPLETE)
60 , m_state (STATE_DATA)
61 , m_buf (TOKENIZER_INITIAL_BUFFER_SIZE)
65 Tokenizer::~Tokenizer (void)
69 void Tokenizer::clear (void)
71 m_curToken = TOKEN_INCOMPLETE;
77 void Tokenizer::error (const std::string& what)
79 throw ParseError(what);
82 void Tokenizer::feed (const deUint8* bytes, int numBytes)
84 // Grow buffer if necessary.
85 if (m_buf.getNumFree() < numBytes)
87 m_buf.resize(getNextBufferSize(m_buf.getSize(), m_buf.getNumElements()+numBytes));
91 m_buf.pushFront(bytes, numBytes);
93 // If we haven't parsed complete token, re-try after data feed.
94 if (m_curToken == TOKEN_INCOMPLETE)
98 int Tokenizer::getChar (int offset) const
100 DE_ASSERT(de::inRange(offset, 0, m_buf.getNumElements()));
102 if (offset < m_buf.getNumElements())
103 return m_buf.peekBack(offset);
105 return END_OF_BUFFER;
108 void Tokenizer::advance (void)
110 if (m_curToken != TOKEN_INCOMPLETE)
112 // Parser should not try to advance beyond end of string.
113 DE_ASSERT(m_curToken != TOKEN_END_OF_STRING);
115 // If current token is tag end, change state to data.
116 if (m_curToken == TOKEN_TAG_END ||
117 m_curToken == TOKEN_EMPTY_ELEMENT_END ||
118 m_curToken == TOKEN_PROCESSING_INSTRUCTION_END ||
119 m_curToken == TOKEN_COMMENT ||
120 m_curToken == TOKEN_ENTITY)
121 m_state = STATE_DATA;
123 // Advance buffer by length of last token.
124 m_buf.popBack(m_curTokenLen);
127 m_curToken = TOKEN_INCOMPLETE;
130 // If we hit end of string here, report it as end of string.
131 if (getChar(0) == END_OF_STRING)
133 m_curToken = TOKEN_END_OF_STRING;
139 int curChar = getChar(m_curTokenLen);
143 if (m_state == STATE_DATA)
145 // Advance until we hit end of buffer or tag start and treat that as data token.
146 if (curChar == END_OF_STRING || curChar == (int)END_OF_BUFFER || curChar == '<' || curChar == '&')
150 else if (curChar == '&')
151 m_state = STATE_ENTITY;
153 if (m_curTokenLen > 0)
155 // Report data token.
156 m_curToken = TOKEN_DATA;
159 else if (curChar == END_OF_STRING || curChar == (int)END_OF_BUFFER)
161 // Just return incomplete token, no data parsed.
166 DE_ASSERT(m_state == STATE_TAG || m_state == STATE_ENTITY);
173 // Eat all whitespace if present.
174 if (m_curTokenLen == 0)
176 while (isWhitespaceChar(curChar))
179 curChar = getChar(0);
183 // Handle end of string / buffer.
184 if (curChar == END_OF_STRING)
185 error("Unexpected end of string");
186 else if (curChar == (int)END_OF_BUFFER)
188 DE_ASSERT(m_curToken == TOKEN_INCOMPLETE);
192 if (m_curTokenLen == 0)
194 // Expect start of identifier, value or special tag token.
195 if (curChar == '\'' || curChar == '"')
196 m_state = STATE_VALUE;
197 else if (isIdentifierStartChar(curChar))
198 m_state = STATE_IDENTIFIER;
199 else if (curChar == '<' || curChar == '?' || curChar == '/')
201 else if (curChar == '&')
202 DE_ASSERT(m_state == STATE_ENTITY);
203 else if (curChar == '=')
205 m_curToken = TOKEN_EQUAL;
209 else if (curChar == '>')
211 m_curToken = TOKEN_TAG_END;
216 error("Unexpected character");
218 else if (m_state == STATE_IDENTIFIER)
220 if (!isIdentifierChar(curChar))
222 m_curToken = TOKEN_IDENTIFIER;
226 else if (m_state == STATE_VALUE)
228 // \todo [2012-06-07 pyry] Escapes.
229 if (curChar == '\'' || curChar == '"')
231 // \todo [2012-10-17 pyry] Should we actually do the check against getChar(0)?
232 if (curChar != getChar(0))
233 error("Mismatched quote");
234 m_curToken = TOKEN_STRING;
239 else if (m_state == STATE_COMMENT)
241 DE_ASSERT(m_curTokenLen >= 2); // 2 characters have been parsed if we are in comment state.
243 if (m_curTokenLen <= 3)
246 error("Invalid comment start");
250 int prev2 = m_curTokenLen > 5 ? getChar(m_curTokenLen-2) : 0;
251 int prev1 = m_curTokenLen > 4 ? getChar(m_curTokenLen-1) : 0;
253 if (prev2 == '-' && prev1 == '-')
256 error("Invalid comment end");
257 m_curToken = TOKEN_COMMENT;
263 else if (m_state == STATE_ENTITY)
265 if (m_curTokenLen >= 1)
269 m_curToken = TOKEN_ENTITY;
273 else if (!de::inRange<int>(curChar, '0', '9') &&
274 !de::inRange<int>(curChar, 'a', 'z') &&
275 !de::inRange<int>(curChar, 'A', 'Z'))
276 error("Invalid entity");
281 // Special tokens are at most 2 characters.
282 DE_ASSERT(m_state == STATE_TAG && m_curTokenLen == 1);
284 int prevChar = getChar(m_curTokenLen-1);
291 m_curToken = TOKEN_END_TAG_START;
295 else if (curChar == '?')
297 m_curToken = TOKEN_PROCESSING_INSTRUCTION_START;
301 else if (curChar == '!')
303 m_state = STATE_COMMENT;
307 m_curToken = TOKEN_TAG_START;
312 else if (prevChar == '?')
315 error("Invalid processing instruction end");
316 m_curToken = TOKEN_PROCESSING_INSTRUCTION_END;
320 else if (prevChar == '/')
323 error("Invalid empty element end");
324 m_curToken = TOKEN_EMPTY_ELEMENT_END;
329 error("Could not parse special token");
334 curChar = getChar(m_curTokenLen);
338 void Tokenizer::getString (std::string& dst) const
340 DE_ASSERT(m_curToken == TOKEN_STRING);
341 dst.resize(m_curTokenLen-2);
342 for (int ndx = 0; ndx < m_curTokenLen-2; ndx++)
343 dst[ndx] = m_buf.peekBack(ndx+1);
346 Parser::Parser (void)
347 : m_element (ELEMENT_INCOMPLETE)
348 , m_state (STATE_DATA)
352 Parser::~Parser (void)
356 void Parser::clear (void)
359 m_elementName.clear();
360 m_attributes.clear();
361 m_attribName.clear();
362 m_entityValue.clear();
364 m_element = ELEMENT_INCOMPLETE;
365 m_state = STATE_DATA;
368 void Parser::error (const std::string& what)
370 throw ParseError(what);
373 void Parser::feed (const deUint8* bytes, int numBytes)
375 m_tokenizer.feed(bytes, numBytes);
377 if (m_element == ELEMENT_INCOMPLETE)
381 void Parser::advance (void)
383 if (m_element == ELEMENT_START)
384 m_attributes.clear();
386 // \note No token is advanced when element end is reported.
387 if (m_state == STATE_YIELD_EMPTY_ELEMENT_END)
389 DE_ASSERT(m_element == ELEMENT_START);
390 m_element = ELEMENT_END;
391 m_state = STATE_DATA;
395 if (m_element != ELEMENT_INCOMPLETE)
397 m_tokenizer.advance();
398 m_element = ELEMENT_INCOMPLETE;
403 Token curToken = m_tokenizer.getToken();
406 while (curToken == TOKEN_COMMENT)
408 m_tokenizer.advance();
409 curToken = m_tokenizer.getToken();
412 if (curToken == TOKEN_INCOMPLETE)
414 DE_ASSERT(m_element == ELEMENT_INCOMPLETE);
421 m_state = STATE_DATA;
422 // Fall-through to STATE_DATA processing.
428 m_element = ELEMENT_DATA;
431 case TOKEN_END_OF_STRING:
432 m_element = ELEMENT_END_OF_STRING;
435 case TOKEN_TAG_START:
436 m_state = STATE_START_TAG_OPEN;
439 case TOKEN_END_TAG_START:
440 m_state = STATE_END_TAG_OPEN;
443 case TOKEN_PROCESSING_INSTRUCTION_START:
444 m_state = STATE_IN_PROCESSING_INSTRUCTION;
448 m_state = STATE_ENTITY;
449 m_element = ELEMENT_DATA;
454 error("Unexpected token");
458 case STATE_IN_PROCESSING_INSTRUCTION:
459 if (curToken == TOKEN_PROCESSING_INSTRUCTION_END)
460 m_state = STATE_DATA;
462 if (curToken != TOKEN_IDENTIFIER && curToken != TOKEN_EQUAL && curToken != TOKEN_STRING)
463 error("Unexpected token in processing instruction");
466 case STATE_START_TAG_OPEN:
467 if (curToken != TOKEN_IDENTIFIER)
468 error("Expected identifier");
469 m_tokenizer.getTokenStr(m_elementName);
470 m_state = STATE_ATTRIBUTE_LIST;
473 case STATE_END_TAG_OPEN:
474 if (curToken != TOKEN_IDENTIFIER)
475 error("Expected identifier");
476 m_tokenizer.getTokenStr(m_elementName);
477 m_state = STATE_EXPECTING_END_TAG_CLOSE;
480 case STATE_EXPECTING_END_TAG_CLOSE:
481 if (curToken != TOKEN_TAG_END)
482 error("Expected tag end");
483 m_state = STATE_DATA;
484 m_element = ELEMENT_END;
487 case STATE_ATTRIBUTE_LIST:
488 if (curToken == TOKEN_IDENTIFIER)
490 m_tokenizer.getTokenStr(m_attribName);
491 m_state = STATE_EXPECTING_ATTRIBUTE_EQ;
493 else if (curToken == TOKEN_EMPTY_ELEMENT_END)
495 m_state = STATE_YIELD_EMPTY_ELEMENT_END;
496 m_element = ELEMENT_START;
499 else if (curToken == TOKEN_TAG_END)
501 m_state = STATE_DATA;
502 m_element = ELEMENT_START;
506 error("Unexpected token");
509 case STATE_EXPECTING_ATTRIBUTE_EQ:
510 if (curToken != TOKEN_EQUAL)
511 error("Expected '='");
512 m_state = STATE_EXPECTING_ATTRIBUTE_VALUE;
515 case STATE_EXPECTING_ATTRIBUTE_VALUE:
516 if (curToken != TOKEN_STRING)
517 error("Expected value");
518 if (hasAttribute(m_attribName.c_str()))
519 error("Duplicate attribute");
521 m_tokenizer.getString(m_attributes[m_attribName]);
522 m_state = STATE_ATTRIBUTE_LIST;
529 m_tokenizer.advance();
533 static char getEntityValue (const std::string& entity)
548 for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_entities); ndx++)
550 if (entity == s_entities[ndx].name)
551 return s_entities[ndx].value;
557 void Parser::parseEntityValue (void)
559 DE_ASSERT(m_state == STATE_ENTITY && m_tokenizer.getToken() == TOKEN_ENTITY);
562 m_tokenizer.getTokenStr(entity);
564 const char value = getEntityValue(entity);
566 error("Invalid entity '" + entity + "'");
568 m_entityValue.resize(1);
569 m_entityValue[0] = value;