Add packaging for TIZEN
[platform/upstream/VK-GL-CTS.git] / executor / xeXMLParser.cpp
1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program Test Executor
3  * ------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief XML Parser.
22  *//*--------------------------------------------------------------------*/
23
24 #include "xeXMLParser.hpp"
25 #include "deInt32.h"
26
27 namespace xe
28 {
29 namespace xml
30 {
31
32 enum
33 {
34         TOKENIZER_INITIAL_BUFFER_SIZE   = 1024
35 };
36
37 static inline bool isIdentifierStartChar (int ch)
38 {
39         return de::inRange<int>(ch, 'a', 'z') || de::inRange<int>(ch, 'A', 'Z');
40 }
41
42 static inline bool isIdentifierChar (int ch)
43 {
44         return isIdentifierStartChar(ch) || de::inRange<int>(ch, '0', '9') || (ch == '-') || (ch == '_');
45 }
46
47 static inline bool isWhitespaceChar (int ch)
48 {
49         return ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n';
50 }
51
52 static int getNextBufferSize (int curSize, int minNewSize)
53 {
54         return de::max(curSize*2, 1<<deLog2Ceil32(minNewSize));
55 }
56
57 Tokenizer::Tokenizer (void)
58         : m_curToken    (TOKEN_INCOMPLETE)
59         , m_curTokenLen (0)
60         , m_state               (STATE_DATA)
61         , m_buf                 (TOKENIZER_INITIAL_BUFFER_SIZE)
62 {
63 }
64
65 Tokenizer::~Tokenizer (void)
66 {
67 }
68
69 void Tokenizer::clear (void)
70 {
71         m_curToken              = TOKEN_INCOMPLETE;
72         m_curTokenLen   = 0;
73         m_state                 = STATE_DATA;
74         m_buf.clear();
75 }
76
77 void Tokenizer::error (const std::string& what)
78 {
79         throw ParseError(what);
80 }
81
82 void Tokenizer::feed (const deUint8* bytes, int numBytes)
83 {
84         // Grow buffer if necessary.
85         if (m_buf.getNumFree() < numBytes)
86         {
87                 m_buf.resize(getNextBufferSize(m_buf.getSize(), m_buf.getNumElements()+numBytes));
88         }
89
90         // Append to front.
91         m_buf.pushFront(bytes, numBytes);
92
93         // If we haven't parsed complete token, re-try after data feed.
94         if (m_curToken == TOKEN_INCOMPLETE)
95                 advance();
96 }
97
98 int Tokenizer::getChar (int offset) const
99 {
100         DE_ASSERT(de::inRange(offset, 0, m_buf.getNumElements()));
101
102         if (offset < m_buf.getNumElements())
103                 return m_buf.peekBack(offset);
104         else
105                 return END_OF_BUFFER;
106 }
107
108 void Tokenizer::advance (void)
109 {
110         if (m_curToken != TOKEN_INCOMPLETE)
111         {
112                 // Parser should not try to advance beyond end of string.
113                 DE_ASSERT(m_curToken != TOKEN_END_OF_STRING);
114
115                 // If current token is tag end, change state to data.
116                 if (m_curToken == TOKEN_TAG_END                                         ||
117                         m_curToken == TOKEN_EMPTY_ELEMENT_END                   ||
118                         m_curToken == TOKEN_PROCESSING_INSTRUCTION_END  ||
119                         m_curToken == TOKEN_COMMENT                                             ||
120                         m_curToken == TOKEN_ENTITY)
121                         m_state = STATE_DATA;
122
123                 // Advance buffer by length of last token.
124                 m_buf.popBack(m_curTokenLen);
125
126                 // Reset state.
127                 m_curToken              = TOKEN_INCOMPLETE;
128                 m_curTokenLen   = 0;
129
130                 // If we hit end of string here, report it as end of string.
131                 if (getChar(0) == END_OF_STRING)
132                 {
133                         m_curToken              = TOKEN_END_OF_STRING;
134                         m_curTokenLen   = 1;
135                         return;
136                 }
137         }
138
139         int curChar = getChar(m_curTokenLen);
140
141         for (;;)
142         {
143                 if (m_state == STATE_DATA)
144                 {
145                         // Advance until we hit end of buffer or tag start and treat that as data token.
146                         if (curChar == END_OF_STRING || curChar == (int)END_OF_BUFFER || curChar == '<' || curChar == '&')
147                         {
148                                 if (curChar == '<')
149                                         m_state = STATE_TAG;
150                                 else if (curChar == '&')
151                                         m_state = STATE_ENTITY;
152
153                                 if (m_curTokenLen > 0)
154                                 {
155                                         // Report data token.
156                                         m_curToken = TOKEN_DATA;
157                                         return;
158                                 }
159                                 else if (curChar == END_OF_STRING || curChar == (int)END_OF_BUFFER)
160                                 {
161                                         // Just return incomplete token, no data parsed.
162                                         return;
163                                 }
164                                 else
165                                 {
166                                         DE_ASSERT(m_state == STATE_TAG || m_state == STATE_ENTITY);
167                                         continue;
168                                 }
169                         }
170                 }
171                 else
172                 {
173                         // Eat all whitespace if present.
174                         if (m_curTokenLen == 0)
175                         {
176                                 while (isWhitespaceChar(curChar))
177                                 {
178                                         m_buf.popBack();
179                                         curChar = getChar(0);
180                                 }
181                         }
182
183                         // Handle end of string / buffer.
184                         if (curChar == END_OF_STRING)
185                                 error("Unexpected end of string");
186                         else if (curChar == (int)END_OF_BUFFER)
187                         {
188                                 DE_ASSERT(m_curToken == TOKEN_INCOMPLETE);
189                                 return;
190                         }
191
192                         if (m_curTokenLen == 0)
193                         {
194                                 // Expect start of identifier, value or special tag token.
195                                 if (curChar == '\'' || curChar == '"')
196                                         m_state = STATE_VALUE;
197                                 else if (isIdentifierStartChar(curChar))
198                                         m_state = STATE_IDENTIFIER;
199                                 else if (curChar == '<' || curChar == '?' || curChar == '/')
200                                         m_state = STATE_TAG;
201                                 else if (curChar == '&')
202                                         DE_ASSERT(m_state == STATE_ENTITY);
203                                 else if (curChar == '=')
204                                 {
205                                         m_curToken              = TOKEN_EQUAL;
206                                         m_curTokenLen   = 1;
207                                         return;
208                                 }
209                                 else if (curChar == '>')
210                                 {
211                                         m_curToken              = TOKEN_TAG_END;
212                                         m_curTokenLen   = 1;
213                                         return;
214                                 }
215                                 else
216                                         error("Unexpected character");
217                         }
218                         else if (m_state == STATE_IDENTIFIER)
219                         {
220                                 if (!isIdentifierChar(curChar))
221                                 {
222                                         m_curToken = TOKEN_IDENTIFIER;
223                                         return;
224                                 }
225                         }
226                         else if (m_state == STATE_VALUE)
227                         {
228                                 // \todo [2012-06-07 pyry] Escapes.
229                                 if (curChar == '\'' || curChar == '"')
230                                 {
231                                         // \todo [2012-10-17 pyry] Should we actually do the check against getChar(0)?
232                                         if (curChar != getChar(0))
233                                                 error("Mismatched quote");
234                                         m_curToken               = TOKEN_STRING;
235                                         m_curTokenLen   += 1;
236                                         return;
237                                 }
238                         }
239                         else if (m_state == STATE_COMMENT)
240                         {
241                                 DE_ASSERT(m_curTokenLen >= 2); // 2 characters have been parsed if we are in comment state.
242
243                                 if (m_curTokenLen <= 3)
244                                 {
245                                         if (curChar != '-')
246                                                 error("Invalid comment start");
247                                 }
248                                 else
249                                 {
250                                         int prev2 = m_curTokenLen > 5 ? getChar(m_curTokenLen-2) : 0;
251                                         int prev1 = m_curTokenLen > 4 ? getChar(m_curTokenLen-1) : 0;
252
253                                         if (prev2 == '-' && prev1 == '-')
254                                         {
255                                                 if (curChar != '>')
256                                                         error("Invalid comment end");
257                                                 m_curToken               = TOKEN_COMMENT;
258                                                 m_curTokenLen   += 1;
259                                                 return;
260                                         }
261                                 }
262                         }
263                         else if (m_state == STATE_ENTITY)
264                         {
265                                 if (m_curTokenLen >= 1)
266                                 {
267                                         if (curChar == ';')
268                                         {
269                                                 m_curToken               = TOKEN_ENTITY;
270                                                 m_curTokenLen   += 1;
271                                                 return;
272                                         }
273                                         else if (!de::inRange<int>(curChar, '0', '9')   &&
274                                                          !de::inRange<int>(curChar, 'a', 'z')   &&
275                                                          !de::inRange<int>(curChar, 'A', 'Z'))
276                                                 error("Invalid entity");
277                                 }
278                         }
279                         else
280                         {
281                                 // Special tokens are at most 2 characters.
282                                 DE_ASSERT(m_state == STATE_TAG && m_curTokenLen == 1);
283
284                                 int prevChar = getChar(m_curTokenLen-1);
285
286                                 if (prevChar == '<')
287                                 {
288                                         // Tag start.
289                                         if (curChar == '/')
290                                         {
291                                                 m_curToken              = TOKEN_END_TAG_START;
292                                                 m_curTokenLen   = 2;
293                                                 return;
294                                         }
295                                         else if (curChar == '?')
296                                         {
297                                                 m_curToken              = TOKEN_PROCESSING_INSTRUCTION_START;
298                                                 m_curTokenLen   = 2;
299                                                 return;
300                                         }
301                                         else if (curChar == '!')
302                                         {
303                                                 m_state = STATE_COMMENT;
304                                         }
305                                         else
306                                         {
307                                                 m_curToken              = TOKEN_TAG_START;
308                                                 m_curTokenLen   = 1;
309                                                 return;
310                                         }
311                                 }
312                                 else if (prevChar == '?')
313                                 {
314                                         if (curChar != '>')
315                                                 error("Invalid processing instruction end");
316                                         m_curToken              = TOKEN_PROCESSING_INSTRUCTION_END;
317                                         m_curTokenLen   = 2;
318                                         return;
319                                 }
320                                 else if (prevChar == '/')
321                                 {
322                                         if (curChar != '>')
323                                                 error("Invalid empty element end");
324                                         m_curToken              = TOKEN_EMPTY_ELEMENT_END;
325                                         m_curTokenLen   = 2;
326                                         return;
327                                 }
328                                 else
329                                         error("Could not parse special token");
330                         }
331                 }
332
333                 m_curTokenLen   += 1;
334                 curChar                  = getChar(m_curTokenLen);
335         }
336 }
337
338 void Tokenizer::getString (std::string& dst) const
339 {
340         DE_ASSERT(m_curToken == TOKEN_STRING);
341         dst.resize(m_curTokenLen-2);
342         for (int ndx = 0; ndx < m_curTokenLen-2; ndx++)
343                 dst[ndx] = m_buf.peekBack(ndx+1);
344 }
345
346 Parser::Parser (void)
347         : m_element             (ELEMENT_INCOMPLETE)
348         , m_state               (STATE_DATA)
349 {
350 }
351
352 Parser::~Parser (void)
353 {
354 }
355
356 void Parser::clear (void)
357 {
358         m_tokenizer.clear();
359         m_elementName.clear();
360         m_attributes.clear();
361         m_attribName.clear();
362         m_entityValue.clear();
363
364         m_element       = ELEMENT_INCOMPLETE;
365         m_state         = STATE_DATA;
366 }
367
368 void Parser::error (const std::string& what)
369 {
370         throw ParseError(what);
371 }
372
373 void Parser::feed (const deUint8* bytes, int numBytes)
374 {
375         m_tokenizer.feed(bytes, numBytes);
376
377         if (m_element == ELEMENT_INCOMPLETE)
378                 advance();
379 }
380
381 void Parser::advance (void)
382 {
383         if (m_element == ELEMENT_START)
384                 m_attributes.clear();
385
386         // \note No token is advanced when element end is reported.
387         if (m_state == STATE_YIELD_EMPTY_ELEMENT_END)
388         {
389                 DE_ASSERT(m_element == ELEMENT_START);
390                 m_element       = ELEMENT_END;
391                 m_state         = STATE_DATA;
392                 return;
393         }
394
395         if (m_element != ELEMENT_INCOMPLETE)
396         {
397                 m_tokenizer.advance();
398                 m_element = ELEMENT_INCOMPLETE;
399         }
400
401         for (;;)
402         {
403                 Token curToken = m_tokenizer.getToken();
404
405                 // Skip comments.
406                 while (curToken == TOKEN_COMMENT)
407                 {
408                         m_tokenizer.advance();
409                         curToken = m_tokenizer.getToken();
410                 }
411
412                 if (curToken == TOKEN_INCOMPLETE)
413                 {
414                         DE_ASSERT(m_element == ELEMENT_INCOMPLETE);
415                         return;
416                 }
417
418                 switch (m_state)
419                 {
420                         case STATE_ENTITY:
421                                 m_state = STATE_DATA;
422                                 // Fall-through to STATE_DATA processing.
423
424                         case STATE_DATA:
425                                 switch (curToken)
426                                 {
427                                         case TOKEN_DATA:
428                                                 m_element = ELEMENT_DATA;
429                                                 return;
430
431                                         case TOKEN_END_OF_STRING:
432                                                 m_element = ELEMENT_END_OF_STRING;
433                                                 return;
434
435                                         case TOKEN_TAG_START:
436                                                 m_state = STATE_START_TAG_OPEN;
437                                                 break;
438
439                                         case TOKEN_END_TAG_START:
440                                                 m_state = STATE_END_TAG_OPEN;
441                                                 break;
442
443                                         case TOKEN_PROCESSING_INSTRUCTION_START:
444                                                 m_state = STATE_IN_PROCESSING_INSTRUCTION;
445                                                 break;
446
447                                         case TOKEN_ENTITY:
448                                                 m_state         = STATE_ENTITY;
449                                                 m_element       = ELEMENT_DATA;
450                                                 parseEntityValue();
451                                                 return;
452
453                                         default:
454                                                 error("Unexpected token");
455                                 }
456                                 break;
457
458                         case STATE_IN_PROCESSING_INSTRUCTION:
459                                 if (curToken == TOKEN_PROCESSING_INSTRUCTION_END)
460                                         m_state = STATE_DATA;
461                                 else
462                                         if (curToken != TOKEN_IDENTIFIER && curToken != TOKEN_EQUAL && curToken != TOKEN_STRING)
463                                                 error("Unexpected token in processing instruction");
464                                 break;
465
466                         case STATE_START_TAG_OPEN:
467                                 if (curToken != TOKEN_IDENTIFIER)
468                                         error("Expected identifier");
469                                 m_tokenizer.getTokenStr(m_elementName);
470                                 m_state = STATE_ATTRIBUTE_LIST;
471                                 break;
472
473                         case STATE_END_TAG_OPEN:
474                                 if (curToken != TOKEN_IDENTIFIER)
475                                         error("Expected identifier");
476                                 m_tokenizer.getTokenStr(m_elementName);
477                                 m_state = STATE_EXPECTING_END_TAG_CLOSE;
478                                 break;
479
480                         case STATE_EXPECTING_END_TAG_CLOSE:
481                                 if (curToken != TOKEN_TAG_END)
482                                         error("Expected tag end");
483                                 m_state         = STATE_DATA;
484                                 m_element       = ELEMENT_END;
485                                 return;
486
487                         case STATE_ATTRIBUTE_LIST:
488                                 if (curToken == TOKEN_IDENTIFIER)
489                                 {
490                                         m_tokenizer.getTokenStr(m_attribName);
491                                         m_state = STATE_EXPECTING_ATTRIBUTE_EQ;
492                                 }
493                                 else if (curToken == TOKEN_EMPTY_ELEMENT_END)
494                                 {
495                                         m_state         = STATE_YIELD_EMPTY_ELEMENT_END;
496                                         m_element       = ELEMENT_START;
497                                         return;
498                                 }
499                                 else if (curToken == TOKEN_TAG_END)
500                                 {
501                                         m_state         = STATE_DATA;
502                                         m_element       = ELEMENT_START;
503                                         return;
504                                 }
505                                 else
506                                         error("Unexpected token");
507                                 break;
508
509                         case STATE_EXPECTING_ATTRIBUTE_EQ:
510                                 if (curToken != TOKEN_EQUAL)
511                                         error("Expected '='");
512                                 m_state = STATE_EXPECTING_ATTRIBUTE_VALUE;
513                                 break;
514
515                         case STATE_EXPECTING_ATTRIBUTE_VALUE:
516                                 if (curToken != TOKEN_STRING)
517                                         error("Expected value");
518                                 if (hasAttribute(m_attribName.c_str()))
519                                         error("Duplicate attribute");
520
521                                 m_tokenizer.getString(m_attributes[m_attribName]);
522                                 m_state = STATE_ATTRIBUTE_LIST;
523                                 break;
524
525                         default:
526                                 DE_ASSERT(false);
527                 }
528
529                 m_tokenizer.advance();
530         }
531 }
532
533 static char getEntityValue (const std::string& entity)
534 {
535         static const struct
536         {
537                 const char*             name;
538                 char                    value;
539         } s_entities[] =
540         {
541                         { "&lt;",                       '<' },
542                         { "&gt;",                       '>' },
543                         { "&amp;",                      '&' },
544                         { "&apos;",                     '\''},
545                         { "&quot;",                     '"' },
546         };
547
548         for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(s_entities); ndx++)
549         {
550                 if (entity == s_entities[ndx].name)
551                         return s_entities[ndx].value;
552         }
553
554         return 0;
555 }
556
557 void Parser::parseEntityValue (void)
558 {
559         DE_ASSERT(m_state == STATE_ENTITY && m_tokenizer.getToken() == TOKEN_ENTITY);
560
561         std::string entity;
562         m_tokenizer.getTokenStr(entity);
563
564         const char value = getEntityValue(entity);
565         if (value == 0)
566                 error("Invalid entity '" + entity + "'");
567
568         m_entityValue.resize(1);
569         m_entityValue[0] = value;
570 }
571
572 } // xml
573 } // xe