2 * Copyright (c) 2020-2021 Samsung Electronics Co., Ltd. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in all
12 * copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32 #include "tvgXmlParser.h"
34 /************************************************************************/
35 /* Internal Class Implementation */
36 /************************************************************************/
38 #ifdef THORVG_LOG_ENABLED
42 string simpleXmlNodeTypeToString(SvgNodeType type)
45 case SvgNodeType::Doc: return "Svg";
46 case SvgNodeType::G: return "G";
47 case SvgNodeType::Defs: return "Defs";
48 case SvgNodeType::Animation: return "Animation";
49 case SvgNodeType::Arc: return "Arc";
50 case SvgNodeType::Circle: return "Circle";
51 case SvgNodeType::Ellipse: return "Ellipse";
52 case SvgNodeType::Image: return "Image";
53 case SvgNodeType::Line: return "Line";
54 case SvgNodeType::Path: return "Path";
55 case SvgNodeType::Polygon: return "Polygon";
56 case SvgNodeType::Polyline: return "Polyline";
57 case SvgNodeType::Rect: return "Rect";
58 case SvgNodeType::Text: return "Text";
59 case SvgNodeType::TextArea: return "TextArea";
60 case SvgNodeType::Tspan: return "Tspan";
61 case SvgNodeType::Use: return "Use";
62 case SvgNodeType::Video: return "Video";
63 case SvgNodeType::ClipPath: return "ClipPath";
64 case SvgNodeType::Mask: return "Mask";
65 default: return "Unknown";
70 bool isIgnoreUnsupportedLogElements(const char* tagName)
72 const auto elementsNum = 1;
73 const char* const elements[] = { "title" };
75 for (unsigned int i = 0; i < elementsNum; ++i) {
76 if (!strncmp(tagName, elements[i], strlen(tagName))) {
83 bool _isIgnoreUnsupportedLogAttributes(const char* tagAttribute, const char* tagValue)
85 const auto attributesNum = 6;
89 bool tagWildcard; //If true, it is assumed that a wildcard is used after the tag. (ex: tagName*)
92 {"id", false, nullptr},
93 {"data-name", false, nullptr},
94 {"overflow", false, "visible"},
95 {"version", false, nullptr},
96 {"xmlns", true, nullptr},
97 {"xml:space", false, nullptr},
100 for (unsigned int i = 0; i < attributesNum; ++i) {
101 if (!strncmp(tagAttribute, attributes[i].tag, attributes[i].tagWildcard ? strlen(attributes[i].tag) : strlen(tagAttribute))) {
102 if (attributes[i].value && tagValue) {
103 if (!strncmp(tagValue, attributes[i].value, strlen(tagValue))) {
115 static const char* _simpleXmlFindWhiteSpace(const char* itr, const char* itrEnd)
117 for (; itr < itrEnd; itr++) {
118 if (isspace((unsigned char)*itr)) break;
124 static const char* _simpleXmlSkipWhiteSpace(const char* itr, const char* itrEnd)
126 for (; itr < itrEnd; itr++) {
127 if (!isspace((unsigned char)*itr)) break;
133 static const char* _simpleXmlUnskipWhiteSpace(const char* itr, const char* itrStart)
135 for (itr--; itr > itrStart; itr--) {
136 if (!isspace((unsigned char)*itr)) break;
142 static const char* _simpleXmlSkipXmlEntities(const char* itr, const char* itrEnd)
145 while (*itr == '&' && itr < itrEnd) {
146 for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
147 if (strncmp(itr, xmlEntity[i], xmlEntityLength[i]) == 0) {
148 itr += xmlEntityLength[i];
159 static const char* _simpleXmlUnskipXmlEntities(const char* itr, const char* itrStart)
162 while (*(itr - 1) == ';' && itr > itrStart) {
163 for (int i = 0; i < NUMBER_OF_XML_ENTITIES; ++i) {
164 if (itr - xmlEntityLength[i] > itrStart &&
165 strncmp(itr - xmlEntityLength[i], xmlEntity[i], xmlEntityLength[i]) == 0) {
166 itr -= xmlEntityLength[i];
177 static const char* _skipWhiteSpacesAndXmlEntities(const char* itr, const char* itrEnd)
179 itr = _simpleXmlSkipWhiteSpace(itr, itrEnd);
182 if (p != (itr = _simpleXmlSkipXmlEntities(itr, itrEnd))) p = itr;
184 if (p != (itr = _simpleXmlSkipWhiteSpace(itr, itrEnd))) p = itr;
191 static const char* _unskipWhiteSpacesAndXmlEntities(const char* itr, const char* itrStart)
193 itr = _simpleXmlUnskipWhiteSpace(itr, itrStart);
196 if (p != (itr = _simpleXmlUnskipXmlEntities(itr, itrStart))) p = itr;
198 if (p != (itr = _simpleXmlUnskipWhiteSpace(itr, itrStart))) p = itr;
205 static const char* _simpleXmlFindStartTag(const char* itr, const char* itrEnd)
207 return (const char*)memchr(itr, '<', itrEnd - itr);
211 static const char* _simpleXmlFindEndTag(const char* itr, const char* itrEnd)
213 bool insideQuote = false;
214 for (; itr < itrEnd; itr++) {
215 if (*itr == '"') insideQuote = !insideQuote;
217 if ((*itr == '>') || (*itr == '<'))
225 static const char* _simpleXmlFindEndCommentTag(const char* itr, const char* itrEnd)
227 for (; itr < itrEnd; itr++) {
228 if ((*itr == '-') && ((itr + 1 < itrEnd) && (*(itr + 1) == '-')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2;
234 static const char* _simpleXmlFindEndCdataTag(const char* itr, const char* itrEnd)
236 for (; itr < itrEnd; itr++) {
237 if ((*itr == ']') && ((itr + 1 < itrEnd) && (*(itr + 1) == ']')) && ((itr + 2 < itrEnd) && (*(itr + 2) == '>'))) return itr + 2;
243 static const char* _simpleXmlFindDoctypeChildEndTag(const char* itr, const char* itrEnd)
245 for (; itr < itrEnd; itr++) {
246 if (*itr == '>') return itr;
252 /************************************************************************/
253 /* External Class Implementation */
254 /************************************************************************/
257 bool simpleXmlParseAttributes(const char* buf, unsigned bufLength, simpleXMLAttributeCb func, const void* data)
259 const char *itr = buf, *itrEnd = buf + bufLength;
260 char* tmpBuf = (char*)alloca(bufLength + 1);
262 if (!buf || !func) return false;
264 while (itr < itrEnd) {
265 const char* p = _skipWhiteSpacesAndXmlEntities(itr, itrEnd);
266 const char *key, *keyEnd, *value, *valueEnd;
269 if (p == itrEnd) return true;
272 for (keyEnd = key; keyEnd < itrEnd; keyEnd++) {
273 if ((*keyEnd == '=') || (isspace((unsigned char)*keyEnd))) break;
275 if (keyEnd == itrEnd) return false;
276 if (keyEnd == key) continue;
278 if (*keyEnd == '=') value = keyEnd + 1;
280 value = (const char*)memchr(keyEnd, '=', itrEnd - keyEnd);
281 if (!value) return false;
284 keyEnd = _simpleXmlUnskipXmlEntities(keyEnd, key);
286 value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
287 if (value == itrEnd) return false;
289 if ((*value == '"') || (*value == '\'')) {
290 valueEnd = (const char*)memchr(value + 1, *value, itrEnd - value);
291 if (!valueEnd) return false;
294 valueEnd = _simpleXmlFindWhiteSpace(value, itrEnd);
299 value = _skipWhiteSpacesAndXmlEntities(value, itrEnd);
300 valueEnd = _unskipWhiteSpacesAndXmlEntities(valueEnd, value);
302 memcpy(tmpBuf, key, keyEnd - key);
303 tmpBuf[keyEnd - key] = '\0';
305 tval = tmpBuf + (keyEnd - key) + 1;
307 while (value < valueEnd) {
308 value = _simpleXmlSkipXmlEntities(value, valueEnd);
314 #ifdef THORVG_LOG_ENABLED
315 if (!func((void*)data, tmpBuf, tval)) {
316 if (!_isIgnoreUnsupportedLogAttributes(tmpBuf, tval)) printf("SVG: Unsupported attributes used [Elements type: %s][Id : %s][Attribute: %s][Value: %s]\n", simpleXmlNodeTypeToString(((SvgLoaderData*)data)->svgParse->node->type).c_str(), ((SvgLoaderData*)data)->svgParse->node->id ? ((SvgLoaderData*)data)->svgParse->node->id->c_str() : "NO_ID", tmpBuf, tval ? tval : "NONE");
319 func((void*)data, tmpBuf, tval);
326 bool simpleXmlParse(const char* buf, unsigned bufLength, bool strip, simpleXMLCb func, const void* data)
328 const char *itr = buf, *itrEnd = buf + bufLength;
330 if (!buf || !func) return false;
332 #define CB(type, start, end) \
334 size_t _sz = end - start; \
336 _ret = func((void*)data, type, start, _sz); \
341 while (itr < itrEnd) {
343 if (itr + 1 >= itrEnd) {
344 CB(SimpleXMLType::Error, itr, itrEnd);
352 type = SimpleXMLType::Close;
354 } else if (itr[1] == '?') {
355 type = SimpleXMLType::Processing;
357 } else if (itr[1] == '!') {
358 if ((itr + sizeof("<!DOCTYPE>") - 1 < itrEnd) && (!memcmp(itr + 2, "DOCTYPE", sizeof("DOCTYPE") - 1)) && ((itr[2 + sizeof("DOCTYPE") - 1] == '>') || (isspace((unsigned char)itr[2 + sizeof("DOCTYPE") - 1])))) {
359 type = SimpleXMLType::Doctype;
360 toff = sizeof("!DOCTYPE") - 1;
361 } else if ((itr + sizeof("<!---->") - 1 < itrEnd) && (!memcmp(itr + 2, "--", sizeof("--") - 1))) {
362 type = SimpleXMLType::Comment;
363 toff = sizeof("!--") - 1;
364 } else if ((itr + sizeof("<![CDATA[]]>") - 1 < itrEnd) && (!memcmp(itr + 2, "[CDATA[", sizeof("[CDATA[") - 1))) {
365 type = SimpleXMLType::CData;
366 toff = sizeof("![CDATA[") - 1;
367 } else if (itr + sizeof("<!>") - 1 < itrEnd) {
368 type = SimpleXMLType::DoctypeChild;
369 toff = sizeof("!") - 1;
371 type = SimpleXMLType::Open;
375 type = SimpleXMLType::Open;
379 if (type == SimpleXMLType::CData) p = _simpleXmlFindEndCdataTag(itr + 1 + toff, itrEnd);
380 else if (type == SimpleXMLType::DoctypeChild) p = _simpleXmlFindDoctypeChildEndTag(itr + 1 + toff, itrEnd);
381 else if (type == SimpleXMLType::Comment) p = _simpleXmlFindEndCommentTag(itr + 1 + toff, itrEnd);
382 else p = _simpleXmlFindEndTag(itr + 1 + toff, itrEnd);
384 if ((p) && (*p == '<')) {
385 type = SimpleXMLType::Error;
390 const char *start, *end;
392 start = itr + 1 + toff;
396 case SimpleXMLType::Open: {
398 type = SimpleXMLType::OpenEmpty;
403 case SimpleXMLType::CData: {
404 if (!memcmp(p - 2, "]]", 2)) end -= 2;
407 case SimpleXMLType::Processing: {
408 if (p[-1] == '?') end--;
411 case SimpleXMLType::Comment: {
412 if (!memcmp(p - 2, "--", 2)) end -= 2;
420 if ((strip) && (type != SimpleXMLType::Error) && (type != SimpleXMLType::CData)) {
421 start = _skipWhiteSpacesAndXmlEntities(start, end);
422 end = _unskipWhiteSpacesAndXmlEntities(end, start);
425 CB(type, start, end);
427 if (type != SimpleXMLType::Error) itr = p + 1;
430 CB(SimpleXMLType::Error, itr, itrEnd);
439 p = _skipWhiteSpacesAndXmlEntities(p, itrEnd);
441 CB(SimpleXMLType::Ignored, itr, p);
446 p = _simpleXmlFindStartTag(itr, itrEnd);
450 if (strip) end = _unskipWhiteSpacesAndXmlEntities(end, itr);
452 if (itr != end) CB(SimpleXMLType::Data, itr, end);
454 if ((strip) && (end < p)) CB(SimpleXMLType::Ignored, end, p);
466 bool simpleXmlParseW3CAttribute(const char* buf, simpleXMLAttributeCb func, const void* data)
473 if (!buf) return false;
475 end = buf + strlen(buf);
476 key = (char*)alloca(end - buf + 1);
477 val = (char*)alloca(end - buf + 1);
479 if (buf == end) return true;
482 char* sep = (char*)strchr(buf, ':');
483 next = (char*)strchr(buf, ';');
488 if (next == nullptr && sep != nullptr) {
489 memcpy(key, buf, sep - buf);
490 key[sep - buf] = '\0';
492 memcpy(val, sep + 1, end - sep - 1);
493 val[end - sep - 1] = '\0';
494 } else if (sep < next && sep != nullptr) {
495 memcpy(key, buf, sep - buf);
496 key[sep - buf] = '\0';
498 memcpy(val, sep + 1, next - sep - 1);
499 val[next - sep - 1] = '\0';
501 memcpy(key, buf, next - buf);
502 key[next - buf] = '\0';
506 if (!func((void*)data, key, val)) return false;
510 } while (next != nullptr);
516 const char* simpleXmlFindAttributesTag(const char* buf, unsigned bufLength)
518 const char *itr = buf, *itrEnd = buf + bufLength;
520 for (; itr < itrEnd; itr++) {
521 if (!isspace((unsigned char)*itr)) {
522 //User skip tagname and already gave it the attributes.
523 if (*itr == '=') return buf;
525 itr = _simpleXmlUnskipXmlEntities(itr, buf);
526 if (itr == itrEnd) return nullptr;