2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008, 2014 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
27 #include "core/xml/parser/XMLDocumentParser.h"
29 #include "bindings/core/v8/ExceptionState.h"
30 #include "bindings/core/v8/ExceptionStatePlaceholder.h"
31 #include "bindings/core/v8/ScriptController.h"
32 #include "bindings/core/v8/ScriptSourceCode.h"
33 #include "core/FetchInitiatorTypeNames.h"
34 #include "core/HTMLNames.h"
35 #include "core/XMLNSNames.h"
36 #include "core/dom/CDATASection.h"
37 #include "core/dom/Comment.h"
38 #include "core/dom/Document.h"
39 #include "core/dom/DocumentFragment.h"
40 #include "core/dom/DocumentType.h"
41 #include "core/dom/ProcessingInstruction.h"
42 #include "core/dom/ScriptLoader.h"
43 #include "core/dom/TransformSource.h"
44 #include "core/fetch/ResourceFetcher.h"
45 #include "core/fetch/ScriptResource.h"
46 #include "core/frame/LocalFrame.h"
47 #include "core/frame/UseCounter.h"
48 #include "core/html/HTMLHtmlElement.h"
49 #include "core/html/HTMLTemplateElement.h"
50 #include "core/html/parser/HTMLEntityParser.h"
51 #include "core/html/parser/TextResourceDecoder.h"
52 #include "core/loader/FrameLoader.h"
53 #include "core/loader/ImageLoader.h"
54 #include "core/svg/graphics/SVGImage.h"
55 #include "core/xml/XMLTreeViewer.h"
56 #include "core/xml/parser/SharedBufferReader.h"
57 #include "core/xml/parser/XMLDocumentParserScope.h"
58 #include "core/xml/parser/XMLParserInput.h"
59 #include "platform/RuntimeEnabledFeatures.h"
60 #include "platform/SharedBuffer.h"
61 #include "platform/TraceEvent.h"
62 #include "platform/network/ResourceError.h"
63 #include "platform/network/ResourceRequest.h"
64 #include "platform/network/ResourceResponse.h"
65 #include "platform/weborigin/SecurityOrigin.h"
66 #include "wtf/StringExtras.h"
67 #include "wtf/TemporaryChange.h"
68 #include "wtf/Threading.h"
69 #include "wtf/Vector.h"
70 #include "wtf/unicode/UTF8.h"
71 #include <libxml/catalog.h>
72 #include <libxml/parser.h>
73 #include <libxml/parserInternals.h>
74 #include <libxslt/xslt.h>
78 using namespace HTMLNames;
80 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
81 static const unsigned maxXMLTreeDepth = 5000;
83 static inline String toString(const xmlChar* string, size_t length)
85 return String::fromUTF8(reinterpret_cast<const char*>(string), length);
88 static inline String toString(const xmlChar* string)
90 return String::fromUTF8(reinterpret_cast<const char*>(string));
93 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
95 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
98 static inline AtomicString toAtomicString(const xmlChar* string)
100 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
103 static inline bool hasNoStyleInformation(Document* document)
105 if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
108 if (!document->frame() || !document->frame()->page())
111 if (document->frame()->tree().parent())
112 return false; // This document is not in a top frame
114 if (SVGImage::isInSVGImage(document))
120 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
122 PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
123 int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
124 : m_localName(localName)
127 , m_namespaceCount(namespaceCount)
128 , m_attributeCount(attributeCount)
129 , m_defaultedCount(defaultedCount)
131 m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
132 for (int i = 0; i < namespaceCount * 2 ; ++i)
133 m_namespaces[i] = xmlStrdup(namespaces[i]);
134 m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
135 for (int i = 0; i < attributeCount; ++i) {
136 // Each attribute has 5 elements in the array:
137 // name, prefix, uri, value and an end pointer.
138 for (int j = 0; j < 3; ++j)
139 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
140 int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
141 m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
142 m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
146 virtual ~PendingStartElementNSCallback()
148 for (int i = 0; i < m_namespaceCount * 2; ++i)
149 xmlFree(m_namespaces[i]);
150 xmlFree(m_namespaces);
151 for (int i = 0; i < m_attributeCount; ++i)
152 for (int j = 0; j < 4; ++j)
153 xmlFree(m_attributes[i * 5 + j]);
154 xmlFree(m_attributes);
157 virtual void call(XMLDocumentParser* parser) OVERRIDE
159 parser->startElementNs(m_localName, m_prefix, m_uri,
160 m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
161 m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
165 AtomicString m_localName;
166 AtomicString m_prefix;
168 int m_namespaceCount;
169 xmlChar** m_namespaces;
170 int m_attributeCount;
171 int m_defaultedCount;
172 xmlChar** m_attributes;
175 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
177 virtual void call(XMLDocumentParser* parser) OVERRIDE
179 parser->endElementNs();
183 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
185 PendingCharactersCallback(const xmlChar* chars, int length)
186 : m_chars(xmlStrndup(chars, length))
191 virtual ~PendingCharactersCallback()
196 virtual void call(XMLDocumentParser* parser) OVERRIDE
198 parser->characters(m_chars, m_length);
206 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
208 PendingProcessingInstructionCallback(const String& target, const String& data)
214 virtual void call(XMLDocumentParser* parser) OVERRIDE
216 parser->processingInstruction(m_target, m_data);
224 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
226 explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
228 virtual void call(XMLDocumentParser* parser) OVERRIDE
230 parser->cdataBlock(m_text);
237 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
239 explicit PendingCommentCallback(const String& text) : m_text(text) { }
241 virtual void call(XMLDocumentParser* parser) OVERRIDE
243 parser->comment(m_text);
250 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
252 PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
254 , m_externalID(externalID)
255 , m_systemID(systemID)
259 virtual void call(XMLDocumentParser* parser) OVERRIDE
261 parser->internalSubset(m_name, m_externalID, m_systemID);
270 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
272 PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
274 , m_message(xmlStrdup(message))
275 , m_lineNumber(lineNumber)
276 , m_columnNumber(columnNumber)
280 virtual ~PendingErrorCallback()
285 virtual void call(XMLDocumentParser* parser) OVERRIDE
287 parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
291 XMLErrors::ErrorType m_type;
293 OrdinalNumber m_lineNumber;
294 OrdinalNumber m_columnNumber;
297 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
300 ASSERT(m_currentNode);
305 m_currentNodeStack.append(m_currentNode);
307 if (m_currentNodeStack.size() > maxXMLTreeDepth)
308 handleError(XMLErrors::ErrorTypeFatal, "Excessive node nesting.", textPosition());
311 void XMLDocumentParser::popCurrentNode()
315 ASSERT(m_currentNodeStack.size());
317 if (m_currentNode != document())
318 m_currentNode->deref();
320 m_currentNode = m_currentNodeStack.last();
321 m_currentNodeStack.removeLast();
324 void XMLDocumentParser::clearCurrentNodeStack()
327 if (m_currentNode && m_currentNode != document())
328 m_currentNode->deref();
330 m_currentNode = nullptr;
331 m_leafTextNode = nullptr;
333 if (m_currentNodeStack.size()) { // Aborted parsing.
335 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
336 m_currentNodeStack[i]->deref();
337 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
338 m_currentNodeStack[0]->deref();
340 m_currentNodeStack.clear();
344 void XMLDocumentParser::insert(const SegmentedString&)
346 ASSERT_NOT_REACHED();
349 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
351 SegmentedString source(inputSource);
352 if (m_sawXSLTransform || !m_sawFirstElement)
353 m_originalSourceForTransform.append(source);
355 if (isStopped() || m_sawXSLTransform)
358 if (m_parserPaused) {
359 m_pendingSrc.append(source);
363 // JavaScript can detach the parser. Make sure this is not released
364 // before the end of this method.
365 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
367 doWrite(source.toString());
370 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
372 m_xmlErrors.handleError(type, formattedMessage, position);
373 if (type != XMLErrors::ErrorTypeWarning)
375 if (type == XMLErrors::ErrorTypeFatal)
379 void XMLDocumentParser::enterText()
381 ASSERT(m_bufferedText.size() == 0);
382 ASSERT(!m_leafTextNode);
383 m_leafTextNode = Text::create(m_currentNode->document(), "");
384 m_currentNode->parserAppendChild(m_leafTextNode.get());
387 void XMLDocumentParser::exitText()
395 m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
396 m_bufferedText.clear();
397 m_leafTextNode = nullptr;
400 void XMLDocumentParser::detach()
402 clearCurrentNodeStack();
403 ScriptableDocumentParser::detach();
406 void XMLDocumentParser::end()
408 TRACE_EVENT0("blink", "XMLDocumentParser::end");
409 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
410 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
411 ASSERT(!m_parsingFragment);
415 // doEnd() call above can detach the parser and null out its document.
416 // In that case, we just bail out.
420 // doEnd() could process a script tag, thus pausing parsing.
425 insertErrorMessageBlock();
428 document()->styleResolverChanged();
432 prepareToStopParsing();
433 document()->setReadyState(Document::Interactive);
434 clearCurrentNodeStack();
435 document()->finishedParsing();
438 void XMLDocumentParser::finish()
440 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
441 // makes sense to call any methods on DocumentParser once it's been stopped.
442 // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
445 m_finishCalled = true;
450 void XMLDocumentParser::insertErrorMessageBlock()
452 m_xmlErrors.insertErrorMessageBlock();
455 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
457 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
459 ScriptSourceCode sourceCode(m_pendingScript.get());
460 bool errorOccurred = m_pendingScript->errorOccurred();
461 bool wasCanceled = m_pendingScript->wasCanceled();
463 m_pendingScript->removeClient(this);
466 RefPtrWillBeRawPtr<Element> e = m_scriptElement;
467 m_scriptElement = nullptr;
469 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
470 ASSERT(scriptLoader);
472 // JavaScript can detach this parser, make sure it's kept alive even if
474 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
477 scriptLoader->dispatchErrorEvent();
478 } else if (!wasCanceled) {
479 scriptLoader->executeScript(sourceCode);
480 scriptLoader->dispatchLoadEvent();
483 m_scriptElement = nullptr;
485 if (!isDetached() && !m_requestingScript)
489 bool XMLDocumentParser::isWaitingForScripts() const
491 return m_pendingScript;
494 void XMLDocumentParser::pauseParsing()
496 if (!m_parsingFragment)
497 m_parserPaused = true;
500 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
505 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
506 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
507 // For now we have a hack for script/style innerHTML support:
508 if (contextElement && (contextElement->hasLocalName(scriptTag.localName()) || contextElement->hasLocalName(styleTag.localName()))) {
509 fragment->parserAppendChild(fragment->document().createTextNode(chunk));
513 RefPtrWillBeRawPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
514 bool wellFormed = parser->appendFragmentSource(chunk);
516 // Do not call finish(). Current finish() and doEnd() implementations touch
517 // the main Document/loader and can cause crashes in the fragment case.
519 // Allows ~DocumentParser to assert it was detached before destruction.
521 // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
525 static int globalDescriptor = 0;
526 static ThreadIdentifier libxmlLoaderThread = 0;
528 static int matchFunc(const char*)
530 // Only match loads initiated due to uses of libxml2 from within
531 // XMLDocumentParser to avoid interfering with client applications that also
532 // use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
533 return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
536 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
538 if (!scriptingContentIsAllowed(parserContentPolicy))
539 element->stripScriptingAttributes(attributeVector);
540 element->parserSetAttributes(attributeVector);
543 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
545 // Hack around libxml2's lack of encoding overide support by manually
546 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
547 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks and
548 // switch encodings, causing the parse to fail.
550 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
554 const UChar BOM = 0xFEFF;
555 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
556 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
559 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
561 bool is8Bit = chunk.is8Bit();
562 switchEncoding(ctxt, is8Bit);
564 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
566 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
569 static void finishParsing(xmlParserCtxtPtr ctxt)
571 xmlParseChunk(ctxt, 0, 0, 1);
574 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
576 static bool isLibxmlDefaultCatalogFile(const String& urlString)
578 // On non-Windows platforms libxml asks for this URL, the
579 // "XML_XML_DEFAULT_CATALOG", on initialization.
580 if (urlString == "file:///etc/xml/catalog")
583 // On Windows, libxml computes a URL relative to where its DLL resides.
584 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
589 static bool shouldAllowExternalLoad(const KURL& url)
591 String urlString = url.string();
593 // This isn't really necessary now that initializeLibXMLIfNecessary
594 // disables catalog support in libxml, but keeping it for defense in depth.
595 if (isLibxmlDefaultCatalogFile(url))
598 // The most common DTD. There isn't much point in hammering www.w3c.org by
599 // requesting this URL for every XHTML document.
600 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
603 // Similarly, there isn't much point in requesting the SVG DTD.
604 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
607 // The libxml doesn't give us a lot of context for deciding whether to allow
608 // this request. In the worst case, this load could be for an external
609 // entity and the resulting document could simply read the retrieved
610 // content. If we had more context, we could potentially allow the parser to
611 // load a DTD. As things stand, we take the conservative route and allow
612 // same-origin requests only.
613 if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
614 XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
621 static void* openFunc(const char* uri)
623 ASSERT(XMLDocumentParserScope::currentFetcher);
624 ASSERT(currentThread() == libxmlLoaderThread);
626 KURL url(KURL(), uri);
628 if (!shouldAllowExternalLoad(url))
629 return &globalDescriptor;
632 RefPtr<SharedBuffer> data;
635 ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
636 XMLDocumentParserScope scope(0);
637 // FIXME: We should restore the original global error handler as well.
639 if (fetcher->frame()) {
640 FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
641 ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
642 if (resource && !resource->errorOccurred()) {
643 data = resource->resourceBuffer();
644 finalURL = resource->response().url();
649 // We have to check the URL again after the load to catch redirects.
650 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
651 if (!shouldAllowExternalLoad(finalURL))
652 return &globalDescriptor;
654 return new SharedBufferReader(data);
657 static int readFunc(void* context, char* buffer, int len)
659 // Do 0-byte reads in case of a null descriptor
660 if (context == &globalDescriptor)
663 SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
664 return data->readData(buffer, len);
667 static int writeFunc(void*, const char*, int)
669 // Always just do 0-byte writes
673 static int closeFunc(void* context)
675 if (context != &globalDescriptor) {
676 SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
682 static void errorFunc(void*, const char*, ...)
684 // FIXME: It would be nice to display error messages somewhere.
687 static void initializeLibXMLIfNecessary()
689 static bool didInit = false;
693 // We don't want libxml to try and load catalogs.
694 // FIXME: It's not nice to set global settings in libxml, embedders of Blink
695 // could be trying to use libxml themselves.
696 xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
698 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
699 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
700 libxmlLoaderThread = currentThread();
705 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
707 initializeLibXMLIfNecessary();
708 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
709 parser->_private = userData;
710 parser->replaceEntities = true;
711 return adoptRef(new XMLParserContext(parser));
714 // Chunk should be encoded in UTF-8
715 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
717 initializeLibXMLIfNecessary();
719 // appendFragmentSource() checks that the length doesn't overflow an int.
720 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
725 // Copy the sax handler
726 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
728 // Set parser options.
729 // XML_PARSE_NODICT: default dictionary option.
730 // XML_PARSE_NOENT: force entities substitutions.
731 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
733 // Internal initialization
735 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
737 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
738 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
739 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
740 parser->_private = userData;
742 return adoptRef(new XMLParserContext(parser));
745 // --------------------------------
747 bool XMLDocumentParser::supportsXMLVersion(const String& version)
749 return version == "1.0";
752 XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
753 : ScriptableDocumentParser(document)
754 , m_hasView(frameView)
756 , m_currentNode(&document)
757 , m_isCurrentlyParsing8BitChunk(false)
760 , m_sawXSLTransform(false)
761 , m_sawFirstElement(false)
762 , m_isXHTMLDocument(false)
763 , m_parserPaused(false)
764 , m_requestingScript(false)
765 , m_finishCalled(false)
766 , m_xmlErrors(&document)
768 , m_scriptStartPosition(TextPosition::belowRangePosition())
769 , m_parsingFragment(false)
771 // This is XML being used as a document resource.
772 if (frameView && document.isXMLDocument())
773 UseCounter::count(document, UseCounter::XMLDocument);
776 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
777 : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
780 , m_currentNode(fragment)
781 , m_isCurrentlyParsing8BitChunk(false)
784 , m_sawXSLTransform(false)
785 , m_sawFirstElement(false)
786 , m_isXHTMLDocument(false)
787 , m_parserPaused(false)
788 , m_requestingScript(false)
789 , m_finishCalled(false)
790 , m_xmlErrors(&fragment->document())
792 , m_scriptStartPosition(TextPosition::belowRangePosition())
793 , m_parsingFragment(true)
799 // Add namespaces based on the parent node
800 WillBeHeapVector<RawPtrWillBeMember<Element> > elemStack;
801 while (parentElement) {
802 elemStack.append(parentElement);
804 ContainerNode* n = parentElement->parentNode();
805 if (!n || !n->isElementNode())
807 parentElement = toElement(n);
810 if (elemStack.isEmpty())
813 for (; !elemStack.isEmpty(); elemStack.removeLast()) {
814 Element* element = elemStack.last();
815 AttributeCollection attributes = element->attributes();
816 AttributeCollection::iterator end = attributes.end();
817 for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it) {
818 if (it->localName() == xmlnsAtom)
819 m_defaultNamespaceURI = it->value();
820 else if (it->prefix() == xmlnsAtom)
821 m_prefixToNamespaceMap.set(it->localName(), it->value());
825 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
826 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
827 m_defaultNamespaceURI = parentElement->namespaceURI();
830 XMLParserContext::~XMLParserContext()
832 if (m_context->myDoc)
833 xmlFreeDoc(m_context->myDoc);
834 xmlFreeParserCtxt(m_context);
837 XMLDocumentParser::~XMLDocumentParser()
840 // The XMLDocumentParser will always be detached before being destroyed.
841 ASSERT(m_currentNodeStack.isEmpty());
842 ASSERT(!m_currentNode);
845 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
847 m_pendingScript->removeClient(this);
850 void XMLDocumentParser::trace(Visitor* visitor)
852 visitor->trace(m_currentNode);
854 visitor->trace(m_currentNodeStack);
856 visitor->trace(m_leafTextNode);
857 visitor->trace(m_xmlErrors);
858 visitor->trace(m_scriptElement);
859 ScriptableDocumentParser::trace(visitor);
862 void XMLDocumentParser::doWrite(const String& parseString)
864 TRACE_EVENT0("blink", "XMLDocumentParser::doWrite");
865 ASSERT(!isDetached());
867 initializeParserContext();
869 // Protect the libxml context from deletion during a callback
870 RefPtr<XMLParserContext> context = m_context;
872 // libXML throws an error if you try to switch the encoding for an empty
874 if (parseString.length()) {
875 // JavaScript may cause the parser to detach during parseChunk
876 // keep this alive until this function is done.
877 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
879 XMLDocumentParserScope scope(document()->fetcher());
880 TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
881 parseChunk(context->context(), parseString);
883 // JavaScript (which may be run under the parseChunk callstack) may
884 // cause the parser to be stopped or detached.
889 // FIXME: Why is this here? And why is it after we process the passed
891 if (document()->sawDecodingError()) {
892 // If the decoder saw an error, report it as fatal (stops parsing)
893 TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
894 handleError(XMLErrors::ErrorTypeFatal, "Encoding error", position);
898 struct xmlSAX2Namespace {
899 const xmlChar* prefix;
903 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
905 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
906 for (int i = 0; i < nbNamespaces; ++i) {
907 AtomicString namespaceQName = xmlnsAtom;
908 AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
909 if (namespaces[i].prefix)
910 namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
912 QualifiedName parsedName = anyName;
913 if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
916 prefixedAttributes.append(Attribute(parsedName, namespaceURI));
920 struct xmlSAX2Attributes {
921 const xmlChar* localname;
922 const xmlChar* prefix;
924 const xmlChar* value;
928 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
930 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
931 for (int i = 0; i < nbAttributes; ++i) {
932 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
933 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
934 String attrPrefix = toString(attributes[i].prefix);
935 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
936 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
938 QualifiedName parsedName = anyName;
939 if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
942 prefixedAttributes.append(Attribute(parsedName, attrValue));
946 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
947 const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
952 if (m_parserPaused) {
953 m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
954 nbAttributes, nbDefaulted, libxmlAttributes)));
960 AtomicString adjustedURI = uri;
961 if (m_parsingFragment && adjustedURI.isNull()) {
962 if (!prefix.isNull())
963 adjustedURI = m_prefixToNamespaceMap.get(prefix);
965 adjustedURI = m_defaultNamespaceURI;
968 bool isFirstElement = !m_sawFirstElement;
969 m_sawFirstElement = true;
971 QualifiedName qName(prefix, localName, adjustedURI);
972 RefPtrWillBeRawPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
978 Vector<Attribute> prefixedAttributes;
979 TrackExceptionState exceptionState;
980 handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
981 if (exceptionState.hadException()) {
982 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
987 handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
988 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
989 if (exceptionState.hadException()) {
994 newElement->beginParsingChildren();
996 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
998 m_scriptStartPosition = textPosition();
1000 m_currentNode->parserAppendChild(newElement.get());
1002 if (isHTMLTemplateElement(*newElement))
1003 pushCurrentNode(toHTMLTemplateElement(*newElement).content());
1005 pushCurrentNode(newElement.get());
1007 if (isHTMLHtmlElement(*newElement))
1008 toHTMLHtmlElement(*newElement).insertedByParser();
1010 if (!m_parsingFragment && isFirstElement && document()->frame())
1011 document()->frame()->loader().dispatchDocumentElementAvailable();
1014 void XMLDocumentParser::endElementNs()
1019 if (m_parserPaused) {
1020 m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
1024 // JavaScript can detach the parser. Make sure this is not released before
1025 // the end of this method.
1026 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
1030 RefPtrWillBeRawPtr<ContainerNode> n = m_currentNode;
1031 if (m_currentNode->isElementNode())
1032 toElement(n.get())->finishParsingChildren();
1034 if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
1036 n->remove(IGNORE_EXCEPTION);
1040 if (!n->isElementNode() || !m_hasView) {
1045 Element* element = toElement(n);
1047 // The element's parent may have already been removed from document.
1048 // Parsing continues in this case, but scripts aren't executed.
1049 if (!element->inDocument()) {
1054 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
1055 if (!scriptLoader) {
1060 // Don't load external scripts for standalone documents (for now).
1061 ASSERT(!m_pendingScript);
1062 m_requestingScript = true;
1064 if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
1065 // FIXME: Script execution should be shared between
1066 // the libxml2 and Qt XMLDocumentParser implementations.
1068 if (scriptLoader->readyToBeParserExecuted()) {
1069 scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
1070 } else if (scriptLoader->willBeParserExecuted()) {
1071 m_pendingScript = scriptLoader->resource();
1072 m_scriptElement = element;
1073 m_pendingScript->addClient(this);
1075 // m_pendingScript will be 0 if script was already loaded and
1076 // addClient() executed it.
1077 if (m_pendingScript)
1080 m_scriptElement = nullptr;
1083 // JavaScript may have detached the parser
1087 m_requestingScript = false;
1091 void XMLDocumentParser::characters(const xmlChar* chars, int length)
1096 if (m_parserPaused) {
1097 m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
1101 if (!m_leafTextNode)
1103 m_bufferedText.append(chars, length);
1106 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
1111 char formattedMessage[1024];
1112 vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
1114 if (m_parserPaused) {
1115 m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
1119 handleError(type, formattedMessage, textPosition());
1122 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
1127 if (m_parserPaused) {
1128 m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target, data)));
1134 // ### handle exceptions
1135 TrackExceptionState exceptionState;
1136 RefPtrWillBeRawPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
1137 if (exceptionState.hadException())
1140 pi->setCreatedByParser(true);
1142 m_currentNode->parserAppendChild(pi.get());
1144 pi->setCreatedByParser(false);
1149 if (!RuntimeEnabledFeatures::xsltEnabled())
1152 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
1153 if (m_sawXSLTransform && !document()->transformSourceDocument()) {
1154 // This behavior is very tricky. We call stopParsing() here because we
1155 // want to stop processing the document until we're ready to apply the
1156 // transform, but we actually still want to be fed decoded string pieces
1157 // to accumulate in m_originalSourceForTransform. So, we call
1158 // stopParsing() here and check isStopped() in element callbacks.
1159 // FIXME: This contradicts the contract of DocumentParser.
1164 void XMLDocumentParser::cdataBlock(const String& text)
1169 if (m_parserPaused) {
1170 m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
1176 m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), text));
1179 void XMLDocumentParser::comment(const String& text)
1184 if (m_parserPaused) {
1185 m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
1191 m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), text));
1194 enum StandaloneInfo {
1195 StandaloneUnspecified = -2,
1201 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
1203 StandaloneInfo standaloneInfo = static_cast<StandaloneInfo>(standalone);
1204 if (standaloneInfo == NoXMlDeclaration) {
1205 document()->setHasXMLDeclaration(false);
1209 if (!version.isNull())
1210 document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
1211 if (standalone != StandaloneUnspecified)
1212 document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1213 if (!encoding.isNull())
1214 document()->setXMLEncoding(encoding);
1215 document()->setHasXMLDeclaration(true);
1218 void XMLDocumentParser::endDocument()
1223 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
1228 if (m_parserPaused) {
1229 m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
1234 document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
1237 static inline XMLDocumentParser* getParser(void* closure)
1239 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1240 return static_cast<XMLDocumentParser*>(ctxt->_private);
1243 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
1245 getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
1248 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1250 getParser(closure)->endElementNs();
1253 static void charactersHandler(void* closure, const xmlChar* chars, int length)
1255 getParser(closure)->characters(chars, length);
1258 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1260 getParser(closure)->processingInstruction(toString(target), toString(data));
1263 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
1265 getParser(closure)->cdataBlock(toString(text, length));
1268 static void commentHandler(void* closure, const xmlChar* text)
1270 getParser(closure)->comment(toString(text));
1273 WTF_ATTRIBUTE_PRINTF(2, 3)
1274 static void warningHandler(void* closure, const char* message, ...)
1277 va_start(args, message);
1278 getParser(closure)->error(XMLErrors::ErrorTypeWarning, message, args);
1282 WTF_ATTRIBUTE_PRINTF(2, 3)
1283 static void fatalErrorHandler(void* closure, const char* message, ...)
1286 va_start(args, message);
1287 getParser(closure)->error(XMLErrors::ErrorTypeFatal, message, args);
1291 WTF_ATTRIBUTE_PRINTF(2, 3)
1292 static void normalErrorHandler(void* closure, const char* message, ...)
1295 va_start(args, message);
1296 getParser(closure)->error(XMLErrors::ErrorTypeNonFatal, message, args);
1300 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is a hack
1301 // to avoid malloc/free. Using a global variable like this could cause trouble
1302 // if libxml implementation details were to change
1303 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1305 static xmlEntityPtr sharedXHTMLEntity()
1307 static xmlEntity entity;
1309 entity.type = XML_ENTITY_DECL;
1310 entity.orig = sharedXHTMLEntityResult;
1311 entity.content = sharedXHTMLEntityResult;
1312 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1317 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1319 const char* originalTarget = target;
1320 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1321 utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1322 if (conversionResult != WTF::Unicode::conversionOK)
1325 // Even though we must pass the length, libxml expects the entity string to be null terminated.
1326 ASSERT(target > originalTarget + 1);
1328 return target - originalTarget;
1331 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1333 UChar utf16DecodedEntity[4];
1334 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1335 if (!numberOfCodeUnits)
1338 ASSERT(numberOfCodeUnits <= 4);
1339 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1340 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1341 if (!entityLengthInUTF8)
1344 xmlEntityPtr entity = sharedXHTMLEntity();
1345 entity->length = entityLengthInUTF8;
1346 entity->name = name;
1350 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1352 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1353 xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1355 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1359 ent = xmlGetDocEntity(ctxt->myDoc, name);
1360 if (!ent && getParser(closure)->isXHTMLDocument()) {
1361 ent = getXHTMLEntity(name);
1363 ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1369 static void startDocumentHandler(void* closure)
1371 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1372 XMLDocumentParser* parser = getParser(closure);
1373 switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
1374 parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
1375 xmlSAX2StartDocument(closure);
1378 static void endDocumentHandler(void* closure)
1380 getParser(closure)->endDocument();
1381 xmlSAX2EndDocument(closure);
1384 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1386 getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
1387 xmlSAX2InternalSubset(closure, name, externalID, systemID);
1390 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1392 String extId = toString(externalId);
1393 if (extId == "-//W3C//DTD XHTML 1.0 Transitional//EN"
1394 || extId == "-//W3C//DTD XHTML 1.1//EN"
1395 || extId == "-//W3C//DTD XHTML 1.0 Strict//EN"
1396 || extId == "-//W3C//DTD XHTML 1.0 Frameset//EN"
1397 || extId == "-//W3C//DTD XHTML Basic 1.0//EN"
1398 || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
1399 || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
1400 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1401 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN"
1402 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN") {
1403 // Controls if we replace entities or not.
1404 getParser(closure)->setIsXHTMLDocument(true);
1408 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1410 // Nothing to do, but we need this to work around a crasher.
1411 // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1412 // http://bugs.webkit.org/show_bug.cgi?id=5792
1415 void XMLDocumentParser::initializeParserContext(const CString& chunk)
1418 memset(&sax, 0, sizeof(sax));
1420 sax.error = normalErrorHandler;
1421 sax.fatalError = fatalErrorHandler;
1422 sax.characters = charactersHandler;
1423 sax.processingInstruction = processingInstructionHandler;
1424 sax.cdataBlock = cdataBlockHandler;
1425 sax.comment = commentHandler;
1426 sax.warning = warningHandler;
1427 sax.startElementNs = startElementNsHandler;
1428 sax.endElementNs = endElementNsHandler;
1429 sax.getEntity = getEntityHandler;
1430 sax.startDocument = startDocumentHandler;
1431 sax.endDocument = endDocumentHandler;
1432 sax.internalSubset = internalSubsetHandler;
1433 sax.externalSubset = externalSubsetHandler;
1434 sax.ignorableWhitespace = ignorableWhitespaceHandler;
1435 sax.entityDecl = xmlSAX2EntityDecl;
1436 sax.initialized = XML_SAX2_MAGIC;
1437 DocumentParser::startParsing();
1440 m_sawXSLTransform = false;
1441 m_sawFirstElement = false;
1443 XMLDocumentParserScope scope(document()->fetcher());
1444 if (m_parsingFragment) {
1445 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1447 ASSERT(!chunk.data());
1448 m_context = XMLParserContext::createStringParser(&sax, this);
1452 void XMLDocumentParser::doEnd()
1456 // Tell libxml we're done.
1458 XMLDocumentParserScope scope(document()->fetcher());
1459 finishParsing(context());
1462 m_context = nullptr;
1466 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
1467 if (xmlViewerMode) {
1468 XMLTreeViewer xmlTreeViewer(document());
1469 xmlTreeViewer.transformDocumentToTreeView();
1470 } else if (m_sawXSLTransform) {
1471 xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
1472 document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1473 // Make the document think it's done, so it will apply XSL stylesheets.
1474 document()->setParsing(false);
1475 document()->styleResolverChanged();
1477 // styleResolverChanged() call can detach the parser and null out its
1478 // document. In that case, we just bail out.
1482 document()->setParsing(true);
1483 DocumentParser::stopParsing();
1487 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
1489 if (source.isEmpty())
1491 // Parse in a single chunk into an xmlDocPtr
1492 // FIXME: Hook up error handlers so that a failure to parse the main
1493 // document results in good error messages.
1494 XMLDocumentParserScope scope(fetcher, errorFunc, 0);
1495 XMLParserInput input(source);
1496 return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
1499 OrdinalNumber XMLDocumentParser::lineNumber() const
1501 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
1504 OrdinalNumber XMLDocumentParser::columnNumber() const
1506 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
1509 TextPosition XMLDocumentParser::textPosition() const
1511 xmlParserCtxtPtr context = this->context();
1513 return TextPosition::minimumPosition();
1514 return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), OrdinalNumber::fromOneBasedInt(context->input->col));
1517 void XMLDocumentParser::stopParsing()
1519 DocumentParser::stopParsing();
1521 xmlStopParser(context());
1524 void XMLDocumentParser::resumeParsing()
1526 ASSERT(!isDetached());
1527 ASSERT(m_parserPaused);
1529 m_parserPaused = false;
1531 // First, execute any pending callbacks
1532 while (!m_pendingCallbacks.isEmpty()) {
1533 OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
1534 callback->call(this);
1536 // A callback paused the parser
1541 // Then, write any pending data
1542 SegmentedString rest = m_pendingSrc;
1543 m_pendingSrc.clear();
1544 // There is normally only one string left, so toString() shouldn't copy.
1545 // In any case, the XML parser runs on the main thread and it's OK if
1546 // the passed string has more than one reference.
1547 append(rest.toString().impl());
1549 // Finally, if finish() has been called and write() didn't result
1550 // in any further callbacks being queued, call end()
1551 if (m_finishCalled && m_pendingCallbacks.isEmpty())
1555 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1558 ASSERT(m_parsingFragment);
1560 CString chunkAsUtf8 = chunk.utf8();
1562 // libxml2 takes an int for a length, and therefore can't handle XML chunks
1563 // larger than 2 GiB.
1564 if (chunkAsUtf8.length() > INT_MAX)
1567 TRACE_EVENT0("blink", "XMLDocumentParser::appendFragmentSource");
1568 initializeParserContext(chunkAsUtf8);
1569 xmlParseContent(context());
1570 endDocument(); // Close any open text nodes.
1572 // FIXME: If this code is actually needed, it should probably move to
1574 // XMLDocumentParserQt has a similar check (m_stream.error() ==
1575 // QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). Check if all
1576 // the chunk has been processed.
1577 long bytesProcessed = xmlByteConsumed(context());
1578 if (bytesProcessed == -1 || static_cast<unsigned long>(bytesProcessed) != chunkAsUtf8.length()) {
1579 // FIXME: I don't believe we can hit this case without also having seen
1580 // an error or a null byte. If we hit this ASSERT, we've found a test
1581 // case which demonstrates the need for this code.
1582 ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1586 // No error if the chunk is well formed or it is not but we have no error.
1587 return context()->wellFormed || !xmlCtxtGetLastError(context());
1590 // --------------------------------
1592 struct AttributeParseState {
1593 HashMap<String, String> attributes;
1597 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1598 const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
1599 int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
1601 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1604 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1605 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1607 state->gotAttributes = true;
1609 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1610 for (int i = 0; i < nbAttributes; ++i) {
1611 String attrLocalName = toString(attributes[i].localname);
1612 int valueLength = (int) (attributes[i].end - attributes[i].value);
1613 String attrValue = toString(attributes[i].value, valueLength);
1614 String attrPrefix = toString(attributes[i].prefix);
1615 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1617 state->attributes.set(attrQName, attrValue);
1621 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1623 AttributeParseState state;
1624 state.gotAttributes = false;
1627 memset(&sax, 0, sizeof(sax));
1628 sax.startElementNs = attributesStartElementNsHandler;
1629 sax.initialized = XML_SAX2_MAGIC;
1630 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1631 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1632 parseChunk(parser->context(), parseString);
1633 finishParsing(parser->context());
1634 attrsOK = state.gotAttributes;
1635 return state.attributes;
1638 } // namespace blink