2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008, 2014 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
27 #include "core/xml/parser/XMLDocumentParser.h"
29 #include "bindings/core/v8/ExceptionState.h"
30 #include "bindings/core/v8/ExceptionStatePlaceholder.h"
31 #include "bindings/core/v8/ScriptController.h"
32 #include "bindings/core/v8/ScriptSourceCode.h"
33 #include "bindings/core/v8/V8Document.h"
34 #include "core/FetchInitiatorTypeNames.h"
35 #include "core/HTMLNames.h"
36 #include "core/XMLNSNames.h"
37 #include "core/dom/CDATASection.h"
38 #include "core/dom/Comment.h"
39 #include "core/dom/Document.h"
40 #include "core/dom/DocumentFragment.h"
41 #include "core/dom/DocumentType.h"
42 #include "core/dom/ProcessingInstruction.h"
43 #include "core/dom/ScriptLoader.h"
44 #include "core/dom/TransformSource.h"
45 #include "core/fetch/ResourceFetcher.h"
46 #include "core/fetch/ScriptResource.h"
47 #include "core/frame/LocalFrame.h"
48 #include "core/frame/UseCounter.h"
49 #include "core/html/HTMLHtmlElement.h"
50 #include "core/html/HTMLTemplateElement.h"
51 #include "core/html/parser/HTMLEntityParser.h"
52 #include "core/html/parser/TextResourceDecoder.h"
53 #include "core/loader/FrameLoader.h"
54 #include "core/loader/ImageLoader.h"
55 #include "core/svg/graphics/SVGImage.h"
56 #include "core/xml/parser/SharedBufferReader.h"
57 #include "core/xml/parser/XMLDocumentParserScope.h"
58 #include "core/xml/parser/XMLParserInput.h"
59 #include "platform/RuntimeEnabledFeatures.h"
60 #include "platform/SharedBuffer.h"
61 #include "platform/TraceEvent.h"
62 #include "platform/network/ResourceError.h"
63 #include "platform/network/ResourceRequest.h"
64 #include "platform/network/ResourceResponse.h"
65 #include "platform/weborigin/SecurityOrigin.h"
66 #include "wtf/StringExtras.h"
67 #include "wtf/TemporaryChange.h"
68 #include "wtf/Threading.h"
69 #include "wtf/Vector.h"
70 #include "wtf/unicode/UTF8.h"
71 #include <libxml/catalog.h>
72 #include <libxml/parser.h>
73 #include <libxml/parserInternals.h>
74 #include <libxslt/xslt.h>
78 using namespace HTMLNames;
80 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
81 static const unsigned maxXMLTreeDepth = 5000;
83 static inline String toString(const xmlChar* string, size_t length)
85 return String::fromUTF8(reinterpret_cast<const char*>(string), length);
88 static inline String toString(const xmlChar* string)
90 return String::fromUTF8(reinterpret_cast<const char*>(string));
93 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
95 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
98 static inline AtomicString toAtomicString(const xmlChar* string)
100 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
103 static inline bool hasNoStyleInformation(Document* document)
105 if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
108 if (!document->frame() || !document->frame()->page())
111 if (document->frame()->tree().parent())
112 return false; // This document is not in a top frame
114 if (SVGImage::isInSVGImage(document))
120 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
122 PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
123 int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
124 : m_localName(localName)
127 , m_namespaceCount(namespaceCount)
128 , m_attributeCount(attributeCount)
129 , m_defaultedCount(defaultedCount)
131 m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
132 for (int i = 0; i < namespaceCount * 2 ; ++i)
133 m_namespaces[i] = xmlStrdup(namespaces[i]);
134 m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
135 for (int i = 0; i < attributeCount; ++i) {
136 // Each attribute has 5 elements in the array:
137 // name, prefix, uri, value and an end pointer.
138 for (int j = 0; j < 3; ++j)
139 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
140 int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
141 m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
142 m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
146 virtual ~PendingStartElementNSCallback()
148 for (int i = 0; i < m_namespaceCount * 2; ++i)
149 xmlFree(m_namespaces[i]);
150 xmlFree(m_namespaces);
151 for (int i = 0; i < m_attributeCount; ++i)
152 for (int j = 0; j < 4; ++j)
153 xmlFree(m_attributes[i * 5 + j]);
154 xmlFree(m_attributes);
157 virtual void call(XMLDocumentParser* parser) OVERRIDE
159 parser->startElementNs(m_localName, m_prefix, m_uri,
160 m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
161 m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
165 AtomicString m_localName;
166 AtomicString m_prefix;
168 int m_namespaceCount;
169 xmlChar** m_namespaces;
170 int m_attributeCount;
171 int m_defaultedCount;
172 xmlChar** m_attributes;
175 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
177 virtual void call(XMLDocumentParser* parser) OVERRIDE
179 parser->endElementNs();
183 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
185 PendingCharactersCallback(const xmlChar* chars, int length)
186 : m_chars(xmlStrndup(chars, length))
191 virtual ~PendingCharactersCallback()
196 virtual void call(XMLDocumentParser* parser) OVERRIDE
198 parser->characters(m_chars, m_length);
206 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
208 PendingProcessingInstructionCallback(const String& target, const String& data)
214 virtual void call(XMLDocumentParser* parser) OVERRIDE
216 parser->processingInstruction(m_target, m_data);
224 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
226 explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
228 virtual void call(XMLDocumentParser* parser) OVERRIDE
230 parser->cdataBlock(m_text);
237 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
239 explicit PendingCommentCallback(const String& text) : m_text(text) { }
241 virtual void call(XMLDocumentParser* parser) OVERRIDE
243 parser->comment(m_text);
250 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
252 PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
254 , m_externalID(externalID)
255 , m_systemID(systemID)
259 virtual void call(XMLDocumentParser* parser) OVERRIDE
261 parser->internalSubset(m_name, m_externalID, m_systemID);
270 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
272 PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
274 , m_message(xmlStrdup(message))
275 , m_lineNumber(lineNumber)
276 , m_columnNumber(columnNumber)
280 virtual ~PendingErrorCallback()
285 virtual void call(XMLDocumentParser* parser) OVERRIDE
287 parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
291 XMLErrors::ErrorType m_type;
293 OrdinalNumber m_lineNumber;
294 OrdinalNumber m_columnNumber;
297 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
300 ASSERT(m_currentNode);
305 m_currentNodeStack.append(m_currentNode);
307 if (m_currentNodeStack.size() > maxXMLTreeDepth)
308 handleError(XMLErrors::ErrorTypeFatal, "Excessive node nesting.", textPosition());
311 void XMLDocumentParser::popCurrentNode()
315 ASSERT(m_currentNodeStack.size());
317 if (m_currentNode != document())
318 m_currentNode->deref();
320 m_currentNode = m_currentNodeStack.last();
321 m_currentNodeStack.removeLast();
324 void XMLDocumentParser::clearCurrentNodeStack()
327 if (m_currentNode && m_currentNode != document())
328 m_currentNode->deref();
330 m_currentNode = nullptr;
331 m_leafTextNode = nullptr;
333 if (m_currentNodeStack.size()) { // Aborted parsing.
335 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
336 m_currentNodeStack[i]->deref();
337 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
338 m_currentNodeStack[0]->deref();
340 m_currentNodeStack.clear();
344 void XMLDocumentParser::insert(const SegmentedString&)
346 ASSERT_NOT_REACHED();
349 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
351 SegmentedString source(inputSource);
352 if (m_sawXSLTransform || !m_sawFirstElement)
353 m_originalSourceForTransform.append(source);
355 if (isStopped() || m_sawXSLTransform)
358 if (m_parserPaused) {
359 m_pendingSrc.append(source);
363 // JavaScript can detach the parser. Make sure this is not released
364 // before the end of this method.
365 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
367 doWrite(source.toString());
370 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
372 m_xmlErrors.handleError(type, formattedMessage, position);
373 if (type != XMLErrors::ErrorTypeWarning)
375 if (type == XMLErrors::ErrorTypeFatal)
379 void XMLDocumentParser::enterText()
381 ASSERT(m_bufferedText.size() == 0);
382 ASSERT(!m_leafTextNode);
383 m_leafTextNode = Text::create(m_currentNode->document(), "");
384 m_currentNode->parserAppendChild(m_leafTextNode.get());
387 void XMLDocumentParser::exitText()
395 m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
396 m_bufferedText.clear();
397 m_leafTextNode = nullptr;
400 void XMLDocumentParser::detach()
402 clearCurrentNodeStack();
403 ScriptableDocumentParser::detach();
406 void XMLDocumentParser::end()
408 TRACE_EVENT0("blink", "XMLDocumentParser::end");
409 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
410 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
411 ASSERT(!m_parsingFragment);
415 // doEnd() call above can detach the parser and null out its document.
416 // In that case, we just bail out.
420 // doEnd() could process a script tag, thus pausing parsing.
425 insertErrorMessageBlock();
428 document()->styleResolverChanged();
432 prepareToStopParsing();
433 document()->setReadyState(Document::Interactive);
434 clearCurrentNodeStack();
435 document()->finishedParsing();
438 void XMLDocumentParser::finish()
440 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
441 // makes sense to call any methods on DocumentParser once it's been stopped.
442 // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
444 // flush may ending up executing arbitrary script, and possibly detach the parser.
445 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
451 m_finishCalled = true;
456 void XMLDocumentParser::insertErrorMessageBlock()
458 m_xmlErrors.insertErrorMessageBlock();
461 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
463 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
465 ScriptSourceCode sourceCode(m_pendingScript.get());
466 bool errorOccurred = m_pendingScript->errorOccurred();
467 bool wasCanceled = m_pendingScript->wasCanceled();
469 m_pendingScript->removeClient(this);
472 RefPtrWillBeRawPtr<Element> e = m_scriptElement;
473 m_scriptElement = nullptr;
475 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
476 ASSERT(scriptLoader);
478 // JavaScript can detach this parser, make sure it's kept alive even if
480 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
483 scriptLoader->dispatchErrorEvent();
484 } else if (!wasCanceled) {
485 scriptLoader->executeScript(sourceCode);
486 scriptLoader->dispatchLoadEvent();
489 m_scriptElement = nullptr;
491 if (!isDetached() && !m_requestingScript)
495 bool XMLDocumentParser::isWaitingForScripts() const
497 return m_pendingScript;
500 void XMLDocumentParser::pauseParsing()
502 if (!m_parsingFragment)
503 m_parserPaused = true;
506 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
511 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
512 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
513 // For now we have a hack for script/style innerHTML support:
514 if (contextElement && (contextElement->hasLocalName(scriptTag.localName()) || contextElement->hasLocalName(styleTag.localName()))) {
515 fragment->parserAppendChild(fragment->document().createTextNode(chunk));
519 RefPtrWillBeRawPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
520 bool wellFormed = parser->appendFragmentSource(chunk);
522 // Do not call finish(). Current finish() and doEnd() implementations touch
523 // the main Document/loader and can cause crashes in the fragment case.
525 // Allows ~DocumentParser to assert it was detached before destruction.
527 // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
531 static int globalDescriptor = 0;
532 static ThreadIdentifier libxmlLoaderThread = 0;
534 static int matchFunc(const char*)
536 // Only match loads initiated due to uses of libxml2 from within
537 // XMLDocumentParser to avoid interfering with client applications that also
538 // use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
539 return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
542 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
544 if (!scriptingContentIsAllowed(parserContentPolicy))
545 element->stripScriptingAttributes(attributeVector);
546 element->parserSetAttributes(attributeVector);
549 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
551 // Hack around libxml2's lack of encoding overide support by manually
552 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
553 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks and
554 // switch encodings, causing the parse to fail.
556 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
560 const UChar BOM = 0xFEFF;
561 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
562 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
565 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
567 bool is8Bit = chunk.is8Bit();
568 switchEncoding(ctxt, is8Bit);
570 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
572 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
575 static void finishParsing(xmlParserCtxtPtr ctxt)
577 xmlParseChunk(ctxt, 0, 0, 1);
580 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
582 static bool isLibxmlDefaultCatalogFile(const String& urlString)
584 // On non-Windows platforms libxml asks for this URL, the
585 // "XML_XML_DEFAULT_CATALOG", on initialization.
586 if (urlString == "file:///etc/xml/catalog")
589 // On Windows, libxml computes a URL relative to where its DLL resides.
590 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
595 static bool shouldAllowExternalLoad(const KURL& url)
597 String urlString = url.string();
599 // This isn't really necessary now that initializeLibXMLIfNecessary
600 // disables catalog support in libxml, but keeping it for defense in depth.
601 if (isLibxmlDefaultCatalogFile(url))
604 // The most common DTD. There isn't much point in hammering www.w3c.org by
605 // requesting this URL for every XHTML document.
606 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
609 // Similarly, there isn't much point in requesting the SVG DTD.
610 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
613 // The libxml doesn't give us a lot of context for deciding whether to allow
614 // this request. In the worst case, this load could be for an external
615 // entity and the resulting document could simply read the retrieved
616 // content. If we had more context, we could potentially allow the parser to
617 // load a DTD. As things stand, we take the conservative route and allow
618 // same-origin requests only.
619 if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
620 XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
627 static void* openFunc(const char* uri)
629 ASSERT(XMLDocumentParserScope::currentFetcher);
630 ASSERT(currentThread() == libxmlLoaderThread);
632 KURL url(KURL(), uri);
634 if (!shouldAllowExternalLoad(url))
635 return &globalDescriptor;
638 RefPtr<SharedBuffer> data;
641 ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
642 XMLDocumentParserScope scope(0);
643 // FIXME: We should restore the original global error handler as well.
645 if (fetcher->frame()) {
646 FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
647 ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
648 if (resource && !resource->errorOccurred()) {
649 data = resource->resourceBuffer();
650 finalURL = resource->response().url();
655 // We have to check the URL again after the load to catch redirects.
656 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
657 if (!shouldAllowExternalLoad(finalURL))
658 return &globalDescriptor;
660 UseCounter::count(XMLDocumentParserScope::currentFetcher->document(), UseCounter::XMLExternalResourceLoad);
662 return new SharedBufferReader(data);
665 static int readFunc(void* context, char* buffer, int len)
667 // Do 0-byte reads in case of a null descriptor
668 if (context == &globalDescriptor)
671 SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
672 return data->readData(buffer, len);
675 static int writeFunc(void*, const char*, int)
677 // Always just do 0-byte writes
681 static int closeFunc(void* context)
683 if (context != &globalDescriptor) {
684 SharedBufferReader* data = static_cast<SharedBufferReader*>(context);
690 static void errorFunc(void*, const char*, ...)
692 // FIXME: It would be nice to display error messages somewhere.
695 static void initializeLibXMLIfNecessary()
697 static bool didInit = false;
701 // We don't want libxml to try and load catalogs.
702 // FIXME: It's not nice to set global settings in libxml, embedders of Blink
703 // could be trying to use libxml themselves.
704 xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
706 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
707 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
708 libxmlLoaderThread = currentThread();
713 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
715 initializeLibXMLIfNecessary();
716 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
717 parser->_private = userData;
718 parser->replaceEntities = true;
719 return adoptRef(new XMLParserContext(parser));
722 // Chunk should be encoded in UTF-8
723 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
725 initializeLibXMLIfNecessary();
727 // appendFragmentSource() checks that the length doesn't overflow an int.
728 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
733 // Copy the sax handler
734 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
736 // Set parser options.
737 // XML_PARSE_NODICT: default dictionary option.
738 // XML_PARSE_NOENT: force entities substitutions.
739 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
741 // Internal initialization
743 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
745 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
746 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
747 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
748 parser->_private = userData;
750 return adoptRef(new XMLParserContext(parser));
753 // --------------------------------
755 bool XMLDocumentParser::supportsXMLVersion(const String& version)
757 return version == "1.0";
760 XMLDocumentParser::XMLDocumentParser(Document& document, FrameView* frameView)
761 : ScriptableDocumentParser(document)
762 , m_hasView(frameView)
764 , m_currentNode(&document)
765 , m_isCurrentlyParsing8BitChunk(false)
768 , m_sawXSLTransform(false)
769 , m_sawFirstElement(false)
770 , m_isXHTMLDocument(false)
771 , m_parserPaused(false)
772 , m_requestingScript(false)
773 , m_finishCalled(false)
774 , m_xmlErrors(&document)
776 , m_scriptStartPosition(TextPosition::belowRangePosition())
777 , m_parsingFragment(false)
779 // This is XML being used as a document resource.
780 if (frameView && document.isXMLDocument())
781 UseCounter::count(document, UseCounter::XMLDocument);
784 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
785 : ScriptableDocumentParser(fragment->document(), parserContentPolicy)
788 , m_currentNode(fragment)
789 , m_isCurrentlyParsing8BitChunk(false)
792 , m_sawXSLTransform(false)
793 , m_sawFirstElement(false)
794 , m_isXHTMLDocument(false)
795 , m_parserPaused(false)
796 , m_requestingScript(false)
797 , m_finishCalled(false)
798 , m_xmlErrors(&fragment->document())
800 , m_scriptStartPosition(TextPosition::belowRangePosition())
801 , m_parsingFragment(true)
807 // Add namespaces based on the parent node
808 WillBeHeapVector<RawPtrWillBeMember<Element> > elemStack;
809 while (parentElement) {
810 elemStack.append(parentElement);
812 Element* grandParentElement = parentElement->parentElement();
813 if (!grandParentElement)
815 parentElement = grandParentElement;
818 if (elemStack.isEmpty())
821 for (; !elemStack.isEmpty(); elemStack.removeLast()) {
822 Element* element = elemStack.last();
823 AttributeCollection attributes = element->attributes();
824 AttributeCollection::iterator end = attributes.end();
825 for (AttributeCollection::iterator it = attributes.begin(); it != end; ++it) {
826 if (it->localName() == xmlnsAtom)
827 m_defaultNamespaceURI = it->value();
828 else if (it->prefix() == xmlnsAtom)
829 m_prefixToNamespaceMap.set(it->localName(), it->value());
833 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
834 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
835 m_defaultNamespaceURI = parentElement->namespaceURI();
838 XMLParserContext::~XMLParserContext()
840 if (m_context->myDoc)
841 xmlFreeDoc(m_context->myDoc);
842 xmlFreeParserCtxt(m_context);
845 XMLDocumentParser::~XMLDocumentParser()
848 // The XMLDocumentParser will always be detached before being destroyed.
849 ASSERT(m_currentNodeStack.isEmpty());
850 ASSERT(!m_currentNode);
853 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
855 m_pendingScript->removeClient(this);
858 void XMLDocumentParser::trace(Visitor* visitor)
860 visitor->trace(m_currentNode);
862 visitor->trace(m_currentNodeStack);
864 visitor->trace(m_leafTextNode);
865 visitor->trace(m_xmlErrors);
866 visitor->trace(m_scriptElement);
867 ScriptableDocumentParser::trace(visitor);
870 void XMLDocumentParser::doWrite(const String& parseString)
872 TRACE_EVENT0("blink", "XMLDocumentParser::doWrite");
873 ASSERT(!isDetached());
875 initializeParserContext();
877 // Protect the libxml context from deletion during a callback
878 RefPtr<XMLParserContext> context = m_context;
880 // libXML throws an error if you try to switch the encoding for an empty
882 if (parseString.length()) {
883 // JavaScript may cause the parser to detach during parseChunk
884 // keep this alive until this function is done.
885 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
887 XMLDocumentParserScope scope(document()->fetcher());
888 TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
889 parseChunk(context->context(), parseString);
891 // JavaScript (which may be run under the parseChunk callstack) may
892 // cause the parser to be stopped or detached.
897 // FIXME: Why is this here? And why is it after we process the passed
899 if (document()->sawDecodingError()) {
900 // If the decoder saw an error, report it as fatal (stops parsing)
901 TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
902 handleError(XMLErrors::ErrorTypeFatal, "Encoding error", position);
906 struct xmlSAX2Namespace {
907 const xmlChar* prefix;
911 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
913 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
914 for (int i = 0; i < nbNamespaces; ++i) {
915 AtomicString namespaceQName = xmlnsAtom;
916 AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
917 if (namespaces[i].prefix)
918 namespaceQName = WTF::xmlnsWithColon + namespaces[i].prefix;
920 QualifiedName parsedName = anyName;
921 if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
924 prefixedAttributes.append(Attribute(parsedName, namespaceURI));
928 struct xmlSAX2Attributes {
929 const xmlChar* localname;
930 const xmlChar* prefix;
932 const xmlChar* value;
936 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
938 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
939 for (int i = 0; i < nbAttributes; ++i) {
940 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
941 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
942 String attrPrefix = toString(attributes[i].prefix);
943 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
944 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
946 QualifiedName parsedName = anyName;
947 if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
950 prefixedAttributes.append(Attribute(parsedName, attrValue));
954 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
955 const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
960 if (m_parserPaused) {
961 m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
962 nbAttributes, nbDefaulted, libxmlAttributes)));
968 AtomicString adjustedURI = uri;
969 if (m_parsingFragment && adjustedURI.isNull()) {
970 if (!prefix.isNull())
971 adjustedURI = m_prefixToNamespaceMap.get(prefix);
973 adjustedURI = m_defaultNamespaceURI;
976 bool isFirstElement = !m_sawFirstElement;
977 m_sawFirstElement = true;
979 QualifiedName qName(prefix, localName, adjustedURI);
980 RefPtrWillBeRawPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
986 Vector<Attribute> prefixedAttributes;
987 TrackExceptionState exceptionState;
988 handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
989 if (exceptionState.hadException()) {
990 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
995 handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
996 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
997 if (exceptionState.hadException()) {
1002 newElement->beginParsingChildren();
1004 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
1006 m_scriptStartPosition = textPosition();
1008 m_currentNode->parserAppendChild(newElement.get());
1010 // Event handlers may synchronously trigger removal of the
1011 // document and cancellation of this parser.
1017 if (isHTMLTemplateElement(*newElement))
1018 pushCurrentNode(toHTMLTemplateElement(*newElement).content());
1020 pushCurrentNode(newElement.get());
1022 if (isHTMLHtmlElement(*newElement))
1023 toHTMLHtmlElement(*newElement).insertedByParser();
1025 if (!m_parsingFragment && isFirstElement && document()->frame())
1026 document()->frame()->loader().dispatchDocumentElementAvailable();
1029 void XMLDocumentParser::endElementNs()
1034 if (m_parserPaused) {
1035 m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
1039 // JavaScript can detach the parser. Make sure this is not released before
1040 // the end of this method.
1041 RefPtrWillBeRawPtr<XMLDocumentParser> protect(this);
1045 RefPtrWillBeRawPtr<ContainerNode> n = m_currentNode;
1046 if (m_currentNode->isElementNode())
1047 toElement(n.get())->finishParsingChildren();
1049 if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
1051 n->remove(IGNORE_EXCEPTION);
1055 if (!n->isElementNode() || !m_hasView) {
1060 Element* element = toElement(n);
1062 // The element's parent may have already been removed from document.
1063 // Parsing continues in this case, but scripts aren't executed.
1064 if (!element->inDocument()) {
1069 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
1070 if (!scriptLoader) {
1075 // Don't load external scripts for standalone documents (for now).
1076 ASSERT(!m_pendingScript);
1077 m_requestingScript = true;
1079 if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
1080 // FIXME: Script execution should be shared between
1081 // the libxml2 and Qt XMLDocumentParser implementations.
1083 if (scriptLoader->readyToBeParserExecuted()) {
1084 scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
1085 } else if (scriptLoader->willBeParserExecuted()) {
1086 m_pendingScript = scriptLoader->resource();
1087 m_scriptElement = element;
1088 m_pendingScript->addClient(this);
1090 // m_pendingScript will be 0 if script was already loaded and
1091 // addClient() executed it.
1092 if (m_pendingScript)
1095 m_scriptElement = nullptr;
1098 // JavaScript may have detached the parser
1102 m_requestingScript = false;
1106 void XMLDocumentParser::characters(const xmlChar* chars, int length)
1111 if (m_parserPaused) {
1112 m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
1116 if (!m_leafTextNode)
1118 m_bufferedText.append(chars, length);
1121 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
1126 char formattedMessage[1024];
1127 vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
1129 if (m_parserPaused) {
1130 m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
1134 handleError(type, formattedMessage, textPosition());
1137 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
1142 if (m_parserPaused) {
1143 m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target, data)));
1149 // ### handle exceptions
1150 TrackExceptionState exceptionState;
1151 RefPtrWillBeRawPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
1152 if (exceptionState.hadException())
1155 pi->setCreatedByParser(true);
1157 m_currentNode->parserAppendChild(pi.get());
1159 pi->setCreatedByParser(false);
1164 if (!RuntimeEnabledFeatures::xsltEnabled())
1167 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
1168 if (m_sawXSLTransform && !document()->transformSourceDocument()) {
1169 // This behavior is very tricky. We call stopParsing() here because we
1170 // want to stop processing the document until we're ready to apply the
1171 // transform, but we actually still want to be fed decoded string pieces
1172 // to accumulate in m_originalSourceForTransform. So, we call
1173 // stopParsing() here and check isStopped() in element callbacks.
1174 // FIXME: This contradicts the contract of DocumentParser.
1179 void XMLDocumentParser::cdataBlock(const String& text)
1184 if (m_parserPaused) {
1185 m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
1191 m_currentNode->parserAppendChild(CDATASection::create(m_currentNode->document(), text));
1194 void XMLDocumentParser::comment(const String& text)
1199 if (m_parserPaused) {
1200 m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
1206 m_currentNode->parserAppendChild(Comment::create(m_currentNode->document(), text));
1209 enum StandaloneInfo {
1210 StandaloneUnspecified = -2,
1216 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
1218 StandaloneInfo standaloneInfo = static_cast<StandaloneInfo>(standalone);
1219 if (standaloneInfo == NoXMlDeclaration) {
1220 document()->setHasXMLDeclaration(false);
1224 if (!version.isNull())
1225 document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
1226 if (standalone != StandaloneUnspecified)
1227 document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1228 if (!encoding.isNull())
1229 document()->setXMLEncoding(encoding);
1230 document()->setHasXMLDeclaration(true);
1233 void XMLDocumentParser::endDocument()
1238 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
1243 if (m_parserPaused) {
1244 m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
1249 document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
1252 static inline XMLDocumentParser* getParser(void* closure)
1254 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1255 return static_cast<XMLDocumentParser*>(ctxt->_private);
1258 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
1260 getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
1263 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1265 getParser(closure)->endElementNs();
1268 static void charactersHandler(void* closure, const xmlChar* chars, int length)
1270 getParser(closure)->characters(chars, length);
1273 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1275 getParser(closure)->processingInstruction(toString(target), toString(data));
1278 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
1280 getParser(closure)->cdataBlock(toString(text, length));
1283 static void commentHandler(void* closure, const xmlChar* text)
1285 getParser(closure)->comment(toString(text));
1288 WTF_ATTRIBUTE_PRINTF(2, 3)
1289 static void warningHandler(void* closure, const char* message, ...)
1292 va_start(args, message);
1293 getParser(closure)->error(XMLErrors::ErrorTypeWarning, message, args);
1297 WTF_ATTRIBUTE_PRINTF(2, 3)
1298 static void fatalErrorHandler(void* closure, const char* message, ...)
1301 va_start(args, message);
1302 getParser(closure)->error(XMLErrors::ErrorTypeFatal, message, args);
1306 WTF_ATTRIBUTE_PRINTF(2, 3)
1307 static void normalErrorHandler(void* closure, const char* message, ...)
1310 va_start(args, message);
1311 getParser(closure)->error(XMLErrors::ErrorTypeNonFatal, message, args);
1315 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is a hack
1316 // to avoid malloc/free. Using a global variable like this could cause trouble
1317 // if libxml implementation details were to change
1318 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1320 static xmlEntityPtr sharedXHTMLEntity()
1322 static xmlEntity entity;
1324 entity.type = XML_ENTITY_DECL;
1325 entity.orig = sharedXHTMLEntityResult;
1326 entity.content = sharedXHTMLEntityResult;
1327 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1332 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1334 const char* originalTarget = target;
1335 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1336 utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1337 if (conversionResult != WTF::Unicode::conversionOK)
1340 // Even though we must pass the length, libxml expects the entity string to be null terminated.
1341 ASSERT(target > originalTarget + 1);
1343 return target - originalTarget;
1346 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1348 UChar utf16DecodedEntity[4];
1349 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1350 if (!numberOfCodeUnits)
1353 ASSERT(numberOfCodeUnits <= 4);
1354 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1355 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1356 if (!entityLengthInUTF8)
1359 xmlEntityPtr entity = sharedXHTMLEntity();
1360 entity->length = entityLengthInUTF8;
1361 entity->name = name;
1365 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1367 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1368 xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1370 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1374 ent = xmlGetDocEntity(ctxt->myDoc, name);
1375 if (!ent && getParser(closure)->isXHTMLDocument()) {
1376 ent = getXHTMLEntity(name);
1378 ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1384 static void startDocumentHandler(void* closure)
1386 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1387 XMLDocumentParser* parser = getParser(closure);
1388 switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
1389 parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
1390 xmlSAX2StartDocument(closure);
1393 static void endDocumentHandler(void* closure)
1395 getParser(closure)->endDocument();
1396 xmlSAX2EndDocument(closure);
1399 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1401 getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
1402 xmlSAX2InternalSubset(closure, name, externalID, systemID);
1405 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1407 String extId = toString(externalId);
1408 if (extId == "-//W3C//DTD XHTML 1.0 Transitional//EN"
1409 || extId == "-//W3C//DTD XHTML 1.1//EN"
1410 || extId == "-//W3C//DTD XHTML 1.0 Strict//EN"
1411 || extId == "-//W3C//DTD XHTML 1.0 Frameset//EN"
1412 || extId == "-//W3C//DTD XHTML Basic 1.0//EN"
1413 || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN"
1414 || extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
1415 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
1416 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN"
1417 || extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN") {
1418 // Controls if we replace entities or not.
1419 getParser(closure)->setIsXHTMLDocument(true);
1423 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1425 // Nothing to do, but we need this to work around a crasher.
1426 // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1427 // http://bugs.webkit.org/show_bug.cgi?id=5792
1430 void XMLDocumentParser::initializeParserContext(const CString& chunk)
1433 memset(&sax, 0, sizeof(sax));
1435 sax.error = normalErrorHandler;
1436 sax.fatalError = fatalErrorHandler;
1437 sax.characters = charactersHandler;
1438 sax.processingInstruction = processingInstructionHandler;
1439 sax.cdataBlock = cdataBlockHandler;
1440 sax.comment = commentHandler;
1441 sax.warning = warningHandler;
1442 sax.startElementNs = startElementNsHandler;
1443 sax.endElementNs = endElementNsHandler;
1444 sax.getEntity = getEntityHandler;
1445 sax.startDocument = startDocumentHandler;
1446 sax.endDocument = endDocumentHandler;
1447 sax.internalSubset = internalSubsetHandler;
1448 sax.externalSubset = externalSubsetHandler;
1449 sax.ignorableWhitespace = ignorableWhitespaceHandler;
1450 sax.entityDecl = xmlSAX2EntityDecl;
1451 sax.initialized = XML_SAX2_MAGIC;
1454 m_sawXSLTransform = false;
1455 m_sawFirstElement = false;
1457 XMLDocumentParserScope scope(document()->fetcher());
1458 if (m_parsingFragment) {
1459 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1461 ASSERT(!chunk.data());
1462 m_context = XMLParserContext::createStringParser(&sax, this);
1466 void XMLDocumentParser::doEnd()
1470 // Tell libxml we're done.
1472 XMLDocumentParserScope scope(document()->fetcher());
1473 finishParsing(context());
1476 m_context = nullptr;
1480 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
1481 if (xmlViewerMode) {
1482 const char noStyleMessage[] = "This XML file does not appear to have any style information associated with it. The document tree is shown below.";
1483 document()->setIsViewSource(true);
1484 V8Document::PrivateScript::transformDocumentToTreeViewMethod(document()->frame(), document(), noStyleMessage);
1485 } else if (m_sawXSLTransform) {
1486 xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
1487 document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1488 // Make the document think it's done, so it will apply XSL stylesheets.
1489 document()->setParsing(false);
1490 document()->styleResolverChanged();
1492 // styleResolverChanged() call can detach the parser and null out its
1493 // document. In that case, we just bail out.
1497 document()->setParsing(true);
1498 DocumentParser::stopParsing();
1502 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
1504 if (source.isEmpty())
1506 // Parse in a single chunk into an xmlDocPtr
1507 // FIXME: Hook up error handlers so that a failure to parse the main
1508 // document results in good error messages.
1509 XMLDocumentParserScope scope(fetcher, errorFunc, 0);
1510 XMLParserInput input(source);
1511 return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
1514 OrdinalNumber XMLDocumentParser::lineNumber() const
1516 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
1519 OrdinalNumber XMLDocumentParser::columnNumber() const
1521 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
1524 TextPosition XMLDocumentParser::textPosition() const
1526 xmlParserCtxtPtr context = this->context();
1528 return TextPosition::minimumPosition();
1529 return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line), OrdinalNumber::fromOneBasedInt(context->input->col));
1532 void XMLDocumentParser::stopParsing()
1534 DocumentParser::stopParsing();
1536 xmlStopParser(context());
1539 void XMLDocumentParser::resumeParsing()
1541 ASSERT(!isDetached());
1542 ASSERT(m_parserPaused);
1544 m_parserPaused = false;
1546 // First, execute any pending callbacks
1547 while (!m_pendingCallbacks.isEmpty()) {
1548 OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
1549 callback->call(this);
1551 // A callback paused the parser
1556 // Then, write any pending data
1557 SegmentedString rest = m_pendingSrc;
1558 m_pendingSrc.clear();
1559 // There is normally only one string left, so toString() shouldn't copy.
1560 // In any case, the XML parser runs on the main thread and it's OK if
1561 // the passed string has more than one reference.
1562 append(rest.toString().impl());
1564 // Finally, if finish() has been called and write() didn't result
1565 // in any further callbacks being queued, call end()
1566 if (m_finishCalled && m_pendingCallbacks.isEmpty())
1570 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1573 ASSERT(m_parsingFragment);
1575 CString chunkAsUtf8 = chunk.utf8();
1577 // libxml2 takes an int for a length, and therefore can't handle XML chunks
1578 // larger than 2 GiB.
1579 if (chunkAsUtf8.length() > INT_MAX)
1582 TRACE_EVENT0("blink", "XMLDocumentParser::appendFragmentSource");
1583 initializeParserContext(chunkAsUtf8);
1584 xmlParseContent(context());
1585 endDocument(); // Close any open text nodes.
1587 // FIXME: If this code is actually needed, it should probably move to
1589 // XMLDocumentParserQt has a similar check (m_stream.error() ==
1590 // QXmlStreamReader::PrematureEndOfDocumentError) in doEnd(). Check if all
1591 // the chunk has been processed.
1592 long bytesProcessed = xmlByteConsumed(context());
1593 if (bytesProcessed == -1 || static_cast<unsigned long>(bytesProcessed) != chunkAsUtf8.length()) {
1594 // FIXME: I don't believe we can hit this case without also having seen
1595 // an error or a null byte. If we hit this ASSERT, we've found a test
1596 // case which demonstrates the need for this code.
1597 ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1601 // No error if the chunk is well formed or it is not but we have no error.
1602 return context()->wellFormed || !xmlCtxtGetLastError(context());
1605 // --------------------------------
1607 struct AttributeParseState {
1608 HashMap<String, String> attributes;
1612 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1613 const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
1614 int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
1616 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1619 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1620 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1622 state->gotAttributes = true;
1624 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1625 for (int i = 0; i < nbAttributes; ++i) {
1626 String attrLocalName = toString(attributes[i].localname);
1627 int valueLength = (int) (attributes[i].end - attributes[i].value);
1628 String attrValue = toString(attributes[i].value, valueLength);
1629 String attrPrefix = toString(attributes[i].prefix);
1630 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1632 state->attributes.set(attrQName, attrValue);
1636 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1638 AttributeParseState state;
1639 state.gotAttributes = false;
1642 memset(&sax, 0, sizeof(sax));
1643 sax.startElementNs = attributesStartElementNsHandler;
1644 sax.initialized = XML_SAX2_MAGIC;
1645 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1646 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1647 parseChunk(parser->context(), parseString);
1648 finishParsing(parser->context());
1649 attrsOK = state.gotAttributes;
1650 return state.attributes;
1653 } // namespace blink