2 * Copyright (C) 2000 Peter Kelly (pmk@post.com)
3 * Copyright (C) 2005, 2006, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
5 * Copyright (C) 2007 Samuel Weinig (sam@webkit.org)
6 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
7 * Copyright (C) 2008 Holger Hans Peter Freyther
8 * Copyright (C) 2008 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Library General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Library General Public License for more details.
20 * You should have received a copy of the GNU Library General Public License
21 * along with this library; see the file COPYING.LIB. If not, write to
22 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
23 * Boston, MA 02110-1301, USA.
27 #include "core/xml/parser/XMLDocumentParser.h"
29 #include <libxml/catalog.h>
30 #include <libxml/parser.h>
31 #include <libxml/parserInternals.h>
32 #include <libxslt/xslt.h>
33 #include "FetchInitiatorTypeNames.h"
34 #include "HTMLNames.h"
35 #include "RuntimeEnabledFeatures.h"
36 #include "XMLNSNames.h"
37 #include "bindings/v8/ExceptionState.h"
38 #include "bindings/v8/ExceptionStatePlaceholder.h"
39 #include "bindings/v8/ScriptController.h"
40 #include "bindings/v8/ScriptSourceCode.h"
41 #include "core/dom/CDATASection.h"
42 #include "core/dom/Comment.h"
43 #include "core/dom/Document.h"
44 #include "core/dom/DocumentFragment.h"
45 #include "core/dom/DocumentType.h"
46 #include "core/dom/ProcessingInstruction.h"
47 #include "core/dom/ScriptLoader.h"
48 #include "core/dom/TransformSource.h"
49 #include "core/fetch/ResourceFetcher.h"
50 #include "core/fetch/ScriptResource.h"
51 #include "core/frame/Frame.h"
52 #include "core/html/HTMLHtmlElement.h"
53 #include "core/html/HTMLTemplateElement.h"
54 #include "core/html/parser/HTMLEntityParser.h"
55 #include "core/html/parser/TextResourceDecoder.h"
56 #include "core/loader/FrameLoader.h"
57 #include "core/loader/ImageLoader.h"
58 #include "core/frame/UseCounter.h"
59 #include "core/xml/XMLTreeViewer.h"
60 #include "core/xml/parser/XMLDocumentParserScope.h"
61 #include "core/xml/parser/XMLParserInput.h"
62 #include "platform/SharedBuffer.h"
63 #include "platform/network/ResourceError.h"
64 #include "platform/network/ResourceRequest.h"
65 #include "platform/network/ResourceResponse.h"
66 #include "platform/weborigin/SecurityOrigin.h"
67 #include "wtf/StringExtras.h"
68 #include "wtf/TemporaryChange.h"
69 #include "wtf/Threading.h"
70 #include "wtf/Vector.h"
71 #include "wtf/unicode/UTF8.h"
77 using namespace HTMLNames;
79 // FIXME: HTMLConstructionSite has a limit of 512, should these match?
80 static const unsigned maxXMLTreeDepth = 5000;
82 static inline String toString(const xmlChar* string, size_t length)
84 return String::fromUTF8(reinterpret_cast<const char*>(string), length);
87 static inline String toString(const xmlChar* string)
89 return String::fromUTF8(reinterpret_cast<const char*>(string));
92 static inline AtomicString toAtomicString(const xmlChar* string, size_t length)
94 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string), length);
97 static inline AtomicString toAtomicString(const xmlChar* string)
99 return AtomicString::fromUTF8(reinterpret_cast<const char*>(string));
102 static inline bool hasNoStyleInformation(Document* document)
104 if (document->sawElementsInKnownNamespaces() || document->transformSourceDocument())
107 if (!document->frame() || !document->frame()->page())
110 if (document->frame()->tree().parent())
111 return false; // This document is not in a top frame
116 class PendingStartElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
118 PendingStartElementNSCallback(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri,
119 int namespaceCount, const xmlChar** namespaces, int attributeCount, int defaultedCount, const xmlChar** attributes)
120 : m_localName(localName)
123 , m_namespaceCount(namespaceCount)
124 , m_attributeCount(attributeCount)
125 , m_defaultedCount(defaultedCount)
127 m_namespaces = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * namespaceCount * 2));
128 for (int i = 0; i < namespaceCount * 2 ; i++)
129 m_namespaces[i] = xmlStrdup(namespaces[i]);
130 m_attributes = static_cast<xmlChar**>(xmlMalloc(sizeof(xmlChar*) * attributeCount * 5));
131 for (int i = 0; i < attributeCount; i++) {
132 // Each attribute has 5 elements in the array:
133 // name, prefix, uri, value and an end pointer.
134 for (int j = 0; j < 3; j++)
135 m_attributes[i * 5 + j] = xmlStrdup(attributes[i * 5 + j]);
136 int length = attributes[i * 5 + 4] - attributes[i * 5 + 3];
137 m_attributes[i * 5 + 3] = xmlStrndup(attributes[i * 5 + 3], length);
138 m_attributes[i * 5 + 4] = m_attributes[i * 5 + 3] + length;
142 virtual ~PendingStartElementNSCallback()
144 for (int i = 0; i < m_namespaceCount * 2; i++)
145 xmlFree(m_namespaces[i]);
146 xmlFree(m_namespaces);
147 for (int i = 0; i < m_attributeCount; i++)
148 for (int j = 0; j < 4; j++)
149 xmlFree(m_attributes[i * 5 + j]);
150 xmlFree(m_attributes);
153 virtual void call(XMLDocumentParser* parser) OVERRIDE
155 parser->startElementNs(m_localName, m_prefix, m_uri,
156 m_namespaceCount, const_cast<const xmlChar**>(m_namespaces),
157 m_attributeCount, m_defaultedCount, const_cast<const xmlChar**>(m_attributes));
161 AtomicString m_localName;
162 AtomicString m_prefix;
164 int m_namespaceCount;
165 xmlChar** m_namespaces;
166 int m_attributeCount;
167 int m_defaultedCount;
168 xmlChar** m_attributes;
171 class PendingEndElementNSCallback FINAL : public XMLDocumentParser::PendingCallback {
173 virtual void call(XMLDocumentParser* parser) OVERRIDE
175 parser->endElementNs();
179 class PendingCharactersCallback FINAL : public XMLDocumentParser::PendingCallback {
181 PendingCharactersCallback(const xmlChar* chars, int length)
182 : m_chars(xmlStrndup(chars, length))
187 virtual ~PendingCharactersCallback()
192 virtual void call(XMLDocumentParser* parser) OVERRIDE
194 parser->characters(m_chars, m_length);
202 class PendingProcessingInstructionCallback FINAL : public XMLDocumentParser::PendingCallback {
204 PendingProcessingInstructionCallback(const String& target, const String& data)
210 virtual void call(XMLDocumentParser* parser) OVERRIDE
212 parser->processingInstruction(m_target, m_data);
220 class PendingCDATABlockCallback FINAL : public XMLDocumentParser::PendingCallback {
222 explicit PendingCDATABlockCallback(const String& text) : m_text(text) { }
224 virtual void call(XMLDocumentParser* parser) OVERRIDE
226 parser->cdataBlock(m_text);
233 class PendingCommentCallback FINAL : public XMLDocumentParser::PendingCallback {
235 explicit PendingCommentCallback(const String& text) : m_text(text) { }
237 virtual void call(XMLDocumentParser* parser) OVERRIDE
239 parser->comment(m_text);
246 class PendingInternalSubsetCallback FINAL : public XMLDocumentParser::PendingCallback {
248 PendingInternalSubsetCallback(const String& name, const String& externalID, const String& systemID)
250 , m_externalID(externalID)
251 , m_systemID(systemID)
255 virtual void call(XMLDocumentParser* parser) OVERRIDE
257 parser->internalSubset(m_name, m_externalID, m_systemID);
266 class PendingErrorCallback FINAL : public XMLDocumentParser::PendingCallback {
268 PendingErrorCallback(XMLErrors::ErrorType type, const xmlChar* message, OrdinalNumber lineNumber, OrdinalNumber columnNumber)
270 , m_message(xmlStrdup(message))
271 , m_lineNumber(lineNumber)
272 , m_columnNumber(columnNumber)
276 virtual ~PendingErrorCallback()
281 virtual void call(XMLDocumentParser* parser) OVERRIDE
283 parser->handleError(m_type, reinterpret_cast<char*>(m_message), TextPosition(m_lineNumber, m_columnNumber));
287 XMLErrors::ErrorType m_type;
289 OrdinalNumber m_lineNumber;
290 OrdinalNumber m_columnNumber;
293 void XMLDocumentParser::pushCurrentNode(ContainerNode* n)
296 ASSERT(m_currentNode);
299 m_currentNodeStack.append(m_currentNode);
301 if (m_currentNodeStack.size() > maxXMLTreeDepth)
302 handleError(XMLErrors::fatal, "Excessive node nesting.", textPosition());
305 void XMLDocumentParser::popCurrentNode()
309 ASSERT(m_currentNodeStack.size());
311 if (m_currentNode != document())
312 m_currentNode->deref();
314 m_currentNode = m_currentNodeStack.last();
315 m_currentNodeStack.removeLast();
318 void XMLDocumentParser::clearCurrentNodeStack()
320 if (m_currentNode && m_currentNode != document())
321 m_currentNode->deref();
325 if (m_currentNodeStack.size()) { // Aborted parsing.
326 for (size_t i = m_currentNodeStack.size() - 1; i != 0; --i)
327 m_currentNodeStack[i]->deref();
328 if (m_currentNodeStack[0] && m_currentNodeStack[0] != document())
329 m_currentNodeStack[0]->deref();
330 m_currentNodeStack.clear();
334 void XMLDocumentParser::insert(const SegmentedString&)
336 ASSERT_NOT_REACHED();
339 void XMLDocumentParser::append(PassRefPtr<StringImpl> inputSource)
341 SegmentedString source(inputSource);
342 if (m_sawXSLTransform || !m_sawFirstElement)
343 m_originalSourceForTransform.append(source);
345 if (isStopped() || m_sawXSLTransform)
348 if (m_parserPaused) {
349 m_pendingSrc.append(source);
353 // JavaScript can detach the parser. Make sure this is not released
354 // before the end of this method.
355 RefPtr<XMLDocumentParser> protect(this);
357 doWrite(source.toString());
362 if (document()->frame() && document()->frame()->script().canExecuteScripts(NotAboutToExecuteScript))
363 ImageLoader::dispatchPendingBeforeLoadEvents();
366 void XMLDocumentParser::handleError(XMLErrors::ErrorType type, const char* formattedMessage, TextPosition position)
368 m_xmlErrors.handleError(type, formattedMessage, position);
369 if (type != XMLErrors::warning)
371 if (type == XMLErrors::fatal)
375 void XMLDocumentParser::enterText()
377 ASSERT(m_bufferedText.size() == 0);
378 ASSERT(!m_leafTextNode);
379 m_leafTextNode = Text::create(m_currentNode->document(), "");
380 m_currentNode->parserAppendChild(m_leafTextNode.get());
383 void XMLDocumentParser::exitText()
391 m_leafTextNode->appendData(toString(m_bufferedText.data(), m_bufferedText.size()));
392 m_bufferedText.clear();
396 void XMLDocumentParser::detach()
398 clearCurrentNodeStack();
399 ScriptableDocumentParser::detach();
402 void XMLDocumentParser::end()
404 // XMLDocumentParserLibxml2 will do bad things to the document if doEnd() is called.
405 // I don't believe XMLDocumentParserQt needs doEnd called in the fragment case.
406 ASSERT(!m_parsingFragment);
410 // doEnd() call above can detach the parser and null out its document.
411 // In that case, we just bail out.
415 // doEnd() could process a script tag, thus pausing parsing.
420 insertErrorMessageBlock();
423 document()->styleResolverChanged(RecalcStyleImmediately);
427 prepareToStopParsing();
428 document()->setReadyState(Document::Interactive);
429 clearCurrentNodeStack();
430 document()->finishedParsing();
433 void XMLDocumentParser::finish()
435 // FIXME: We should ASSERT(!m_parserStopped) here, since it does not
436 // makes sense to call any methods on DocumentParser once it's been stopped.
437 // However, FrameLoader::stop calls DocumentParser::finish unconditionally.
440 m_finishCalled = true;
445 void XMLDocumentParser::insertErrorMessageBlock()
447 m_xmlErrors.insertErrorMessageBlock();
450 void XMLDocumentParser::notifyFinished(Resource* unusedResource)
452 ASSERT_UNUSED(unusedResource, unusedResource == m_pendingScript);
453 ASSERT(m_pendingScript->accessCount() > 0);
455 ScriptSourceCode sourceCode(m_pendingScript.get());
456 bool errorOccurred = m_pendingScript->errorOccurred();
457 bool wasCanceled = m_pendingScript->wasCanceled();
459 m_pendingScript->removeClient(this);
462 RefPtr<Element> e = m_scriptElement;
465 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(e.get());
466 ASSERT(scriptLoader);
468 // JavaScript can detach this parser, make sure it's kept alive even if detached.
469 RefPtr<XMLDocumentParser> protect(this);
472 scriptLoader->dispatchErrorEvent();
473 else if (!wasCanceled) {
474 scriptLoader->executeScript(sourceCode);
475 scriptLoader->dispatchLoadEvent();
480 if (!isDetached() && !m_requestingScript)
484 bool XMLDocumentParser::isWaitingForScripts() const
486 return m_pendingScript;
489 void XMLDocumentParser::pauseParsing()
491 if (m_parsingFragment)
494 m_parserPaused = true;
497 bool XMLDocumentParser::parseDocumentFragment(const String& chunk, DocumentFragment* fragment, Element* contextElement, ParserContentPolicy parserContentPolicy)
502 // FIXME: We need to implement the HTML5 XML Fragment parsing algorithm:
503 // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-xhtml-syntax.html#xml-fragment-parsing-algorithm
504 // For now we have a hack for script/style innerHTML support:
505 if (contextElement && (contextElement->hasLocalName(HTMLNames::scriptTag) || contextElement->hasLocalName(HTMLNames::styleTag))) {
506 fragment->parserAppendChild(fragment->document().createTextNode(chunk));
510 RefPtr<XMLDocumentParser> parser = XMLDocumentParser::create(fragment, contextElement, parserContentPolicy);
511 bool wellFormed = parser->appendFragmentSource(chunk);
512 // Do not call finish(). Current finish() and doEnd() implementations touch the main Document/loader
513 // and can cause crashes in the fragment case.
514 parser->detach(); // Allows ~DocumentParser to assert it was detached before destruction.
515 return wellFormed; // appendFragmentSource()'s wellFormed is more permissive than wellFormed().
518 static int globalDescriptor = 0;
519 static ThreadIdentifier libxmlLoaderThread = 0;
521 static int matchFunc(const char*)
523 // Only match loads initiated due to uses of libxml2 from within XMLDocumentParser to avoid
524 // interfering with client applications that also use libxml2. http://bugs.webkit.org/show_bug.cgi?id=17353
525 return XMLDocumentParserScope::currentFetcher && currentThread() == libxmlLoaderThread;
529 WTF_MAKE_FAST_ALLOCATED;
531 OffsetBuffer(const Vector<char>& b) : m_buffer(b), m_currentOffset(0) { }
533 int readOutBytes(char* outputBuffer, unsigned askedToRead)
535 unsigned bytesLeft = m_buffer.size() - m_currentOffset;
536 unsigned lenToCopy = min(askedToRead, bytesLeft);
538 memcpy(outputBuffer, m_buffer.data() + m_currentOffset, lenToCopy);
539 m_currentOffset += lenToCopy;
545 Vector<char> m_buffer;
546 unsigned m_currentOffset;
549 static inline void setAttributes(Element* element, Vector<Attribute>& attributeVector, ParserContentPolicy parserContentPolicy)
551 if (!scriptingContentIsAllowed(parserContentPolicy))
552 element->stripScriptingAttributes(attributeVector);
553 element->parserSetAttributes(attributeVector);
556 static void switchEncoding(xmlParserCtxtPtr ctxt, bool is8Bit)
558 // Hack around libxml2's lack of encoding overide support by manually
559 // resetting the encoding to UTF-16 before every chunk. Otherwise libxml
560 // will detect <?xml version="1.0" encoding="<encoding name>"?> blocks
561 // and switch encodings, causing the parse to fail.
563 xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
567 const UChar BOM = 0xFEFF;
568 const unsigned char BOMHighByte = *reinterpret_cast<const unsigned char*>(&BOM);
569 xmlSwitchEncoding(ctxt, BOMHighByte == 0xFF ? XML_CHAR_ENCODING_UTF16LE : XML_CHAR_ENCODING_UTF16BE);
572 static void parseChunk(xmlParserCtxtPtr ctxt, const String& chunk)
574 bool is8Bit = chunk.is8Bit();
575 switchEncoding(ctxt, is8Bit);
577 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters8()), sizeof(LChar) * chunk.length(), 0);
579 xmlParseChunk(ctxt, reinterpret_cast<const char*>(chunk.characters16()), sizeof(UChar) * chunk.length(), 0);
582 static void finishParsing(xmlParserCtxtPtr ctxt)
584 xmlParseChunk(ctxt, 0, 0, 1);
587 #define xmlParseChunk #error "Use parseChunk instead to select the correct encoding."
589 static bool isLibxmlDefaultCatalogFile(const String& urlString)
591 // On non-Windows platforms libxml asks for this URL, the
592 // "XML_XML_DEFAULT_CATALOG", on initialization.
593 if (urlString == "file:///etc/xml/catalog")
596 // On Windows, libxml computes a URL relative to where its DLL resides.
597 if (urlString.startsWith("file:///", false) && urlString.endsWith("/etc/catalog", false))
602 static bool shouldAllowExternalLoad(const KURL& url)
604 String urlString = url.string();
606 // This isn't really necessary now that initializeLibXMLIfNecessary
607 // disables catalog support in libxml, but keeping it for defense in depth.
608 if (isLibxmlDefaultCatalogFile(url))
611 // The most common DTD. There isn't much point in hammering www.w3c.org
612 // by requesting this URL for every XHTML document.
613 if (urlString.startsWith("http://www.w3.org/TR/xhtml", false))
616 // Similarly, there isn't much point in requesting the SVG DTD.
617 if (urlString.startsWith("http://www.w3.org/Graphics/SVG", false))
620 // The libxml doesn't give us a lot of context for deciding whether to
621 // allow this request. In the worst case, this load could be for an
622 // external entity and the resulting document could simply read the
623 // retrieved content. If we had more context, we could potentially allow
624 // the parser to load a DTD. As things stand, we take the conservative
625 // route and allow same-origin requests only.
626 if (!XMLDocumentParserScope::currentFetcher->document()->securityOrigin()->canRequest(url)) {
627 XMLDocumentParserScope::currentFetcher->printAccessDeniedMessage(url);
634 static void* openFunc(const char* uri)
636 ASSERT(XMLDocumentParserScope::currentFetcher);
637 ASSERT(currentThread() == libxmlLoaderThread);
639 KURL url(KURL(), uri);
641 if (!shouldAllowExternalLoad(url))
642 return &globalDescriptor;
648 ResourceFetcher* fetcher = XMLDocumentParserScope::currentFetcher;
649 XMLDocumentParserScope scope(0);
650 // FIXME: We should restore the original global error handler as well.
652 if (fetcher->frame()) {
653 FetchRequest request(ResourceRequest(url), FetchInitiatorTypeNames::xml, ResourceFetcher::defaultResourceOptions());
654 ResourcePtr<Resource> resource = fetcher->fetchSynchronously(request);
655 if (resource && !resource->errorOccurred()) {
656 resource->resourceBuffer()->moveTo(data);
657 finalURL = resource->response().url();
662 // We have to check the URL again after the load to catch redirects.
663 // See <https://bugs.webkit.org/show_bug.cgi?id=21963>.
664 if (!shouldAllowExternalLoad(finalURL))
665 return &globalDescriptor;
667 return new OffsetBuffer(data);
670 static int readFunc(void* context, char* buffer, int len)
672 // Do 0-byte reads in case of a null descriptor
673 if (context == &globalDescriptor)
676 OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
677 return data->readOutBytes(buffer, len);
680 static int writeFunc(void*, const char*, int)
682 // Always just do 0-byte writes
686 static int closeFunc(void* context)
688 if (context != &globalDescriptor) {
689 OffsetBuffer* data = static_cast<OffsetBuffer*>(context);
695 static void errorFunc(void*, const char*, ...)
697 // FIXME: It would be nice to display error messages somewhere.
700 static void initializeLibXMLIfNecessary()
702 static bool didInit = false;
706 // We don't want libxml to try and load catalogs.
707 // FIXME: It's not nice to set global settings in libxml, embedders of Blink
708 // could be trying to use libxml themselves.
709 xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
711 xmlRegisterInputCallbacks(matchFunc, openFunc, readFunc, closeFunc);
712 xmlRegisterOutputCallbacks(matchFunc, openFunc, writeFunc, closeFunc);
713 libxmlLoaderThread = currentThread();
718 PassRefPtr<XMLParserContext> XMLParserContext::createStringParser(xmlSAXHandlerPtr handlers, void* userData)
720 initializeLibXMLIfNecessary();
721 xmlParserCtxtPtr parser = xmlCreatePushParserCtxt(handlers, 0, 0, 0, 0);
722 parser->_private = userData;
723 parser->replaceEntities = true;
724 return adoptRef(new XMLParserContext(parser));
727 // Chunk should be encoded in UTF-8
728 PassRefPtr<XMLParserContext> XMLParserContext::createMemoryParser(xmlSAXHandlerPtr handlers, void* userData, const CString& chunk)
730 initializeLibXMLIfNecessary();
732 // appendFragmentSource() checks that the length doesn't overflow an int.
733 xmlParserCtxtPtr parser = xmlCreateMemoryParserCtxt(chunk.data(), chunk.length());
738 // Copy the sax handler
739 memcpy(parser->sax, handlers, sizeof(xmlSAXHandler));
741 // Set parser options.
742 // XML_PARSE_NODICT: default dictionary option.
743 // XML_PARSE_NOENT: force entities substitutions.
744 xmlCtxtUseOptions(parser, XML_PARSE_NODICT | XML_PARSE_NOENT);
746 // Internal initialization
748 parser->instate = XML_PARSER_CONTENT; // We are parsing a CONTENT
750 parser->str_xml = xmlDictLookup(parser->dict, BAD_CAST "xml", 3);
751 parser->str_xmlns = xmlDictLookup(parser->dict, BAD_CAST "xmlns", 5);
752 parser->str_xml_ns = xmlDictLookup(parser->dict, XML_XML_NAMESPACE, 36);
753 parser->_private = userData;
755 return adoptRef(new XMLParserContext(parser));
758 // --------------------------------
760 bool XMLDocumentParser::supportsXMLVersion(const String& version)
762 return version == "1.0";
765 XMLDocumentParser::XMLDocumentParser(Document* document, FrameView* frameView)
766 : ScriptableDocumentParser(document)
769 , m_currentNode(document)
770 , m_isCurrentlyParsing8BitChunk(false)
773 , m_sawXSLTransform(false)
774 , m_sawFirstElement(false)
775 , m_isXHTMLDocument(false)
776 , m_parserPaused(false)
777 , m_requestingScript(false)
778 , m_finishCalled(false)
779 , m_xmlErrors(document)
781 , m_scriptStartPosition(TextPosition::belowRangePosition())
782 , m_parsingFragment(false)
784 // This is XML being used as a document resource.
785 UseCounter::count(*document, UseCounter::XMLDocument);
788 XMLDocumentParser::XMLDocumentParser(DocumentFragment* fragment, Element* parentElement, ParserContentPolicy parserContentPolicy)
789 : ScriptableDocumentParser(&fragment->document(), parserContentPolicy)
792 , m_currentNode(fragment)
793 , m_isCurrentlyParsing8BitChunk(false)
796 , m_sawXSLTransform(false)
797 , m_sawFirstElement(false)
798 , m_isXHTMLDocument(false)
799 , m_parserPaused(false)
800 , m_requestingScript(false)
801 , m_finishCalled(false)
802 , m_xmlErrors(&fragment->document())
804 , m_scriptStartPosition(TextPosition::belowRangePosition())
805 , m_parsingFragment(true)
809 // Add namespaces based on the parent node
810 Vector<Element*> elemStack;
811 while (parentElement) {
812 elemStack.append(parentElement);
814 ContainerNode* n = parentElement->parentNode();
815 if (!n || !n->isElementNode())
817 parentElement = toElement(n);
820 if (elemStack.isEmpty())
823 for (; !elemStack.isEmpty(); elemStack.removeLast()) {
824 Element* element = elemStack.last();
825 if (element->hasAttributes()) {
826 for (unsigned i = 0; i < element->attributeCount(); i++) {
827 const Attribute* attribute = element->attributeItem(i);
828 if (attribute->localName() == xmlnsAtom)
829 m_defaultNamespaceURI = attribute->value();
830 else if (attribute->prefix() == xmlnsAtom)
831 m_prefixToNamespaceMap.set(attribute->localName(), attribute->value());
836 // If the parent element is not in document tree, there may be no xmlns attribute; just default to the parent's namespace.
837 if (m_defaultNamespaceURI.isNull() && !parentElement->inDocument())
838 m_defaultNamespaceURI = parentElement->namespaceURI();
841 XMLParserContext::~XMLParserContext()
843 if (m_context->myDoc)
844 xmlFreeDoc(m_context->myDoc);
845 xmlFreeParserCtxt(m_context);
848 XMLDocumentParser::~XMLDocumentParser()
850 // The XMLDocumentParser will always be detached before being destroyed.
851 ASSERT(m_currentNodeStack.isEmpty());
852 ASSERT(!m_currentNode);
854 // FIXME: m_pendingScript handling should be moved into XMLDocumentParser.cpp!
856 m_pendingScript->removeClient(this);
859 void XMLDocumentParser::doWrite(const String& parseString)
861 ASSERT(!isDetached());
863 initializeParserContext();
865 // Protect the libxml context from deletion during a callback
866 RefPtr<XMLParserContext> context = m_context;
868 // libXML throws an error if you try to switch the encoding for an empty string.
869 if (parseString.length()) {
870 // JavaScript may cause the parser to detach during parseChunk
871 // keep this alive until this function is done.
872 RefPtr<XMLDocumentParser> protect(this);
874 XMLDocumentParserScope scope(document()->fetcher());
875 TemporaryChange<bool> encodingScope(m_isCurrentlyParsing8BitChunk, parseString.is8Bit());
876 parseChunk(context->context(), parseString);
878 // JavaScript (which may be run under the parseChunk callstack) may
879 // cause the parser to be stopped or detached.
884 // FIXME: Why is this here? And why is it after we process the passed source?
885 if (document()->sawDecodingError()) {
886 // If the decoder saw an error, report it as fatal (stops parsing)
887 TextPosition position(OrdinalNumber::fromOneBasedInt(context->context()->input->line), OrdinalNumber::fromOneBasedInt(context->context()->input->col));
888 handleError(XMLErrors::fatal, "Encoding error", position);
892 struct _xmlSAX2Namespace {
893 const xmlChar* prefix;
896 typedef struct _xmlSAX2Namespace xmlSAX2Namespace;
898 static inline void handleNamespaceAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlNamespaces, int nbNamespaces, ExceptionState& exceptionState)
900 xmlSAX2Namespace* namespaces = reinterpret_cast<xmlSAX2Namespace*>(libxmlNamespaces);
901 for (int i = 0; i < nbNamespaces; i++) {
902 AtomicString namespaceQName = xmlnsAtom;
903 AtomicString namespaceURI = toAtomicString(namespaces[i].uri);
904 if (namespaces[i].prefix)
905 namespaceQName = "xmlns:" + toString(namespaces[i].prefix);
907 QualifiedName parsedName = anyName;
908 if (!Element::parseAttributeName(parsedName, XMLNSNames::xmlnsNamespaceURI, namespaceQName, exceptionState))
911 prefixedAttributes.append(Attribute(parsedName, namespaceURI));
915 struct _xmlSAX2Attributes {
916 const xmlChar* localname;
917 const xmlChar* prefix;
919 const xmlChar* value;
922 typedef struct _xmlSAX2Attributes xmlSAX2Attributes;
924 static inline void handleElementAttributes(Vector<Attribute>& prefixedAttributes, const xmlChar** libxmlAttributes, int nbAttributes, ExceptionState& exceptionState)
926 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
927 for (int i = 0; i < nbAttributes; i++) {
928 int valueLength = static_cast<int>(attributes[i].end - attributes[i].value);
929 AtomicString attrValue = toAtomicString(attributes[i].value, valueLength);
930 String attrPrefix = toString(attributes[i].prefix);
931 AtomicString attrURI = attrPrefix.isEmpty() ? AtomicString() : toAtomicString(attributes[i].uri);
932 AtomicString attrQName = attrPrefix.isEmpty() ? toAtomicString(attributes[i].localname) : attrPrefix + ":" + toString(attributes[i].localname);
934 QualifiedName parsedName = anyName;
935 if (!Element::parseAttributeName(parsedName, attrURI, attrQName, exceptionState))
938 prefixedAttributes.append(Attribute(parsedName, attrValue));
942 void XMLDocumentParser::startElementNs(const AtomicString& localName, const AtomicString& prefix, const AtomicString& uri, int nbNamespaces,
943 const xmlChar** libxmlNamespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
948 if (m_parserPaused) {
949 m_pendingCallbacks.append(adoptPtr(new PendingStartElementNSCallback(localName, prefix, uri, nbNamespaces, libxmlNamespaces,
950 nbAttributes, nbDefaulted, libxmlAttributes)));
956 AtomicString adjustedURI = uri;
957 if (m_parsingFragment && adjustedURI.isNull()) {
958 if (!prefix.isNull())
959 adjustedURI = m_prefixToNamespaceMap.get(prefix);
961 adjustedURI = m_defaultNamespaceURI;
964 bool isFirstElement = !m_sawFirstElement;
965 m_sawFirstElement = true;
967 QualifiedName qName(prefix, localName, adjustedURI);
968 RefPtr<Element> newElement = m_currentNode->document().createElement(qName, true);
974 Vector<Attribute> prefixedAttributes;
975 TrackExceptionState exceptionState;
976 handleNamespaceAttributes(prefixedAttributes, libxmlNamespaces, nbNamespaces, exceptionState);
977 if (exceptionState.hadException()) {
978 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
983 handleElementAttributes(prefixedAttributes, libxmlAttributes, nbAttributes, exceptionState);
984 setAttributes(newElement.get(), prefixedAttributes, parserContentPolicy());
985 if (exceptionState.hadException()) {
990 newElement->beginParsingChildren();
992 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(newElement.get());
994 m_scriptStartPosition = textPosition();
996 m_currentNode->parserAppendChild(newElement.get());
998 if (newElement->hasTagName(HTMLNames::templateTag))
999 pushCurrentNode(toHTMLTemplateElement(newElement.get())->content());
1001 pushCurrentNode(newElement.get());
1003 if (newElement->hasTagName(HTMLNames::htmlTag))
1004 toHTMLHtmlElement(newElement)->insertedByParser();
1006 if (!m_parsingFragment && isFirstElement && document()->frame())
1007 document()->frame()->loader().dispatchDocumentElementAvailable();
1010 void XMLDocumentParser::endElementNs()
1015 if (m_parserPaused) {
1016 m_pendingCallbacks.append(adoptPtr(new PendingEndElementNSCallback()));
1020 // JavaScript can detach the parser. Make sure this is not released
1021 // before the end of this method.
1022 RefPtr<XMLDocumentParser> protect(this);
1026 RefPtr<ContainerNode> n = m_currentNode;
1027 if (m_currentNode->isElementNode())
1028 toElement(n.get())->finishParsingChildren();
1030 if (!scriptingContentIsAllowed(parserContentPolicy()) && n->isElementNode() && toScriptLoaderIfPossible(toElement(n))) {
1032 n->remove(IGNORE_EXCEPTION);
1036 if (!n->isElementNode() || !m_view) {
1041 Element* element = toElement(n);
1043 // The element's parent may have already been removed from document.
1044 // Parsing continues in this case, but scripts aren't executed.
1045 if (!element->inDocument()) {
1050 ScriptLoader* scriptLoader = toScriptLoaderIfPossible(element);
1051 if (!scriptLoader) {
1056 // Don't load external scripts for standalone documents (for now).
1057 ASSERT(!m_pendingScript);
1058 m_requestingScript = true;
1060 if (scriptLoader->prepareScript(m_scriptStartPosition, ScriptLoader::AllowLegacyTypeInTypeAttribute)) {
1061 // FIXME: Script execution should be shared between
1062 // the libxml2 and Qt XMLDocumentParser implementations.
1064 if (scriptLoader->readyToBeParserExecuted()) {
1065 scriptLoader->executeScript(ScriptSourceCode(scriptLoader->scriptContent(), document()->url(), m_scriptStartPosition));
1066 } else if (scriptLoader->willBeParserExecuted()) {
1067 m_pendingScript = scriptLoader->resource();
1068 m_scriptElement = element;
1069 m_pendingScript->addClient(this);
1071 // m_pendingScript will be 0 if script was already loaded and addClient() executed it.
1072 if (m_pendingScript)
1075 m_scriptElement = 0;
1078 // JavaScript may have detached the parser
1082 m_requestingScript = false;
1086 void XMLDocumentParser::characters(const xmlChar* chars, int length)
1091 if (m_parserPaused) {
1092 m_pendingCallbacks.append(adoptPtr(new PendingCharactersCallback(chars, length)));
1096 if (!m_leafTextNode)
1098 m_bufferedText.append(chars, length);
1101 void XMLDocumentParser::error(XMLErrors::ErrorType type, const char* message, va_list args)
1106 char formattedMessage[1024];
1107 vsnprintf(formattedMessage, sizeof(formattedMessage) - 1, message, args);
1109 if (m_parserPaused) {
1110 m_pendingCallbacks.append(adoptPtr(new PendingErrorCallback(type, reinterpret_cast<const xmlChar*>(formattedMessage), lineNumber(), columnNumber())));
1114 handleError(type, formattedMessage, textPosition());
1117 void XMLDocumentParser::processingInstruction(const String& target, const String& data)
1122 if (m_parserPaused) {
1123 m_pendingCallbacks.append(adoptPtr(new PendingProcessingInstructionCallback(target ,data)));
1129 // ### handle exceptions
1130 TrackExceptionState exceptionState;
1131 RefPtr<ProcessingInstruction> pi = m_currentNode->document().createProcessingInstruction(target, data, exceptionState);
1132 if (exceptionState.hadException())
1135 pi->setCreatedByParser(true);
1137 m_currentNode->parserAppendChild(pi.get());
1139 pi->setCreatedByParser(false);
1144 if (!RuntimeEnabledFeatures::xsltEnabled())
1147 m_sawXSLTransform = !m_sawFirstElement && pi->isXSL();
1148 if (m_sawXSLTransform && !document()->transformSourceDocument()) {
1149 // This behavior is very tricky. We call stopParsing() here because we want to stop processing the document
1150 // until we're ready to apply the transform, but we actually still want to be fed decoded string pieces to
1151 // accumulate in m_originalSourceForTransform. So, we call stopParsing() here and
1152 // check isStopped() in element callbacks.
1153 // FIXME: This contradicts the contract of DocumentParser.
1158 void XMLDocumentParser::cdataBlock(const String& text)
1163 if (m_parserPaused) {
1164 m_pendingCallbacks.append(adoptPtr(new PendingCDATABlockCallback(text)));
1170 RefPtr<CDATASection> newNode = CDATASection::create(m_currentNode->document(), text);
1171 m_currentNode->parserAppendChild(newNode.get());
1174 void XMLDocumentParser::comment(const String& text)
1179 if (m_parserPaused) {
1180 m_pendingCallbacks.append(adoptPtr(new PendingCommentCallback(text)));
1186 RefPtr<Comment> newNode = Comment::create(m_currentNode->document(), text);
1187 m_currentNode->parserAppendChild(newNode.get());
1190 enum StandaloneInfo {
1191 StandaloneUnspecified = -2,
1197 void XMLDocumentParser::startDocument(const String& version, const String& encoding, int standalone)
1199 StandaloneInfo standaloneInfo = (StandaloneInfo)standalone;
1200 if (standaloneInfo == NoXMlDeclaration) {
1201 document()->setHasXMLDeclaration(false);
1205 if (!version.isNull())
1206 document()->setXMLVersion(version, ASSERT_NO_EXCEPTION);
1207 if (standalone != StandaloneUnspecified)
1208 document()->setXMLStandalone(standaloneInfo == StandaloneYes, ASSERT_NO_EXCEPTION);
1209 if (!encoding.isNull())
1210 document()->setXMLEncoding(encoding);
1211 document()->setHasXMLDeclaration(true);
1214 void XMLDocumentParser::endDocument()
1219 void XMLDocumentParser::internalSubset(const String& name, const String& externalID, const String& systemID)
1224 if (m_parserPaused) {
1225 m_pendingCallbacks.append(adoptPtr(new PendingInternalSubsetCallback(name, externalID, systemID)));
1230 document()->parserAppendChild(DocumentType::create(document(), name, externalID, systemID));
1233 static inline XMLDocumentParser* getParser(void* closure)
1235 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1236 return static_cast<XMLDocumentParser*>(ctxt->_private);
1239 static void startElementNsHandler(void* closure, const xmlChar* localName, const xmlChar* prefix, const xmlChar* uri, int nbNamespaces, const xmlChar** namespaces, int nbAttributes, int nbDefaulted, const xmlChar** libxmlAttributes)
1241 getParser(closure)->startElementNs(toAtomicString(localName), toAtomicString(prefix), toAtomicString(uri), nbNamespaces, namespaces, nbAttributes, nbDefaulted, libxmlAttributes);
1244 static void endElementNsHandler(void* closure, const xmlChar*, const xmlChar*, const xmlChar*)
1246 getParser(closure)->endElementNs();
1249 static void charactersHandler(void* closure, const xmlChar* chars, int length)
1251 getParser(closure)->characters(chars, length);
1254 static void processingInstructionHandler(void* closure, const xmlChar* target, const xmlChar* data)
1256 getParser(closure)->processingInstruction(toString(target), toString(data));
1259 static void cdataBlockHandler(void* closure, const xmlChar* text, int length)
1261 getParser(closure)->cdataBlock(toString(text, length));
1264 static void commentHandler(void* closure, const xmlChar* text)
1266 getParser(closure)->comment(toString(text));
1269 WTF_ATTRIBUTE_PRINTF(2, 3)
1270 static void warningHandler(void* closure, const char* message, ...)
1273 va_start(args, message);
1274 getParser(closure)->error(XMLErrors::warning, message, args);
1278 WTF_ATTRIBUTE_PRINTF(2, 3)
1279 static void fatalErrorHandler(void* closure, const char* message, ...)
1282 va_start(args, message);
1283 getParser(closure)->error(XMLErrors::fatal, message, args);
1287 WTF_ATTRIBUTE_PRINTF(2, 3)
1288 static void normalErrorHandler(void* closure, const char* message, ...)
1291 va_start(args, message);
1292 getParser(closure)->error(XMLErrors::nonFatal, message, args);
1296 // Using a static entity and marking it XML_INTERNAL_PREDEFINED_ENTITY is
1297 // a hack to avoid malloc/free. Using a global variable like this could cause trouble
1298 // if libxml implementation details were to change
1299 static xmlChar sharedXHTMLEntityResult[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
1301 static xmlEntityPtr sharedXHTMLEntity()
1303 static xmlEntity entity;
1305 entity.type = XML_ENTITY_DECL;
1306 entity.orig = sharedXHTMLEntityResult;
1307 entity.content = sharedXHTMLEntityResult;
1308 entity.etype = XML_INTERNAL_PREDEFINED_ENTITY;
1313 static size_t convertUTF16EntityToUTF8(const UChar* utf16Entity, size_t numberOfCodeUnits, char* target, size_t targetSize)
1315 const char* originalTarget = target;
1316 WTF::Unicode::ConversionResult conversionResult = WTF::Unicode::convertUTF16ToUTF8(&utf16Entity,
1317 utf16Entity + numberOfCodeUnits, &target, target + targetSize);
1318 if (conversionResult != WTF::Unicode::conversionOK)
1321 // Even though we must pass the length, libxml expects the entity string to be null terminated.
1322 ASSERT(target > originalTarget + 1);
1324 return target - originalTarget;
1327 static xmlEntityPtr getXHTMLEntity(const xmlChar* name)
1329 UChar utf16DecodedEntity[4];
1330 size_t numberOfCodeUnits = decodeNamedEntityToUCharArray(reinterpret_cast<const char*>(name), utf16DecodedEntity);
1331 if (!numberOfCodeUnits)
1334 ASSERT(numberOfCodeUnits <= 4);
1335 size_t entityLengthInUTF8 = convertUTF16EntityToUTF8(utf16DecodedEntity, numberOfCodeUnits,
1336 reinterpret_cast<char*>(sharedXHTMLEntityResult), WTF_ARRAY_LENGTH(sharedXHTMLEntityResult));
1337 if (!entityLengthInUTF8)
1340 xmlEntityPtr entity = sharedXHTMLEntity();
1341 entity->length = entityLengthInUTF8;
1342 entity->name = name;
1346 static xmlEntityPtr getEntityHandler(void* closure, const xmlChar* name)
1348 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1349 xmlEntityPtr ent = xmlGetPredefinedEntity(name);
1351 ent->etype = XML_INTERNAL_PREDEFINED_ENTITY;
1355 ent = xmlGetDocEntity(ctxt->myDoc, name);
1356 if (!ent && getParser(closure)->isXHTMLDocument()) {
1357 ent = getXHTMLEntity(name);
1359 ent->etype = XML_INTERNAL_GENERAL_ENTITY;
1365 static void startDocumentHandler(void* closure)
1367 xmlParserCtxt* ctxt = static_cast<xmlParserCtxt*>(closure);
1368 XMLDocumentParser* parser = getParser(closure);
1369 switchEncoding(ctxt, parser->isCurrentlyParsing8BitChunk());
1370 parser->startDocument(toString(ctxt->version), toString(ctxt->encoding), ctxt->standalone);
1371 xmlSAX2StartDocument(closure);
1374 static void endDocumentHandler(void* closure)
1376 getParser(closure)->endDocument();
1377 xmlSAX2EndDocument(closure);
1380 static void internalSubsetHandler(void* closure, const xmlChar* name, const xmlChar* externalID, const xmlChar* systemID)
1382 getParser(closure)->internalSubset(toString(name), toString(externalID), toString(systemID));
1383 xmlSAX2InternalSubset(closure, name, externalID, systemID);
1386 static void externalSubsetHandler(void* closure, const xmlChar*, const xmlChar* externalId, const xmlChar*)
1388 String extId = toString(externalId);
1389 if ((extId == "-//W3C//DTD XHTML 1.0 Transitional//EN")
1390 || (extId == "-//W3C//DTD XHTML 1.1//EN")
1391 || (extId == "-//W3C//DTD XHTML 1.0 Strict//EN")
1392 || (extId == "-//W3C//DTD XHTML 1.0 Frameset//EN")
1393 || (extId == "-//W3C//DTD XHTML Basic 1.0//EN")
1394 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN")
1395 || (extId == "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN")
1396 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.0//EN")
1397 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.1//EN")
1398 || (extId == "-//WAPFORUM//DTD XHTML Mobile 1.2//EN"))
1399 getParser(closure)->setIsXHTMLDocument(true); // controls if we replace entities or not.
1402 static void ignorableWhitespaceHandler(void*, const xmlChar*, int)
1404 // nothing to do, but we need this to work around a crasher
1405 // http://bugzilla.gnome.org/show_bug.cgi?id=172255
1406 // http://bugs.webkit.org/show_bug.cgi?id=5792
1409 void XMLDocumentParser::initializeParserContext(const CString& chunk)
1412 memset(&sax, 0, sizeof(sax));
1414 sax.error = normalErrorHandler;
1415 sax.fatalError = fatalErrorHandler;
1416 sax.characters = charactersHandler;
1417 sax.processingInstruction = processingInstructionHandler;
1418 sax.cdataBlock = cdataBlockHandler;
1419 sax.comment = commentHandler;
1420 sax.warning = warningHandler;
1421 sax.startElementNs = startElementNsHandler;
1422 sax.endElementNs = endElementNsHandler;
1423 sax.getEntity = getEntityHandler;
1424 sax.startDocument = startDocumentHandler;
1425 sax.endDocument = endDocumentHandler;
1426 sax.internalSubset = internalSubsetHandler;
1427 sax.externalSubset = externalSubsetHandler;
1428 sax.ignorableWhitespace = ignorableWhitespaceHandler;
1429 sax.entityDecl = xmlSAX2EntityDecl;
1430 sax.initialized = XML_SAX2_MAGIC;
1431 DocumentParser::startParsing();
1434 m_sawXSLTransform = false;
1435 m_sawFirstElement = false;
1437 XMLDocumentParserScope scope(document()->fetcher());
1438 if (m_parsingFragment)
1439 m_context = XMLParserContext::createMemoryParser(&sax, this, chunk);
1441 ASSERT(!chunk.data());
1442 m_context = XMLParserContext::createStringParser(&sax, this);
1446 void XMLDocumentParser::doEnd()
1450 // Tell libxml we're done.
1452 XMLDocumentParserScope scope(document()->fetcher());
1453 finishParsing(context());
1460 bool xmlViewerMode = !m_sawError && !m_sawCSS && !m_sawXSLTransform && hasNoStyleInformation(document());
1461 if (xmlViewerMode) {
1462 XMLTreeViewer xmlTreeViewer(document());
1463 xmlTreeViewer.transformDocumentToTreeView();
1464 } else if (m_sawXSLTransform) {
1465 xmlDocPtr doc = xmlDocPtrForString(document()->fetcher(), m_originalSourceForTransform.toString(), document()->url().string());
1466 document()->setTransformSource(adoptPtr(new TransformSource(doc)));
1468 document()->setParsing(false); // Make the document think it's done, so it will apply XSL stylesheets.
1469 document()->styleResolverChanged(RecalcStyleImmediately);
1471 // styleResolverChanged() call can detach the parser and null out its document.
1472 // In that case, we just bail out.
1476 document()->setParsing(true);
1477 DocumentParser::stopParsing();
1481 xmlDocPtr xmlDocPtrForString(ResourceFetcher* fetcher, const String& source, const String& url)
1483 if (source.isEmpty())
1485 // Parse in a single chunk into an xmlDocPtr
1486 // FIXME: Hook up error handlers so that a failure to parse the main document results in
1487 // good error messages.
1488 XMLDocumentParserScope scope(fetcher, errorFunc, 0);
1489 XMLParserInput input(source);
1490 return xmlReadMemory(input.data(), input.size(), url.latin1().data(), input.encoding(), XSLT_PARSE_OPTIONS);
1493 OrdinalNumber XMLDocumentParser::lineNumber() const
1495 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->line : 1);
1498 OrdinalNumber XMLDocumentParser::columnNumber() const
1500 return OrdinalNumber::fromOneBasedInt(context() ? context()->input->col : 1);
1503 TextPosition XMLDocumentParser::textPosition() const
1505 xmlParserCtxtPtr context = this->context();
1507 return TextPosition::minimumPosition();
1508 return TextPosition(OrdinalNumber::fromOneBasedInt(context->input->line),
1509 OrdinalNumber::fromOneBasedInt(context->input->col));
1512 void XMLDocumentParser::stopParsing()
1514 DocumentParser::stopParsing();
1516 xmlStopParser(context());
1519 void XMLDocumentParser::resumeParsing()
1521 ASSERT(!isDetached());
1522 ASSERT(m_parserPaused);
1524 m_parserPaused = false;
1526 // First, execute any pending callbacks
1527 while (!m_pendingCallbacks.isEmpty()) {
1528 OwnPtr<PendingCallback> callback = m_pendingCallbacks.takeFirst();
1529 callback->call(this);
1531 // A callback paused the parser
1536 // Then, write any pending data
1537 SegmentedString rest = m_pendingSrc;
1538 m_pendingSrc.clear();
1539 // There is normally only one string left, so toString() shouldn't copy.
1540 // In any case, the XML parser runs on the main thread and it's OK if
1541 // the passed string has more than one reference.
1542 append(rest.toString().impl());
1544 // Finally, if finish() has been called and write() didn't result
1545 // in any further callbacks being queued, call end()
1546 if (m_finishCalled && m_pendingCallbacks.isEmpty())
1550 bool XMLDocumentParser::appendFragmentSource(const String& chunk)
1553 ASSERT(m_parsingFragment);
1555 CString chunkAsUtf8 = chunk.utf8();
1557 // libxml2 takes an int for a length, and therefore can't handle XML chunks larger than 2 GiB.
1558 if (chunkAsUtf8.length() > INT_MAX)
1561 initializeParserContext(chunkAsUtf8);
1562 xmlParseContent(context());
1563 endDocument(); // Close any open text nodes.
1565 // FIXME: If this code is actually needed, it should probably move to finish()
1566 // XMLDocumentParserQt has a similar check (m_stream.error() == QXmlStreamReader::PrematureEndOfDocumentError) in doEnd().
1567 // Check if all the chunk has been processed.
1568 long bytesProcessed = xmlByteConsumed(context());
1569 if (bytesProcessed == -1 || ((unsigned long)bytesProcessed) != chunkAsUtf8.length()) {
1570 // FIXME: I don't believe we can hit this case without also having seen an error or a null byte.
1571 // If we hit this ASSERT, we've found a test case which demonstrates the need for this code.
1572 ASSERT(m_sawError || (bytesProcessed >= 0 && !chunkAsUtf8.data()[bytesProcessed]));
1576 // No error if the chunk is well formed or it is not but we have no error.
1577 return context()->wellFormed || !xmlCtxtGetLastError(context());
1580 // --------------------------------
1582 struct AttributeParseState {
1583 HashMap<String, String> attributes;
1587 static void attributesStartElementNsHandler(void* closure, const xmlChar* xmlLocalName, const xmlChar* /*xmlPrefix*/,
1588 const xmlChar* /*xmlURI*/, int /*nbNamespaces*/, const xmlChar** /*namespaces*/,
1589 int nbAttributes, int /*nbDefaulted*/, const xmlChar** libxmlAttributes)
1591 if (strcmp(reinterpret_cast<const char*>(xmlLocalName), "attrs") != 0)
1594 xmlParserCtxtPtr ctxt = static_cast<xmlParserCtxtPtr>(closure);
1595 AttributeParseState* state = static_cast<AttributeParseState*>(ctxt->_private);
1597 state->gotAttributes = true;
1599 xmlSAX2Attributes* attributes = reinterpret_cast<xmlSAX2Attributes*>(libxmlAttributes);
1600 for (int i = 0; i < nbAttributes; i++) {
1601 String attrLocalName = toString(attributes[i].localname);
1602 int valueLength = (int) (attributes[i].end - attributes[i].value);
1603 String attrValue = toString(attributes[i].value, valueLength);
1604 String attrPrefix = toString(attributes[i].prefix);
1605 String attrQName = attrPrefix.isEmpty() ? attrLocalName : attrPrefix + ":" + attrLocalName;
1607 state->attributes.set(attrQName, attrValue);
1611 HashMap<String, String> parseAttributes(const String& string, bool& attrsOK)
1613 AttributeParseState state;
1614 state.gotAttributes = false;
1617 memset(&sax, 0, sizeof(sax));
1618 sax.startElementNs = attributesStartElementNsHandler;
1619 sax.initialized = XML_SAX2_MAGIC;
1620 RefPtr<XMLParserContext> parser = XMLParserContext::createStringParser(&sax, &state);
1621 String parseString = "<?xml version=\"1.0\"?><attrs " + string + " />";
1622 parseChunk(parser->context(), parseString);
1623 finishParsing(parser->context());
1624 attrsOK = state.gotAttributes;
1625 return state.attributes;
1628 } // namespace WebCore