2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "MarkupAccumulator.h"
30 #include "CDATASection.h"
32 #include "DocumentFragment.h"
33 #include "DocumentType.h"
35 #include "HTMLElement.h"
36 #include "HTMLNames.h"
38 #include "ProcessingInstruction.h"
39 #include "XMLNSNames.h"
40 #include <wtf/unicode/CharacterNames.h>
44 using namespace HTMLNames;
46 void appendCharactersReplacingEntities(StringBuilder& result, const UChar* content, size_t length, EntityMask entityMask)
48 DEFINE_STATIC_LOCAL(const String, ampReference, ("&"));
49 DEFINE_STATIC_LOCAL(const String, ltReference, ("<"));
50 DEFINE_STATIC_LOCAL(const String, gtReference, (">"));
51 DEFINE_STATIC_LOCAL(const String, quotReference, ("""));
52 DEFINE_STATIC_LOCAL(const String, nbspReference, (" "));
54 static const EntityDescription entityMaps[] = {
55 { '&', ampReference, EntityAmp },
56 { '<', ltReference, EntityLt },
57 { '>', gtReference, EntityGt },
58 { '"', quotReference, EntityQuot },
59 { noBreakSpace, nbspReference, EntityNbsp },
62 size_t positionAfterLastEntity = 0;
63 for (size_t i = 0; i < length; ++i) {
64 for (size_t m = 0; m < WTF_ARRAY_LENGTH(entityMaps); ++m) {
65 if (content[i] == entityMaps[m].entity && entityMaps[m].mask & entityMask) {
66 result.append(content + positionAfterLastEntity, i - positionAfterLastEntity);
67 result.append(entityMaps[m].reference);
68 positionAfterLastEntity = i + 1;
73 result.append(content + positionAfterLastEntity, length - positionAfterLastEntity);
76 MarkupAccumulator::MarkupAccumulator(Vector<Node*>* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range)
79 , m_resolveURLsMethod(resolveUrlsMethod)
83 MarkupAccumulator::~MarkupAccumulator()
87 String MarkupAccumulator::serializeNodes(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly)
89 return serializeNodes(targetNode, nodeToSkip, childrenOnly, 0);
92 String MarkupAccumulator::serializeNodes(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip)
94 serializeNodesWithNamespaces(targetNode, nodeToSkip, childrenOnly, 0, tagNamesToSkip);
95 return m_markup.toString();
98 void MarkupAccumulator::serializeNodesWithNamespaces(Node* targetNode, Node* nodeToSkip, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip)
100 if (targetNode == nodeToSkip)
103 if (tagNamesToSkip) {
104 for (size_t i = 0; i < tagNamesToSkip->size(); ++i) {
105 if (targetNode->hasTagName(tagNamesToSkip->at(i)))
110 Namespaces namespaceHash;
112 namespaceHash = *namespaces;
115 appendStartTag(targetNode, &namespaceHash);
117 if (!(targetNode->document()->isHTMLDocument() && elementCannotHaveEndTag(targetNode))) {
118 for (Node* current = targetNode->firstChild(); current; current = current->nextSibling())
119 serializeNodesWithNamespaces(current, nodeToSkip, IncludeNode, &namespaceHash, tagNamesToSkip);
123 appendEndTag(targetNode);
126 String MarkupAccumulator::resolveURLIfNeeded(const Element* element, const String& urlString) const
128 switch (m_resolveURLsMethod) {
130 return element->document()->completeURL(urlString).string();
132 case ResolveNonLocalURLs:
133 if (!element->document()->url().isLocalFile())
134 return element->document()->completeURL(urlString).string();
137 case DoNotResolveURLs:
143 void MarkupAccumulator::appendString(const String& string)
145 m_markup.append(string);
148 void MarkupAccumulator::appendStartTag(Node* node, Namespaces* namespaces)
150 appendStartMarkup(m_markup, node, namespaces);
152 m_nodes->append(node);
155 void MarkupAccumulator::appendEndTag(Node* node)
157 appendEndMarkup(m_markup, node);
160 size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
163 for (size_t i = 0; i < strings.size(); ++i)
164 length += strings[i].length();
168 void MarkupAccumulator::concatenateMarkup(StringBuilder& result)
170 result.append(m_markup);
173 void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML)
175 appendCharactersReplacingEntities(result, attribute.characters(), attribute.length(),
176 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
179 void MarkupAccumulator::appendCustomAttributes(StringBuilder&, Element*, Namespaces*)
183 void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element* element, const Attribute& attribute)
185 ASSERT(element->isURLAttribute(attribute));
186 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value());
187 UChar quoteChar = '"';
188 String strippedURLString = resolvedURLString.stripWhiteSpace();
189 if (protocolIsJavaScript(strippedURLString)) {
190 // minimal escaping for javascript urls
191 if (strippedURLString.contains('"')) {
192 if (strippedURLString.contains('\''))
193 strippedURLString.replaceWithLiteral('"', """);
197 result.append(quoteChar);
198 result.append(strippedURLString);
199 result.append(quoteChar);
203 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
204 result.append(quoteChar);
205 appendAttributeValue(result, resolvedURLString, false);
206 result.append(quoteChar);
209 void MarkupAccumulator::appendNodeValue(StringBuilder& result, const Node* node, const Range* range, EntityMask entityMask)
211 String str = node->nodeValue();
212 const UChar* characters = str.characters();
213 size_t length = str.length();
217 if (node == range->endContainer(ec))
218 length = range->endOffset(ec);
219 if (node == range->startContainer(ec)) {
220 size_t start = range->startOffset(ec);
226 appendCharactersReplacingEntities(result, characters, length, entityMask);
229 bool MarkupAccumulator::shouldAddNamespaceElement(const Element* element)
231 // Don't add namespace attribute if it is already defined for this elem.
232 const AtomicString& prefix = element->prefix();
233 if (prefix.isEmpty())
234 return !element->hasAttribute(xmlnsAtom);
236 DEFINE_STATIC_LOCAL(String, xmlnsWithColon, ("xmlns:"));
237 return !element->hasAttribute(xmlnsWithColon + prefix);
240 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, Namespaces& namespaces)
242 namespaces.checkConsistency();
244 // Don't add namespace attributes twice
245 if (attribute.name() == XMLNSNames::xmlnsAttr) {
246 namespaces.set(emptyAtom.impl(), attribute.value().impl());
250 QualifiedName xmlnsPrefixAttr(xmlnsAtom, attribute.localName(), XMLNSNames::xmlnsNamespaceURI);
251 if (attribute.name() == xmlnsPrefixAttr) {
252 namespaces.set(attribute.localName().impl(), attribute.value().impl());
259 void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
261 namespaces.checkConsistency();
262 if (namespaceURI.isEmpty())
265 // Use emptyAtoms's impl() for both null and empty strings since the HashMap can't handle 0 as a key
266 AtomicStringImpl* pre = prefix.isEmpty() ? emptyAtom.impl() : prefix.impl();
267 AtomicStringImpl* foundNS = namespaces.get(pre);
268 if (foundNS != namespaceURI.impl()) {
269 namespaces.set(pre, namespaceURI.impl());
271 result.append(xmlnsAtom.string());
272 if (!prefix.isEmpty()) {
274 result.append(prefix);
279 appendAttributeValue(result, namespaceURI, false);
284 EntityMask MarkupAccumulator::entityMaskForText(Text* text) const
286 const QualifiedName* parentName = 0;
287 if (text->parentElement())
288 parentName = &static_cast<Element*>(text->parentElement())->tagQName();
290 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
291 return EntityMaskInCDATA;
293 return text->document()->isHTMLDocument() ? EntityMaskInHTMLPCDATA : EntityMaskInPCDATA;
296 void MarkupAccumulator::appendText(StringBuilder& result, Text* text)
298 appendNodeValue(result, text, m_range, entityMaskForText(text));
301 void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment)
303 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
304 static const char commentBegin[] = "<!--";
305 result.append(commentBegin, sizeof(commentBegin) - 1);
306 result.append(comment);
307 static const char commentEnd[] = "-->";
308 result.append(commentEnd, sizeof(commentEnd) - 1);
311 void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document* document)
313 if (!document->hasXMLDeclaration())
316 static const char xmlDeclStart[] = "<?xml version=\"";
317 result.append(xmlDeclStart, sizeof(xmlDeclStart) - 1);
318 result.append(document->xmlVersion());
319 const String& encoding = document->xmlEncoding();
320 if (!encoding.isEmpty()) {
321 static const char xmlEncoding[] = "\" encoding=\"";
322 result.append(xmlEncoding, sizeof(xmlEncoding) - 1);
323 result.append(encoding);
325 if (document->xmlStandaloneStatus() != Document::StandaloneUnspecified) {
326 static const char xmlStandalone[] = "\" standalone=\"";
327 result.append(xmlStandalone, sizeof(xmlStandalone) - 1);
328 if (document->xmlStandalone()) {
329 static const char standaloneYes[] = "yes";
330 result.append(standaloneYes, sizeof(standaloneYes) - 1);
332 static const char standaloneNo[] = "no";
333 result.append(standaloneNo, sizeof(standaloneNo) - 1);
337 static const char xmlDeclEnd[] = "\"?>";
338 result.append(xmlDeclEnd, sizeof(xmlDeclEnd) - 1);
341 void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType* n)
343 if (n->name().isEmpty())
346 static const char doctypeString[] = "<!DOCTYPE ";
347 result.append(doctypeString, sizeof(doctypeString) - 1);
348 result.append(n->name());
349 if (!n->publicId().isEmpty()) {
350 static const char publicString[] = " PUBLIC \"";
351 result.append(publicString, sizeof(publicString) - 1);
352 result.append(n->publicId());
354 if (!n->systemId().isEmpty()) {
357 result.append(n->systemId());
360 } else if (!n->systemId().isEmpty()) {
361 static const char systemString[] = " SYSTEM \"";
362 result.append(systemString, sizeof(systemString) - 1);
363 result.append(n->systemId());
366 if (!n->internalSubset().isEmpty()) {
369 result.append(n->internalSubset());
375 void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data)
377 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
380 result.append(target);
387 void MarkupAccumulator::appendElement(StringBuilder& result, Element* element, Namespaces* namespaces)
389 appendOpenTag(result, element, namespaces);
391 if (element->hasAttributes()) {
392 unsigned length = element->attributeCount();
393 for (unsigned int i = 0; i < length; i++)
394 appendAttribute(result, element, *element->attributeItem(i), namespaces);
397 // Give an opportunity to subclasses to add their own attributes.
398 appendCustomAttributes(result, element, namespaces);
400 appendCloseTag(result, element);
403 void MarkupAccumulator::appendOpenTag(StringBuilder& result, Element* element, Namespaces* namespaces)
406 result.append(element->nodeNamePreservingCase());
407 if (!element->document()->isHTMLDocument() && namespaces && shouldAddNamespaceElement(element))
408 appendNamespace(result, element->prefix(), element->namespaceURI(), *namespaces);
411 void MarkupAccumulator::appendCloseTag(StringBuilder& result, Element* element)
413 if (shouldSelfClose(element)) {
414 if (element->isHTMLElement())
415 result.append(' '); // XHTML 1.0 <-> HTML compatibility.
421 void MarkupAccumulator::appendAttribute(StringBuilder& result, Element* element, const Attribute& attribute, Namespaces* namespaces)
423 bool documentIsHTML = element->document()->isHTMLDocument();
428 result.append(attribute.name().localName());
430 result.append(attribute.name().toString());
434 if (element->isURLAttribute(attribute))
435 appendQuotedURLAttributeValue(result, element, attribute);
438 appendAttributeValue(result, attribute.value(), documentIsHTML);
442 if (!documentIsHTML && namespaces && shouldAddNamespaceAttribute(attribute, *namespaces))
443 appendNamespace(result, attribute.prefix(), attribute.namespaceURI(), *namespaces);
446 void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section)
448 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
449 static const char cdataBegin[] = "<![CDATA[";
450 result.append(cdataBegin, sizeof(cdataBegin) - 1);
451 result.append(section);
452 static const char cdataEnd[] = "]]>";
453 result.append(cdataEnd, sizeof(cdataEnd) - 1);
456 void MarkupAccumulator::appendStartMarkup(StringBuilder& result, const Node* node, Namespaces* namespaces)
459 namespaces->checkConsistency();
461 switch (node->nodeType()) {
462 case Node::TEXT_NODE:
463 appendText(result, toText(const_cast<Node*>(node)));
465 case Node::COMMENT_NODE:
466 appendComment(result, static_cast<const Comment*>(node)->data());
468 case Node::DOCUMENT_NODE:
469 appendXMLDeclaration(result, static_cast<const Document*>(node));
471 case Node::DOCUMENT_FRAGMENT_NODE:
473 case Node::DOCUMENT_TYPE_NODE:
474 appendDocumentType(result, static_cast<const DocumentType*>(node));
476 case Node::PROCESSING_INSTRUCTION_NODE:
477 appendProcessingInstruction(result, static_cast<const ProcessingInstruction*>(node)->target(), static_cast<const ProcessingInstruction*>(node)->data());
479 case Node::ELEMENT_NODE:
480 appendElement(result, static_cast<Element*>(const_cast<Node*>(node)), namespaces);
482 case Node::CDATA_SECTION_NODE:
483 appendCDATASection(result, static_cast<const CDATASection*>(node)->data());
485 case Node::ATTRIBUTE_NODE:
486 case Node::ENTITY_NODE:
487 case Node::ENTITY_REFERENCE_NODE:
488 case Node::NOTATION_NODE:
489 case Node::XPATH_NAMESPACE_NODE:
490 ASSERT_NOT_REACHED();
495 // Rules of self-closure
496 // 1. No elements in HTML documents use the self-closing syntax.
497 // 2. Elements w/ children never self-close because they use a separate end tag.
498 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
499 // 4. Other elements self-close.
500 bool MarkupAccumulator::shouldSelfClose(const Node* node)
502 if (node->document()->isHTMLDocument())
504 if (node->hasChildNodes())
506 if (node->isHTMLElement() && !elementCannotHaveEndTag(node))
511 bool MarkupAccumulator::elementCannotHaveEndTag(const Node* node)
513 if (!node->isHTMLElement())
516 // FIXME: ieForbidsInsertHTML may not be the right function to call here
517 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
518 // or createContextualFragment. It does not necessarily align with
519 // which elements should be serialized w/o end tags.
520 return static_cast<const HTMLElement*>(node)->ieForbidsInsertHTML();
523 void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node* node)
525 if (!node->isElementNode() || shouldSelfClose(node) || (!node->hasChildNodes() && elementCannotHaveEndTag(node)))
530 result.append(static_cast<const Element*>(node)->nodeNamePreservingCase());