2 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2012 Apple Inc. All rights reserved.
3 * Copyright (C) 2009, 2010 Google Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
18 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
20 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include "core/editing/MarkupAccumulator.h"
30 #include "core/HTMLNames.h"
31 #include "core/XLinkNames.h"
32 #include "core/XMLNSNames.h"
33 #include "core/XMLNames.h"
34 #include "core/dom/CDATASection.h"
35 #include "core/dom/Comment.h"
36 #include "core/dom/Document.h"
37 #include "core/dom/DocumentFragment.h"
38 #include "core/dom/DocumentType.h"
39 #include "core/dom/ProcessingInstruction.h"
40 #include "core/editing/Editor.h"
41 #include "core/html/HTMLElement.h"
42 #include "core/html/HTMLTemplateElement.h"
43 #include "platform/weborigin/KURL.h"
44 #include "wtf/unicode/CharacterNames.h"
48 using namespace HTMLNames;
50 struct EntityDescription {
52 const CString& reference;
56 template <typename CharType>
57 static inline void appendCharactersReplacingEntitiesInternal(StringBuilder& result, CharType* text, unsigned length, const EntityDescription entityMaps[], unsigned entityMapsCount, EntityMask entityMask)
59 unsigned positionAfterLastEntity = 0;
60 for (unsigned i = 0; i < length; ++i) {
61 for (unsigned entityIndex = 0; entityIndex < entityMapsCount; ++entityIndex) {
62 if (text[i] == entityMaps[entityIndex].entity && entityMaps[entityIndex].mask & entityMask) {
63 result.append(text + positionAfterLastEntity, i - positionAfterLastEntity);
64 const CString& replacement = entityMaps[entityIndex].reference;
65 result.append(replacement.data(), replacement.length());
66 positionAfterLastEntity = i + 1;
71 result.append(text + positionAfterLastEntity, length - positionAfterLastEntity);
74 void MarkupAccumulator::appendCharactersReplacingEntities(StringBuilder& result, const String& source, unsigned offset, unsigned length, EntityMask entityMask)
76 DEFINE_STATIC_LOCAL(const CString, ampReference, ("&"));
77 DEFINE_STATIC_LOCAL(const CString, ltReference, ("<"));
78 DEFINE_STATIC_LOCAL(const CString, gtReference, (">"));
79 DEFINE_STATIC_LOCAL(const CString, quotReference, ("""));
80 DEFINE_STATIC_LOCAL(const CString, nbspReference, (" "));
82 static const EntityDescription entityMaps[] = {
83 { '&', ampReference, EntityAmp },
84 { '<', ltReference, EntityLt },
85 { '>', gtReference, EntityGt },
86 { '"', quotReference, EntityQuot },
87 { noBreakSpace, nbspReference, EntityNbsp },
90 if (!(offset + length))
93 ASSERT(offset + length <= source.length());
95 appendCharactersReplacingEntitiesInternal(result, source.characters8() + offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask);
97 appendCharactersReplacingEntitiesInternal(result, source.characters16() + offset, length, entityMaps, WTF_ARRAY_LENGTH(entityMaps), entityMask);
100 MarkupAccumulator::MarkupAccumulator(WillBeHeapVector<RawPtrWillBeMember<Node> >* nodes, EAbsoluteURLs resolveUrlsMethod, const Range* range, SerializationType serializationType)
103 , m_resolveURLsMethod(resolveUrlsMethod)
104 , m_serializationType(serializationType)
108 MarkupAccumulator::~MarkupAccumulator()
112 String MarkupAccumulator::serializeNodes(Node& targetNode, EChildrenOnly childrenOnly, Vector<QualifiedName>* tagNamesToSkip)
114 Namespaces* namespaces = 0;
115 Namespaces namespaceHash;
116 if (!serializeAsHTMLDocument(targetNode)) {
117 // Add pre-bound namespaces for XML fragments.
118 namespaceHash.set(xmlAtom, XMLNames::xmlNamespaceURI);
119 namespaces = &namespaceHash;
122 serializeNodesWithNamespaces(targetNode, childrenOnly, namespaces, tagNamesToSkip);
123 return m_markup.toString();
126 void MarkupAccumulator::serializeNodesWithNamespaces(Node& targetNode, EChildrenOnly childrenOnly, const Namespaces* namespaces, Vector<QualifiedName>* tagNamesToSkip)
128 if (tagNamesToSkip) {
129 for (size_t i = 0; i < tagNamesToSkip->size(); ++i) {
130 if (targetNode.hasTagName(tagNamesToSkip->at(i)))
135 Namespaces namespaceHash;
137 namespaceHash = *namespaces;
140 appendStartTag(targetNode, &namespaceHash);
142 if (!(serializeAsHTMLDocument(targetNode) && elementCannotHaveEndTag(targetNode))) {
143 Node* current = isHTMLTemplateElement(targetNode) ? toHTMLTemplateElement(targetNode).content()->firstChild() : targetNode.firstChild();
144 for ( ; current; current = current->nextSibling())
145 serializeNodesWithNamespaces(*current, IncludeNode, &namespaceHash, tagNamesToSkip);
149 appendEndTag(targetNode);
152 String MarkupAccumulator::resolveURLIfNeeded(const Element& element, const String& urlString) const
154 switch (m_resolveURLsMethod) {
156 return element.document().completeURL(urlString).string();
158 case ResolveNonLocalURLs:
159 if (!element.document().url().isLocalFile())
160 return element.document().completeURL(urlString).string();
163 case DoNotResolveURLs:
169 void MarkupAccumulator::appendString(const String& string)
171 m_markup.append(string);
174 void MarkupAccumulator::appendStartTag(Node& node, Namespaces* namespaces)
176 appendStartMarkup(m_markup, node, namespaces);
178 m_nodes->append(&node);
181 void MarkupAccumulator::appendEndTag(const Node& node)
183 appendEndMarkup(m_markup, node);
186 size_t MarkupAccumulator::totalLength(const Vector<String>& strings)
189 for (size_t i = 0; i < strings.size(); ++i)
190 length += strings[i].length();
194 void MarkupAccumulator::concatenateMarkup(StringBuilder& result)
196 result.append(m_markup);
199 void MarkupAccumulator::appendAttributeValue(StringBuilder& result, const String& attribute, bool documentIsHTML)
201 appendCharactersReplacingEntities(result, attribute, 0, attribute.length(),
202 documentIsHTML ? EntityMaskInHTMLAttributeValue : EntityMaskInAttributeValue);
205 void MarkupAccumulator::appendCustomAttributes(StringBuilder&, const Element&, Namespaces*)
209 void MarkupAccumulator::appendQuotedURLAttributeValue(StringBuilder& result, const Element& element, const Attribute& attribute)
211 ASSERT(element.isURLAttribute(attribute));
212 const String resolvedURLString = resolveURLIfNeeded(element, attribute.value());
213 UChar quoteChar = '"';
214 String strippedURLString = resolvedURLString.stripWhiteSpace();
215 if (protocolIsJavaScript(strippedURLString)) {
216 // minimal escaping for javascript urls
217 if (strippedURLString.contains('"')) {
218 if (strippedURLString.contains('\''))
219 strippedURLString.replaceWithLiteral('"', """);
223 result.append(quoteChar);
224 result.append(strippedURLString);
225 result.append(quoteChar);
229 // FIXME: This does not fully match other browsers. Firefox percent-escapes non-ASCII characters for innerHTML.
230 result.append(quoteChar);
231 appendAttributeValue(result, resolvedURLString, false);
232 result.append(quoteChar);
235 bool MarkupAccumulator::shouldAddNamespaceElement(const Element& element, Namespaces& namespaces)
237 // Don't add namespace attribute if it is already defined for this elem.
238 const AtomicString& prefix = element.prefix();
239 if (prefix.isEmpty()) {
240 if (element.hasAttribute(xmlnsAtom)) {
241 namespaces.set(emptyAtom, element.namespaceURI());
247 return !element.hasAttribute(WTF::xmlnsWithColon + prefix);
250 bool MarkupAccumulator::shouldAddNamespaceAttribute(const Attribute& attribute, const Element& element)
252 // xmlns and xmlns:prefix attributes should be handled by another branch in appendAttribute.
253 ASSERT(attribute.namespaceURI() != XMLNSNames::xmlnsNamespaceURI);
255 // Attributes are in the null namespace by default.
256 if (!attribute.namespaceURI())
259 // Attributes without a prefix will need one generated for them, and an xmlns attribute for that prefix.
260 if (!attribute.prefix())
263 return !element.hasAttribute(WTF::xmlnsWithColon + attribute.prefix());
266 void MarkupAccumulator::appendNamespace(StringBuilder& result, const AtomicString& prefix, const AtomicString& namespaceURI, Namespaces& namespaces)
268 if (namespaceURI.isEmpty())
271 const AtomicString& lookupKey = (!prefix) ? emptyAtom : prefix;
272 AtomicString foundURI = namespaces.get(lookupKey);
273 if (foundURI != namespaceURI) {
274 namespaces.set(lookupKey, namespaceURI);
276 result.append(xmlnsAtom.string());
277 if (!prefix.isEmpty()) {
279 result.append(prefix);
282 result.appendLiteral("=\"");
283 appendAttributeValue(result, namespaceURI, false);
288 EntityMask MarkupAccumulator::entityMaskForText(const Text& text) const
290 if (!serializeAsHTMLDocument(text))
291 return EntityMaskInPCDATA;
293 const QualifiedName* parentName = 0;
294 if (text.parentElement())
295 parentName = &(text.parentElement())->tagQName();
297 if (parentName && (*parentName == scriptTag || *parentName == styleTag || *parentName == xmpTag))
298 return EntityMaskInCDATA;
299 return EntityMaskInHTMLPCDATA;
302 void MarkupAccumulator::appendText(StringBuilder& result, Text& text)
304 const String& str = text.data();
305 unsigned length = str.length();
309 if (text == m_range->endContainer())
310 length = m_range->endOffset();
311 if (text == m_range->startContainer()) {
312 start = m_range->startOffset();
316 appendCharactersReplacingEntities(result, str, start, length, entityMaskForText(text));
319 void MarkupAccumulator::appendComment(StringBuilder& result, const String& comment)
321 // FIXME: Comment content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "-->".
322 result.appendLiteral("<!--");
323 result.append(comment);
324 result.appendLiteral("-->");
327 void MarkupAccumulator::appendXMLDeclaration(StringBuilder& result, const Document& document)
329 if (!document.hasXMLDeclaration())
332 result.appendLiteral("<?xml version=\"");
333 result.append(document.xmlVersion());
334 const String& encoding = document.xmlEncoding();
335 if (!encoding.isEmpty()) {
336 result.appendLiteral("\" encoding=\"");
337 result.append(encoding);
339 if (document.xmlStandaloneStatus() != Document::StandaloneUnspecified) {
340 result.appendLiteral("\" standalone=\"");
341 if (document.xmlStandalone())
342 result.appendLiteral("yes");
344 result.appendLiteral("no");
347 result.appendLiteral("\"?>");
350 void MarkupAccumulator::appendDocumentType(StringBuilder& result, const DocumentType& n)
352 if (n.name().isEmpty())
355 result.appendLiteral("<!DOCTYPE ");
356 result.append(n.name());
357 if (!n.publicId().isEmpty()) {
358 result.appendLiteral(" PUBLIC \"");
359 result.append(n.publicId());
361 if (!n.systemId().isEmpty()) {
362 result.appendLiteral(" \"");
363 result.append(n.systemId());
366 } else if (!n.systemId().isEmpty()) {
367 result.appendLiteral(" SYSTEM \"");
368 result.append(n.systemId());
374 void MarkupAccumulator::appendProcessingInstruction(StringBuilder& result, const String& target, const String& data)
376 // FIXME: PI data is not escaped, but XMLSerializer (and possibly other callers) this should raise an exception if it includes "?>".
377 result.appendLiteral("<?");
378 result.append(target);
381 result.appendLiteral("?>");
384 void MarkupAccumulator::appendElement(StringBuilder& result, Element& element, Namespaces* namespaces)
386 appendOpenTag(result, element, namespaces);
388 if (element.hasAttributes()) {
389 AttributeCollection attributes = element.attributes();
390 AttributeCollection::const_iterator end = attributes.end();
391 for (AttributeCollection::const_iterator it = attributes.begin(); it != end; ++it)
392 appendAttribute(result, element, *it, namespaces);
395 // Give an opportunity to subclasses to add their own attributes.
396 appendCustomAttributes(result, element, namespaces);
398 appendCloseTag(result, element);
401 static String nodeNamePreservingCase(const Element& element)
403 return element.tagQName().toString();
406 void MarkupAccumulator::appendOpenTag(StringBuilder& result, const Element& element, Namespaces* namespaces)
409 result.append(nodeNamePreservingCase(element));
410 if (!serializeAsHTMLDocument(element) && namespaces && shouldAddNamespaceElement(element, *namespaces))
411 appendNamespace(result, element.prefix(), element.namespaceURI(), *namespaces);
414 void MarkupAccumulator::appendCloseTag(StringBuilder& result, const Element& element)
416 if (shouldSelfClose(element)) {
417 if (element.isHTMLElement())
418 result.append(' '); // XHTML 1.0 <-> HTML compatibility.
424 static inline bool attributeIsInSerializedNamespace(const Attribute& attribute)
426 return attribute.namespaceURI() == XMLNames::xmlNamespaceURI
427 || attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI
428 || attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI;
431 void MarkupAccumulator::appendAttribute(StringBuilder& result, const Element& element, const Attribute& attribute, Namespaces* namespaces)
433 bool documentIsHTML = serializeAsHTMLDocument(element);
435 QualifiedName prefixedName = attribute.name();
436 if (documentIsHTML && !attributeIsInSerializedNamespace(attribute)) {
438 result.append(attribute.name().localName());
440 if (attribute.namespaceURI() == XLinkNames::xlinkNamespaceURI) {
441 if (!attribute.prefix())
442 prefixedName.setPrefix(xlinkAtom);
443 } else if (attribute.namespaceURI() == XMLNames::xmlNamespaceURI) {
444 if (!attribute.prefix())
445 prefixedName.setPrefix(xmlAtom);
446 } else if (attribute.namespaceURI() == XMLNSNames::xmlnsNamespaceURI) {
447 if (!attribute.prefix() && attribute.localName() != xmlnsAtom)
448 prefixedName.setPrefix(xmlnsAtom);
449 if (namespaces) { // Account for the namespace attribute we're about to append.
450 const AtomicString& lookupKey = (!attribute.prefix()) ? emptyAtom : attribute.localName();
451 namespaces->set(lookupKey, attribute.value());
453 } else if (namespaces && shouldAddNamespaceAttribute(attribute, element)) {
454 if (!attribute.prefix()) {
455 // This behavior is in process of being standardized. See crbug.com/248044 and https://www.w3.org/Bugs/Public/show_bug.cgi?id=24208
456 String prefixPrefix("ns", 2);
457 for (unsigned i = attribute.namespaceURI().impl()->existingHash(); ; ++i) {
458 AtomicString newPrefix(String(prefixPrefix + String::number(i)));
459 AtomicString foundURI = namespaces->get(newPrefix);
460 if (foundURI == attribute.namespaceURI() || foundURI == nullAtom) {
461 // We already generated a prefix for this namespace.
462 prefixedName.setPrefix(newPrefix);
467 ASSERT(prefixedName.prefix());
468 appendNamespace(result, prefixedName.prefix(), attribute.namespaceURI(), *namespaces);
471 result.append(prefixedName.toString());
476 if (element.isURLAttribute(attribute)) {
477 appendQuotedURLAttributeValue(result, element, attribute);
480 appendAttributeValue(result, attribute.value(), documentIsHTML);
485 void MarkupAccumulator::appendCDATASection(StringBuilder& result, const String& section)
487 // FIXME: CDATA content is not escaped, but XMLSerializer (and possibly other callers) should raise an exception if it includes "]]>".
488 result.appendLiteral("<![CDATA[");
489 result.append(section);
490 result.appendLiteral("]]>");
493 void MarkupAccumulator::appendStartMarkup(StringBuilder& result, Node& node, Namespaces* namespaces)
495 switch (node.nodeType()) {
496 case Node::TEXT_NODE:
497 appendText(result, toText(node));
499 case Node::COMMENT_NODE:
500 appendComment(result, toComment(node).data());
502 case Node::DOCUMENT_NODE:
503 appendXMLDeclaration(result, toDocument(node));
505 case Node::DOCUMENT_FRAGMENT_NODE:
507 case Node::DOCUMENT_TYPE_NODE:
508 appendDocumentType(result, toDocumentType(node));
510 case Node::PROCESSING_INSTRUCTION_NODE:
511 appendProcessingInstruction(result, toProcessingInstruction(node).target(), toProcessingInstruction(node).data());
513 case Node::ELEMENT_NODE:
514 appendElement(result, toElement(node), namespaces);
516 case Node::CDATA_SECTION_NODE:
517 appendCDATASection(result, toCDATASection(node).data());
519 case Node::ATTRIBUTE_NODE:
520 ASSERT_NOT_REACHED();
525 // Rules of self-closure
526 // 1. No elements in HTML documents use the self-closing syntax.
527 // 2. Elements w/ children never self-close because they use a separate end tag.
528 // 3. HTML elements which do not have a "forbidden" end tag will close with a separate end tag.
529 // 4. Other elements self-close.
530 bool MarkupAccumulator::shouldSelfClose(const Node& node)
532 if (serializeAsHTMLDocument(node))
534 if (node.hasChildren())
536 if (node.isHTMLElement() && !elementCannotHaveEndTag(node))
541 bool MarkupAccumulator::elementCannotHaveEndTag(const Node& node)
543 if (!node.isHTMLElement())
546 // FIXME: ieForbidsInsertHTML may not be the right function to call here
547 // ieForbidsInsertHTML is used to disallow setting innerHTML/outerHTML
548 // or createContextualFragment. It does not necessarily align with
549 // which elements should be serialized w/o end tags.
550 return toHTMLElement(node).ieForbidsInsertHTML();
553 void MarkupAccumulator::appendEndMarkup(StringBuilder& result, const Node& node)
555 if (!node.isElementNode() || shouldSelfClose(node) || (!node.hasChildren() && elementCannotHaveEndTag(node)))
558 result.appendLiteral("</");
559 result.append(nodeNamePreservingCase(toElement(node)));
563 bool MarkupAccumulator::serializeAsHTMLDocument(const Node& node) const
565 if (m_serializationType == ForcedXML)
567 return node.document().isHTMLDocument();