2 * Copyright (C) 2011 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "core/page/PageSerializer.h"
34 #include "core/HTMLNames.h"
35 #include "core/InputTypeNames.h"
36 #include "core/css/CSSFontFaceRule.h"
37 #include "core/css/CSSFontFaceSrcValue.h"
38 #include "core/css/CSSImageValue.h"
39 #include "core/css/CSSImportRule.h"
40 #include "core/css/CSSStyleDeclaration.h"
41 #include "core/css/CSSStyleRule.h"
42 #include "core/css/CSSValueList.h"
43 #include "core/css/StylePropertySet.h"
44 #include "core/css/StyleRule.h"
45 #include "core/css/StyleSheetContents.h"
46 #include "core/dom/Document.h"
47 #include "core/dom/Element.h"
48 #include "core/dom/Text.h"
49 #include "core/editing/MarkupAccumulator.h"
50 #include "core/fetch/FontResource.h"
51 #include "core/fetch/ImageResource.h"
52 #include "core/frame/LocalFrame.h"
53 #include "core/html/HTMLFrameOwnerElement.h"
54 #include "core/html/HTMLImageElement.h"
55 #include "core/html/HTMLInputElement.h"
56 #include "core/html/HTMLLinkElement.h"
57 #include "core/html/HTMLMetaElement.h"
58 #include "core/html/HTMLStyleElement.h"
59 #include "core/html/parser/HTMLParserIdioms.h"
60 #include "core/page/Page.h"
61 #include "core/rendering/RenderImage.h"
62 #include "core/rendering/style/StyleFetchedImage.h"
63 #include "core/rendering/style/StyleImage.h"
64 #include "platform/SerializedResource.h"
65 #include "platform/graphics/Image.h"
66 #include "wtf/text/CString.h"
67 #include "wtf/text/StringBuilder.h"
68 #include "wtf/text/TextEncoding.h"
69 #include "wtf/text/WTFString.h"
73 static bool isCharsetSpecifyingNode(const Node& node)
75 if (!isHTMLMetaElement(node))
78 const HTMLMetaElement& element = toHTMLMetaElement(node);
79 HTMLAttributeList attributeList;
80 AttributeCollection attributes = element.attributes();
81 for (const Attribute& attr : attributes) {
82 // FIXME: We should deal appropriately with the attribute if they have a namespace.
83 attributeList.append(std::make_pair(attr.name().localName(), attr.value().string()));
85 WTF::TextEncoding textEncoding = encodingFromMetaAttributes(attributeList);
86 return textEncoding.isValid();
89 static bool shouldIgnoreElement(const Element& element)
91 return isHTMLScriptElement(element) || isHTMLNoScriptElement(element) || isCharsetSpecifyingNode(element);
94 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerElement& frameOwner)
96 // FIXME: We should support all frame owners including applets.
97 return isHTMLObjectElement(frameOwner) ? HTMLNames::dataAttr : HTMLNames::srcAttr;
100 class SerializerMarkupAccumulator final : public MarkupAccumulator {
102 SerializerMarkupAccumulator(PageSerializer*, const Document&, WillBeHeapVector<RawPtrWillBeMember<Node>>*);
103 virtual ~SerializerMarkupAccumulator();
106 virtual void appendText(StringBuilder& out, Text&) override;
107 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) override;
108 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Namespaces*) override;
109 virtual void appendEndTag(const Element&) override;
112 PageSerializer* m_serializer;
113 const Document& m_document;
116 SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer* serializer, const Document& document, WillBeHeapVector<RawPtrWillBeMember<Node>>* nodes)
117 : MarkupAccumulator(nodes, ResolveAllURLs, nullptr)
118 , m_serializer(serializer)
119 , m_document(document)
123 SerializerMarkupAccumulator::~SerializerMarkupAccumulator()
127 void SerializerMarkupAccumulator::appendText(StringBuilder& out, Text& text)
129 Element* parent = text.parentElement();
130 if (parent && !shouldIgnoreElement(*parent))
131 MarkupAccumulator::appendText(out, text);
134 void SerializerMarkupAccumulator::appendElement(StringBuilder& out, Element& element, Namespaces* namespaces)
136 if (!shouldIgnoreElement(element))
137 MarkupAccumulator::appendElement(out, element, namespaces);
139 if (isHTMLHeadElement(element)) {
140 out.appendLiteral("<meta charset=\"");
141 out.append(m_document.charset());
142 out.appendLiteral("\">");
145 // FIXME: For object (plugins) tags and video tag we could replace them by an image of their current contents.
148 void SerializerMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces)
150 if (!element.isFrameOwnerElement())
153 const HTMLFrameOwnerElement& frameOwner = toHTMLFrameOwnerElement(element);
154 Frame* frame = frameOwner.contentFrame();
155 // FIXME: RemoteFrames not currently supported here.
156 if (!frame || !frame->isLocalFrame())
159 KURL url = toLocalFrame(frame)->document()->url();
160 if (url.isValid() && !url.protocolIsAbout())
163 // We need to give a fake location to blank frames so they can be referenced by the serialized frame.
164 url = m_serializer->urlForBlankFrame(toLocalFrame(frame));
165 appendAttribute(out, element, Attribute(frameOwnerURLAttributeName(frameOwner), AtomicString(url.string())), namespaces);
168 void SerializerMarkupAccumulator::appendEndTag(const Element& element)
170 if (!shouldIgnoreElement(element))
171 MarkupAccumulator::appendEndTag(element);
174 PageSerializer::PageSerializer(Vector<SerializedResource>* resources)
175 : m_resources(resources)
176 , m_blankFrameCounter(0)
180 void PageSerializer::serialize(Page* page)
182 serializeFrame(page->deprecatedLocalMainFrame());
185 void PageSerializer::serializeFrame(LocalFrame* frame)
187 ASSERT(frame->document());
188 Document& document = *frame->document();
189 KURL url = document.url();
190 // FIXME: This probably wants isAboutBlankURL? to exclude other about: urls (like about:srcdoc)?
191 if (!url.isValid() || url.protocolIsAbout()) {
192 // For blank frames we generate a fake URL so they can be referenced by their containing frame.
193 url = urlForBlankFrame(frame);
196 if (m_resourceURLs.contains(url)) {
197 // FIXME: We could have 2 frame with the same URL but which were dynamically changed and have now
198 // different content. So we should serialize both and somehow rename the frame src in the containing
203 WTF::TextEncoding textEncoding(document.charset());
204 if (!textEncoding.isValid()) {
205 // FIXME: iframes used as images trigger this. We should deal with them correctly.
209 WillBeHeapVector<RawPtrWillBeMember<Node>> serializedNodes;
210 SerializerMarkupAccumulator accumulator(this, document, &serializedNodes);
211 String text = accumulator.serializeNodes(document, IncludeNode);
212 CString frameHTML = textEncoding.normalizeAndEncode(text, WTF::EntitiesForUnencodables);
213 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
214 m_resourceURLs.add(url);
216 for (Node* node : serializedNodes) {
218 if (!node->isElementNode())
221 Element& element = toElement(*node);
222 // We have to process in-line style as it might contain some resources (typically background images).
223 if (element.isStyledElement())
224 retrieveResourcesForProperties(element.inlineStyle(), document);
226 if (isHTMLImageElement(element)) {
227 HTMLImageElement& imageElement = toHTMLImageElement(element);
228 KURL url = document.completeURL(imageElement.getAttribute(HTMLNames::srcAttr));
229 ImageResource* cachedImage = imageElement.cachedImage();
230 addImageToResources(cachedImage, imageElement.renderer(), url);
231 } else if (isHTMLInputElement(element)) {
232 HTMLInputElement& inputElement = toHTMLInputElement(element);
233 if (inputElement.type() == InputTypeNames::image && inputElement.hasImageLoader()) {
234 KURL url = inputElement.src();
235 ImageResource* cachedImage = inputElement.imageLoader()->image();
236 addImageToResources(cachedImage, inputElement.renderer(), url);
238 } else if (isHTMLLinkElement(element)) {
239 HTMLLinkElement& linkElement = toHTMLLinkElement(element);
240 if (CSSStyleSheet* sheet = linkElement.sheet()) {
241 KURL url = document.completeURL(linkElement.getAttribute(HTMLNames::hrefAttr));
242 serializeCSSStyleSheet(*sheet, url);
243 ASSERT(m_resourceURLs.contains(url));
245 } else if (isHTMLStyleElement(element)) {
246 HTMLStyleElement& styleElement = toHTMLStyleElement(element);
247 if (CSSStyleSheet* sheet = styleElement.sheet())
248 serializeCSSStyleSheet(*sheet, KURL());
252 for (Frame* childFrame = frame->tree().firstChild(); childFrame; childFrame = childFrame->tree().nextSibling()) {
253 if (childFrame->isLocalFrame())
254 serializeFrame(toLocalFrame(childFrame));
258 void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet& styleSheet, const KURL& url)
260 StringBuilder cssText;
261 for (unsigned i = 0; i < styleSheet.length(); ++i) {
262 CSSRule* rule = styleSheet.item(i);
263 String itemText = rule->cssText();
264 if (!itemText.isEmpty()) {
265 cssText.append(itemText);
266 if (i < styleSheet.length() - 1)
267 cssText.appendLiteral("\n\n");
269 ASSERT(styleSheet.ownerDocument());
270 Document& document = *styleSheet.ownerDocument();
271 // Some rules have resources associated with them that we need to retrieve.
272 if (rule->type() == CSSRule::IMPORT_RULE) {
273 CSSImportRule* importRule = toCSSImportRule(rule);
274 KURL importURL = document.completeURL(importRule->href());
275 if (m_resourceURLs.contains(importURL))
277 if (importRule->styleSheet())
278 serializeCSSStyleSheet(*importRule->styleSheet(), importURL);
279 } else if (rule->type() == CSSRule::FONT_FACE_RULE) {
280 retrieveResourcesForProperties(&toCSSFontFaceRule(rule)->styleRule()->properties(), document);
281 } else if (rule->type() == CSSRule::STYLE_RULE) {
282 retrieveResourcesForProperties(&toCSSStyleRule(rule)->styleRule()->properties(), document);
286 if (url.isValid() && !m_resourceURLs.contains(url)) {
287 // FIXME: We should check whether a charset has been specified and if none was found add one.
288 WTF::TextEncoding textEncoding(styleSheet.contents()->charset());
289 ASSERT(textEncoding.isValid());
290 String textString = cssText.toString();
291 CString text = textEncoding.normalizeAndEncode(textString, WTF::EntitiesForUnencodables);
292 m_resources->append(SerializedResource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
293 m_resourceURLs.add(url);
297 bool PageSerializer::shouldAddURL(const KURL& url)
299 return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData();
302 void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url)
305 WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data());
309 String mimeType = resource->response().mimeType();
310 m_resources->append(SerializedResource(url, mimeType, data));
311 m_resourceURLs.add(url);
314 void PageSerializer::addImageToResources(ImageResource* image, RenderObject* imageRenderer, const KURL& url)
316 if (!shouldAddURL(url))
319 if (!image || image->image() == Image::nullImage() || image->errorOccurred())
322 RefPtr<SharedBuffer> data = imageRenderer ? image->imageForRenderer(imageRenderer)->data() : nullptr;
324 data = image->image()->data();
326 addToResources(image, data, url);
329 void PageSerializer::addFontToResources(FontResource* font)
331 if (!font || !shouldAddURL(font->url()) || !font->isLoaded() || !font->resourceBuffer()) {
334 RefPtr<SharedBuffer> data(font->resourceBuffer());
336 addToResources(font, data, font->url());
339 void PageSerializer::retrieveResourcesForProperties(const StylePropertySet* styleDeclaration, Document& document)
341 if (!styleDeclaration)
344 // The background-image and list-style-image (for ul or ol) are the CSS properties
345 // that make use of images. We iterate to make sure we include any other
346 // image properties there might be.
347 unsigned propertyCount = styleDeclaration->propertyCount();
348 for (unsigned i = 0; i < propertyCount; ++i) {
349 RefPtrWillBeRawPtr<CSSValue> cssValue = styleDeclaration->propertyAt(i).value();
350 retrieveResourcesForCSSValue(cssValue.get(), document);
354 void PageSerializer::retrieveResourcesForCSSValue(CSSValue* cssValue, Document& document)
356 if (cssValue->isImageValue()) {
357 CSSImageValue* imageValue = toCSSImageValue(cssValue);
358 StyleImage* styleImage = imageValue->cachedOrPendingImage();
359 // Non cached-images are just place-holders and do not contain data.
360 if (!styleImage || !styleImage->isImageResource())
363 addImageToResources(styleImage->cachedImage(), nullptr, styleImage->cachedImage()->url());
364 } else if (cssValue->isFontFaceSrcValue()) {
365 CSSFontFaceSrcValue* fontFaceSrcValue = toCSSFontFaceSrcValue(cssValue);
366 if (fontFaceSrcValue->isLocal()) {
370 addFontToResources(fontFaceSrcValue->fetch(&document));
371 } else if (cssValue->isValueList()) {
372 CSSValueList* cssValueList = toCSSValueList(cssValue);
373 for (unsigned i = 0; i < cssValueList->length(); i++)
374 retrieveResourcesForCSSValue(cssValueList->item(i), document);
378 KURL PageSerializer::urlForBlankFrame(LocalFrame* frame)
380 HashMap<LocalFrame*, KURL>::iterator iter = m_blankFrameURLs.find(frame);
381 if (iter != m_blankFrameURLs.end())
383 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++);
384 KURL fakeURL(ParsedURLString, url);
385 m_blankFrameURLs.add(frame, fakeURL);