2 * Copyright (C) 2011 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "core/page/PageSerializer.h"
34 #include "HTMLNames.h"
35 #include "core/css/CSSFontFaceRule.h"
36 #include "core/css/CSSFontFaceSrcValue.h"
37 #include "core/css/CSSImageValue.h"
38 #include "core/css/CSSImportRule.h"
39 #include "core/css/CSSStyleDeclaration.h"
40 #include "core/css/CSSStyleRule.h"
41 #include "core/css/CSSValueList.h"
42 #include "core/css/StylePropertySet.h"
43 #include "core/css/StyleRule.h"
44 #include "core/css/StyleSheetContents.h"
45 #include "core/dom/Document.h"
46 #include "core/dom/Element.h"
47 #include "core/dom/Text.h"
48 #include "core/editing/MarkupAccumulator.h"
49 #include "core/fetch/FontResource.h"
50 #include "core/fetch/ImageResource.h"
51 #include "core/frame/Frame.h"
52 #include "core/html/HTMLFrameOwnerElement.h"
53 #include "core/html/HTMLImageElement.h"
54 #include "core/html/HTMLInputElement.h"
55 #include "core/html/HTMLLinkElement.h"
56 #include "core/html/HTMLStyleElement.h"
57 #include "core/html/parser/HTMLParserIdioms.h"
58 #include "core/page/Page.h"
59 #include "core/rendering/RenderImage.h"
60 #include "core/rendering/style/StyleFetchedImage.h"
61 #include "core/rendering/style/StyleImage.h"
62 #include "platform/SerializedResource.h"
63 #include "platform/graphics/Image.h"
64 #include "wtf/text/CString.h"
65 #include "wtf/text/StringBuilder.h"
66 #include "wtf/text/TextEncoding.h"
67 #include "wtf/text/WTFString.h"
71 static bool isCharsetSpecifyingNode(const Node& node)
73 if (!node.isHTMLElement())
76 const HTMLElement& element = toHTMLElement(node);
77 if (!element.hasTagName(HTMLNames::metaTag))
79 HTMLAttributeList attributes;
80 if (element.hasAttributes()) {
81 for (unsigned i = 0; i < element.attributeCount(); ++i) {
82 const Attribute* attribute = element.attributeItem(i);
83 // FIXME: We should deal appropriately with the attribute if they have a namespace.
84 attributes.append(std::make_pair(attribute->name().localName(), attribute->value().string()));
87 WTF::TextEncoding textEncoding = encodingFromMetaAttributes(attributes);
88 return textEncoding.isValid();
91 static bool shouldIgnoreElement(const Element& element)
93 return element.hasTagName(HTMLNames::scriptTag) || element.hasTagName(HTMLNames::noscriptTag) || isCharsetSpecifyingNode(element);
96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerElement& frameOwner)
98 // FIXME: We should support all frame owners including applets.
99 return frameOwner.hasTagName(HTMLNames::objectTag) ? HTMLNames::dataAttr : HTMLNames::srcAttr;
102 class SerializerMarkupAccumulator FINAL : public MarkupAccumulator {
104 SerializerMarkupAccumulator(PageSerializer*, const Document&, Vector<Node*>*);
105 virtual ~SerializerMarkupAccumulator();
108 virtual void appendText(StringBuilder& out, Text&) OVERRIDE;
109 virtual void appendElement(StringBuilder& out, Element&, Namespaces*) OVERRIDE;
110 virtual void appendCustomAttributes(StringBuilder& out, const Element&, Namespaces*) OVERRIDE;
111 virtual void appendEndTag(const Node&) OVERRIDE;
114 PageSerializer* m_serializer;
115 const Document& m_document;
118 SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer* serializer, const Document& document, Vector<Node*>* nodes)
119 : MarkupAccumulator(nodes, ResolveAllURLs)
120 , m_serializer(serializer)
121 , m_document(document)
125 SerializerMarkupAccumulator::~SerializerMarkupAccumulator()
129 void SerializerMarkupAccumulator::appendText(StringBuilder& out, Text& text)
131 Element* parent = text.parentElement();
132 if (parent && !shouldIgnoreElement(*parent))
133 MarkupAccumulator::appendText(out, text);
136 void SerializerMarkupAccumulator::appendElement(StringBuilder& out, Element& element, Namespaces* namespaces)
138 if (!shouldIgnoreElement(element))
139 MarkupAccumulator::appendElement(out, element, namespaces);
141 if (element.hasTagName(HTMLNames::headTag)) {
142 out.append("<meta charset=\"");
143 out.append(m_document.charset());
147 // FIXME: For object (plugins) tags and video tag we could replace them by an image of their current contents.
150 void SerializerMarkupAccumulator::appendCustomAttributes(StringBuilder& out, const Element& element, Namespaces* namespaces)
152 if (!element.isFrameOwnerElement())
155 const HTMLFrameOwnerElement& frameOwner = toHTMLFrameOwnerElement(element);
156 Frame* frame = frameOwner.contentFrame();
160 KURL url = frame->document()->url();
161 if (url.isValid() && !url.isBlankURL())
164 // We need to give a fake location to blank frames so they can be referenced by the serialized frame.
165 url = m_serializer->urlForBlankFrame(frame);
166 appendAttribute(out, element, Attribute(frameOwnerURLAttributeName(frameOwner), AtomicString(url.string())), namespaces);
169 void SerializerMarkupAccumulator::appendEndTag(const Node& node)
171 if (node.isElementNode() && !shouldIgnoreElement(toElement(node)))
172 MarkupAccumulator::appendEndTag(node);
175 PageSerializer::PageSerializer(Vector<SerializedResource>* resources)
176 : m_resources(resources)
177 , m_blankFrameCounter(0)
181 void PageSerializer::serialize(Page* page)
183 serializeFrame(page->mainFrame());
186 void PageSerializer::serializeFrame(Frame* frame)
188 ASSERT(frame->document());
189 Document& document = *frame->document();
190 KURL url = document.url();
191 if (!url.isValid() || url.isBlankURL()) {
192 // For blank frames we generate a fake URL so they can be referenced by their containing frame.
193 url = urlForBlankFrame(frame);
196 if (m_resourceURLs.contains(url)) {
197 // FIXME: We could have 2 frame with the same URL but which were dynamically changed and have now
198 // different content. So we should serialize both and somehow rename the frame src in the containing
203 WTF::TextEncoding textEncoding(document.charset());
204 if (!textEncoding.isValid()) {
205 // FIXME: iframes used as images trigger this. We should deal with them correctly.
209 Vector<Node*> serializedNodes;
210 SerializerMarkupAccumulator accumulator(this, document, &serializedNodes);
211 String text = accumulator.serializeNodes(document, IncludeNode);
212 CString frameHTML = textEncoding.normalizeAndEncode(text, WTF::EntitiesForUnencodables);
213 m_resources->append(SerializedResource(url, document.suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
214 m_resourceURLs.add(url);
216 for (Vector<Node*>::iterator iter = serializedNodes.begin(); iter != serializedNodes.end(); ++iter) {
219 if (!node.isElementNode())
222 Element& element = toElement(node);
223 // We have to process in-line style as it might contain some resources (typically background images).
224 if (element.isStyledElement())
225 retrieveResourcesForProperties(element.inlineStyle(), document);
227 if (element.hasTagName(HTMLNames::imgTag)) {
228 HTMLImageElement& imageElement = toHTMLImageElement(element);
229 KURL url = document.completeURL(imageElement.getAttribute(HTMLNames::srcAttr));
230 ImageResource* cachedImage = imageElement.cachedImage();
231 addImageToResources(cachedImage, imageElement.renderer(), url);
232 } else if (element.hasTagName(HTMLNames::inputTag)) {
233 HTMLInputElement& inputElement = toHTMLInputElement(element);
234 if (inputElement.isImageButton() && inputElement.hasImageLoader()) {
235 KURL url = inputElement.src();
236 ImageResource* cachedImage = inputElement.imageLoader()->image();
237 addImageToResources(cachedImage, inputElement.renderer(), url);
239 } else if (element.hasTagName(HTMLNames::linkTag)) {
240 HTMLLinkElement& linkElement = toHTMLLinkElement(element);
241 if (CSSStyleSheet* sheet = linkElement.sheet()) {
242 KURL url = document.completeURL(linkElement.getAttribute(HTMLNames::hrefAttr));
243 serializeCSSStyleSheet(sheet, url);
244 ASSERT(m_resourceURLs.contains(url));
246 } else if (element.hasTagName(HTMLNames::styleTag)) {
247 HTMLStyleElement& styleElement = toHTMLStyleElement(element);
248 if (CSSStyleSheet* sheet = styleElement.sheet())
249 serializeCSSStyleSheet(sheet, KURL());
253 for (Frame* childFrame = frame->tree().firstChild(); childFrame; childFrame = childFrame->tree().nextSibling())
254 serializeFrame(childFrame);
257 void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet* styleSheet, const KURL& url)
259 StringBuilder cssText;
260 for (unsigned i = 0; i < styleSheet->length(); ++i) {
261 CSSRule* rule = styleSheet->item(i);
262 String itemText = rule->cssText();
263 if (!itemText.isEmpty()) {
264 cssText.append(itemText);
265 if (i < styleSheet->length() - 1)
266 cssText.append("\n\n");
268 ASSERT(styleSheet->ownerDocument());
269 Document& document = *styleSheet->ownerDocument();
270 // Some rules have resources associated with them that we need to retrieve.
271 if (rule->type() == CSSRule::IMPORT_RULE) {
272 CSSImportRule* importRule = toCSSImportRule(rule);
273 KURL importURL = document.completeURL(importRule->href());
274 if (m_resourceURLs.contains(importURL))
276 serializeCSSStyleSheet(importRule->styleSheet(), importURL);
277 } else if (rule->type() == CSSRule::FONT_FACE_RULE) {
278 retrieveResourcesForProperties(toCSSFontFaceRule(rule)->styleRule()->properties(), document);
279 } else if (rule->type() == CSSRule::STYLE_RULE) {
280 retrieveResourcesForProperties(toCSSStyleRule(rule)->styleRule()->properties(), document);
284 if (url.isValid() && !m_resourceURLs.contains(url)) {
285 // FIXME: We should check whether a charset has been specified and if none was found add one.
286 WTF::TextEncoding textEncoding(styleSheet->contents()->charset());
287 ASSERT(textEncoding.isValid());
288 String textString = cssText.toString();
289 CString text = textEncoding.normalizeAndEncode(textString, WTF::EntitiesForUnencodables);
290 m_resources->append(SerializedResource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
291 m_resourceURLs.add(url);
295 bool PageSerializer::shouldAddURL(const KURL& url)
297 return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData();
300 void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url)
303 WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data());
307 String mimeType = resource->response().mimeType();
308 m_resources->append(SerializedResource(url, mimeType, data));
309 m_resourceURLs.add(url);
312 void PageSerializer::addImageToResources(ImageResource* image, RenderObject* imageRenderer, const KURL& url)
314 if (!shouldAddURL(url))
317 if (!image || image->image() == Image::nullImage())
320 RefPtr<SharedBuffer> data = imageRenderer ? image->imageForRenderer(imageRenderer)->data() : 0;
322 data = image->image()->data();
324 addToResources(image, data, url);
327 void PageSerializer::addFontToResources(FontResource* font)
329 if (!font || !shouldAddURL(font->url()) || !font->isLoaded() || !font->resourceBuffer()) {
332 RefPtr<SharedBuffer> data(font->resourceBuffer());
334 addToResources(font, data, font->url());
337 void PageSerializer::retrieveResourcesForProperties(const StylePropertySet* styleDeclaration, Document& document)
339 if (!styleDeclaration)
342 // The background-image and list-style-image (for ul or ol) are the CSS properties
343 // that make use of images. We iterate to make sure we include any other
344 // image properties there might be.
345 unsigned propertyCount = styleDeclaration->propertyCount();
346 for (unsigned i = 0; i < propertyCount; ++i) {
347 RefPtr<CSSValue> cssValue = styleDeclaration->propertyAt(i).value();
348 retrieveResourcesForCSSValue(cssValue.get(), document);
352 void PageSerializer::retrieveResourcesForCSSValue(CSSValue* cssValue, Document& document)
354 if (cssValue->isImageValue()) {
355 CSSImageValue* imageValue = toCSSImageValue(cssValue);
356 StyleImage* styleImage = imageValue->cachedOrPendingImage();
357 // Non cached-images are just place-holders and do not contain data.
358 if (!styleImage || !styleImage->isImageResource())
361 addImageToResources(styleImage->cachedImage(), 0, styleImage->cachedImage()->url());
362 } else if (cssValue->isFontFaceSrcValue()) {
363 CSSFontFaceSrcValue* fontFaceSrcValue = toCSSFontFaceSrcValue(cssValue);
364 if (fontFaceSrcValue->isLocal()) {
368 addFontToResources(fontFaceSrcValue->fetch(&document));
369 } else if (cssValue->isValueList()) {
370 CSSValueList* cssValueList = toCSSValueList(cssValue);
371 for (unsigned i = 0; i < cssValueList->length(); i++)
372 retrieveResourcesForCSSValue(cssValueList->item(i), document);
376 KURL PageSerializer::urlForBlankFrame(Frame* frame)
378 HashMap<Frame*, KURL>::iterator iter = m_blankFrameURLs.find(frame);
379 if (iter != m_blankFrameURLs.end())
381 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++);
382 KURL fakeURL(ParsedURLString, url);
383 m_blankFrameURLs.add(frame, fakeURL);