2 * Copyright (C) 2011 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 #include "core/page/PageSerializer.h"
34 #include "HTMLNames.h"
35 #include "core/css/CSSFontFaceRule.h"
36 #include "core/css/CSSFontFaceSrcValue.h"
37 #include "core/css/CSSImageValue.h"
38 #include "core/css/CSSImportRule.h"
39 #include "core/css/CSSStyleDeclaration.h"
40 #include "core/css/CSSStyleRule.h"
41 #include "core/css/CSSValueList.h"
42 #include "core/css/StylePropertySet.h"
43 #include "core/css/StyleRule.h"
44 #include "core/css/StyleSheetContents.h"
45 #include "core/dom/Document.h"
46 #include "core/dom/Element.h"
47 #include "core/dom/Text.h"
48 #include "core/editing/MarkupAccumulator.h"
49 #include "core/fetch/FontResource.h"
50 #include "core/fetch/ImageResource.h"
51 #include "core/frame/Frame.h"
52 #include "core/html/HTMLFrameOwnerElement.h"
53 #include "core/html/HTMLImageElement.h"
54 #include "core/html/HTMLInputElement.h"
55 #include "core/html/HTMLLinkElement.h"
56 #include "core/html/HTMLStyleElement.h"
57 #include "core/html/parser/HTMLParserIdioms.h"
58 #include "core/page/Page.h"
59 #include "core/rendering/RenderImage.h"
60 #include "core/rendering/style/StyleFetchedImage.h"
61 #include "core/rendering/style/StyleImage.h"
62 #include "platform/SerializedResource.h"
63 #include "platform/graphics/Image.h"
64 #include "wtf/text/CString.h"
65 #include "wtf/text/StringBuilder.h"
66 #include "wtf/text/TextEncoding.h"
67 #include "wtf/text/WTFString.h"
71 static bool isCharsetSpecifyingNode(Node* node)
73 if (!node->isHTMLElement())
76 HTMLElement* element = toHTMLElement(node);
77 if (!element->hasTagName(HTMLNames::metaTag))
79 HTMLAttributeList attributes;
80 if (element->hasAttributes()) {
81 for (unsigned i = 0; i < element->attributeCount(); ++i) {
82 const Attribute* attribute = element->attributeItem(i);
83 // FIXME: We should deal appropriately with the attribute if they have a namespace.
84 attributes.append(std::make_pair(attribute->name().localName(), attribute->value().string()));
87 WTF::TextEncoding textEncoding = encodingFromMetaAttributes(attributes);
88 return textEncoding.isValid();
91 static bool shouldIgnoreElement(Element* element)
93 return element->hasTagName(HTMLNames::scriptTag) || element->hasTagName(HTMLNames::noscriptTag) || isCharsetSpecifyingNode(element);
96 static const QualifiedName& frameOwnerURLAttributeName(const HTMLFrameOwnerElement& frameOwner)
98 // FIXME: We should support all frame owners including applets.
99 return frameOwner.hasTagName(HTMLNames::objectTag) ? HTMLNames::dataAttr : HTMLNames::srcAttr;
102 class SerializerMarkupAccumulator FINAL : public MarkupAccumulator {
104 SerializerMarkupAccumulator(PageSerializer*, Document*, Vector<Node*>*);
105 virtual ~SerializerMarkupAccumulator();
108 virtual void appendText(StringBuilder& out, Text*) OVERRIDE;
109 virtual void appendElement(StringBuilder& out, Element*, Namespaces*) OVERRIDE;
110 virtual void appendCustomAttributes(StringBuilder& out, Element*, Namespaces*) OVERRIDE;
111 virtual void appendEndTag(Node*) OVERRIDE;
114 PageSerializer* m_serializer;
115 Document* m_document;
118 SerializerMarkupAccumulator::SerializerMarkupAccumulator(PageSerializer* serializer, Document* document, Vector<Node*>* nodes)
119 : MarkupAccumulator(nodes, ResolveAllURLs)
120 , m_serializer(serializer)
121 , m_document(document)
125 SerializerMarkupAccumulator::~SerializerMarkupAccumulator()
129 void SerializerMarkupAccumulator::appendText(StringBuilder& out, Text* text)
131 Element* parent = text->parentElement();
132 if (parent && !shouldIgnoreElement(parent))
133 MarkupAccumulator::appendText(out, text);
136 void SerializerMarkupAccumulator::appendElement(StringBuilder& out, Element* element, Namespaces* namespaces)
138 if (!shouldIgnoreElement(element))
139 MarkupAccumulator::appendElement(out, element, namespaces);
141 if (element->hasTagName(HTMLNames::headTag)) {
142 out.append("<meta charset=\"");
143 out.append(m_document->charset());
147 // FIXME: For object (plugins) tags and video tag we could replace them by an image of their current contents.
150 void SerializerMarkupAccumulator::appendCustomAttributes(StringBuilder& out, Element* element, Namespaces* namespaces)
152 if (!element->isFrameOwnerElement())
155 HTMLFrameOwnerElement* frameOwner = toHTMLFrameOwnerElement(element);
156 Frame* frame = frameOwner->contentFrame();
160 KURL url = frame->document()->url();
161 if (url.isValid() && !url.isBlankURL())
164 // We need to give a fake location to blank frames so they can be referenced by the serialized frame.
165 url = m_serializer->urlForBlankFrame(frame);
166 appendAttribute(out, element, Attribute(frameOwnerURLAttributeName(*frameOwner), AtomicString(url.string())), namespaces);
169 void SerializerMarkupAccumulator::appendEndTag(Node* node)
171 if (node->isElementNode() && !shouldIgnoreElement(toElement(node)))
172 MarkupAccumulator::appendEndTag(node);
175 PageSerializer::PageSerializer(Vector<SerializedResource>* resources)
176 : m_resources(resources)
177 , m_blankFrameCounter(0)
181 void PageSerializer::serialize(Page* page)
183 serializeFrame(page->mainFrame());
186 void PageSerializer::serializeFrame(Frame* frame)
188 Document* document = frame->document();
189 KURL url = document->url();
190 if (!url.isValid() || url.isBlankURL()) {
191 // For blank frames we generate a fake URL so they can be referenced by their containing frame.
192 url = urlForBlankFrame(frame);
195 if (m_resourceURLs.contains(url)) {
196 // FIXME: We could have 2 frame with the same URL but which were dynamically changed and have now
197 // different content. So we should serialize both and somehow rename the frame src in the containing
202 WTF::TextEncoding textEncoding(document->charset());
203 if (!textEncoding.isValid()) {
204 // FIXME: iframes used as images trigger this. We should deal with them correctly.
208 Vector<Node*> serializedNodes;
209 SerializerMarkupAccumulator accumulator(this, document, &serializedNodes);
210 String text = accumulator.serializeNodes(document, IncludeNode);
211 CString frameHTML = textEncoding.normalizeAndEncode(text, WTF::EntitiesForUnencodables);
212 m_resources->append(SerializedResource(url, document->suggestedMIMEType(), SharedBuffer::create(frameHTML.data(), frameHTML.length())));
213 m_resourceURLs.add(url);
215 for (Vector<Node*>::iterator iter = serializedNodes.begin(); iter != serializedNodes.end(); ++iter) {
217 if (!node->isElementNode())
220 Element* element = toElement(node);
221 // We have to process in-line style as it might contain some resources (typically background images).
222 if (element->isStyledElement())
223 retrieveResourcesForProperties(element->inlineStyle(), document);
225 if (element->hasTagName(HTMLNames::imgTag)) {
226 HTMLImageElement* imageElement = toHTMLImageElement(element);
227 KURL url = document->completeURL(imageElement->getAttribute(HTMLNames::srcAttr));
228 ImageResource* cachedImage = imageElement->cachedImage();
229 addImageToResources(cachedImage, imageElement->renderer(), url);
230 } else if (element->hasTagName(HTMLNames::inputTag)) {
231 HTMLInputElement* inputElement = toHTMLInputElement(element);
232 if (inputElement->isImageButton() && inputElement->hasImageLoader()) {
233 KURL url = inputElement->src();
234 ImageResource* cachedImage = inputElement->imageLoader()->image();
235 addImageToResources(cachedImage, inputElement->renderer(), url);
237 } else if (element->hasTagName(HTMLNames::linkTag)) {
238 HTMLLinkElement* linkElement = toHTMLLinkElement(element);
239 if (CSSStyleSheet* sheet = linkElement->sheet()) {
240 KURL url = document->completeURL(linkElement->getAttribute(HTMLNames::hrefAttr));
241 serializeCSSStyleSheet(sheet, url);
242 ASSERT(m_resourceURLs.contains(url));
244 } else if (element->hasTagName(HTMLNames::styleTag)) {
245 HTMLStyleElement* styleElement = toHTMLStyleElement(element);
246 if (CSSStyleSheet* sheet = styleElement->sheet())
247 serializeCSSStyleSheet(sheet, KURL());
251 for (Frame* childFrame = frame->tree().firstChild(); childFrame; childFrame = childFrame->tree().nextSibling())
252 serializeFrame(childFrame);
255 void PageSerializer::serializeCSSStyleSheet(CSSStyleSheet* styleSheet, const KURL& url)
257 StringBuilder cssText;
258 for (unsigned i = 0; i < styleSheet->length(); ++i) {
259 CSSRule* rule = styleSheet->item(i);
260 String itemText = rule->cssText();
261 if (!itemText.isEmpty()) {
262 cssText.append(itemText);
263 if (i < styleSheet->length() - 1)
264 cssText.append("\n\n");
266 Document* document = styleSheet->ownerDocument();
267 // Some rules have resources associated with them that we need to retrieve.
268 if (rule->type() == CSSRule::IMPORT_RULE) {
269 CSSImportRule* importRule = toCSSImportRule(rule);
270 KURL importURL = document->completeURL(importRule->href());
271 if (m_resourceURLs.contains(importURL))
273 serializeCSSStyleSheet(importRule->styleSheet(), importURL);
274 } else if (rule->type() == CSSRule::FONT_FACE_RULE) {
275 retrieveResourcesForProperties(toCSSFontFaceRule(rule)->styleRule()->properties(), document);
276 } else if (rule->type() == CSSRule::STYLE_RULE) {
277 retrieveResourcesForProperties(toCSSStyleRule(rule)->styleRule()->properties(), document);
281 if (url.isValid() && !m_resourceURLs.contains(url)) {
282 // FIXME: We should check whether a charset has been specified and if none was found add one.
283 WTF::TextEncoding textEncoding(styleSheet->contents()->charset());
284 ASSERT(textEncoding.isValid());
285 String textString = cssText.toString();
286 CString text = textEncoding.normalizeAndEncode(textString, WTF::EntitiesForUnencodables);
287 m_resources->append(SerializedResource(url, String("text/css"), SharedBuffer::create(text.data(), text.length())));
288 m_resourceURLs.add(url);
292 bool PageSerializer::shouldAddURL(const KURL& url)
294 return url.isValid() && !m_resourceURLs.contains(url) && !url.protocolIsData();
297 void PageSerializer::addToResources(Resource* resource, PassRefPtr<SharedBuffer> data, const KURL& url)
300 WTF_LOG_ERROR("No data for resource %s", url.string().utf8().data());
304 String mimeType = resource->response().mimeType();
305 m_resources->append(SerializedResource(url, mimeType, data));
306 m_resourceURLs.add(url);
309 void PageSerializer::addImageToResources(ImageResource* image, RenderObject* imageRenderer, const KURL& url)
311 if (!shouldAddURL(url))
314 if (!image || image->image() == Image::nullImage())
317 RefPtr<SharedBuffer> data = imageRenderer ? image->imageForRenderer(imageRenderer)->data() : 0;
319 data = image->image()->data();
321 addToResources(image, data, url);
324 void PageSerializer::addFontToResources(FontResource* font)
326 if (!font || !shouldAddURL(font->url()) || !font->isLoaded() || !font->resourceBuffer()) {
329 RefPtr<SharedBuffer> data(font->resourceBuffer());
331 addToResources(font, data, font->url());
334 void PageSerializer::retrieveResourcesForProperties(const StylePropertySet* styleDeclaration, Document* document)
336 if (!styleDeclaration)
339 // The background-image and list-style-image (for ul or ol) are the CSS properties
340 // that make use of images. We iterate to make sure we include any other
341 // image properties there might be.
342 unsigned propertyCount = styleDeclaration->propertyCount();
343 for (unsigned i = 0; i < propertyCount; ++i) {
344 RefPtr<CSSValue> cssValue = styleDeclaration->propertyAt(i).value();
345 retrieveResourcesForCSSValue(cssValue.get(), document);
349 void PageSerializer::retrieveResourcesForCSSValue(CSSValue* cssValue, Document* document)
351 if (cssValue->isImageValue()) {
352 CSSImageValue* imageValue = toCSSImageValue(cssValue);
353 StyleImage* styleImage = imageValue->cachedOrPendingImage();
354 // Non cached-images are just place-holders and do not contain data.
355 if (!styleImage || !styleImage->isImageResource())
358 addImageToResources(styleImage->cachedImage(), 0, styleImage->cachedImage()->url());
359 } else if (cssValue->isFontFaceSrcValue()) {
360 CSSFontFaceSrcValue* fontFaceSrcValue = toCSSFontFaceSrcValue(cssValue);
361 if (fontFaceSrcValue->isLocal()) {
365 addFontToResources(fontFaceSrcValue->fetch(document));
366 } else if (cssValue->isValueList()) {
367 CSSValueList* cssValueList = toCSSValueList(cssValue);
368 for (unsigned i = 0; i < cssValueList->length(); i++)
369 retrieveResourcesForCSSValue(cssValueList->item(i), document);
373 KURL PageSerializer::urlForBlankFrame(Frame* frame)
375 HashMap<Frame*, KURL>::iterator iter = m_blankFrameURLs.find(frame);
376 if (iter != m_blankFrameURLs.end())
378 String url = "wyciwyg://frame/" + String::number(m_blankFrameCounter++);
379 KURL fakeURL(ParsedURLString, url);
380 m_blankFrameURLs.add(frame, fakeURL);