2 * Copyright (C) 2009 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #ifndef WebPageSerializerImpl_h
32 #define WebPageSerializerImpl_h
34 #include "wtf/Forward.h"
35 #include "wtf/HashMap.h"
36 #include "wtf/Vector.h"
37 #include "wtf/text/StringBuilder.h"
38 #include "wtf/text/StringHash.h"
39 #include "wtf/text/WTFString.h"
41 #include "public/platform/WebString.h"
42 #include "public/platform/WebURL.h"
43 #include "public/web/WebPageSerializer.h"
44 #include "public/web/WebPageSerializerClient.h"
45 #include "web/WebEntities.h"
56 class WebLocalFrameImpl;
58 // Get html data by serializing all frames of current page with lists
59 // which contain all resource links that have local copy.
60 // contain all saved auxiliary files included all sub frames and resources.
61 // This function will find out all frames and serialize them to HTML data.
62 // We have a data buffer to temporary saving generated html data. We will
63 // sequentially call WebViewDelegate::SendSerializedHtmlData once the data
64 // buffer is full. See comments of WebViewDelegate::SendSerializedHtmlData
65 // for getting more information.
66 class WebPageSerializerImpl {
68 // Do serialization action. Return false means no available frame has been
69 // serialized, otherwise return true.
72 // The parameter specifies which frame need to be serialized.
73 // The parameter recursive_serialization specifies whether we need to
74 // serialize all sub frames of the specified frame or not.
75 // The parameter delegate specifies the pointer of interface
76 // DomSerializerDelegate provide sink interface which can receive the
77 // individual chunks of data to be saved.
78 // The parameter links contain original URLs of all saved links.
79 // The parameter local_paths contain corresponding local file paths of all
80 // saved links, which matched with vector:links one by one.
81 // The parameter local_directory_name is relative path of directory which
82 // contain all saved auxiliary files included all sub frames and resources.
83 WebPageSerializerImpl(WebFrame* frame,
85 WebPageSerializerClient* client,
86 const WebVector<WebURL>& links,
87 const WebVector<WebString>& localPaths,
88 const WebString& localDirectoryName);
91 // Specified frame which need to be serialized;
92 WebLocalFrameImpl* m_specifiedWebLocalFrameImpl;
93 // Pointer of WebPageSerializerClient
94 WebPageSerializerClient* m_client;
95 // This hash map is used to map resource URL of original link to its local
97 typedef HashMap<WTF::String, WTF::String> LinkLocalPathMap;
98 // local_links_ include all pair of local resource path and corresponding
100 LinkLocalPathMap m_localLinks;
101 // Data buffer for saving result of serialized DOM data.
102 StringBuilder m_dataBuffer;
103 // Passing true to recursive_serialization_ indicates we will serialize not
104 // only the specified frame but also all sub-frames in the specific frame.
105 // Otherwise we only serialize the specified frame excluded all sub-frames.
106 bool m_recursiveSerialization;
107 // Flag indicates whether we have collected all frames which need to be
108 // serialized or not;
109 bool m_framesCollected;
110 // Local directory name of all local resource files.
111 WTF::String m_localDirectoryName;
112 // Vector for saving all frames which need to be serialized.
113 Vector<WebLocalFrameImpl*> m_frames;
115 // Web entities conversion maps.
116 WebEntities m_htmlEntities;
117 WebEntities m_xmlEntities;
119 struct SerializeDomParam {
121 const WTF::TextEncoding& textEncoding;
123 const WTF::String& directoryName;
124 bool isHTMLDocument; // document.isHTMLDocument()
125 bool haveSeenDocType;
126 bool haveAddedCharsetDeclaration;
127 // This meta element need to be skipped when serializing DOM.
128 const Element* skipMetaElement;
129 // Flag indicates we are in script or style tag.
130 bool isInScriptOrStyleTag;
131 bool haveAddedXMLProcessingDirective;
132 // Flag indicates whether we have added additional contents before end tag.
133 // This flag will be re-assigned in each call of function
134 // PostActionAfterSerializeOpenTag and it could be changed in function
135 // PreActionBeforeSerializeEndTag if the function adds new contents into
136 // serialization stream.
137 bool haveAddedContentsBeforeEnd;
139 SerializeDomParam(const KURL&, const WTF::TextEncoding&, Document*, const WTF::String& directoryName);
142 // Collect all target frames which need to be serialized.
143 void collectTargetFrames();
144 // Before we begin serializing open tag of a element, we give the target
145 // element a chance to do some work prior to add some additional data.
146 WTF::String preActionBeforeSerializeOpenTag(const Element*,
147 SerializeDomParam* param,
149 // After we finish serializing open tag of a element, we give the target
150 // element a chance to do some post work to add some additional data.
151 WTF::String postActionAfterSerializeOpenTag(const Element*,
152 SerializeDomParam* param);
153 // Before we begin serializing end tag of a element, we give the target
154 // element a chance to do some work prior to add some additional data.
155 WTF::String preActionBeforeSerializeEndTag(const Element*,
156 SerializeDomParam* param,
158 // After we finish serializing end tag of a element, we give the target
159 // element a chance to do some post work to add some additional data.
160 WTF::String postActionAfterSerializeEndTag(const Element*,
161 SerializeDomParam* param);
162 // Save generated html content to data buffer.
163 void saveHTMLContentToBuffer(const WTF::String& content,
164 SerializeDomParam* param);
171 // Flushes the content buffer by encoding and sending the content to the
172 // WebPageSerializerClient. Content is not flushed if the buffer is not full
173 // unless force is 1.
174 void encodeAndFlushBuffer(WebPageSerializerClient::PageSerializationStatus status,
175 SerializeDomParam* param,
177 // Serialize open tag of an specified element.
178 void openTagToString(Element*,
179 SerializeDomParam* param);
180 // Serialize end tag of an specified element.
181 void endTagToString(Element*,
182 SerializeDomParam* param);
183 // Build content for a specified node
184 void buildContentForNode(Node*,
185 SerializeDomParam* param);