2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google Inc. All Rights Reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "core/html/parser/HTMLPreloadScanner.h"
31 #include "HTMLNames.h"
32 #include "InputTypeNames.h"
33 #include "RuntimeEnabledFeatures.h"
34 #include "core/css/MediaList.h"
35 #include "core/css/MediaQueryEvaluator.h"
36 #include "core/css/MediaValues.h"
37 #include "core/css/parser/SizesAttributeParser.h"
38 #include "core/html/LinkRelAttribute.h"
39 #include "core/html/parser/HTMLParserIdioms.h"
40 #include "core/html/parser/HTMLSrcsetParser.h"
41 #include "core/html/parser/HTMLTokenizer.h"
42 #include "platform/TraceEvent.h"
43 #include "wtf/MainThread.h"
47 using namespace HTMLNames;
49 static bool match(const StringImpl* impl, const QualifiedName& qName)
51 return impl == qName.localName().impl();
54 static bool match(const AtomicString& name, const QualifiedName& qName)
56 ASSERT(isMainThread());
57 return qName.localName() == name;
60 static bool match(const String& name, const QualifiedName& qName)
62 return threadSafeMatch(name, qName);
65 static const StringImpl* tagImplFor(const HTMLToken::DataVector& data)
67 AtomicString tagName(data);
68 const StringImpl* result = tagName.impl();
69 if (result->isStatic())
74 static const StringImpl* tagImplFor(const String& tagName)
76 const StringImpl* result = tagName.impl();
77 if (result->isStatic())
82 static String initiatorFor(const StringImpl* tagImpl)
85 if (match(tagImpl, imgTag))
86 return imgTag.localName();
87 if (match(tagImpl, inputTag))
88 return inputTag.localName();
89 if (match(tagImpl, linkTag))
90 return linkTag.localName();
91 if (match(tagImpl, scriptTag))
92 return scriptTag.localName();
97 static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue)
99 RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue);
100 MediaQueryEvaluator mediaQueryEvaluator("screen", mediaValues);
101 return mediaQueryEvaluator.eval(mediaQueries.get());
104 class TokenPreloadScanner::StartTagScanner {
106 StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues)
108 , m_linkIsStyleSheet(false)
109 , m_matchedMediaAttribute(true)
110 , m_inputIsImage(false)
112 , m_sourceSizeSet(false)
113 , m_isCORSEnabled(false)
114 , m_allowCredentials(DoNotAllowStoredCredentials)
115 , m_mediaValues(mediaValues)
117 if (!match(m_tagImpl, imgTag)
118 && !match(m_tagImpl, inputTag)
119 && !match(m_tagImpl, linkTag)
120 && !match(m_tagImpl, scriptTag))
122 if (RuntimeEnabledFeatures::pictureSizesEnabled())
123 m_imgSourceSize = SizesAttributeParser::findEffectiveSize(String(), m_mediaValues);
126 enum URLReplacement {
128 DisallowURLReplacement
131 void processAttributes(const HTMLToken::AttributeList& attributes)
133 ASSERT(isMainThread());
136 for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
137 AtomicString attributeName(iter->name);
138 String attributeValue = StringImpl::create8BitIfPossible(iter->value);
139 processAttribute(attributeName, attributeValue);
143 void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes)
147 for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter)
148 processAttribute(iter->name, iter->value);
151 PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source)
153 if (!shouldPreload() || !m_matchedMediaAttribute)
156 TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii());
157 TextPosition position = TextPosition(source.currentLine(), source.currentColumn());
158 OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType());
160 request->setCrossOriginEnabled(allowStoredCredentials());
161 request->setCharset(charset());
162 return request.release();
166 template<typename NameType>
167 void processAttribute(const NameType& attributeName, const String& attributeValue)
169 if (match(attributeName, charsetAttr))
170 m_charset = attributeValue;
172 if (match(m_tagImpl, scriptTag)) {
173 if (match(attributeName, srcAttr))
174 setUrlToLoad(attributeValue, DisallowURLReplacement);
175 else if (match(attributeName, crossoriginAttr))
176 setCrossOriginAllowed(attributeValue);
177 } else if (match(m_tagImpl, imgTag)) {
178 if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) {
179 m_imgSrcUrl = attributeValue;
180 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_imgSourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement);
181 } else if (match(attributeName, crossoriginAttr)) {
182 setCrossOriginAllowed(attributeValue);
183 } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
184 m_imgSrcsetAttributeValue = attributeValue;
185 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_imgSourceSize, attributeValue);
186 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_imgSourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
187 } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) {
188 m_imgSourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues);
189 m_sourceSizeSet = true;
190 if (!m_srcsetImageCandidate.isEmpty()) {
191 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_imgSourceSize, m_imgSrcsetAttributeValue);
192 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_imgSourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
195 } else if (match(m_tagImpl, linkTag)) {
196 if (match(attributeName, hrefAttr))
197 setUrlToLoad(attributeValue, DisallowURLReplacement);
198 else if (match(attributeName, relAttr))
199 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
200 else if (match(attributeName, mediaAttr))
201 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
202 else if (match(attributeName, crossoriginAttr))
203 setCrossOriginAllowed(attributeValue);
204 } else if (match(m_tagImpl, inputTag)) {
205 if (match(attributeName, srcAttr))
206 setUrlToLoad(attributeValue, DisallowURLReplacement);
207 else if (match(attributeName, typeAttr))
208 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image);
212 static bool relAttributeIsStyleSheet(const String& attributeValue)
214 LinkRelAttribute rel(attributeValue);
215 return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch();
218 void setUrlToLoad(const String& value, URLReplacement replacement)
220 // We only respect the first src/href, per HTML5:
221 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
222 if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty())
224 String url = stripLeadingAndTrailingHTMLSpaces(value);
230 const String& charset() const
232 // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
233 if (match(m_tagImpl, imgTag))
234 return emptyString();
238 Resource::Type resourceType() const
240 if (match(m_tagImpl, scriptTag))
241 return Resource::Script;
242 if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage))
243 return Resource::Image;
244 if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet)
245 return Resource::CSSStyleSheet;
246 ASSERT_NOT_REACHED();
247 return Resource::Raw;
250 bool shouldPreload() const
252 if (m_urlToLoad.isEmpty())
254 if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet)
256 if (match(m_tagImpl, inputTag) && !m_inputIsImage)
261 bool isCORSEnabled() const
263 return m_isCORSEnabled;
266 StoredCredentials allowStoredCredentials() const
268 return m_allowCredentials;
271 void setCrossOriginAllowed(const String& corsSetting)
273 m_isCORSEnabled = true;
274 if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials"))
275 m_allowCredentials = AllowStoredCredentials;
277 m_allowCredentials = DoNotAllowStoredCredentials;
280 const StringImpl* m_tagImpl;
282 ImageCandidate m_srcsetImageCandidate;
284 bool m_linkIsStyleSheet;
285 bool m_matchedMediaAttribute;
288 String m_imgSrcsetAttributeValue;
289 unsigned m_imgSourceSize;
290 bool m_sourceSizeSet;
291 bool m_isCORSEnabled;
292 StoredCredentials m_allowCredentials;
293 RefPtr<MediaValues> m_mediaValues;
296 TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
297 : m_documentURL(documentURL)
300 , m_mediaValues(mediaValues)
304 TokenPreloadScanner::~TokenPreloadScanner()
308 TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
310 TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
311 m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount));
315 void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
317 ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
318 const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
319 m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
320 m_inStyle = checkpoint.inStyle;
321 m_templateCount = checkpoint.templateCount;
322 m_cssScanner.reset();
323 m_checkpoints.clear();
326 void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
328 scanCommon(token, source, requests);
331 void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
333 scanCommon(token, source, requests);
336 template<typename Token>
337 void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests)
339 switch (token.type()) {
340 case HTMLToken::Character: {
343 m_cssScanner.scan(token.data(), source, requests);
346 case HTMLToken::EndTag: {
347 const StringImpl* tagImpl = tagImplFor(token.data());
348 if (match(tagImpl, templateTag)) {
353 if (match(tagImpl, styleTag)) {
355 m_cssScanner.reset();
360 case HTMLToken::StartTag: {
363 const StringImpl* tagImpl = tagImplFor(token.data());
364 if (match(tagImpl, templateTag)) {
368 if (match(tagImpl, styleTag)) {
372 if (match(tagImpl, baseTag)) {
373 // The first <base> element is the one that wins.
374 if (!m_predictedBaseElementURL.isEmpty())
376 updatePredictedBaseURL(token);
380 StartTagScanner scanner(tagImpl, m_mediaValues);
381 scanner.processAttributes(token.attributes());
382 OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source);
384 requests.append(request.release());
393 template<typename Token>
394 void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
396 ASSERT(m_predictedBaseElementURL.isEmpty());
397 if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
398 m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
401 HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
402 : m_scanner(documentURL, mediaValues)
403 , m_tokenizer(HTMLTokenizer::create(options))
407 HTMLPreloadScanner::~HTMLPreloadScanner()
411 void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
413 m_source.append(source);
416 void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL)
418 ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
420 TRACE_EVENT1("webkit", "HTMLPreloadScanner::scan", "source_length", m_source.length());
422 // When we start scanning, our best prediction of the baseElementURL is the real one!
423 if (!startingBaseElementURL.isEmpty())
424 m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
426 PreloadRequestStream requests;
428 while (m_tokenizer->nextToken(m_source, m_token)) {
429 if (m_token.type() == HTMLToken::StartTag)
430 m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit));
431 m_scanner.scan(m_token, m_source, requests);
435 preloader->takeAndPreload(requests);