2 * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
4 * Copyright (C) 2010 Google Inc. All Rights Reserved.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "core/html/parser/HTMLPreloadScanner.h"
31 #include "core/HTMLNames.h"
32 #include "core/InputTypeNames.h"
33 #include "core/css/MediaList.h"
34 #include "core/css/MediaQueryEvaluator.h"
35 #include "core/css/MediaValues.h"
36 #include "core/css/parser/SizesAttributeParser.h"
37 #include "core/html/LinkRelAttribute.h"
38 #include "core/html/parser/HTMLParserIdioms.h"
39 #include "core/html/parser/HTMLSrcsetParser.h"
40 #include "core/html/parser/HTMLTokenizer.h"
41 #include "platform/RuntimeEnabledFeatures.h"
42 #include "platform/TraceEvent.h"
43 #include "wtf/MainThread.h"
47 using namespace HTMLNames;
49 static bool match(const StringImpl* impl, const QualifiedName& qName)
51 return impl == qName.localName().impl();
54 static bool match(const AtomicString& name, const QualifiedName& qName)
56 ASSERT(isMainThread());
57 return qName.localName() == name;
60 static bool match(const String& name, const QualifiedName& qName)
62 return threadSafeMatch(name, qName);
65 static const StringImpl* tagImplFor(const HTMLToken::DataVector& data)
67 AtomicString tagName(data);
68 const StringImpl* result = tagName.impl();
69 if (result->isStatic())
74 static const StringImpl* tagImplFor(const String& tagName)
76 const StringImpl* result = tagName.impl();
77 if (result->isStatic())
82 static String initiatorFor(const StringImpl* tagImpl)
85 if (match(tagImpl, imgTag))
86 return imgTag.localName();
87 if (match(tagImpl, inputTag))
88 return inputTag.localName();
89 if (match(tagImpl, linkTag))
90 return linkTag.localName();
91 if (match(tagImpl, scriptTag))
92 return scriptTag.localName();
97 static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue)
99 RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue);
100 MediaQueryEvaluator mediaQueryEvaluator("screen", mediaValues);
101 return mediaQueryEvaluator.eval(mediaQueries.get());
104 class TokenPreloadScanner::StartTagScanner {
106 StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues)
108 , m_linkIsStyleSheet(false)
109 , m_matchedMediaAttribute(true)
110 , m_inputIsImage(false)
112 , m_sourceSizeSet(false)
113 , m_isCORSEnabled(false)
114 , m_allowCredentials(DoNotAllowStoredCredentials)
115 , m_mediaValues(mediaValues)
117 if (match(m_tagImpl, imgTag)
118 || match(m_tagImpl, sourceTag)) {
119 if (RuntimeEnabledFeatures::pictureSizesEnabled())
120 m_sourceSize = SizesAttributeParser::findEffectiveSize(String(), m_mediaValues);
123 if ( !match(m_tagImpl, inputTag)
124 && !match(m_tagImpl, linkTag)
125 && !match(m_tagImpl, scriptTag))
129 enum URLReplacement {
131 DisallowURLReplacement
134 void processAttributes(const HTMLToken::AttributeList& attributes)
136 ASSERT(isMainThread());
139 for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
140 AtomicString attributeName(iter->name);
141 String attributeValue = StringImpl::create8BitIfPossible(iter->value);
142 processAttribute(attributeName, attributeValue);
146 void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes)
150 for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter)
151 processAttribute(iter->name, iter->value);
154 void handlePictureSourceURL(String& sourceURL)
156 if (match(m_tagImpl, sourceTag) && m_matchedMediaAttribute && sourceURL.isEmpty())
157 sourceURL = m_srcsetImageCandidate.toString();
158 else if (match(m_tagImpl, imgTag) && !sourceURL.isEmpty())
159 setUrlToLoad(sourceURL, AllowURLReplacement);
162 PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source)
164 if (!shouldPreload() || !m_matchedMediaAttribute)
167 TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii());
168 TextPosition position = TextPosition(source.currentLine(), source.currentColumn());
169 OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType());
171 request->setCrossOriginEnabled(allowStoredCredentials());
172 request->setCharset(charset());
173 return request.release();
177 template<typename NameType>
178 void processScriptAttribute(const NameType& attributeName, const String& attributeValue)
180 // FIXME - Don't set crossorigin multiple times.
181 if (match(attributeName, srcAttr))
182 setUrlToLoad(attributeValue, DisallowURLReplacement);
183 else if (match(attributeName, crossoriginAttr))
184 setCrossOriginAllowed(attributeValue);
187 template<typename NameType>
188 void processImgAttribute(const NameType& attributeName, const String& attributeValue)
190 if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) {
191 m_imgSrcUrl = attributeValue;
192 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement);
193 } else if (match(attributeName, crossoriginAttr)) {
194 setCrossOriginAllowed(attributeValue);
195 } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
196 m_srcsetAttributeValue = attributeValue;
197 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
198 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
199 } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) {
200 m_sourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues);
201 m_sourceSizeSet = true;
202 if (!m_srcsetImageCandidate.isEmpty()) {
203 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
204 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
209 template<typename NameType>
210 void processLinkAttribute(const NameType& attributeName, const String& attributeValue)
212 // FIXME - Don't set rel/media/crossorigin multiple times.
213 if (match(attributeName, hrefAttr))
214 setUrlToLoad(attributeValue, DisallowURLReplacement);
215 else if (match(attributeName, relAttr))
216 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
217 else if (match(attributeName, mediaAttr))
218 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
219 else if (match(attributeName, crossoriginAttr))
220 setCrossOriginAllowed(attributeValue);
223 template<typename NameType>
224 void processInputAttribute(const NameType& attributeName, const String& attributeValue)
226 // FIXME - Don't set type multiple times.
227 if (match(attributeName, srcAttr))
228 setUrlToLoad(attributeValue, DisallowURLReplacement);
229 else if (match(attributeName, typeAttr))
230 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image);
233 template<typename NameType>
234 void processSourceAttribute(const NameType& attributeName, const String& attributeValue)
236 if (!RuntimeEnabledFeatures::pictureEnabled())
238 if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
239 m_srcsetAttributeValue = attributeValue;
240 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
241 } else if (match(attributeName, sizesAttr) && !m_sourceSizeSet) {
242 m_sourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues);
243 m_sourceSizeSet = true;
244 if (!m_srcsetImageCandidate.isEmpty()) {
245 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
247 } else if (match(attributeName, mediaAttr)) {
248 // FIXME - Don't match media multiple times.
249 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
254 template<typename NameType>
255 void processAttribute(const NameType& attributeName, const String& attributeValue)
257 if (match(attributeName, charsetAttr))
258 m_charset = attributeValue;
260 if (match(m_tagImpl, scriptTag))
261 processScriptAttribute(attributeName, attributeValue);
262 else if (match(m_tagImpl, imgTag))
263 processImgAttribute(attributeName, attributeValue);
264 else if (match(m_tagImpl, linkTag))
265 processLinkAttribute(attributeName, attributeValue);
266 else if (match(m_tagImpl, inputTag))
267 processInputAttribute(attributeName, attributeValue);
268 else if (match(m_tagImpl, sourceTag))
269 processSourceAttribute(attributeName, attributeValue);
272 static bool relAttributeIsStyleSheet(const String& attributeValue)
274 LinkRelAttribute rel(attributeValue);
275 return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch();
278 void setUrlToLoad(const String& value, URLReplacement replacement)
280 // We only respect the first src/href, per HTML5:
281 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
282 if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty())
284 String url = stripLeadingAndTrailingHTMLSpaces(value);
290 const String& charset() const
292 // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
293 if (match(m_tagImpl, imgTag))
294 return emptyString();
298 Resource::Type resourceType() const
300 if (match(m_tagImpl, scriptTag))
301 return Resource::Script;
302 if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage))
303 return Resource::Image;
304 if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet)
305 return Resource::CSSStyleSheet;
306 ASSERT_NOT_REACHED();
307 return Resource::Raw;
310 bool shouldPreload() const
312 if (m_urlToLoad.isEmpty())
314 if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet)
316 if (match(m_tagImpl, inputTag) && !m_inputIsImage)
321 bool isCORSEnabled() const
323 return m_isCORSEnabled;
326 StoredCredentials allowStoredCredentials() const
328 return m_allowCredentials;
331 void setCrossOriginAllowed(const String& corsSetting)
333 m_isCORSEnabled = true;
334 if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials"))
335 m_allowCredentials = AllowStoredCredentials;
337 m_allowCredentials = DoNotAllowStoredCredentials;
340 const StringImpl* m_tagImpl;
342 ImageCandidate m_srcsetImageCandidate;
344 bool m_linkIsStyleSheet;
345 bool m_matchedMediaAttribute;
348 String m_srcsetAttributeValue;
349 unsigned m_sourceSize;
350 bool m_sourceSizeSet;
351 bool m_isCORSEnabled;
352 StoredCredentials m_allowCredentials;
353 RefPtr<MediaValues> m_mediaValues;
356 TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
357 : m_documentURL(documentURL)
361 , m_mediaValues(mediaValues)
365 TokenPreloadScanner::~TokenPreloadScanner()
369 TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
371 TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
372 m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount));
376 void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
378 ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
379 const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
380 m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
381 m_inStyle = checkpoint.inStyle;
382 m_templateCount = checkpoint.templateCount;
383 m_cssScanner.reset();
384 m_checkpoints.clear();
387 void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
389 scanCommon(token, source, requests);
392 void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
394 scanCommon(token, source, requests);
397 template<typename Token>
398 void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests)
400 switch (token.type()) {
401 case HTMLToken::Character: {
404 m_cssScanner.scan(token.data(), source, requests);
407 case HTMLToken::EndTag: {
408 const StringImpl* tagImpl = tagImplFor(token.data());
409 if (match(tagImpl, templateTag)) {
414 if (match(tagImpl, styleTag)) {
416 m_cssScanner.reset();
420 if (match(tagImpl, pictureTag))
424 case HTMLToken::StartTag: {
427 const StringImpl* tagImpl = tagImplFor(token.data());
428 if (match(tagImpl, templateTag)) {
432 if (match(tagImpl, styleTag)) {
436 if (match(tagImpl, baseTag)) {
437 // The first <base> element is the one that wins.
438 if (!m_predictedBaseElementURL.isEmpty())
440 updatePredictedBaseURL(token);
443 if (RuntimeEnabledFeatures::pictureEnabled() && (match(tagImpl, pictureTag))) {
445 m_pictureSourceURL = String();
449 StartTagScanner scanner(tagImpl, m_mediaValues);
450 scanner.processAttributes(token.attributes());
452 scanner.handlePictureSourceURL(m_pictureSourceURL);
453 OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source);
455 requests.append(request.release());
464 template<typename Token>
465 void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
467 ASSERT(m_predictedBaseElementURL.isEmpty());
468 if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
469 m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
472 HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
473 : m_scanner(documentURL, mediaValues)
474 , m_tokenizer(HTMLTokenizer::create(options))
478 HTMLPreloadScanner::~HTMLPreloadScanner()
482 void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
484 m_source.append(source);
487 void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL)
489 ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
491 TRACE_EVENT1("webkit", "HTMLPreloadScanner::scan", "source_length", m_source.length());
493 // When we start scanning, our best prediction of the baseElementURL is the real one!
494 if (!startingBaseElementURL.isEmpty())
495 m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
497 PreloadRequestStream requests;
499 while (m_tokenizer->nextToken(m_source, m_token)) {
500 if (m_token.type() == HTMLToken::StartTag)
501 m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit));
502 m_scanner.scan(m_token, m_source, requests);
506 preloader->takeAndPreload(requests);