1 # Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
8 from HTMLParser import HTMLParser
10 from tvcm import module
11 from tvcm import strip_js_comments
12 from tvcm import html_generation_controller
14 def _InitBeautifulSoup():
15 tvcm_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
16 bs_path = os.path.join(tvcm_path, 'third_party', 'beautifulsoup')
17 if bs_path in sys.path:
19 sys.path.insert(0, bs_path)
25 class InlineScript(object):
26 def __init__(self, soup):
28 self._stripped_contents = None
29 self._open_tags = None
33 return str(self._soup.string)
36 def stripped_contents(self):
37 if not self._stripped_contents:
38 self._stripped_contents = strip_js_comments.StripJSComments(
40 return self._stripped_contents
45 return self._open_tags
47 cur = self._soup.parent
49 if isinstance(cur, BeautifulSoup.BeautifulSoup):
52 open_tags.append(_Tag(cur.name, cur.attrs))
56 assert open_tags[-1].tag == 'script'
59 self._open_tags = open_tags
60 return self._open_tags
64 if not isinstance(x, BeautifulSoup.Declaration):
66 return x == 'DOCTYPE html' or x == 'DOCTYPE HTML'
68 class HTMLModuleParserResults(object):
69 def __init__(self, html):
70 self._soup = polymer_soup.PolymerSoup(html)
71 self._inline_scripts = None
75 decls = [x for x in self._soup.contents
77 return len(decls) == 1
80 def scripts_external(self):
81 tags = self._soup.findAll('script', src=True)
82 return [t['src'] for t in tags]
85 def inline_scripts(self):
86 if not self._inline_scripts:
87 tags = self._soup.findAll('script', src=None)
88 self._inline_scripts = [InlineScript(t.string) for t in tags]
89 return self._inline_scripts
93 tags = self._soup.findAll('link', rel='import')
94 return [t['href'] for t in tags]
97 def stylesheets(self):
98 tags = self._soup.findAll('link', rel='stylesheet')
99 return [t['href'] for t in tags]
102 def inline_stylesheets(self):
103 tags = self._soup.findAll('style')
104 return [str(t.string) for t in tags]
106 def YieldHTMLInPieces(self, controller):
107 yield self.GenerateHTML(controller)
109 def GenerateHTML(self, controller):
110 soup = polymer_soup.PolymerSoup(str(self._soup))
113 for x in soup.contents:
114 if isinstance(x, BeautifulSoup.Declaration):
119 imports = soup.findAll('link', rel='import')
123 # Remove all script links
124 scripts_external = soup.findAll('script', src=True)
125 for script in scripts_external:
128 # Remove all inline script
129 scripts_external = soup.findAll('script', src=None)
130 for script in scripts_external:
133 # Process all inline styles
134 inline_styles = soup.findAll('style')
135 for style in inline_styles:
136 html = controller.GetHTMLForInlineStylesheet(str(style.string))
138 ns = BeautifulSoup.Tag(soup, 'style')
139 ns.append(BeautifulSoup.NavigableString(html))
140 style.replaceWith(ns)
144 # Rewrite all external stylesheet hrefs or remove, as needed
145 stylesheet_links = soup.findAll('link', rel='stylesheet')
146 for stylesheet_link in stylesheet_links:
147 html = controller.GetHTMLForStylesheetHRef(stylesheet_link['href'])
149 tmp = polymer_soup.PolymerSoup(html).findChildren()
151 stylesheet_link.replaceWith(tmp[0])
153 stylesheet_link.extract()
159 def html_contents_without_links_and_script(self):
160 return self.GenerateHTML(html_generation_controller.HTMLGenerationController())
163 def __init__(self, tag, attrs):
168 attr_string = ' '.join(['%s="%s"' % (x[0], x[1]) for x in self.attrs])
169 return '<%s %s>' % (self.tag, attr_string)
171 class HTMLModuleParser():
172 def Parse(self, html):
176 if html.find('< /script>') != -1:
177 raise Exception('Escape script tags with <\/script>')
179 return HTMLModuleParserResults(html)