1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 from HTMLParser import HTMLParser
10 from docs_server_utils import FormatKey
11 from file_system import FileNotFoundError
12 from third_party.handlebar import Handlebar
14 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro
15 # article data sources created as instances of it.
17 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL)
19 class _IntroParser(HTMLParser):
20 ''' An HTML parser which will parse table of contents and page title info out
24 HTMLParser.__init__(self)
26 self.page_title = None
27 self._recent_tag = None
28 self._current_heading = {}
30 def handle_starttag(self, tag, attrs):
32 if tag not in ['h1', 'h2', 'h3']:
34 if tag != 'h1' or self.page_title is None:
35 self._recent_tag = tag
40 self._current_heading = { 'link': id_, 'subheadings': [], 'title': '' }
41 self.toc.append(self._current_heading)
43 self._current_heading = { 'link': id_, 'title': '' }
44 self.toc[-1]['subheadings'].append(self._current_heading)
46 def handle_endtag(self, tag):
47 if tag in ['h1', 'h2', 'h3']:
48 self._recent_tag = None
50 def handle_data(self, data):
51 if self._recent_tag is None:
53 if self._recent_tag == 'h1':
54 if self.page_title is None:
55 self.page_title = data
57 self.page_title += data
58 elif self._recent_tag in ['h2', 'h3']:
59 self._current_heading['title'] += data
61 class IntroDataSource(object):
62 '''This class fetches the intros for a given API. From this intro, a table
63 of contents dictionary is created, which contains the headings in the intro.
65 class Factory(object):
71 self._cache = compiled_fs_factory.Create(file_system,
74 self._ref_resolver = ref_resolver_factory.Create()
75 self._base_paths = base_paths
77 def _MakeIntroDict(self, intro_path, intro):
78 # Guess the name of the API from the path to the intro.
79 api_name = os.path.splitext(intro_path.split('/')[-1])[0]
80 intro_with_links = self._ref_resolver.ResolveAllLinks(intro,
82 # TODO(kalman): Do $ref replacement after rendering the template, not
83 # before, so that (a) $ref links can contain template annotations, and (b)
84 # we can use CompiledFileSystem.ForTemplates to create the templates and
85 # save ourselves some effort.
86 apps_parser = _IntroParser()
87 apps_parser.feed(Handlebar(intro_with_links).render(
88 { 'is_apps': True }).text)
89 extensions_parser = _IntroParser()
90 extensions_parser.feed(Handlebar(intro_with_links).render(
91 { 'is_apps': False }).text)
92 # TODO(cduvall): Use the normal template rendering system, so we can check
94 if extensions_parser.page_title != apps_parser.page_title:
96 'Title differs for apps and extensions: Apps: %s, Extensions: %s.' %
97 (extensions_parser.page_title, apps_parser.page_title))
98 # The templates will render the heading themselves, so remove it from the
100 intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1)
102 'intro': Handlebar(intro_with_links),
103 'title': apps_parser.page_title,
104 'apps_toc': apps_parser.toc,
105 'extensions_toc': extensions_parser.toc,
109 return IntroDataSource(self._cache, self._base_paths)
111 def __init__(self, cache, base_paths):
113 self._base_paths = base_paths
116 path = FormatKey(key)
117 def get_from_base_path(base_path):
118 return self._cache.GetFromFile('%s/%s' % (base_path, path)).Get()
119 for base_path in self._base_paths:
121 return get_from_base_path(base_path)
122 except FileNotFoundError:
124 # Not found. Do the first operation again so that we get a stack trace - we
125 # know that it'll fail.
126 get_from_base_path(self._base_paths[0])
127 raise AssertionError()