src/chrome/common/extensions/docs/server2/intro_data_source.py

   1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 from HTMLParser import HTMLParser
   6 import logging
   7 import os
   8 import re
   9
  10 from docs_server_utils import FormatKey
  11 from file_system import FileNotFoundError
  12 from third_party.handlebar import Handlebar
  13
  14 # TODO(kalman): rename this HTMLDataSource or other, then have separate intro
  15 # article data sources created as instances of it.
  16
  17 _H1_REGEX = re.compile('<h1[^>.]*?>.*?</h1>', flags=re.DOTALL)
  18
  19 class _IntroParser(HTMLParser):
  20   ''' An HTML parser which will parse table of contents and page title info out
  21   of an intro.
  22   '''
  23   def __init__(self):
  24     HTMLParser.__init__(self)
  25     self.toc = []
  26     self.page_title = None
  27     self._recent_tag = None
  28     self._current_heading = {}
  29
  30   def handle_starttag(self, tag, attrs):
  31     id_ = ''
  32     if tag not in ['h1', 'h2', 'h3']:
  33       return
  34     if tag != 'h1' or self.page_title is None:
  35       self._recent_tag = tag
  36     for attr in attrs:
  37       if attr[0] == 'id':
  38         id_ = attr[1]
  39     if tag == 'h2':
  40       self._current_heading = { 'link': id_, 'subheadings': [], 'title': '' }
  41       self.toc.append(self._current_heading)
  42     elif tag == 'h3':
  43       self._current_heading = { 'link': id_, 'title': '' }
  44       self.toc[-1]['subheadings'].append(self._current_heading)
  45
  46   def handle_endtag(self, tag):
  47     if tag in ['h1', 'h2', 'h3']:
  48       self._recent_tag = None
  49
  50   def handle_data(self, data):
  51     if self._recent_tag is None:
  52       return
  53     if self._recent_tag == 'h1':
  54       if self.page_title is None:
  55         self.page_title = data
  56       else:
  57         self.page_title += data
  58     elif self._recent_tag in ['h2', 'h3']:
  59       self._current_heading['title'] += data
  60
  61 class IntroDataSource(object):
  62   '''This class fetches the intros for a given API. From this intro, a table
  63   of contents dictionary is created, which contains the headings in the intro.
  64   '''
  65   class Factory(object):
  66     def __init__(self,
  67                  compiled_fs_factory,
  68                  file_system,
  69                  ref_resolver_factory,
  70                  base_paths):
  71       self._cache = compiled_fs_factory.Create(file_system,
  72                                                self._MakeIntroDict,
  73                                                IntroDataSource)
  74       self._ref_resolver = ref_resolver_factory.Create()
  75       self._base_paths = base_paths
  76
  77     def _MakeIntroDict(self, intro_path, intro):
  78       # Guess the name of the API from the path to the intro.
  79       api_name = os.path.splitext(intro_path.split('/')[-1])[0]
  80       intro_with_links = self._ref_resolver.ResolveAllLinks(intro,
  81                                                             namespace=api_name)
  82       # TODO(kalman): Do $ref replacement after rendering the template, not
  83       # before, so that (a) $ref links can contain template annotations, and (b)
  84       # we can use CompiledFileSystem.ForTemplates to create the templates and
  85       # save ourselves some effort.
  86       apps_parser = _IntroParser()
  87       apps_parser.feed(Handlebar(intro_with_links).render(
  88           { 'is_apps': True }).text)
  89       extensions_parser = _IntroParser()
  90       extensions_parser.feed(Handlebar(intro_with_links).render(
  91           { 'is_apps': False }).text)
  92       # TODO(cduvall): Use the normal template rendering system, so we can check
  93       # errors.
  94       if extensions_parser.page_title != apps_parser.page_title:
  95         logging.error(
  96             'Title differs for apps and extensions: Apps: %s, Extensions: %s.' %
  97                 (extensions_parser.page_title, apps_parser.page_title))
  98       # The templates will render the heading themselves, so remove it from the
  99       # HTML content.
 100       intro_with_links = re.sub(_H1_REGEX, '', intro_with_links, count=1)
 101       return {
 102         'intro': Handlebar(intro_with_links),
 103         'title': apps_parser.page_title,
 104         'apps_toc': apps_parser.toc,
 105         'extensions_toc': extensions_parser.toc,
 106       }
 107
 108     def Create(self):
 109       return IntroDataSource(self._cache, self._base_paths)
 110
 111   def __init__(self, cache, base_paths):
 112     self._cache = cache
 113     self._base_paths = base_paths
 114
 115   def get(self, key):
 116     path = FormatKey(key)
 117     def get_from_base_path(base_path):
 118       return self._cache.GetFromFile('%s/%s' % (base_path, path)).Get()
 119     for base_path in self._base_paths:
 120       try:
 121         return get_from_base_path(base_path)
 122       except FileNotFoundError:
 123         continue
 124     # Not found. Do the first operation again so that we get a stack trace - we
 125     # know that it'll fail.
 126     get_from_base_path(self._base_paths[0])
 127     raise AssertionError()