doc/tools/buildindex.py

   1 #! /usr/bin/env python
   2
   3 __version__ = '$Revision: 1.1.1.1 $'
   4
   5 import os
   6 import re
   7 import string
   8 import sys
   9
  10
  11 class Node:
  12     __rmjunk = re.compile("<#\d+#>")
  13
  14     continuation = 0
  15
  16     def __init__(self, link, str, seqno):
  17         self.links = [link]
  18         self.seqno = seqno
  19         # remove <#\d+#> left in by moving the data out of LaTeX2HTML
  20         str = self.__rmjunk.sub('', str)
  21         # build up the text
  22         self.text = split_entry_text(str)
  23         self.key = split_entry_key(str)
  24
  25     def __cmp__(self, other):
  26         """Comparison operator includes sequence number, for use with
  27         list.sort()."""
  28         return self.cmp_entry(other) or cmp(self.seqno, other.seqno)
  29
  30     def cmp_entry(self, other):
  31         """Comparison 'operator' that ignores sequence number."""
  32         c = 0
  33         for i in range(min(len(self.key), len(other.key))):
  34             c = (cmp_part(self.key[i], other.key[i])
  35                  or cmp_part(self.text[i], other.text[i]))
  36             if c:
  37                 break
  38         return c or cmp(self.key, other.key) or cmp(self.text, other.text)
  39
  40     def __repr__(self):
  41         return "<Node for %s (%s)>" % (string.join(self.text, '!'), self.seqno)
  42
  43     def __str__(self):
  44         return string.join(self.key, '!')
  45
  46     def dump(self):
  47         return "%s\1%s###%s\n" \
  48                % (string.join(self.links, "\1"),
  49                   string.join(self.text, '!'),
  50                   self.seqno)
  51
  52
  53 def cmp_part(s1, s2):
  54     result = cmp(s1, s2)
  55     if result == 0:
  56         return 0
  57     l1 = string.lower(s1)
  58     l2 = string.lower(s2)
  59     minlen = min(len(s1), len(s2))
  60     if len(s1) < len(s2) and l1 == l2[:len(s1)]:
  61         result = -1
  62     elif len(s2) < len(s1) and l2 == l1[:len(s2)]:
  63         result = 1
  64     else:
  65         result = cmp(l1, l2) or cmp(s1, s2)
  66     return result
  67
  68
  69 def split_entry(str, which):
  70     stuff = []
  71     parts = string.split(str, '!')
  72     parts = map(string.split, parts, ['@'] * len(parts))
  73     for entry in parts:
  74         if len(entry) != 1:
  75             key = entry[which]
  76         else:
  77             key = entry[0]
  78         stuff.append(key)
  79     return stuff
  80
  81
  82 _rmtt = re.compile(r"""(.*)<tt(?: class=['"][a-z0-9]+["'])?>(.*)</tt>(.*)$""",
  83                    re.IGNORECASE)
  84 _rmparens = re.compile(r"\(\)")
  85
  86 def split_entry_key(str):
  87     parts = split_entry(str, 1)
  88     for i in range(len(parts)):
  89         m = _rmtt.match(parts[i])
  90         if m:
  91             parts[i] = string.join(m.group(1, 2, 3), '')
  92         else:
  93             parts[i] = string.lower(parts[i])
  94         # remove '()' from the key:
  95         parts[i] = _rmparens.sub('', parts[i])
  96     return map(trim_ignored_letters, parts)
  97
  98
  99 def split_entry_text(str):
 100     if '<' in str:
 101         m = _rmtt.match(str)
 102         if m:
 103             str = string.join(m.group(1, 2, 3), '')
 104     return split_entry(str, 1)
 105
 106
 107 def load(fp):
 108     nodes = []
 109     rx = re.compile("(.*)\1(.*)###(.*)$")
 110     while 1:
 111         line = fp.readline()
 112         if not line:
 113             break
 114         m = rx.match(line)
 115         if m:
 116             link, str, seqno = m.group(1, 2, 3)
 117             nodes.append(Node(link, str, seqno))
 118     return nodes
 119
 120
 121 def trim_ignored_letters(s):
 122     # ignore $ to keep environment variables with the
 123     # leading letter from the name
 124     s = string.lower(s)
 125     if s[0] == "$":
 126         return s[1:]
 127     else:
 128         return s
 129
 130 def get_first_letter(s):
 131     return string.lower(trim_ignored_letters(s)[0])
 132
 133
 134 def split_letters(nodes):
 135     letter_groups = []
 136     if nodes:
 137         group = []
 138         append = group.append
 139         letter = get_first_letter(nodes[0].text[0])
 140         letter_groups.append((letter, group))
 141         for node in nodes:
 142             nletter = get_first_letter(node.text[0])
 143             if letter != nletter:
 144                 letter = nletter
 145                 group = []
 146                 letter_groups.append((letter, group))
 147                 append = group.append
 148             append(node)
 149     return letter_groups
 150
 151
 152 # need a function to separate the nodes into columns...
 153 def split_columns(nodes, columns=1):
 154     if columns <= 1:
 155         return [nodes]
 156     # This is a rough height; we may have to increase to avoid breaks before
 157     # a subitem.
 158     colheight = len(nodes) / columns
 159     numlong = len(nodes) % columns
 160     if numlong:
 161         colheight = colheight + 1
 162     else:
 163         numlong = columns
 164     cols = []
 165     for i in range(numlong):
 166         start = i * colheight
 167         end = start + colheight
 168         cols.append(nodes[start:end])
 169     del nodes[:end]
 170     colheight = colheight - 1
 171     try:
 172         numshort = len(nodes) / colheight
 173     except ZeroDivisionError:
 174         cols = cols + (columns - len(cols)) * [[]]
 175     else:
 176         for i in range(numshort):
 177             start = i * colheight
 178             end = start + colheight
 179             cols.append(nodes[start:end])
 180     #
 181     # If items continue across columns, make sure they are marked
 182     # as continuations so the user knows to look at the previous column.
 183     #
 184     for i in range(len(cols) - 1):
 185         try:
 186             prev = cols[i][-1]
 187             next = cols[i + 1][0]
 188         except IndexError:
 189             return cols
 190         else:
 191             n = min(len(prev.key), len(next.key))
 192             for j in range(n):
 193                 if prev.key[j] != next.key[j]:
 194                     break
 195                 next.continuation = j + 1
 196     return cols
 197
 198
 199 DL_LEVEL_INDENT = "  "
 200
 201 def format_column(nodes):
 202     strings = ["<dl compact>"]
 203     append = strings.append
 204     level = 0
 205     previous = []
 206     for node in nodes:
 207         current = node.text
 208         count = 0
 209         for i in range(min(len(current), len(previous))):
 210             if previous[i] != current[i]:
 211                 break
 212             count = i + 1
 213         if count > level:
 214             append("<dl compact>" * (count - level) + "\n")
 215             level = count
 216         elif level > count:
 217             append("\n")
 218             append(level * DL_LEVEL_INDENT)
 219             append("</dl>" * (level - count))
 220             level = count
 221         # else: level == count
 222         for i in range(count, len(current) - 1):
 223             term = node.text[i]
 224             level = level + 1
 225             if node.continuation > i:
 226                 extra = " (continued)"
 227             else:
 228                 extra = ""
 229             append("\n<dt>%s%s\n<dd>\n%s<dl compact>"
 230                    % (term, extra, level * DL_LEVEL_INDENT))
 231         append("\n%s<dt>%s%s</a>"
 232                % (level * DL_LEVEL_INDENT, node.links[0], node.text[-1]))
 233         for link in node.links[1:]:
 234             append(",\n%s    %s[Link]</a>" % (level * DL_LEVEL_INDENT, link))
 235         previous = current
 236     append("\n")
 237     append("</dl>" * (level + 1))
 238     return string.join(strings, '')
 239
 240
 241 def format_nodes(nodes, columns=1):
 242     strings = []
 243     append = strings.append
 244     if columns > 1:
 245         colnos = range(columns)
 246         colheight = len(nodes) / columns
 247         if len(nodes) % columns:
 248             colheight = colheight + 1
 249         colwidth = 100 / columns
 250         append('<table width="100%"><tr valign="top">')
 251         for col in split_columns(nodes, columns):
 252             append('<td width="%d%%">\n' % colwidth)
 253             append(format_column(col))
 254             append("\n</td>")
 255         append("\n</tr></table>")
 256     else:
 257         append(format_column(nodes))
 258     append("\n<p>\n")
 259     return string.join(strings, '')
 260
 261
 262 def format_letter(letter):
 263     if letter == '.':
 264         lettername = ". (dot)"
 265     elif letter == '_':
 266         lettername = "_ (underscore)"
 267     else:
 268         lettername = string.upper(letter)
 269     return "\n<hr>\n<h2><a name=\"letter-%s\">%s</a></h2>\n\n" \
 270            % (letter, lettername)
 271
 272
 273 def format_html_letters(nodes, columns=1):
 274     letter_groups = split_letters(nodes)
 275     items = []
 276     for letter, nodes in letter_groups:
 277         s = "<b><a href=\"#letter-%s\">%s</a></b>" % (letter, letter)
 278         items.append(s)
 279     s = ["<hr><center>\n%s</center>\n" % string.join(items, " |\n")]
 280     for letter, nodes in letter_groups:
 281         s.append(format_letter(letter))
 282         s.append(format_nodes(nodes, columns))
 283     return string.join(s, '')
 284
 285 def format_html(nodes, columns):
 286     return format_nodes(nodes, columns)
 287
 288
 289 def collapse(nodes):
 290     """Collapse sequences of nodes with matching keys into a single node.
 291     Destructive."""
 292     if len(nodes) < 2:
 293         return
 294     prev = nodes[0]
 295     i = 1
 296     while i < len(nodes):
 297         node = nodes[i]
 298         if not node.cmp_entry(prev):
 299             prev.links.append(node.links[0])
 300             del nodes[i]
 301         else:
 302             i = i + 1
 303             prev = node
 304
 305
 306 def dump(nodes, fp):
 307     for node in nodes:
 308         fp.write(node.dump())
 309
 310
 311 def process_nodes(nodes, columns, letters):
 312     nodes.sort()
 313     collapse(nodes)
 314     if letters:
 315         return format_html_letters(nodes, columns)
 316     else:
 317         return format_html(nodes, columns)
 318
 319
 320 def main():
 321     import getopt
 322     ifn = "-"
 323     ofn = "-"
 324     columns = 1
 325     letters = 0
 326     opts, args = getopt.getopt(sys.argv[1:], "c:lo:",
 327                                ["columns=", "letters", "output="])
 328     for opt, val in opts:
 329         if opt in ("-o", "--output"):
 330             ofn = val
 331         elif opt in ("-c", "--columns"):
 332             columns = string.atoi(val)
 333         elif opt in ("-l", "--letters"):
 334             letters = 1
 335     if not args:
 336         args = [ifn]
 337     nodes = []
 338     for fn in args:
 339         nodes = nodes + load(open(fn))
 340     num_nodes = len(nodes)
 341     html = process_nodes(nodes, columns, letters)
 342     program = os.path.basename(sys.argv[0])
 343     if ofn == "-":
 344         sys.stdout.write(html)
 345         sys.stderr.write("\n%s: %d index nodes" % (program, num_nodes))
 346     else:
 347         open(ofn, "w").write(html)
 348         print
 349         print "%s: %d index nodes" % (program, num_nodes)
 350
 351
 352 if __name__ == "__main__":
 353     main()