Initial import to Tizen
[profile/ivi/python-pyOpenSSL.git] / doc / tools / sgmlconv / latex2esis.py
1 #! /usr/bin/env python
2
3 """Generate ESIS events based on a LaTeX source document and
4 configuration data.
5
6 The conversion is not strong enough to work with arbitrary LaTeX
7 documents; it has only been designed to work with the highly stylized
8 markup used in the standard Python documentation.  A lot of
9 information about specific markup is encoded in the control table
10 passed to the convert() function; changing this table can allow this
11 tool to support additional LaTeX markups.
12
13 The format of the table is largely undocumented; see the commented
14 headers where the table is specified in main().  There is no provision 
15 to load an alternate table from an external file.
16 """
17
18 import errno
19 import getopt
20 import os
21 import re
22 import string
23 import sys
24 import UserList
25 import xml.sax.saxutils
26
27 from types import ListType, StringType, TupleType
28
29 try:
30     from xml.parsers.xmllib import XMLParser
31 except ImportError:
32     from xmllib import XMLParser
33
34
35 from esistools import encode
36
37
38 DEBUG = 0
39
40
41 class LaTeXFormatError(Exception):
42     pass
43
44
45 class LaTeXStackError(LaTeXFormatError):
46     def __init__(self, found, stack):
47         msg = "environment close for %s doesn't match;\n  stack = %s" \
48               % (found, stack)
49         self.found = found
50         self.stack = stack[:]
51         LaTeXFormatError.__init__(self, msg)
52
53 \f
54 _begin_env_rx = re.compile(r"[\\]begin{([^}]*)}")
55 _end_env_rx = re.compile(r"[\\]end{([^}]*)}")
56 _begin_macro_rx = re.compile(r"[\\]([a-zA-Z]+[*]?) ?({|\s*\n?)")
57 _comment_rx = re.compile("%+ ?(.*)\n[ \t]*")
58 _text_rx = re.compile(r"[^]~%\\{}]+")
59 _optional_rx = re.compile(r"\s*[[]([^]]*)[]]")
60 # _parameter_rx is this complicated to allow {...} inside a parameter;
61 # this is useful to match tabular layout specifications like {c|p{24pt}}
62 _parameter_rx = re.compile("[ \n]*{(([^{}}]|{[^}]*})*)}")
63 _token_rx = re.compile(r"[a-zA-Z][a-zA-Z0-9.-]*$")
64 _start_group_rx = re.compile("[ \n]*{")
65 _start_optional_rx = re.compile("[ \n]*[[]")
66
67
68 ESCAPED_CHARS = "$%#^ {}&~"
69
70
71 def dbgmsg(msg):
72     if DEBUG:
73         sys.stderr.write(msg + "\n")
74
75 def pushing(name, point, depth):
76     dbgmsg("pushing <%s> at %s" % (name, point))
77
78 def popping(name, point, depth):
79     dbgmsg("popping </%s> at %s" % (name, point))
80
81
82 class _Stack(UserList.UserList):
83     def append(self, entry):
84         if type(entry) is not StringType:
85             raise LaTeXFormatError("cannot push non-string on stack: "
86                                    + `entry`)
87         #dbgmsg("%s<%s>" % (" "*len(self.data), entry))
88         self.data.append(entry)
89
90     def pop(self, index=-1):
91         entry = self.data[index]
92         del self.data[index]
93         #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
94
95     def __delitem__(self, index):
96         entry = self.data[index]
97         del self.data[index]
98         #dbgmsg("%s</%s>" % (" "*len(self.data), entry))
99
100
101 def new_stack():
102     if DEBUG:
103         return _Stack()
104     return []
105
106 \f
107 class Conversion:
108     def __init__(self, ifp, ofp, table):
109         self.write = ofp.write
110         self.ofp = ofp
111         self.table = table
112         self.line = string.join(map(string.rstrip, ifp.readlines()), "\n")
113         self.preamble = 1
114
115     def convert(self):
116         self.subconvert()
117
118     def subconvert(self, endchar=None, depth=0):
119         #
120         # Parses content, including sub-structures, until the character
121         # 'endchar' is found (with no open structures), or until the end
122         # of the input data is endchar is None.
123         #
124         stack = new_stack()
125         line = self.line
126         while line:
127             if line[0] == endchar and not stack:
128                 self.line = line
129                 return line
130             m = _comment_rx.match(line)
131             if m:
132                 text = m.group(1)
133                 if text:
134                     self.write("(COMMENT\n- %s \n)COMMENT\n-\\n\n"
135                                % encode(text))
136                 line = line[m.end():]
137                 continue
138             m = _begin_env_rx.match(line)
139             if m:
140                 name = m.group(1)
141                 entry = self.get_env_entry(name)
142                 # re-write to use the macro handler
143                 line = r"\%s %s" % (name, line[m.end():])
144                 continue
145             m = _end_env_rx.match(line)
146             if m:
147                 # end of environment
148                 envname = m.group(1)
149                 entry = self.get_entry(envname)
150                 while stack and envname != stack[-1] \
151                       and stack[-1] in entry.endcloses:
152                     self.write(")%s\n" % stack.pop())
153                 if stack and envname == stack[-1]:
154                     self.write(")%s\n" % entry.outputname)
155                     del stack[-1]
156                 else:
157                     raise LaTeXStackError(envname, stack)
158                 line = line[m.end():]
159                 continue
160             m = _begin_macro_rx.match(line)
161             if m:
162                 # start of macro
163                 macroname = m.group(1)
164                 if macroname == "c":
165                     # Ugh!  This is a combining character...
166                     endpos = m.end()
167                     self.combining_char("c", line[endpos])
168                     line = line[endpos + 1:]
169                     continue
170                 entry = self.get_entry(macroname)
171                 if entry.verbatim:
172                     # magic case!
173                     pos = string.find(line, "\\end{%s}" % macroname)
174                     text = line[m.end(1):pos]
175                     stack.append(entry.name)
176                     self.write("(%s\n" % entry.outputname)
177                     self.write("-%s\n" % encode(text))
178                     self.write(")%s\n" % entry.outputname)
179                     stack.pop()
180                     line = line[pos + len("\\end{%s}" % macroname):]
181                     continue
182                 while stack and stack[-1] in entry.closes:
183                     top = stack.pop()
184                     topentry = self.get_entry(top)
185                     if topentry.outputname:
186                         self.write(")%s\n-\\n\n" % topentry.outputname)
187                 #
188                 if entry.outputname:
189                     if entry.empty:
190                         self.write("e\n")
191                 #
192                 params, optional, empty, environ = self.start_macro(macroname)
193                 # rip off the macroname
194                 if params:
195                     line = line[m.end(1):]
196                 elif empty:
197                     line = line[m.end(1):]
198                 else:
199                     line = line[m.end():]
200                 opened = 0
201                 implied_content = 0
202
203                 # handle attribute mappings here:
204                 for pentry in params:
205                     if pentry.type == "attribute":
206                         if pentry.optional:
207                             m = _optional_rx.match(line)
208                             if m and entry.outputname:
209                                 line = line[m.end():]
210                                 self.dump_attr(pentry, m.group(1))
211                         elif pentry.text and entry.outputname:
212                             # value supplied by conversion spec:
213                             self.dump_attr(pentry, pentry.text)
214                         else:
215                             m = _parameter_rx.match(line)
216                             if not m:
217                                 raise LaTeXFormatError(
218                                     "could not extract parameter %s for %s: %s"
219                                     % (pentry.name, macroname, `line[:100]`))
220                             if entry.outputname:
221                                 self.dump_attr(pentry, m.group(1))
222                             line = line[m.end():]
223                     elif pentry.type == "child":
224                         if pentry.optional:
225                             m = _optional_rx.match(line)
226                             if m:
227                                 line = line[m.end():]
228                                 if entry.outputname and not opened:
229                                     opened = 1
230                                     self.write("(%s\n" % entry.outputname)
231                                     stack.append(macroname)
232                                 stack.append(pentry.name)
233                                 self.write("(%s\n" % pentry.name)
234                                 self.write("-%s\n" % encode(m.group(1)))
235                                 self.write(")%s\n" % pentry.name)
236                                 stack.pop()
237                         else:
238                             if entry.outputname and not opened:
239                                 opened = 1
240                                 self.write("(%s\n" % entry.outputname)
241                                 stack.append(entry.name)
242                             self.write("(%s\n" % pentry.name)
243                             stack.append(pentry.name)
244                             self.line = skip_white(line)[1:]
245                             line = self.subconvert(
246                                 "}", len(stack) + depth + 1)[1:]
247                             self.write(")%s\n" % stack.pop())
248                     elif pentry.type == "content":
249                         if pentry.implied:
250                             implied_content = 1
251                         else:
252                             if entry.outputname and not opened:
253                                 opened = 1
254                                 self.write("(%s\n" % entry.outputname)
255                                 stack.append(entry.name)
256                             line = skip_white(line)
257                             if line[0] != "{":
258                                 raise LaTeXFormatError(
259                                     "missing content for " + macroname)
260                             self.line = line[1:]
261                             line = self.subconvert("}", len(stack) + depth + 1)
262                             if line and line[0] == "}":
263                                 line = line[1:]
264                     elif pentry.type == "text" and pentry.text:
265                         if entry.outputname and not opened:
266                             opened = 1
267                             stack.append(entry.name)
268                             self.write("(%s\n" % entry.outputname)
269                         #dbgmsg("--- text: %s" % `pentry.text`)
270                         self.write("-%s\n" % encode(pentry.text))
271                     elif pentry.type == "entityref":
272                         self.write("&%s\n" % pentry.name)
273                 if entry.outputname:
274                     if not opened:
275                         self.write("(%s\n" % entry.outputname)
276                         stack.append(entry.name)
277                     if not implied_content:
278                         self.write(")%s\n" % entry.outputname)
279                         stack.pop()
280                 continue
281             if line[0] == endchar and not stack:
282                 self.line = line[1:]
283                 return self.line
284             if line[0] == "}":
285                 # end of macro or group
286                 macroname = stack[-1]
287                 if macroname:
288                     conversion = self.table[macroname]
289                     if conversion.outputname:
290                         # otherwise, it was just a bare group
291                         self.write(")%s\n" % conversion.outputname)
292                 del stack[-1]
293                 line = line[1:]
294                 continue
295             if line[0] == "~":
296                 # don't worry about the "tie" aspect of this command
297                 line = line[1:]
298                 self.write("- \n")
299                 continue
300             if line[0] == "{":
301                 stack.append("")
302                 line = line[1:]
303                 continue
304             if line[0] == "\\" and line[1] in ESCAPED_CHARS:
305                 self.write("-%s\n" % encode(line[1]))
306                 line = line[2:]
307                 continue
308             if line[:2] == r"\\":
309                 self.write("(BREAK\n)BREAK\n")
310                 line = line[2:]
311                 continue
312             if line[:2] == r"\_":
313                 line = "_" + line[2:]
314                 continue
315             if line[:2] in (r"\'", r'\"'):
316                 # combining characters...
317                 self.combining_char(line[1], line[2])
318                 line = line[3:]
319                 continue
320             m = _text_rx.match(line)
321             if m:
322                 text = encode(m.group())
323                 self.write("-%s\n" % text)
324                 line = line[m.end():]
325                 continue
326             # special case because of \item[]
327             # XXX can we axe this???
328             if line[0] == "]":
329                 self.write("-]\n")
330                 line = line[1:]
331                 continue
332             # avoid infinite loops
333             extra = ""
334             if len(line) > 100:
335                 extra = "..."
336             raise LaTeXFormatError("could not identify markup: %s%s"
337                                    % (`line[:100]`, extra))
338         while stack:
339             entry = self.get_entry(stack[-1])
340             if entry.closes:
341                 self.write(")%s\n-%s\n" % (entry.outputname, encode("\n")))
342                 del stack[-1]
343             else:
344                 break
345         if stack:
346             raise LaTeXFormatError("elements remain on stack: "
347                                    + string.join(stack, ", "))
348         # otherwise we just ran out of input here...
349
350     # This is a really limited table of combinations, but it will have
351     # to do for now.
352     _combinations = {
353         ("c", "c"): 0x00E7,
354         ("'", "e"): 0x00E9,
355         ('"', "o"): 0x00F6,
356         }
357
358     def combining_char(self, prefix, char):
359         ordinal = self._combinations[(prefix, char)]
360         self.write("-\\%%%d;\n" % ordinal)
361
362     def start_macro(self, name):
363         conversion = self.get_entry(name)
364         parameters = conversion.parameters
365         optional = parameters and parameters[0].optional
366         return parameters, optional, conversion.empty, conversion.environment
367
368     def get_entry(self, name):
369         entry = self.table.get(name)
370         if entry is None:
371             dbgmsg("get_entry(%s) failing; building default entry!" % `name`)
372             # not defined; build a default entry:
373             entry = TableEntry(name)
374             entry.has_content = 1
375             entry.parameters.append(Parameter("content"))
376             self.table[name] = entry
377         return entry
378
379     def get_env_entry(self, name):
380         entry = self.table.get(name)
381         if entry is None:
382             # not defined; build a default entry:
383             entry = TableEntry(name, 1)
384             entry.has_content = 1
385             entry.parameters.append(Parameter("content"))
386             entry.parameters[-1].implied = 1
387             self.table[name] = entry
388         elif not entry.environment:
389             raise LaTeXFormatError(
390                 name + " is defined as a macro; expected environment")
391         return entry
392
393     def dump_attr(self, pentry, value):
394         if not (pentry.name and value):
395             return
396         if _token_rx.match(value):
397             dtype = "TOKEN"
398         else:
399             dtype = "CDATA"
400         self.write("A%s %s %s\n" % (pentry.name, dtype, encode(value)))
401
402
403 def convert(ifp, ofp, table):
404     c = Conversion(ifp, ofp, table)
405     try:
406         c.convert()
407     except IOError, (err, msg):
408         if err != errno.EPIPE:
409             raise
410
411
412 def skip_white(line):
413     while line and line[0] in " %\n\t\r":
414         line = string.lstrip(line[1:])
415     return line
416
417
418
419 class TableEntry:
420     def __init__(self, name, environment=0):
421         self.name = name
422         self.outputname = name
423         self.environment = environment
424         self.empty = not environment
425         self.has_content = 0
426         self.verbatim = 0
427         self.auto_close = 0
428         self.parameters = []
429         self.closes = []
430         self.endcloses = []
431
432 class Parameter:
433     def __init__(self, type, name=None, optional=0):
434         self.type = type
435         self.name = name
436         self.optional = optional
437         self.text = ''
438         self.implied = 0
439
440
441 class TableParser(XMLParser):
442     def __init__(self, table=None):
443         if table is None:
444             table = {}
445         self.__table = table
446         self.__current = None
447         self.__buffer = ''
448         XMLParser.__init__(self)
449
450     def get_table(self):
451         for entry in self.__table.values():
452             if entry.environment and not entry.has_content:
453                 p = Parameter("content")
454                 p.implied = 1
455                 entry.parameters.append(p)
456                 entry.has_content = 1
457         return self.__table
458
459     def start_environment(self, attrs):
460         name = attrs["name"]
461         self.__current = TableEntry(name, environment=1)
462         self.__current.verbatim = attrs.get("verbatim") == "yes"
463         if attrs.has_key("outputname"):
464             self.__current.outputname = attrs.get("outputname")
465         self.__current.endcloses = string.split(attrs.get("endcloses", ""))
466     def end_environment(self):
467         self.end_macro()
468
469     def start_macro(self, attrs):
470         name = attrs["name"]
471         self.__current = TableEntry(name)
472         self.__current.closes = string.split(attrs.get("closes", ""))
473         if attrs.has_key("outputname"):
474             self.__current.outputname = attrs.get("outputname")
475     def end_macro(self):
476         self.__table[self.__current.name] = self.__current
477         self.__current = None
478
479     def start_attribute(self, attrs):
480         name = attrs.get("name")
481         optional = attrs.get("optional") == "yes"
482         if name:
483             p = Parameter("attribute", name, optional=optional)
484         else:
485             p = Parameter("attribute", optional=optional)
486         self.__current.parameters.append(p)
487         self.__buffer = ''
488     def end_attribute(self):
489         self.__current.parameters[-1].text = self.__buffer
490
491     def start_entityref(self, attrs):
492         name = attrs["name"]
493         p = Parameter("entityref", name)
494         self.__current.parameters.append(p)
495
496     def start_child(self, attrs):
497         name = attrs["name"]
498         p = Parameter("child", name, attrs.get("optional") == "yes")
499         self.__current.parameters.append(p)
500         self.__current.empty = 0
501
502     def start_content(self, attrs):
503         p = Parameter("content")
504         p.implied = attrs.get("implied") == "yes"
505         if self.__current.environment:
506             p.implied = 1
507         self.__current.parameters.append(p)
508         self.__current.has_content = 1
509         self.__current.empty = 0
510
511     def start_text(self, attrs):
512         self.__current.empty = 0
513         self.__buffer = ''
514     def end_text(self):
515         p = Parameter("text")
516         p.text = self.__buffer
517         self.__current.parameters.append(p)
518
519     def handle_data(self, data):
520         self.__buffer = self.__buffer + data
521
522
523 def load_table(fp, table=None):
524     parser = TableParser(table=table)
525     parser.feed(fp.read())
526     parser.close()
527     return parser.get_table()
528
529
530 def main():
531     global DEBUG
532     #
533     opts, args = getopt.getopt(sys.argv[1:], "D", ["debug"])
534     for opt, arg in opts:
535         if opt in ("-D", "--debug"):
536             DEBUG = DEBUG + 1
537     if len(args) == 0:
538         ifp = sys.stdin
539         ofp = sys.stdout
540     elif len(args) == 1:
541         ifp = open(args)
542         ofp = sys.stdout
543     elif len(args) == 2:
544         ifp = open(args[0])
545         ofp = open(args[1], "w")
546     else:
547         usage()
548         sys.exit(2)
549
550     table = load_table(open(os.path.join(sys.path[0], 'conversion.xml')))
551     convert(ifp, ofp, table)
552
553
554 if __name__ == "__main__":
555     main()