3 """Convert ESIS events to SGML or XML markup.
5 This is limited, but seems sufficient for the ESIS generated by the
6 latex2esis.py script when run over the Python documentation.
9 # This should have an explicit option to indicate whether the *INPUT* was
10 # generated from an SGML or an XML application.
18 from xml.sax.saxutils import escape
23 EMPTIES_FILENAME = "../sgml/empties.dat"
33 def map_gi(sgmlgi, map):
34 uncased = _normalize_case(sgmlgi)
41 def null_map_gi(sgmlgi, map):
45 def format_attrs(attrs, xml=0):
50 for name, value in attrs:
52 append('%s="%s"' % (name, escape(value)))
54 # this is a little bogus, but should do for now
55 if name == value and isnmtoken(value):
58 if value == "no" + name:
61 append("%s=%s" % (name, value))
63 append('%s="%s"' % (name, escape(value)))
66 return string.join(parts)
69 _nmtoken_rx = re.compile("[a-z][-._a-z0-9]*$", re.IGNORECASE)
71 return _nmtoken_rx.match(s) is not None
73 _token_rx = re.compile("[a-z0-9][-._a-z0-9]*$", re.IGNORECASE)
75 return _token_rx.match(s) is not None
78 def convert(ifp, ofp, xml=0, autoclose=(), verbatims=()):
94 if data and data[-1] == "\n":
97 data = esistools.decode(data)
100 data = string.replace(data, "---", "—")
107 if data == "COMMENT":
110 data = map_gi(data, _elem_map)
111 if knownempty and xml:
112 ofp.write("<%s%s/>" % (data, format_attrs(attrs, xml)))
114 ofp.write("<%s%s>" % (data, format_attrs(attrs, xml)))
115 if knownempty and data not in knownempties:
116 # accumulate knowledge!
117 knownempties.append(data)
120 lastempty = knownempty
122 inverbatim = data in verbatims
124 if data == "COMMENT":
127 data = map_gi(data, _elem_map)
130 ofp.write("</%s>" % data)
131 elif data not in knownempties:
132 if data in autoclose:
134 elif lastopened == data:
137 ofp.write("</%s>" % data)
142 name, type, value = string.split(data, " ", 2)
143 name = map_gi(name, _attr_map)
144 attrs[name] = esistools.decode(value)
148 ofp.write("&%s;" % data)
151 raise RuntimeError, "unrecognized ESIS event type: '%s'" % type
154 dump_empty_element_names(knownempties)
157 def dump_empty_element_names(knownempties):
159 for gi in knownempties:
161 knownempties.append("")
162 if os.path.isfile(EMPTIES_FILENAME):
163 fp = open(EMPTIES_FILENAME)
168 gi = string.strip(line)
171 fp = open(EMPTIES_FILENAME, "w")
174 fp.write(string.join(gilist, "\n"))
179 def update_gi_map(map, names, fromsgml=1):
180 for name in string.split(names, ","):
182 uncased = string.lower(name)
192 autoclose = AUTOCLOSE
198 verbatims = ('verbatim', 'interactive-session')
199 opts, args = getopt.getopt(sys.argv[1:], "adesx",
200 ["autoclose=", "declare", "sgml", "xml",
201 "elements-map=", "attributes-map",
203 for opt, arg in opts:
204 if opt in ("-d", "--declare"):
209 elif opt in ("-s", "--sgml"):
211 elif opt in ("-x", "--xml"):
213 elif opt in ("-a", "--autoclose"):
214 autoclose = string.split(arg, ",")
215 elif opt == "--elements-map":
216 elem_names = ("%s,%s" % (elem_names, arg))[1:]
217 elif opt == "--attributes-map":
218 attr_names = ("%s,%s" % (attr_names, arg))[1:]
219 elif opt == "--values-map":
220 value_names = ("%s,%s" % (value_names, arg))[1:]
222 # open input streams:
232 ofp = open(args[1], "w")
237 # setup the name maps:
239 if elem_names or attr_names or value_names:
240 # assume the origin was SGML; ignore case of the names from the ESIS
241 # stream but set up conversion tables to get the case right on output
242 global _normalize_case
243 _normalize_case = string.lower
244 update_gi_map(_elem_map, string.split(elem_names, ","))
245 update_gi_map(_attr_map, string.split(attr_names, ","))
246 update_gi_map(_values_map, string.split(value_names, ","))
251 # run the conversion:
255 opf.write('<?xml version="1.0" encoding="iso8859-1"?>\n')
256 convert(ifp, ofp, xml=xml, autoclose=autoclose, verbatims=verbatims)
257 except IOError, (err, msg):
258 if err != errno.EPIPE:
262 if __name__ == "__main__":