2 # -*- Mode: Python; py-indent-offset: 4 -*-
4 # Toby D. Reeves <toby@max.rl.plh.af.mil>
6 # Modified by James Henstridge <james@daa.com.au> to output stuff in
7 # Havoc's new defs format. Info on this format can be seen at:
8 # http://mail.gnome.org/archives/gtk-devel-list/2000-January/msg00070.html
9 # Updated to be PEP-8 compatible and refactored to use OOP
11 # Scan the given public .h files of a GTK module (or module using
12 # GTK object conventions) and generates a set of scheme defs.
14 # h2def searches through a header file looking for function prototypes and
15 # generates a scheme style defenition for each prototype.
16 # Basically the operation of h2def is:
18 # - read each .h file into a buffer which is scrubbed of extraneous data
19 # - find all object defenitions:
20 # - find all structures that may represent a GtkObject
21 # - find all structures that might represent a class
22 # - find all structures that may represent a GtkObject subclass
23 # - find all structures that might represent a class/Iface inherited from
25 # - find all enum defenitions
26 # - write out the defs
28 # The command line options are:
30 # -s --separate Create separate files for objects and function/method defs
31 # using the given name as the base name (optional). If this
32 # is not specified the combined object and function defs
33 # will be output to sys.stdout.
34 # -f --defsfilter Extract defs from the given file to filter the output defs
35 # that is don't output defs that are defined in the
36 # defsfile. More than one deffile may be specified.
37 # -m --modulename The prefix to be stripped from the front of function names
38 # for the given module
39 # -n --namespace The module or namespace name to be used, for example
40 # WebKit where h2def is unable to detect the module name
41 # automatically. it also sets the gtype-id prefix.
42 # --onlyenums Only produce defs for enums and flags
43 # --onlyobjdefs Only produce defs for objects
48 # python h2def.py /usr/local/include/pango-1.0/pango/*.h >/tmp/pango.defs
50 # - Outputs all defs for the pango module.
52 # python h2def.py -m gdk -s /tmp/gdk-2.10 \
53 # -f /usr/tmp/pygtk/gtk/gdk-base.defs \
54 # /usr/local/include/gtk-2.0/gdk/*.h \
55 # /usr/local/include/gtk-2.0/gdk-pixbuf/*.h
57 # - Outputs the gdk module defs that are not contained in the defs file
58 # /usr/tmp/pygtk/gtk/gdk-base.defs. Two output files are created:
59 # /tmp/gdk-2.10-types.defs and /tmp/gdk-2.10.defs.
61 # python h2def.py -n WebKit /usr/incude/webkit-1.0/webkit/*.h \
64 # - Outputs all the defs for webkit module, setting the module name to WebKit
65 # and the gtype-id prefix to WEBKIT_ which can't be detected automatically.
76 # ------------------ Create typecodes from typenames ---------
78 _upperstr_pat1 = re.compile(r'([^A-Z])([A-Z])')
79 _upperstr_pat2 = re.compile(r'([A-Z][A-Z])([A-Z][0-9a-z])')
80 _upperstr_pat3 = re.compile(r'^([A-Z])([A-Z])')
82 def to_upper_str(name):
83 """Converts a typename to the equivalent upercase and underscores
84 name. This is used to form the type conversion macros and enum/flag
86 name = _upperstr_pat1.sub(r'\1_\2', name)
87 name = _upperstr_pat2.sub(r'\1_\2', name)
88 name = _upperstr_pat3.sub(r'\1_\2', name, count=1)
89 return string.upper(name)
91 def typecode(typename, namespace=None):
92 """create a typecode (eg. GTK_TYPE_WIDGET) from a typename"""
94 return string.replace(string.upper(namespace) + "_" + to_upper_str(typename[len(namespace):]), '_', '_TYPE_', 1)
96 return string.replace(to_upper_str(typename), '_', '_TYPE_', 1)
99 # ------------------ Find object definitions -----------------
100 # Strips the comments from buffer
101 def strip_comments(buf):
105 pos = string.find(buf, '/*', lastpos)
107 parts.append(buf[lastpos:pos])
108 pos = string.find(buf, '*/', pos)
114 parts.append(buf[lastpos:])
116 return string.join(parts, '')
118 # Strips the dll API from buffer, for example WEBKIT_API
119 def strip_dll_api(buf):
120 pat = re.compile("[A-Z]*_API ")
121 buf = pat.sub("", buf)
124 obj_name_pat = "[A-Z][a-z]*[A-Z][A-Za-z0-9]*"
126 split_prefix_pat = re.compile('([A-Z]+[a-z]*)([A-Za-z0-9]+)')
128 def find_obj_defs(buf, objdefs=[]):
130 Try to find object definitions in header files.
133 # filter out comments from buffer.
134 buf = strip_comments(buf)
137 buf = strip_dll_api(buf)
139 maybeobjdefs = [] # contains all possible objects from file
141 # first find all structures that look like they may represent a GtkObject
142 pat = re.compile("struct\s+_(" + obj_name_pat + ")\s*{\s*" +
143 "(" + obj_name_pat + ")\s+", re.MULTILINE)
145 while pos < len(buf):
146 m = pat.search(buf, pos)
148 maybeobjdefs.append((m.group(1), m.group(2)))
151 # handle typedef struct { ... } style struct defs.
152 pat = re.compile("typedef struct\s+[_\w]*\s*{\s*" +
153 "(" + obj_name_pat + ")\s+[^}]*}\s*" +
154 "(" + obj_name_pat + ")\s*;", re.MULTILINE)
156 while pos < len(buf):
157 m = pat.search(buf, pos)
159 maybeobjdefs.append((m.group(2), m.group(1)))
162 # now find all structures that look like they might represent a class:
163 pat = re.compile("struct\s+_(" + obj_name_pat + ")Class\s*{\s*" +
164 "(" + obj_name_pat + ")Class\s+", re.MULTILINE)
166 while pos < len(buf):
167 m = pat.search(buf, pos)
169 t = (m.group(1), m.group(2))
170 # if we find an object structure together with a corresponding
171 # class structure, then we have probably found a GtkObject subclass.
172 if t in maybeobjdefs:
176 pat = re.compile("typedef struct\s+[_\w]*\s*{\s*" +
177 "(" + obj_name_pat + ")Class\s+[^}]*}\s*" +
178 "(" + obj_name_pat + ")Class\s*;", re.MULTILINE)
180 while pos < len(buf):
181 m = pat.search(buf, pos)
183 t = (m.group(2), m.group(1))
184 # if we find an object structure together with a corresponding
185 # class structure, then we have probably found a GtkObject subclass.
186 if t in maybeobjdefs:
190 # now find all structures that look like they might represent
191 # a class inherited from GTypeInterface:
192 pat = re.compile("struct\s+_(" + obj_name_pat + ")Class\s*{\s*" +
193 "GTypeInterface\s+", re.MULTILINE)
195 while pos < len(buf):
196 m = pat.search(buf, pos)
199 t2 = (m.group(1)+'Class', 'GTypeInterface')
200 # if we find an object structure together with a corresponding
201 # class structure, then we have probably found a GtkObject subclass.
202 if t2 in maybeobjdefs:
206 # now find all structures that look like they might represent
207 # an Iface inherited from GTypeInterface:
208 pat = re.compile("struct\s+_(" + obj_name_pat + ")Iface\s*{\s*" +
209 "GTypeInterface\s+", re.MULTILINE)
211 while pos < len(buf):
212 m = pat.search(buf, pos)
215 t2 = (m.group(1)+'Iface', 'GTypeInterface')
216 # if we find an object structure together with a corresponding
217 # class structure, then we have probably found a GtkObject subclass.
218 if t2 in maybeobjdefs:
222 def sort_obj_defs(objdefs):
223 objdefs.sort() # not strictly needed, but looks nice
225 while pos < len(objdefs):
226 klass,parent = objdefs[pos]
227 for i in range(pos+1, len(objdefs)):
228 # parent below subclass ... reorder
229 if objdefs[i][0] == parent:
230 objdefs.insert(i+1, objdefs[pos])
237 # ------------------ Find enum definitions -----------------
239 def find_enum_defs(buf, enums=[]):
242 buf = strip_comments(buf)
244 # strip dll api macros
245 buf = strip_dll_api(buf)
248 pat = re.compile(r"""^[#].*?$""", re.MULTILINE)
249 buf = pat.sub('', buf)
251 buf = re.sub('\n', ' ', buf)
253 enum_pat = re.compile(r'enum\s*{([^}]*)}\s*([A-Z][A-Za-z]*)(\s|;)')
254 splitter = re.compile(r'\s*,\s', re.MULTILINE)
256 while pos < len(buf):
257 m = enum_pat.search(buf, pos)
262 isflags = string.find(vals, '<<') >= 0
264 for val in splitter.split(vals):
265 if not string.strip(val): continue
266 entries.append(string.split(val)[0])
267 if name != 'GdkCursorType':
268 enums.append((name, isflags, entries))
272 # ------------------ Find function definitions -----------------
276 Ideally would make buf have a single prototype on each line.
277 Actually just cuts out a good deal of junk, but leaves lines
278 where a regex can figure prototypes out.
281 buf = strip_comments(buf)
284 buf = strip_dll_api(buf)
286 # compact continued lines
287 pat = re.compile(r"""\\\n""", re.MULTILINE)
288 buf = pat.sub('', buf)
290 # Preprocess directives
291 pat = re.compile(r"""^[#].*?$""", re.MULTILINE)
292 buf = pat.sub('', buf)
294 #typedefs, stucts, and enums
295 pat = re.compile(r"""^(typedef|struct|enum)(\s|.|\n)*?;\s*""",
297 buf = pat.sub('', buf)
300 pat = re.compile(r"""G_BEGIN_DECLS|BEGIN_LIBGTOP_DECLS""", re.MULTILINE)
301 buf = pat.sub('', buf)
304 pat = re.compile(r"""^\s*(extern)\s+\"C\"\s+{""", re.MULTILINE)
305 buf = pat.sub('', buf)
308 pat = re.compile(r"""\s+""", re.MULTILINE)
309 buf = pat.sub(' ', buf)
312 pat = re.compile(r""";\s*""", re.MULTILINE)
313 buf = pat.sub('\n', buf)
316 #associate *, &, and [] with type instead of variable
317 #pat = re.compile(r'\s+([*|&]+)\s*(\w+)')
318 pat = re.compile(r' \s* ([*|&]+) \s* (\w+)', re.VERBOSE)
319 buf = pat.sub(r'\1 \2', buf)
320 pat = re.compile(r'\s+ (\w+) \[ \s* \]', re.VERBOSE)
321 buf = pat.sub(r'[] \1', buf)
323 # make return types that are const work.
324 buf = re.sub(r'\s*\*\s*G_CONST_RETURN\s*\*\s*', '** ', buf)
325 buf = string.replace(buf, 'G_CONST_RETURN ', 'const-')
326 buf = string.replace(buf, 'const ', 'const-')
328 #strip GSEAL macros from the middle of function declarations:
329 pat = re.compile(r"""GSEAL""", re.VERBOSE)
330 buf = pat.sub('', buf)
334 proto_pat=re.compile(r"""
335 (?P<ret>(-|\w|\&|\*)+\s*) # return type
336 \s+ # skip whitespace
337 (?P<func>\w+)\s*[(] # match the function name until the opening (
338 \s*(?P<args>.*?)\s*[)] # group the function arguments
339 """, re.IGNORECASE|re.VERBOSE)
341 arg_split_pat = re.compile("\s*,\s*")
343 get_type_pat = re.compile(r'(const-)?([A-Za-z0-9]+)\*?\s+')
344 pointer_pat = re.compile('.*\*$')
345 func_new_pat = re.compile('(\w+)_new$')
348 def __init__(self, fp=None, prefix=None, ns=None, verbose=False,
356 self.verbose = verbose
362 filter = defsparser.DefsParser(defsfilter)
363 filter.startParsing()
364 for func in filter.functions + filter.methods.values():
365 self._functions[func.c_name] = func
366 for obj in filter.objects + filter.boxes + filter.interfaces:
367 self._objects[obj.c_name] = obj
368 for obj in filter.enums:
369 self._enums[obj.c_name] = obj
371 def write_def(self, deffile):
372 buf = open(deffile).read()
374 self.fp.write('\n;; From %s\n\n' % os.path.basename(deffile))
375 self._define_func(buf)
378 def write_enum_defs(self, enums, fp=None):
382 fp.write(';; Enumerations and flags ...\n\n')
383 trans = string.maketrans(string.uppercase + '_',
384 string.lowercase + '-')
386 for cname, isflags, entries in enums:
393 module = self.namespace
394 name = cname[len(self.namespace):]
396 m = split_prefix_pat.match(cname)
401 fp.write('(define-flags ' + name + '\n')
403 fp.write('(define-enum ' + name + '\n')
405 fp.write(' (in-module "' + module + '")\n')
406 fp.write(' (c-name "' + cname + '")\n')
407 fp.write(' (gtype-id "' + typecode(cname, self.namespace) + '")\n')
410 # shorten prefix til we get a match ...
411 # and handle GDK_FONT_FONT, GDK_FONT_FONTSET case
412 while ((len(prefix) and prefix[-1] != '_') or ent[:len(prefix)] != prefix
413 or len(prefix) >= len(ent)):
415 prefix_len = len(prefix)
416 fp.write(' (values\n')
418 fp.write(' \'("%s" "%s")\n' %
419 (string.translate(ent[prefix_len:], trans), ent))
423 def write_obj_defs(self, objdefs, fp=None):
427 fp.write(';; -*- scheme -*-\n')
428 fp.write('; object definitions ...\n')
430 filter = self._objects
431 for klass, parent in objdefs:
436 cname = klass[len(self.namespace):]
437 cmodule = self.namespace
439 m = split_prefix_pat.match(klass)
445 fp.write('(define-object ' + cname + '\n')
447 fp.write(' (in-module "' + cmodule + '")\n')
449 fp.write(' (parent "' + parent + '")\n')
450 fp.write(' (c-name "' + klass + '")\n')
451 fp.write(' (gtype-id "' + typecode(klass, self.namespace) + '")\n')
452 # should do something about accessible fields
455 def _define_func(self, buf):
456 buf = clean_func(buf)
457 buf = string.split(buf,'\n')
458 filter = self._functions
462 m = proto_pat.match(p)
465 sys.stderr.write('No match:|%s|\n' % p)
467 func = m.group('func')
474 args = m.group('args')
475 args = arg_split_pat.split(args)
476 for i in range(len(args)):
477 spaces = string.count(args[i], ' ')
479 args[i] = string.replace(args[i], ' ', '-', spaces - 1)
481 self._write_func(func, ret, args)
483 def _write_func(self, name, ret, args):
485 # methods must have at least one argument
486 munged_name = name.replace('_', '')
487 m = get_type_pat.match(args[0])
490 if munged_name[:len(obj)] == obj.lower():
491 self._write_method(obj, name, ret, args)
496 if name[:l] == self.prefix and name[l] == '_':
503 # it is either a constructor or normal function
504 self.fp.write('(define-function ' + fname + '\n')
505 self.fp.write(' (c-name "' + name + '")\n')
507 # Hmmm... Let's asume that a constructor function name
508 # ends with '_new' and it returns a pointer.
509 m = func_new_pat.match(name)
510 if pointer_pat.match(ret) and m:
512 names = m.group(1).split('_')
515 cname = self.namespace
521 self.fp.write(' (is-constructor-of "' + cname + '")\n')
523 self._write_return(ret)
524 self._write_arguments(args)
526 def _write_method(self, obj, name, ret, args):
527 regex = string.join(map(lambda x: x+'_?', string.lower(obj)),'')
528 mname = re.sub(regex, '', name, 1)
530 l = len(self.prefix) + 1
531 if mname[:l] == self.prefix and mname[l+1] == '_':
533 self.fp.write('(define-method ' + mname + '\n')
534 self.fp.write(' (of-object "' + obj + '")\n')
535 self.fp.write(' (c-name "' + name + '")\n')
536 self._write_return(ret)
537 self._write_arguments(args[1:])
539 def _write_return(self, ret):
541 self.fp.write(' (return-type "' + ret + '")\n')
543 self.fp.write(' (return-type "none")\n')
545 def _write_arguments(self, args):
547 has_args = len(args) > 0
551 elif arg in ('void', 'void '):
554 self.fp.write(' (parameters\n')
557 tupleArg = tuple(string.split(arg))
558 if len(tupleArg) == 2:
559 self.fp.write(' \'("%s" "%s")\n' % tupleArg)
560 self.fp.write(' )\n')
562 self.fp.write(' (varargs #t)\n')
563 self.fp.write(')\n\n')
565 # ------------------ Main function -----------------
575 opts, args = getopt.getopt(args[1:], 'vs:m:n:f:',
576 ['onlyenums', 'onlyobjdefs',
577 'modulename=', 'namespace=',
578 'separate=', 'defsfilter='])
582 if o == '--onlyenums':
584 if o == '--onlyobjdefs':
586 if o in ('-s', '--separate'):
588 if o in ('-m', '--modulename'):
590 if o in ('-n', '--namespace'):
592 if o in ('-f', '--defsfilter'):
596 print 'Must specify at least one input file name'
599 # read all the object definitions in
602 for filename in args:
603 buf = open(filename).read()
604 find_obj_defs(buf, objdefs)
605 find_enum_defs(buf, enums)
606 objdefs = sort_obj_defs(objdefs)
609 methods = file(separate + '.defs', 'w')
610 types = file(separate + '-types.defs', 'w')
612 dw = DefsWriter(methods, prefix=modulename, ns=namespace,
613 verbose=verbose, defsfilter=defsfilter)
614 dw.write_obj_defs(objdefs, types)
615 dw.write_enum_defs(enums, types)
616 print "Wrote %s-types.defs" % separate
618 for filename in args:
619 dw.write_def(filename)
620 print "Wrote %s.defs" % separate
622 dw = DefsWriter(prefix=modulename, ns=namespace,
623 verbose=verbose, defsfilter=defsfilter)
626 dw.write_enum_defs(enums)
628 dw.write_obj_defs(objdefs)
630 dw.write_obj_defs(objdefs)
631 dw.write_enum_defs(enums)
633 for filename in args:
634 dw.write_def(filename)
636 if __name__ == '__main__':
637 sys.exit(main(sys.argv))