3 # This is the API builder, it parses the C sources and build the
4 # API formal description in XML.
6 # See Copyright for the status of this software.
17 # C parser analysis code
20 "trio": "too many non standard macros",
21 "trio.c": "too many non standard macros",
22 "trionan.c": "too many non standard macros",
23 "triostr.c": "too many non standard macros",
24 "acconfig.h": "generated portability layer",
25 "config.h": "generated portability layer",
26 "libxml.h": "internal only",
27 "testOOM.c": "out of memory tester",
28 "testOOMlib.h": "out of memory tester",
29 "testOOMlib.c": "out of memory tester",
33 "WINAPI": (0, "Windows keyword"),
34 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
35 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
36 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
37 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
38 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
39 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
40 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
41 "XMLCALL": (0, "Special macro for win32 calls"),
42 "XSLTCALL": (0, "Special macro for win32 calls"),
43 "EXSLTCALL": (0, "Special macro for win32 calls"),
44 "__declspec": (3, "Windows keyword"),
45 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
46 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
47 "X_IN_Y": (5, "macro function builder"),
48 "XSLT_ITEM_COMMON_FIELDS": (0, "Special macro")
52 raw = string.replace(raw, '&', '&')
53 raw = string.replace(raw, '<', '<')
54 raw = string.replace(raw, '>', '>')
55 raw = string.replace(raw, "'", ''')
56 raw = string.replace(raw, '"', '"')
66 def __init__(self, name, module=None, type=None, lineno = 0,
67 info=None, extra=None):
77 r = "%s %s:" % (self.type, self.name)
80 if self.module != None:
81 r = r + " from %s" % (self.module)
83 r = r + " " + `self.info`
84 if self.extra != None:
85 r = r + " " + `self.extra`
89 def set_module(self, module):
91 def set_type(self, type):
93 def set_info(self, info):
95 def set_extra(self, extra):
97 def set_lineno(self, lineno):
99 def set_static(self, static):
104 def get_module(self):
110 def get_lineno(self):
114 def get_static(self):
117 def update(self, module, type = None, info = None, extra=None):
118 if module != None and self.module == None:
119 self.set_module(module)
120 if type != None and self.type == None:
125 self.set_extra(extra)
129 def __init__(self, name = "noname"):
131 self.identifiers = {}
142 def add_ref(self, name, module, static, type, lineno, info=None, extra=None):
143 if name[0:2] == '__':
147 d = self.identifiers[name]
148 d.update(module, type, lineno, info, extra)
150 d = identifier(name, module, type, lineno, info, extra)
151 self.identifiers[name] = d
153 if d != None and static == 1:
156 if d != None and name != None and type != None:
157 self.references[name] = d
159 def add(self, name, module, static, type, lineno, info=None, extra=None):
160 if name[0:2] == '__':
164 d = self.identifiers[name]
165 d.update(module, type, lineno, info, extra)
167 d = identifier(name, module, type, lineno, info, extra)
168 self.identifiers[name] = d
170 if d != None and static == 1:
173 if d != None and name != None and type != None:
174 if type == "function":
175 self.functions[name] = d
176 elif type == "functype":
177 self.functions[name] = d
178 elif type == "variable":
179 self.variables[name] = d
180 elif type == "include":
181 self.includes[name] = d
182 elif type == "struct":
183 self.structs[name] = d
186 elif type == "typedef":
187 self.typedefs[name] = d
188 elif type == "macro":
189 self.macros[name] = d
191 print "Unable to register type ", type
194 def merge(self, idx):
195 for id in idx.functions.keys():
197 # macro might be used to override functions or variables
200 if self.macros.has_key(id):
202 if self.functions.has_key(id):
203 print "function %s from %s redeclared in %s" % (
204 id, self.functions[id].module, idx.functions[id].module)
206 self.functions[id] = idx.functions[id]
207 self.identifiers[id] = idx.functions[id]
208 for id in idx.variables.keys():
210 # macro might be used to override functions or variables
213 if self.macros.has_key(id):
215 if self.variables.has_key(id):
216 print "variable %s from %s redeclared in %s" % (
217 id, self.variables[id].module, idx.variables[id].module)
219 self.variables[id] = idx.variables[id]
220 self.identifiers[id] = idx.variables[id]
221 for id in idx.structs.keys():
222 if self.structs.has_key(id):
223 print "struct %s from %s redeclared in %s" % (
224 id, self.structs[id].module, idx.structs[id].module)
226 self.structs[id] = idx.structs[id]
227 self.identifiers[id] = idx.structs[id]
228 for id in idx.typedefs.keys():
229 if self.typedefs.has_key(id):
230 print "typedef %s from %s redeclared in %s" % (
231 id, self.typedefs[id].module, idx.typedefs[id].module)
233 self.typedefs[id] = idx.typedefs[id]
234 self.identifiers[id] = idx.typedefs[id]
235 for id in idx.macros.keys():
237 # macro might be used to override functions or variables
240 if self.variables.has_key(id):
242 if self.functions.has_key(id):
244 if self.enums.has_key(id):
246 if self.macros.has_key(id):
247 print "macro %s from %s redeclared in %s" % (
248 id, self.macros[id].module, idx.macros[id].module)
250 self.macros[id] = idx.macros[id]
251 self.identifiers[id] = idx.macros[id]
252 for id in idx.enums.keys():
253 if self.enums.has_key(id):
254 print "enum %s from %s redeclared in %s" % (
255 id, self.enums[id].module, idx.enums[id].module)
257 self.enums[id] = idx.enums[id]
258 self.identifiers[id] = idx.enums[id]
260 def merge_public(self, idx):
261 for id in idx.functions.keys():
262 if self.functions.has_key(id):
263 up = idx.functions[id]
264 self.functions[id].update(None, up.type, up.info, up.extra)
266 # print "Function %s from %s is not declared in headers" % (
267 # id, idx.functions[id].module)
268 # TODO: do the same for variables.
270 def analyze_dict(self, type, dict):
273 for name in dict.keys():
279 print " %d %s , %d public" % (count, type, public)
281 print " %d public %s" % (count, type)
285 self.analyze_dict("functions", self.functions)
286 self.analyze_dict("variables", self.variables)
287 self.analyze_dict("structs", self.structs)
288 self.analyze_dict("typedefs", self.typedefs)
289 self.analyze_dict("macros", self.macros)
292 """A lexer for the C language, tokenize the input by reading and
293 analyzing it line by line"""
294 def __init__(self, input):
303 line = self.input.readline()
306 self.lineno = self.lineno + 1
307 line = string.lstrip(line)
308 line = string.rstrip(line)
311 while line[-1] == '\\':
313 n = self.input.readline()
314 self.lineno = self.lineno + 1
326 def push(self, token):
327 self.tokens.insert(0, token);
330 print "Last token: ", self.last
331 print "Token queue: ", self.tokens
332 print "Line %d end: " % (self.lineno), self.line
335 while self.tokens == []:
337 line = self.getline()
345 self.tokens = map((lambda x: ('preproc', x)),
349 if line[0] == '"' or line[0] == "'":
359 self.line = line[i+1:]
369 line = self.getline()
372 self.last = ('string', tok)
375 if l >= 2 and line[0] == '/' and line[1] == '*':
383 if line[i] == '*' and i+1 < l and line[i+1] == '/':
384 self.line = line[i+2:]
394 line = self.getline()
397 self.last = ('comment', tok)
399 if l >= 2 and line[0] == '/' and line[1] == '/':
401 self.last = ('comment', line)
405 if line[i] == '/' and i+1 < l and line[i+1] == '/':
409 if line[i] == '/' and i+1 < l and line[i+1] == '*':
413 if line[i] == '"' or line[i] == "'":
421 if line[i] == ' ' or line[i] == '\t':
425 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
426 (o >= 48 and o <= 57):
430 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
431 (o >= 48 and o <= 57) or string.find(
432 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
436 self.tokens.append(('name', line[s:i]))
438 if string.find("(){}:;,[]", line[i]) != -1:
439 # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
440 # line[i] == '}' or line[i] == ':' or line[i] == ';' or \
441 # line[i] == ',' or line[i] == '[' or line[i] == ']':
442 self.tokens.append(('sep', line[i]))
445 if string.find("+-*><=/%&!|.", line[i]) != -1:
446 # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
447 # line[i] == '>' or line[i] == '<' or line[i] == '=' or \
448 # line[i] == '/' or line[i] == '%' or line[i] == '&' or \
449 # line[i] == '!' or line[i] == '|' or line[i] == '.':
450 if line[i] == '.' and i + 2 < l and \
451 line[i+1] == '.' and line[i+2] == '.':
452 self.tokens.append(('name', '...'))
458 string.find("+-*><=/%&!|", line[j]) != -1):
459 # line[j] == '+' or line[j] == '-' or line[j] == '*' or \
460 # line[j] == '>' or line[j] == '<' or line[j] == '=' or \
461 # line[j] == '/' or line[j] == '%' or line[j] == '&' or \
462 # line[j] == '!' or line[j] == '|'):
463 self.tokens.append(('op', line[i:j+1]))
466 self.tokens.append(('op', line[i]))
472 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
473 (o >= 48 and o <= 57) or (
474 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
475 # line[i] != ' ' and line[i] != '\t' and
476 # line[i] != '(' and line[i] != ')' and
477 # line[i] != '{' and line[i] != '}' and
478 # line[i] != ':' and line[i] != ';' and
479 # line[i] != ',' and line[i] != '+' and
480 # line[i] != '-' and line[i] != '*' and
481 # line[i] != '/' and line[i] != '%' and
482 # line[i] != '&' and line[i] != '!' and
483 # line[i] != '|' and line[i] != '[' and
484 # line[i] != ']' and line[i] != '=' and
485 # line[i] != '*' and line[i] != '>' and
490 self.tokens.append(('name', line[s:i]))
493 self.tokens = self.tokens[1:]
498 """The C module parser"""
499 def __init__(self, filename, idx = None):
500 self.filename = filename
501 if len(filename) > 2 and filename[-2:] == '.h':
505 self.input = open(filename)
506 self.lexer = CLexer(self.input)
511 self.top_comment = ""
512 self.last_comment = ""
517 def collect_references(self):
520 def stop_error(self):
523 def start_error(self):
527 return self.lexer.getlineno()
529 def index_add(self, name, module, static, type, info=None, extra = None):
530 self.index.add(name, module, static, type, self.lineno(),
533 def index_add_ref(self, name, module, static, type, info=None,
535 self.index.add_ref(name, module, static, type, self.lineno(),
538 def warning(self, msg):
543 def error(self, msg, token=-1):
547 print "Parse Error: " + msg
549 print "Got token ", token
553 def debug(self, msg, token=-1):
554 print "Debug: " + msg
556 print "Got token ", token
559 def parseTopComment(self, comment):
561 lines = string.split(comment, "\n")
564 while line != "" and (line[0] == ' ' or line[0] == '\t'):
566 while line != "" and line[0] == '*':
568 while line != "" and (line[0] == ' ' or line[0] == '\t'):
571 (it, line) = string.split(line, ":", 1)
573 while line != "" and (line[0] == ' ' or line[0] == '\t'):
575 if res.has_key(item):
576 res[item] = res[item] + " " + line
581 if res.has_key(item):
582 res[item] = res[item] + " " + line
585 self.index.info = res
587 def parseComment(self, token):
588 if self.top_comment == "":
589 self.top_comment = token[1]
590 if self.comment == None or token[1][0] == '*':
591 self.comment = token[1];
593 self.comment = self.comment + token[1]
594 token = self.lexer.token()
596 if string.find(self.comment, "DOC_DISABLE") != -1:
599 if string.find(self.comment, "DOC_ENABLE") != -1:
605 # Parse a comment block associate to a macro
607 def parseMacroComment(self, name, quiet = 0):
608 if name[0:2] == '__':
614 if self.comment == None:
616 self.warning("Missing comment for macro %s" % (name))
618 if self.comment[0] != '*':
620 self.warning("Missing * in macro comment for %s" % (name))
622 lines = string.split(self.comment, '\n')
625 if lines[0] != "* %s:" % (name):
627 self.warning("Misformatted macro comment for %s" % (name))
628 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
631 while lines[0] == '*':
633 while len(lines) > 0 and lines[0][0:3] == '* @':
636 (arg, desc) = string.split(l, ':', 1)
637 desc=string.strip(desc)
638 arg=string.strip(arg)
641 self.warning("Misformatted macro comment for %s" % (name))
642 self.warning(" problem with '%s'" % (lines[0]))
646 l = string.strip(lines[0])
647 while len(l) > 2 and l[0:3] != '* @':
650 desc = desc + ' ' + string.strip(l)
655 args.append((arg, desc))
656 while len(lines) > 0 and lines[0] == '*':
659 while len(lines) > 0:
661 while len(l) > 0 and l[0] == '*':
664 desc = desc + " " + l
667 desc = string.strip(desc)
671 self.warning("Macro comment for %s lack description of the macro" % (name))
676 # Parse a comment block and merge the informations found in the
677 # parameters descriptions, finally returns a block as complete
680 def mergeFunctionComment(self, name, description, quiet = 0):
683 if name[0:2] == '__':
686 (ret, args) = description
690 if self.comment == None:
692 self.warning("Missing comment for function %s" % (name))
693 return(((ret[0], retdesc), args, desc))
694 if self.comment[0] != '*':
696 self.warning("Missing * in function comment for %s" % (name))
697 return(((ret[0], retdesc), args, desc))
698 lines = string.split(self.comment, '\n')
701 if lines[0] != "* %s:" % (name):
703 self.warning("Misformatted function comment for %s" % (name))
704 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
705 return(((ret[0], retdesc), args, desc))
707 while len(lines) > 0 and lines[0] == '*':
710 while len(lines) > 0 and lines[0][0:3] == '* @':
713 (arg, desc) = string.split(l, ':', 1)
714 desc=string.strip(desc)
715 arg=string.strip(arg)
718 self.warning("Misformatted function comment for %s" % (name))
719 self.warning(" problem with '%s'" % (lines[0]))
723 l = string.strip(lines[0])
724 while len(l) > 2 and l[0:3] != '* @':
727 desc = desc + ' ' + string.strip(l)
734 if args[i][1] == arg:
735 args[i] = (args[i][0], arg, desc)
740 self.warning("Unable to find arg %s from function comment for %s" % (
742 while len(lines) > 0 and lines[0] == '*':
745 while len(lines) > 0:
747 while len(l) > 0 and l[0] == '*':
750 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
752 l = string.split(l, ' ', 1)[1]
755 retdesc = string.strip(l)
757 while len(lines) > 0:
759 while len(l) > 0 and l[0] == '*':
762 retdesc = retdesc + " " + l
765 desc = desc + " " + l
768 retdesc = string.strip(retdesc)
769 desc = string.strip(desc)
773 # report missing comments
777 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
778 self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1]))
780 if retdesc == "" and ret[0] != "void":
781 self.warning("Function comment for %s lack description of return value" % (name))
783 self.warning("Function comment for %s lack description of the function" % (name))
786 return(((ret[0], retdesc), args, desc))
788 def parsePreproc(self, token):
790 if name == "#include":
791 token = self.lexer.token()
794 if token[0] == 'preproc':
795 self.index_add(token[1], self.filename, not self.is_header,
797 return self.lexer.token()
799 if name == "#define":
800 token = self.lexer.token()
803 if token[0] == 'preproc':
804 # TODO macros with arguments
807 token = self.lexer.token()
808 while token != None and token[0] == 'preproc' and \
811 token = self.lexer.token()
813 name = string.split(name, '(') [0]
816 info = self.parseMacroComment(name, not self.is_header)
817 self.index_add(name, self.filename, not self.is_header,
820 token = self.lexer.token()
821 while token != None and token[0] == 'preproc' and \
823 token = self.lexer.token()
827 # token acquisition on top of the lexer, it handle internally
828 # preprocessor and comments since they are logically not part of
829 # the program structure.
834 token = self.lexer.token()
836 if token[0] == 'comment':
837 token = self.parseComment(token)
839 elif token[0] == 'preproc':
840 token = self.parsePreproc(token)
842 elif token[0] == "name" and ignored_words.has_key(token[1]):
843 (n, info) = ignored_words[token[1]]
846 token = self.lexer.token()
848 token = self.lexer.token()
857 # Parse a typedef, it records the type and its name.
859 def parseTypedef(self, token):
862 token = self.parseType(token)
864 self.error("parsing typedef")
866 base_type = self.type
868 #self.debug("end typedef type", token)
870 if token[0] == "name":
872 signature = self.signature
873 if signature != None:
874 type = string.split(type, '(')[0]
875 d = self.mergeFunctionComment(name,
876 ((type, None), signature), 1)
877 self.index_add(name, self.filename, not self.is_header,
880 if base_type == "struct":
881 self.index_add(name, self.filename, not self.is_header,
883 base_type = "struct " + name
885 self.index_add(name, self.filename, not self.is_header,
889 self.error("parsing typedef: expecting a name")
891 #self.debug("end typedef", token)
892 if token != None and token[0] == 'sep' and token[1] == ',':
895 while token != None and token[0] == "op":
896 type = type + token[1]
898 elif token != None and token[0] == 'sep' and token[1] == ';':
900 elif token != None and token[0] == 'name':
904 self.error("parsing typedef: expecting ';'", token)
910 # Parse a C code block, used for functions it parse till
911 # the balancing } included
913 def parseBlock(self, token):
915 if token[0] == "sep" and token[1] == "{":
917 token = self.parseBlock(token)
918 elif token[0] == "sep" and token[1] == "}":
923 if self.collect_ref == 1:
926 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
927 if token[0] == "sep" and token[1] == "(":
928 self.index_add_ref(oldtok[1], self.filename,
931 elif token[0] == "name":
933 if token[0] == "sep" and (token[1] == ";" or
934 token[1] == "," or token[1] == "="):
935 self.index_add_ref(oldtok[1], self.filename,
937 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
938 self.index_add_ref(oldtok[1], self.filename,
940 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
941 self.index_add_ref(oldtok[1], self.filename,
949 # Parse a C struct definition till the balancing }
951 def parseStruct(self, token):
953 #self.debug("start parseStruct", token)
955 if token[0] == "sep" and token[1] == "{":
957 token = self.parseTypeBlock(token)
958 elif token[0] == "sep" and token[1] == "}":
959 self.struct_fields = fields
960 #self.debug("end parseStruct", token)
965 base_type = self.type
966 #self.debug("before parseType", token)
967 token = self.parseType(token)
968 #self.debug("after parseType", token)
969 if token != None and token[0] == "name":
972 if token[0] == "sep" and token[1] == ";":
975 fields.append((self.type, fname, self.comment))
978 self.error("parseStruct: expecting ;", token)
979 elif token != None and token[0] == "sep" and token[1] == "{":
981 token = self.parseTypeBlock(token)
982 if token != None and token[0] == "name":
984 if token != None and token[0] == "sep" and token[1] == ";":
987 self.error("parseStruct: expecting ;", token)
989 self.error("parseStruct: name", token)
991 self.type = base_type;
992 self.struct_fields = fields
993 #self.debug("end parseStruct", token)
998 # Parse a C enum block, parse till the balancing }
1000 def parseEnumBlock(self, token):
1006 while token != None:
1007 if token[0] == "sep" and token[1] == "{":
1008 token = self.token()
1009 token = self.parseTypeBlock(token)
1010 elif token[0] == "sep" and token[1] == "}":
1012 if self.comment != None:
1013 comment = self.comment
1015 self.enums.append((name, value, comment))
1016 token = self.token()
1018 elif token[0] == "name":
1020 if self.comment != None:
1021 comment = string.strip(self.comment)
1023 self.enums.append((name, value, comment))
1026 token = self.token()
1027 if token[0] == "op" and token[1][0] == "=":
1029 if len(token[1]) > 1:
1030 value = token[1][1:]
1031 token = self.token()
1032 while token[0] != "sep" or (token[1] != ',' and
1034 value = value + token[1]
1035 token = self.token()
1038 value = "%d" % (int(value) + 1)
1040 self.warning("Failed to compute value of enum %s" % (name))
1042 if token[0] == "sep" and token[1] == ",":
1043 token = self.token()
1045 token = self.token()
1049 # Parse a C definition block, used for structs it parse till
1052 def parseTypeBlock(self, token):
1053 while token != None:
1054 if token[0] == "sep" and token[1] == "{":
1055 token = self.token()
1056 token = self.parseTypeBlock(token)
1057 elif token[0] == "sep" and token[1] == "}":
1058 token = self.token()
1061 token = self.token()
1065 # Parse a type: the fact that the type name can either occur after
1066 # the definition or within the definition makes it a little harder
1067 # if inside, the name token is pushed back before returning
1069 def parseType(self, token):
1071 self.struct_fields = []
1072 self.signature = None
1076 while token[0] == "name" and (
1077 token[1] == "const" or token[1] == "unsigned" or
1078 token[1] == "signed"):
1080 self.type = token[1]
1082 self.type = self.type + " " + token[1]
1083 token = self.token()
1085 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1087 self.type = token[1]
1089 self.type = self.type + " " + token[1]
1090 if token[0] == "name" and token[1] == "int":
1094 self.type = self.type + " " + tmp[1]
1096 elif token[0] == "name" and token[1] == "struct":
1098 self.type = token[1]
1100 self.type = self.type + " " + token[1]
1101 token = self.token()
1103 if token[0] == "name":
1105 token = self.token()
1106 if token != None and token[0] == "sep" and token[1] == "{":
1107 token = self.token()
1108 token = self.parseStruct(token)
1109 elif token != None and token[0] == "op" and token[1] == "*":
1110 self.type = self.type + " " + nametok[1] + " *"
1111 token = self.token()
1112 while token != None and token[0] == "op" and token[1] == "*":
1113 self.type = self.type + " *"
1114 token = self.token()
1115 if token[0] == "name":
1117 token = self.token()
1119 self.error("struct : expecting name", token)
1121 elif token != None and token[0] == "name" and nametok != None:
1122 self.type = self.type + " " + nametok[1]
1126 self.lexer.push(token)
1130 elif token[0] == "name" and token[1] == "enum":
1132 self.type = token[1]
1134 self.type = self.type + " " + token[1]
1136 token = self.token()
1137 if token != None and token[0] == "sep" and token[1] == "{":
1138 token = self.token()
1139 token = self.parseEnumBlock(token)
1141 self.error("parsing enum: expecting '{'", token)
1143 if token != None and token[0] != "name":
1144 self.lexer.push(token)
1145 token = ("name", "enum")
1147 enum_type = token[1]
1148 for enum in self.enums:
1149 self.index_add(enum[0], self.filename,
1150 not self.is_header, "enum",
1151 (enum[1], enum[2], enum_type))
1154 elif token[0] == "name":
1156 self.type = token[1]
1158 self.type = self.type + " " + token[1]
1160 self.error("parsing type %s: expecting a name" % (self.type),
1163 token = self.token()
1164 while token != None and (token[0] == "op" or
1165 token[0] == "name" and token[1] == "const"):
1166 self.type = self.type + " " + token[1]
1167 token = self.token()
1170 # if there is a parenthesis here, this means a function type
1172 if token != None and token[0] == "sep" and token[1] == '(':
1173 self.type = self.type + token[1]
1174 token = self.token()
1175 while token != None and token[0] == "op" and token[1] == '*':
1176 self.type = self.type + token[1]
1177 token = self.token()
1178 if token == None or token[0] != "name" :
1179 self.error("parsing function type, name expected", token);
1181 self.type = self.type + token[1]
1183 token = self.token()
1184 if token != None and token[0] == "sep" and token[1] == ')':
1185 self.type = self.type + token[1]
1186 token = self.token()
1187 if token != None and token[0] == "sep" and token[1] == '(':
1188 token = self.token()
1190 token = self.parseSignature(token);
1193 self.error("parsing function type, '(' expected", token);
1196 self.error("parsing function type, ')' expected", token);
1198 self.lexer.push(token)
1203 # do some lookahead for arrays
1205 if token != None and token[0] == "name":
1207 token = self.token()
1208 if token != None and token[0] == "sep" and token[1] == '[':
1209 self.type = self.type + nametok[1]
1210 while token != None and token[0] == "sep" and token[1] == '[':
1211 self.type = self.type + token[1]
1212 token = self.token()
1213 while token != None and token[0] != 'sep' and \
1214 token[1] != ']' and token[1] != ';':
1215 self.type = self.type + token[1]
1216 token = self.token()
1217 if token != None and token[0] == 'sep' and token[1] == ']':
1218 self.type = self.type + token[1]
1219 token = self.token()
1221 self.error("parsing array type, ']' expected", token);
1223 elif token != None and token[0] == "sep" and token[1] == ':':
1224 # remove :12 in case it's a limited int size
1225 token = self.token()
1226 token = self.token()
1227 self.lexer.push(token)
1233 # Parse a signature: '(' has been parsed and we scan the type definition
1234 # up to the ')' included
1235 def parseSignature(self, token):
1237 if token != None and token[0] == "sep" and token[1] == ')':
1239 token = self.token()
1241 while token != None:
1242 token = self.parseType(token)
1243 if token != None and token[0] == "name":
1244 signature.append((self.type, token[1], None))
1245 token = self.token()
1246 elif token != None and token[0] == "sep" and token[1] == ',':
1247 token = self.token()
1249 elif token != None and token[0] == "sep" and token[1] == ')':
1250 # only the type was provided
1251 if self.type == "...":
1252 signature.append((self.type, "...", None))
1254 signature.append((self.type, None, None))
1255 if token != None and token[0] == "sep":
1257 token = self.token()
1259 elif token[1] == ')':
1260 token = self.token()
1262 self.signature = signature
1266 # Parse a global definition, be it a type, variable or function
1267 # the extern "C" blocks are a bit nasty and require it to recurse.
1269 def parseGlobal(self, token):
1271 if token[1] == 'extern':
1272 token = self.token()
1275 if token[0] == 'string':
1277 token = self.token()
1280 if token[0] == 'sep' and token[1] == "{":
1281 token = self.token()
1282 # print 'Entering extern "C line ', self.lineno()
1283 while token != None and (token[0] != 'sep' or
1285 if token[0] == 'name':
1286 token = self.parseGlobal(token)
1289 "token %s %s unexpected at the top level" % (
1290 token[0], token[1]))
1291 token = self.parseGlobal(token)
1292 # print 'Exiting extern "C" line', self.lineno()
1293 token = self.token()
1297 elif token[1] == 'static':
1299 token = self.token()
1300 if token == None or token[0] != 'name':
1303 if token[1] == 'typedef':
1304 token = self.token()
1305 return self.parseTypedef(token)
1307 token = self.parseType(token)
1308 type_orig = self.type
1309 if token == None or token[0] != "name":
1312 self.name = token[1]
1313 token = self.token()
1314 while token != None and (token[0] == "sep" or token[0] == "op"):
1315 if token[0] == "sep":
1317 type = type + token[1]
1318 token = self.token()
1319 while token != None and (token[0] != "sep" or \
1321 type = type + token[1]
1322 token = self.token()
1324 if token != None and token[0] == "op" and token[1] == "=":
1326 # Skip the initialization of the variable
1328 token = self.token()
1329 if token[0] == 'sep' and token[1] == '{':
1330 token = self.token()
1331 token = self.parseBlock(token)
1334 while token != None and (token[0] != "sep" or \
1335 (token[1] != ';' and token[1] != ',')):
1336 token = self.token()
1338 if token == None or token[0] != "sep" or (token[1] != ';' and
1340 self.error("missing ';' or ',' after value")
1342 if token != None and token[0] == "sep":
1345 token = self.token()
1346 if type == "struct":
1347 self.index_add(self.name, self.filename,
1348 not self.is_header, "struct", self.struct_fields)
1350 self.index_add(self.name, self.filename,
1351 not self.is_header, "variable", type)
1353 elif token[1] == "(":
1354 token = self.token()
1355 token = self.parseSignature(token)
1358 if token[0] == "sep" and token[1] == ";":
1359 d = self.mergeFunctionComment(self.name,
1360 ((type, None), self.signature), 1)
1361 self.index_add(self.name, self.filename, static,
1363 token = self.token()
1364 elif token[0] == "sep" and token[1] == "{":
1365 d = self.mergeFunctionComment(self.name,
1366 ((type, None), self.signature), static)
1367 self.index_add(self.name, self.filename, static,
1369 token = self.token()
1370 token = self.parseBlock(token);
1371 elif token[1] == ',':
1373 self.index_add(self.name, self.filename, static,
1376 token = self.token()
1377 while token != None and token[0] == "sep":
1378 type = type + token[1]
1379 token = self.token()
1380 if token != None and token[0] == "name":
1381 self.name = token[1]
1382 token = self.token()
1389 self.warning("Parsing %s" % (self.filename))
1390 token = self.token()
1391 while token != None:
1392 if token[0] == 'name':
1393 token = self.parseGlobal(token)
1395 self.error("token %s %s unexpected at the top level" % (
1396 token[0], token[1]))
1397 token = self.parseGlobal(token)
1399 self.parseTopComment(self.top_comment)
1404 """A documentation builder"""
1405 def __init__(self, name, directories=['.'], excludes=[]):
1407 self.directories = directories
1408 self.excludes = excludes + ignored_files.keys()
1414 if name == 'libxml2':
1415 self.basename = 'libxml'
1417 self.basename = name
1419 def indexString(self, id, str):
1422 str = string.replace(str, "'", ' ')
1423 str = string.replace(str, '"', ' ')
1424 str = string.replace(str, "/", ' ')
1425 str = string.replace(str, '*', ' ')
1426 str = string.replace(str, "[", ' ')
1427 str = string.replace(str, "]", ' ')
1428 str = string.replace(str, "(", ' ')
1429 str = string.replace(str, ")", ' ')
1430 str = string.replace(str, "<", ' ')
1431 str = string.replace(str, '>', ' ')
1432 str = string.replace(str, "&", ' ')
1433 str = string.replace(str, '#', ' ')
1434 str = string.replace(str, ",", ' ')
1435 str = string.replace(str, '.', ' ')
1436 str = string.replace(str, ';', ' ')
1437 tokens = string.split(str)
1438 for token in tokens:
1441 if string.find(string.letters, c) < 0:
1443 elif len(token) < 3:
1446 lower = string.lower(token)
1447 # TODO: generalize this a bit
1448 if lower == 'and' or lower == 'the':
1450 elif self.xref.has_key(token):
1451 self.xref[token].append(id)
1453 self.xref[token] = [id]
1458 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1461 def scanHeaders(self):
1462 for header in self.headers.keys():
1463 parser = CParser(header)
1464 idx = parser.parse()
1465 self.headers[header] = idx;
1468 def scanModules(self):
1469 for module in self.modules.keys():
1470 parser = CParser(module)
1471 idx = parser.parse()
1473 self.modules[module] = idx
1474 self.idx.merge_public(idx)
1477 for directory in self.directories:
1478 files = glob.glob(directory + "/*.c")
1481 for excl in self.excludes:
1482 if string.find(file, excl) != -1:
1486 self.modules[file] = None;
1487 files = glob.glob(directory + "/*.h")
1490 for excl in self.excludes:
1491 if string.find(file, excl) != -1:
1495 self.headers[file] = None;
1499 def modulename_file(self, file):
1500 module = os.path.basename(file)
1501 if module[-2:] == '.h':
1502 module = module[:-2]
1505 def serialize_enum(self, output, name):
1506 id = self.idx.enums[name]
1507 output.write(" <enum name='%s' file='%s'" % (name,
1508 self.modulename_file(id.module)))
1511 if info[0] != None and info[0] != '':
1516 output.write(" value='%s'" % (val));
1517 if info[2] != None and info[2] != '':
1518 output.write(" type='%s'" % info[2]);
1519 if info[1] != None and info[1] != '':
1520 output.write(" info='%s'" % escape(info[1]));
1521 output.write("/>\n")
1523 def serialize_macro(self, output, name):
1524 id = self.idx.macros[name]
1525 output.write(" <macro name='%s' file='%s'>\n" % (name,
1526 self.modulename_file(id.module)))
1529 (args, desc) = id.info
1530 if desc != None and desc != "":
1531 output.write(" <info>%s</info>\n" % (escape(desc)))
1532 self.indexString(name, desc)
1535 if desc != None and desc != "":
1536 output.write(" <arg name='%s' info='%s'/>\n" % (
1537 name, escape(desc)))
1538 self.indexString(name, desc)
1540 output.write(" <arg name='%s'/>\n" % (name))
1543 output.write(" </macro>\n")
1545 def serialize_typedef(self, output, name):
1546 id = self.idx.typedefs[name]
1547 if id.info[0:7] == 'struct ':
1548 output.write(" <struct name='%s' file='%s' type='%s'" % (
1549 name, self.modulename_file(id.module), id.info))
1551 if self.idx.structs.has_key(name) and ( \
1552 type(self.idx.structs[name].info) == type(()) or
1553 type(self.idx.structs[name].info) == type([])):
1554 output.write(">\n");
1556 for field in self.idx.structs[name].info:
1558 self.indexString(name, desc)
1563 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1565 print "Failed to serialize struct %s" % (name)
1566 output.write(" </struct>\n")
1568 output.write("/>\n");
1570 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1571 name, self.modulename_file(id.module), id.info))
1573 def serialize_variable(self, output, name):
1574 id = self.idx.variables[name]
1576 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1577 name, self.modulename_file(id.module), id.info))
1579 output.write(" <variable name='%s' file='%s'/>\n" % (
1580 name, self.modulename_file(id.module)))
1582 def serialize_function(self, output, name):
1583 id = self.idx.functions[name]
1584 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1585 self.modulename_file(id.module)))
1587 (ret, params, desc) = id.info
1588 output.write(" <info>%s</info>\n" % (escape(desc)))
1589 self.indexString(name, desc)
1591 if ret[0] == "void":
1592 output.write(" <return type='void'/>\n")
1594 output.write(" <return type='%s' info='%s'/>\n" % (
1595 ret[0], escape(ret[1])))
1596 self.indexString(name, ret[1])
1597 for param in params:
1598 if param[0] == 'void':
1600 if param[2] == None:
1601 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1603 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1604 self.indexString(name, param[2])
1606 print "Failed to save function %s info: " % name, `id.info`
1607 output.write(" </%s>\n" % (id.type))
1609 def serialize_exports(self, output, file):
1610 module = self.modulename_file(file)
1611 output.write(" <file name='%s'>\n" % (module))
1612 dict = self.headers[file]
1613 if dict.info != None:
1614 for data in ('Summary', 'Description', 'Author'):
1616 output.write(" <%s>%s</%s>\n" % (
1618 escape(dict.info[data]),
1619 string.lower(data)))
1621 print "Header %s lacks a %s description" % (module, data)
1622 if dict.info.has_key('Description'):
1623 desc = dict.info['Description']
1624 if string.find(desc, "DEPRECATED") != -1:
1625 output.write(" <deprecated/>\n")
1627 ids = dict.macros.keys()
1629 for id in uniq(ids):
1630 # Macros are sometime used to masquerade other types.
1631 if dict.functions.has_key(id):
1633 if dict.variables.has_key(id):
1635 if dict.typedefs.has_key(id):
1637 if dict.structs.has_key(id):
1639 if dict.enums.has_key(id):
1641 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1642 ids = dict.enums.keys()
1644 for id in uniq(ids):
1645 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1646 ids = dict.typedefs.keys()
1648 for id in uniq(ids):
1649 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1650 ids = dict.structs.keys()
1652 for id in uniq(ids):
1653 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1654 ids = dict.variables.keys()
1656 for id in uniq(ids):
1657 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1658 ids = dict.functions.keys()
1660 for id in uniq(ids):
1661 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
1662 output.write(" </file>\n")
1664 def serialize_xrefs_files(self, output):
1665 headers = self.headers.keys()
1667 for file in headers:
1668 module = self.modulename_file(file)
1669 output.write(" <file name='%s'>\n" % (module))
1670 dict = self.headers[file]
1671 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1672 dict.macros.keys() + dict.typedefs.keys() + \
1673 dict.structs.keys() + dict.enums.keys())
1676 output.write(" <ref name='%s'/>\n" % (id))
1677 output.write(" </file>\n")
1680 def serialize_xrefs_functions(self, output):
1682 for name in self.idx.functions.keys():
1683 id = self.idx.functions[name]
1685 (ret, params, desc) = id.info
1686 for param in params:
1687 if param[0] == 'void':
1689 if funcs.has_key(param[0]):
1690 funcs[param[0]].append(name)
1692 funcs[param[0]] = [name]
1698 if type == '' or type == 'void' or type == "int" or \
1699 type == "char *" or type == "const char *" :
1701 output.write(" <type name='%s'>\n" % (type))
1704 pid = '' # not sure why we have dups, but get rid of them!
1707 output.write(" <ref name='%s'/>\n" % (id))
1709 output.write(" </type>\n")
1711 def serialize_xrefs_constructors(self, output):
1713 for name in self.idx.functions.keys():
1714 id = self.idx.functions[name]
1716 (ret, params, desc) = id.info
1717 if ret[0] == "void":
1719 if funcs.has_key(ret[0]):
1720 funcs[ret[0]].append(name)
1722 funcs[ret[0]] = [name]
1728 if type == '' or type == 'void' or type == "int" or \
1729 type == "char *" or type == "const char *" :
1731 output.write(" <type name='%s'>\n" % (type))
1735 output.write(" <ref name='%s'/>\n" % (id))
1736 output.write(" </type>\n")
1738 def serialize_xrefs_alpha(self, output):
1740 ids = self.idx.identifiers.keys()
1745 output.write(" </letter>\n")
1747 output.write(" <letter name='%s'>\n" % (letter))
1748 output.write(" <ref name='%s'/>\n" % (id))
1750 output.write(" </letter>\n")
1752 def serialize_xrefs_references(self, output):
1753 typ = self.idx.identifiers.keys()
1756 idf = self.idx.identifiers[id]
1758 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1759 'html/' + self.basename + '-' +
1760 self.modulename_file(module) + '.html#' +
1763 def serialize_xrefs_index(self, output):
1772 if len(index[id]) > 30:
1775 if letter == None or count > 200:
1777 output.write(" </letter>\n")
1778 output.write(" </chunk>\n")
1780 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1781 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1782 first_letter = id[0]
1784 elif letter != None:
1785 output.write(" </letter>\n")
1787 output.write(" <letter name='%s'>\n" % (letter))
1788 output.write(" <word name='%s'>\n" % (id))
1792 for token in tokens:
1796 output.write(" <ref name='%s'/>\n" % (token))
1798 output.write(" </word>\n")
1800 output.write(" </letter>\n")
1801 output.write(" </chunk>\n")
1803 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1804 output.write(" <chunks>\n")
1806 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
1807 ch[0], ch[1], ch[2]))
1808 output.write(" </chunks>\n")
1810 def serialize_xrefs(self, output):
1811 output.write(" <references>\n")
1812 self.serialize_xrefs_references(output)
1813 output.write(" </references>\n")
1814 output.write(" <alpha>\n")
1815 self.serialize_xrefs_alpha(output)
1816 output.write(" </alpha>\n")
1817 output.write(" <constructors>\n")
1818 self.serialize_xrefs_constructors(output)
1819 output.write(" </constructors>\n")
1820 output.write(" <functions>\n")
1821 self.serialize_xrefs_functions(output)
1822 output.write(" </functions>\n")
1823 output.write(" <files>\n")
1824 self.serialize_xrefs_files(output)
1825 output.write(" </files>\n")
1826 output.write(" <index>\n")
1827 self.serialize_xrefs_index(output)
1828 output.write(" </index>\n")
1830 def serialize(self, outdir):
1831 filename = outdir + "%s-api.xml" % self.name
1832 print "Saving XML description %s" % (filename)
1833 output = open(filename, "w")
1834 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1835 output.write("<api name='%s'>\n" % self.name)
1836 output.write(" <files>\n")
1837 headers = self.headers.keys()
1839 for file in headers:
1840 self.serialize_exports(output, file)
1841 output.write(" </files>\n")
1842 output.write(" <symbols>\n")
1843 macros = self.idx.macros.keys()
1845 for macro in macros:
1846 self.serialize_macro(output, macro)
1847 enums = self.idx.enums.keys()
1850 self.serialize_enum(output, enum)
1851 typedefs = self.idx.typedefs.keys()
1853 for typedef in typedefs:
1854 self.serialize_typedef(output, typedef)
1855 variables = self.idx.variables.keys()
1857 for variable in variables:
1858 self.serialize_variable(output, variable)
1859 functions = self.idx.functions.keys()
1861 for function in functions:
1862 self.serialize_function(output, function)
1863 output.write(" </symbols>\n")
1864 output.write("</api>\n")
1867 filename = outdir + "%s-refs.xml" % self.name
1868 print "Saving XML Cross References %s" % (filename)
1869 output = open(filename, "w")
1870 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1871 output.write("<apirefs name='%s'>\n" % self.name)
1872 self.serialize_xrefs(output)
1873 output.write("</apirefs>\n")
1879 if glob.glob("parser.c") != [] :
1880 print "Rebuilding API description for libxml2"
1881 builder = docBuilder("libxml2", [".", "."],
1882 ["xmlwin32version.h", "tst.c"])
1883 elif glob.glob("../parser.c") != [] :
1884 print "Rebuilding API description for libxml2"
1885 builder = docBuilder("libxml2", ["..", "../include/libxml"],
1886 ["xmlwin32version.h", "tst.c"])
1887 elif glob.glob("../libxslt/transform.c") != [] :
1888 print "Rebuilding API description for libxslt"
1889 builder = docBuilder("libxslt", ["../libxslt"],
1890 ["win32config.h", "libxslt.h", "tst.c"])
1892 print "rebuild() failed, unable to guess the module"
1896 builder.serialize("./")
1897 if glob.glob("../libexslt/exslt.c") != [] :
1898 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1901 extra.serialize("EXSLT/")
1905 # for debugging the parser
1907 def parse(filename):
1908 parser = CParser(filename)
1909 idx = parser.parse()
1912 if __name__ == "__main__":