3 # This is the API builder, it parses the C sources and build the
4 # API formal description in XML.
6 # See Copyright for the status of this software.
17 # C parser analysis code
20 "trio": "too many non standard macros",
21 "trio.c": "too many non standard macros",
22 "trionan.c": "too many non standard macros",
23 "triostr.c": "too many non standard macros",
24 "acconfig.h": "generated portability layer",
25 "config.h": "generated portability layer",
26 "libxml.h": "internal only",
27 "testOOM.c": "out of memory tester",
28 "testOOMlib.h": "out of memory tester",
29 "testOOMlib.c": "out of memory tester",
33 "WINAPI": (0, "Windows keyword"),
34 "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
35 "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
36 "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
37 "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
38 "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
39 "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
40 "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
41 "XMLCALL": (0, "Special macro for win32 calls"),
42 "XSLTCALL": (0, "Special macro for win32 calls"),
43 "EXSLTCALL": (0, "Special macro for win32 calls"),
44 "__declspec": (3, "Windows keyword"),
45 "ATTRIBUTE_UNUSED": (0, "macro keyword"),
46 "LIBEXSLT_PUBLIC": (0, "macro keyword"),
47 "X_IN_Y": (5, "macro function builder"),
48 "XSLT_ITEM_COMMON_FIELDS": (0, "Special macro"),
49 "CALLBACK": (0, "macro keyword"),
50 "LIBXSLT_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
54 raw = string.replace(raw, '&', '&')
55 raw = string.replace(raw, '<', '<')
56 raw = string.replace(raw, '>', '>')
57 raw = string.replace(raw, "'", ''')
58 raw = string.replace(raw, '"', '"')
68 def __init__(self, name, module=None, type=None, lineno = 0,
69 info=None, extra=None):
79 r = "%s %s:" % (self.type, self.name)
82 if self.module != None:
83 r = r + " from %s" % (self.module)
85 r = r + " " + `self.info`
86 if self.extra != None:
87 r = r + " " + `self.extra`
91 def set_module(self, module):
93 def set_type(self, type):
95 def set_info(self, info):
97 def set_extra(self, extra):
99 def set_lineno(self, lineno):
101 def set_static(self, static):
106 def get_module(self):
112 def get_lineno(self):
116 def get_static(self):
119 def update(self, module, type = None, info = None, extra=None):
120 if module != None and self.module == None:
121 self.set_module(module)
122 if type != None and self.type == None:
127 self.set_extra(extra)
131 def __init__(self, name = "noname"):
133 self.identifiers = {}
144 def add_ref(self, name, module, static, type, lineno, info=None, extra=None):
145 if name[0:2] == '__':
149 d = self.identifiers[name]
150 d.update(module, type, lineno, info, extra)
152 d = identifier(name, module, type, lineno, info, extra)
153 self.identifiers[name] = d
155 if d != None and static == 1:
158 if d != None and name != None and type != None:
159 self.references[name] = d
161 def add(self, name, module, static, type, lineno, info=None, extra=None):
162 if name[0:2] == '__':
166 d = self.identifiers[name]
167 d.update(module, type, lineno, info, extra)
169 d = identifier(name, module, type, lineno, info, extra)
170 self.identifiers[name] = d
172 if d != None and static == 1:
175 if d != None and name != None and type != None:
176 if type == "function":
177 self.functions[name] = d
178 elif type == "functype":
179 self.functions[name] = d
180 elif type == "variable":
181 self.variables[name] = d
182 elif type == "include":
183 self.includes[name] = d
184 elif type == "struct":
185 self.structs[name] = d
188 elif type == "typedef":
189 self.typedefs[name] = d
190 elif type == "macro":
191 self.macros[name] = d
193 print "Unable to register type ", type
196 def merge(self, idx):
197 for id in idx.functions.keys():
199 # macro might be used to override functions or variables
202 if self.macros.has_key(id):
204 if self.functions.has_key(id):
205 print "function %s from %s redeclared in %s" % (
206 id, self.functions[id].module, idx.functions[id].module)
208 self.functions[id] = idx.functions[id]
209 self.identifiers[id] = idx.functions[id]
210 for id in idx.variables.keys():
212 # macro might be used to override functions or variables
215 if self.macros.has_key(id):
217 if self.variables.has_key(id):
218 print "variable %s from %s redeclared in %s" % (
219 id, self.variables[id].module, idx.variables[id].module)
221 self.variables[id] = idx.variables[id]
222 self.identifiers[id] = idx.variables[id]
223 for id in idx.structs.keys():
224 if self.structs.has_key(id):
225 print "struct %s from %s redeclared in %s" % (
226 id, self.structs[id].module, idx.structs[id].module)
228 self.structs[id] = idx.structs[id]
229 self.identifiers[id] = idx.structs[id]
230 for id in idx.typedefs.keys():
231 if self.typedefs.has_key(id):
232 print "typedef %s from %s redeclared in %s" % (
233 id, self.typedefs[id].module, idx.typedefs[id].module)
235 self.typedefs[id] = idx.typedefs[id]
236 self.identifiers[id] = idx.typedefs[id]
237 for id in idx.macros.keys():
239 # macro might be used to override functions or variables
242 if self.variables.has_key(id):
244 if self.functions.has_key(id):
246 if self.enums.has_key(id):
248 if self.macros.has_key(id):
249 print "macro %s from %s redeclared in %s" % (
250 id, self.macros[id].module, idx.macros[id].module)
252 self.macros[id] = idx.macros[id]
253 self.identifiers[id] = idx.macros[id]
254 for id in idx.enums.keys():
255 if self.enums.has_key(id):
256 print "enum %s from %s redeclared in %s" % (
257 id, self.enums[id].module, idx.enums[id].module)
259 self.enums[id] = idx.enums[id]
260 self.identifiers[id] = idx.enums[id]
262 def merge_public(self, idx):
263 for id in idx.functions.keys():
264 if self.functions.has_key(id):
265 up = idx.functions[id]
266 self.functions[id].update(None, up.type, up.info, up.extra)
268 # print "Function %s from %s is not declared in headers" % (
269 # id, idx.functions[id].module)
270 # TODO: do the same for variables.
272 def analyze_dict(self, type, dict):
275 for name in dict.keys():
281 print " %d %s , %d public" % (count, type, public)
283 print " %d public %s" % (count, type)
287 self.analyze_dict("functions", self.functions)
288 self.analyze_dict("variables", self.variables)
289 self.analyze_dict("structs", self.structs)
290 self.analyze_dict("typedefs", self.typedefs)
291 self.analyze_dict("macros", self.macros)
294 """A lexer for the C language, tokenize the input by reading and
295 analyzing it line by line"""
296 def __init__(self, input):
305 line = self.input.readline()
308 self.lineno = self.lineno + 1
309 line = string.lstrip(line)
310 line = string.rstrip(line)
313 while line[-1] == '\\':
315 n = self.input.readline()
316 self.lineno = self.lineno + 1
328 def push(self, token):
329 self.tokens.insert(0, token);
332 print "Last token: ", self.last
333 print "Token queue: ", self.tokens
334 print "Line %d end: " % (self.lineno), self.line
337 while self.tokens == []:
339 line = self.getline()
347 self.tokens = map((lambda x: ('preproc', x)),
351 if line[0] == '"' or line[0] == "'":
361 self.line = line[i+1:]
371 line = self.getline()
374 self.last = ('string', tok)
377 if l >= 2 and line[0] == '/' and line[1] == '*':
385 if line[i] == '*' and i+1 < l and line[i+1] == '/':
386 self.line = line[i+2:]
396 line = self.getline()
399 self.last = ('comment', tok)
401 if l >= 2 and line[0] == '/' and line[1] == '/':
403 self.last = ('comment', line)
407 if line[i] == '/' and i+1 < l and line[i+1] == '/':
411 if line[i] == '/' and i+1 < l and line[i+1] == '*':
415 if line[i] == '"' or line[i] == "'":
423 if line[i] == ' ' or line[i] == '\t':
427 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
428 (o >= 48 and o <= 57):
432 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
433 (o >= 48 and o <= 57) or string.find(
434 " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
438 self.tokens.append(('name', line[s:i]))
440 if string.find("(){}:;,[]", line[i]) != -1:
441 # if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
442 # line[i] == '}' or line[i] == ':' or line[i] == ';' or \
443 # line[i] == ',' or line[i] == '[' or line[i] == ']':
444 self.tokens.append(('sep', line[i]))
447 if string.find("+-*><=/%&!|.", line[i]) != -1:
448 # if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
449 # line[i] == '>' or line[i] == '<' or line[i] == '=' or \
450 # line[i] == '/' or line[i] == '%' or line[i] == '&' or \
451 # line[i] == '!' or line[i] == '|' or line[i] == '.':
452 if line[i] == '.' and i + 2 < l and \
453 line[i+1] == '.' and line[i+2] == '.':
454 self.tokens.append(('name', '...'))
460 string.find("+-*><=/%&!|", line[j]) != -1):
461 # line[j] == '+' or line[j] == '-' or line[j] == '*' or \
462 # line[j] == '>' or line[j] == '<' or line[j] == '=' or \
463 # line[j] == '/' or line[j] == '%' or line[j] == '&' or \
464 # line[j] == '!' or line[j] == '|'):
465 self.tokens.append(('op', line[i:j+1]))
468 self.tokens.append(('op', line[i]))
474 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
475 (o >= 48 and o <= 57) or (
476 string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
477 # line[i] != ' ' and line[i] != '\t' and
478 # line[i] != '(' and line[i] != ')' and
479 # line[i] != '{' and line[i] != '}' and
480 # line[i] != ':' and line[i] != ';' and
481 # line[i] != ',' and line[i] != '+' and
482 # line[i] != '-' and line[i] != '*' and
483 # line[i] != '/' and line[i] != '%' and
484 # line[i] != '&' and line[i] != '!' and
485 # line[i] != '|' and line[i] != '[' and
486 # line[i] != ']' and line[i] != '=' and
487 # line[i] != '*' and line[i] != '>' and
492 self.tokens.append(('name', line[s:i]))
495 self.tokens = self.tokens[1:]
500 """The C module parser"""
501 def __init__(self, filename, idx = None):
502 self.filename = filename
503 if len(filename) > 2 and filename[-2:] == '.h':
507 self.input = open(filename)
508 self.lexer = CLexer(self.input)
513 self.top_comment = ""
514 self.last_comment = ""
519 def collect_references(self):
522 def stop_error(self):
525 def start_error(self):
529 return self.lexer.getlineno()
531 def index_add(self, name, module, static, type, info=None, extra = None):
532 self.index.add(name, module, static, type, self.lineno(),
535 def index_add_ref(self, name, module, static, type, info=None,
537 self.index.add_ref(name, module, static, type, self.lineno(),
540 def warning(self, msg):
545 def error(self, msg, token=-1):
549 print "Parse Error: " + msg
551 print "Got token ", token
555 def debug(self, msg, token=-1):
556 print "Debug: " + msg
558 print "Got token ", token
561 def parseTopComment(self, comment):
563 lines = string.split(comment, "\n")
566 while line != "" and (line[0] == ' ' or line[0] == '\t'):
568 while line != "" and line[0] == '*':
570 while line != "" and (line[0] == ' ' or line[0] == '\t'):
573 (it, line) = string.split(line, ":", 1)
575 while line != "" and (line[0] == ' ' or line[0] == '\t'):
577 if res.has_key(item):
578 res[item] = res[item] + " " + line
583 if res.has_key(item):
584 res[item] = res[item] + " " + line
587 self.index.info = res
589 def parseComment(self, token):
590 if self.top_comment == "":
591 self.top_comment = token[1]
592 if self.comment == None or token[1][0] == '*':
593 self.comment = token[1];
595 self.comment = self.comment + token[1]
596 token = self.lexer.token()
598 if string.find(self.comment, "DOC_DISABLE") != -1:
601 if string.find(self.comment, "DOC_ENABLE") != -1:
607 # Parse a comment block associate to a macro
609 def parseMacroComment(self, name, quiet = 0):
610 if name[0:2] == '__':
616 if self.comment == None:
618 self.warning("Missing comment for macro %s" % (name))
620 if self.comment[0] != '*':
622 self.warning("Missing * in macro comment for %s" % (name))
624 lines = string.split(self.comment, '\n')
627 if lines[0] != "* %s:" % (name):
629 self.warning("Misformatted macro comment for %s" % (name))
630 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
633 while lines[0] == '*':
635 while len(lines) > 0 and lines[0][0:3] == '* @':
638 (arg, desc) = string.split(l, ':', 1)
639 desc=string.strip(desc)
640 arg=string.strip(arg)
643 self.warning("Misformatted macro comment for %s" % (name))
644 self.warning(" problem with '%s'" % (lines[0]))
648 l = string.strip(lines[0])
649 while len(l) > 2 and l[0:3] != '* @':
652 desc = desc + ' ' + string.strip(l)
657 args.append((arg, desc))
658 while len(lines) > 0 and lines[0] == '*':
661 while len(lines) > 0:
663 while len(l) > 0 and l[0] == '*':
666 desc = desc + " " + l
669 desc = string.strip(desc)
673 self.warning("Macro comment for %s lack description of the macro" % (name))
678 # Parse a comment block and merge the informations found in the
679 # parameters descriptions, finally returns a block as complete
682 def mergeFunctionComment(self, name, description, quiet = 0):
685 if name[0:2] == '__':
688 (ret, args) = description
692 if self.comment == None:
694 self.warning("Missing comment for function %s" % (name))
695 return(((ret[0], retdesc), args, desc))
696 if self.comment[0] != '*':
698 self.warning("Missing * in function comment for %s" % (name))
699 return(((ret[0], retdesc), args, desc))
700 lines = string.split(self.comment, '\n')
703 if lines[0] != "* %s:" % (name):
705 self.warning("Misformatted function comment for %s" % (name))
706 self.warning(" Expecting '* %s:' got '%s'" % (name, lines[0]))
707 return(((ret[0], retdesc), args, desc))
709 while len(lines) > 0 and lines[0] == '*':
712 while len(lines) > 0 and lines[0][0:3] == '* @':
715 (arg, desc) = string.split(l, ':', 1)
716 desc=string.strip(desc)
717 arg=string.strip(arg)
720 self.warning("Misformatted function comment for %s" % (name))
721 self.warning(" problem with '%s'" % (lines[0]))
725 l = string.strip(lines[0])
726 while len(l) > 2 and l[0:3] != '* @':
729 desc = desc + ' ' + string.strip(l)
736 if args[i][1] == arg:
737 args[i] = (args[i][0], arg, desc)
742 self.warning("Unable to find arg %s from function comment for %s" % (
744 while len(lines) > 0 and lines[0] == '*':
747 while len(lines) > 0:
749 while len(l) > 0 and l[0] == '*':
752 if len(l) >= 6 and l[0:6] == "return" or l[0:6] == "Return":
754 l = string.split(l, ' ', 1)[1]
757 retdesc = string.strip(l)
759 while len(lines) > 0:
761 while len(l) > 0 and l[0] == '*':
764 retdesc = retdesc + " " + l
767 desc = desc + " " + l
770 retdesc = string.strip(retdesc)
771 desc = string.strip(desc)
775 # report missing comments
779 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
780 self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1]))
782 if retdesc == "" and ret[0] != "void":
783 self.warning("Function comment for %s lack description of return value" % (name))
785 self.warning("Function comment for %s lack description of the function" % (name))
788 return(((ret[0], retdesc), args, desc))
790 def parsePreproc(self, token):
792 if name == "#include":
793 token = self.lexer.token()
796 if token[0] == 'preproc':
797 self.index_add(token[1], self.filename, not self.is_header,
799 return self.lexer.token()
801 if name == "#define":
802 token = self.lexer.token()
805 if token[0] == 'preproc':
806 # TODO macros with arguments
809 token = self.lexer.token()
810 while token != None and token[0] == 'preproc' and \
813 token = self.lexer.token()
815 name = string.split(name, '(') [0]
818 info = self.parseMacroComment(name, not self.is_header)
819 self.index_add(name, self.filename, not self.is_header,
822 token = self.lexer.token()
823 while token != None and token[0] == 'preproc' and \
825 token = self.lexer.token()
829 # token acquisition on top of the lexer, it handle internally
830 # preprocessor and comments since they are logically not part of
831 # the program structure.
836 token = self.lexer.token()
838 if token[0] == 'comment':
839 token = self.parseComment(token)
841 elif token[0] == 'preproc':
842 token = self.parsePreproc(token)
844 elif token[0] == "name" and ignored_words.has_key(token[1]):
845 (n, info) = ignored_words[token[1]]
848 token = self.lexer.token()
850 token = self.lexer.token()
859 # Parse a typedef, it records the type and its name.
861 def parseTypedef(self, token):
864 token = self.parseType(token)
866 self.error("parsing typedef")
868 base_type = self.type
870 #self.debug("end typedef type", token)
872 if token[0] == "name":
874 signature = self.signature
875 if signature != None:
876 type = string.split(type, '(')[0]
877 d = self.mergeFunctionComment(name,
878 ((type, None), signature), 1)
879 self.index_add(name, self.filename, not self.is_header,
882 if base_type == "struct":
883 self.index_add(name, self.filename, not self.is_header,
885 base_type = "struct " + name
887 self.index_add(name, self.filename, not self.is_header,
891 self.error("parsing typedef: expecting a name")
893 #self.debug("end typedef", token)
894 if token != None and token[0] == 'sep' and token[1] == ',':
897 while token != None and token[0] == "op":
898 type = type + token[1]
900 elif token != None and token[0] == 'sep' and token[1] == ';':
902 elif token != None and token[0] == 'name':
906 self.error("parsing typedef: expecting ';'", token)
912 # Parse a C code block, used for functions it parse till
913 # the balancing } included
915 def parseBlock(self, token):
917 if token[0] == "sep" and token[1] == "{":
919 token = self.parseBlock(token)
920 elif token[0] == "sep" and token[1] == "}":
925 if self.collect_ref == 1:
928 if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
929 if token[0] == "sep" and token[1] == "(":
930 self.index_add_ref(oldtok[1], self.filename,
933 elif token[0] == "name":
935 if token[0] == "sep" and (token[1] == ";" or
936 token[1] == "," or token[1] == "="):
937 self.index_add_ref(oldtok[1], self.filename,
939 elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
940 self.index_add_ref(oldtok[1], self.filename,
942 elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
943 self.index_add_ref(oldtok[1], self.filename,
951 # Parse a C struct definition till the balancing }
953 def parseStruct(self, token):
955 #self.debug("start parseStruct", token)
957 if token[0] == "sep" and token[1] == "{":
959 token = self.parseTypeBlock(token)
960 elif token[0] == "sep" and token[1] == "}":
961 self.struct_fields = fields
962 #self.debug("end parseStruct", token)
967 base_type = self.type
968 #self.debug("before parseType", token)
969 token = self.parseType(token)
970 #self.debug("after parseType", token)
971 if token != None and token[0] == "name":
974 if token[0] == "sep" and token[1] == ";":
977 fields.append((self.type, fname, self.comment))
980 self.error("parseStruct: expecting ;", token)
981 elif token != None and token[0] == "sep" and token[1] == "{":
983 token = self.parseTypeBlock(token)
984 if token != None and token[0] == "name":
986 if token != None and token[0] == "sep" and token[1] == ";":
989 self.error("parseStruct: expecting ;", token)
991 self.error("parseStruct: name", token)
993 self.type = base_type;
994 self.struct_fields = fields
995 #self.debug("end parseStruct", token)
1000 # Parse a C enum block, parse till the balancing }
1002 def parseEnumBlock(self, token):
1008 while token != None:
1009 if token[0] == "sep" and token[1] == "{":
1010 token = self.token()
1011 token = self.parseTypeBlock(token)
1012 elif token[0] == "sep" and token[1] == "}":
1014 if self.comment != None:
1015 comment = self.comment
1017 self.enums.append((name, value, comment))
1018 token = self.token()
1020 elif token[0] == "name":
1022 if self.comment != None:
1023 comment = string.strip(self.comment)
1025 self.enums.append((name, value, comment))
1028 token = self.token()
1029 if token[0] == "op" and token[1][0] == "=":
1031 if len(token[1]) > 1:
1032 value = token[1][1:]
1033 token = self.token()
1034 while token[0] != "sep" or (token[1] != ',' and
1036 value = value + token[1]
1037 token = self.token()
1040 value = "%d" % (int(value) + 1)
1042 self.warning("Failed to compute value of enum %s" % (name))
1044 if token[0] == "sep" and token[1] == ",":
1045 token = self.token()
1047 token = self.token()
1051 # Parse a C definition block, used for structs it parse till
1054 def parseTypeBlock(self, token):
1055 while token != None:
1056 if token[0] == "sep" and token[1] == "{":
1057 token = self.token()
1058 token = self.parseTypeBlock(token)
1059 elif token[0] == "sep" and token[1] == "}":
1060 token = self.token()
1063 token = self.token()
1067 # Parse a type: the fact that the type name can either occur after
1068 # the definition or within the definition makes it a little harder
1069 # if inside, the name token is pushed back before returning
1071 def parseType(self, token):
1073 self.struct_fields = []
1074 self.signature = None
1078 while token[0] == "name" and (
1079 token[1] == "const" or token[1] == "unsigned" or
1080 token[1] == "signed"):
1082 self.type = token[1]
1084 self.type = self.type + " " + token[1]
1085 token = self.token()
1087 if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1089 self.type = token[1]
1091 self.type = self.type + " " + token[1]
1092 if token[0] == "name" and token[1] == "int":
1096 self.type = self.type + " " + tmp[1]
1098 elif token[0] == "name" and token[1] == "struct":
1100 self.type = token[1]
1102 self.type = self.type + " " + token[1]
1103 token = self.token()
1105 if token[0] == "name":
1107 token = self.token()
1108 if token != None and token[0] == "sep" and token[1] == "{":
1109 token = self.token()
1110 token = self.parseStruct(token)
1111 elif token != None and token[0] == "op" and token[1] == "*":
1112 self.type = self.type + " " + nametok[1] + " *"
1113 token = self.token()
1114 while token != None and token[0] == "op" and token[1] == "*":
1115 self.type = self.type + " *"
1116 token = self.token()
1117 if token[0] == "name":
1119 token = self.token()
1121 self.error("struct : expecting name", token)
1123 elif token != None and token[0] == "name" and nametok != None:
1124 self.type = self.type + " " + nametok[1]
1128 self.lexer.push(token)
1132 elif token[0] == "name" and token[1] == "enum":
1134 self.type = token[1]
1136 self.type = self.type + " " + token[1]
1138 token = self.token()
1139 if token != None and token[0] == "sep" and token[1] == "{":
1140 token = self.token()
1141 token = self.parseEnumBlock(token)
1143 self.error("parsing enum: expecting '{'", token)
1145 if token != None and token[0] != "name":
1146 self.lexer.push(token)
1147 token = ("name", "enum")
1149 enum_type = token[1]
1150 for enum in self.enums:
1151 self.index_add(enum[0], self.filename,
1152 not self.is_header, "enum",
1153 (enum[1], enum[2], enum_type))
1156 elif token[0] == "name":
1158 self.type = token[1]
1160 self.type = self.type + " " + token[1]
1162 self.error("parsing type %s: expecting a name" % (self.type),
1165 token = self.token()
1166 while token != None and (token[0] == "op" or
1167 token[0] == "name" and token[1] == "const"):
1168 self.type = self.type + " " + token[1]
1169 token = self.token()
1172 # if there is a parenthesis here, this means a function type
1174 if token != None and token[0] == "sep" and token[1] == '(':
1175 self.type = self.type + token[1]
1176 token = self.token()
1177 while token != None and token[0] == "op" and token[1] == '*':
1178 self.type = self.type + token[1]
1179 token = self.token()
1180 if token == None or token[0] != "name" :
1181 self.error("parsing function type, name expected", token);
1183 self.type = self.type + token[1]
1185 token = self.token()
1186 if token != None and token[0] == "sep" and token[1] == ')':
1187 self.type = self.type + token[1]
1188 token = self.token()
1189 if token != None and token[0] == "sep" and token[1] == '(':
1190 token = self.token()
1192 token = self.parseSignature(token);
1195 self.error("parsing function type, '(' expected", token);
1198 self.error("parsing function type, ')' expected", token);
1200 self.lexer.push(token)
1205 # do some lookahead for arrays
1207 if token != None and token[0] == "name":
1209 token = self.token()
1210 if token != None and token[0] == "sep" and token[1] == '[':
1211 self.type = self.type + nametok[1]
1212 while token != None and token[0] == "sep" and token[1] == '[':
1213 self.type = self.type + token[1]
1214 token = self.token()
1215 while token != None and token[0] != 'sep' and \
1216 token[1] != ']' and token[1] != ';':
1217 self.type = self.type + token[1]
1218 token = self.token()
1219 if token != None and token[0] == 'sep' and token[1] == ']':
1220 self.type = self.type + token[1]
1221 token = self.token()
1223 self.error("parsing array type, ']' expected", token);
1225 elif token != None and token[0] == "sep" and token[1] == ':':
1226 # remove :12 in case it's a limited int size
1227 token = self.token()
1228 token = self.token()
1229 self.lexer.push(token)
1235 # Parse a signature: '(' has been parsed and we scan the type definition
1236 # up to the ')' included
1237 def parseSignature(self, token):
1239 if token != None and token[0] == "sep" and token[1] == ')':
1241 token = self.token()
1243 while token != None:
1244 token = self.parseType(token)
1245 if token != None and token[0] == "name":
1246 signature.append((self.type, token[1], None))
1247 token = self.token()
1248 elif token != None and token[0] == "sep" and token[1] == ',':
1249 token = self.token()
1251 elif token != None and token[0] == "sep" and token[1] == ')':
1252 # only the type was provided
1253 if self.type == "...":
1254 signature.append((self.type, "...", None))
1256 signature.append((self.type, None, None))
1257 if token != None and token[0] == "sep":
1259 token = self.token()
1261 elif token[1] == ')':
1262 token = self.token()
1264 self.signature = signature
1268 # Parse a global definition, be it a type, variable or function
1269 # the extern "C" blocks are a bit nasty and require it to recurse.
1271 def parseGlobal(self, token):
1273 if token[1] == 'extern':
1274 token = self.token()
1277 if token[0] == 'string':
1279 token = self.token()
1282 if token[0] == 'sep' and token[1] == "{":
1283 token = self.token()
1284 # print 'Entering extern "C line ', self.lineno()
1285 while token != None and (token[0] != 'sep' or
1287 if token[0] == 'name':
1288 token = self.parseGlobal(token)
1291 "token %s %s unexpected at the top level" % (
1292 token[0], token[1]))
1293 token = self.parseGlobal(token)
1294 # print 'Exiting extern "C" line', self.lineno()
1295 token = self.token()
1299 elif token[1] == 'static':
1301 token = self.token()
1302 if token == None or token[0] != 'name':
1305 if token[1] == 'typedef':
1306 token = self.token()
1307 return self.parseTypedef(token)
1309 token = self.parseType(token)
1310 type_orig = self.type
1311 if token == None or token[0] != "name":
1314 self.name = token[1]
1315 token = self.token()
1316 while token != None and (token[0] == "sep" or token[0] == "op"):
1317 if token[0] == "sep":
1319 type = type + token[1]
1320 token = self.token()
1321 while token != None and (token[0] != "sep" or \
1323 type = type + token[1]
1324 token = self.token()
1326 if token != None and token[0] == "op" and token[1] == "=":
1328 # Skip the initialization of the variable
1330 token = self.token()
1331 if token[0] == 'sep' and token[1] == '{':
1332 token = self.token()
1333 token = self.parseBlock(token)
1336 while token != None and (token[0] != "sep" or \
1337 (token[1] != ';' and token[1] != ',')):
1338 token = self.token()
1340 if token == None or token[0] != "sep" or (token[1] != ';' and
1342 self.error("missing ';' or ',' after value")
1344 if token != None and token[0] == "sep":
1347 token = self.token()
1348 if type == "struct":
1349 self.index_add(self.name, self.filename,
1350 not self.is_header, "struct", self.struct_fields)
1352 self.index_add(self.name, self.filename,
1353 not self.is_header, "variable", type)
1355 elif token[1] == "(":
1356 token = self.token()
1357 token = self.parseSignature(token)
1360 if token[0] == "sep" and token[1] == ";":
1361 d = self.mergeFunctionComment(self.name,
1362 ((type, None), self.signature), 1)
1363 self.index_add(self.name, self.filename, static,
1365 token = self.token()
1366 elif token[0] == "sep" and token[1] == "{":
1367 d = self.mergeFunctionComment(self.name,
1368 ((type, None), self.signature), static)
1369 self.index_add(self.name, self.filename, static,
1371 token = self.token()
1372 token = self.parseBlock(token);
1373 elif token[1] == ',':
1375 self.index_add(self.name, self.filename, static,
1378 token = self.token()
1379 while token != None and token[0] == "sep":
1380 type = type + token[1]
1381 token = self.token()
1382 if token != None and token[0] == "name":
1383 self.name = token[1]
1384 token = self.token()
1391 self.warning("Parsing %s" % (self.filename))
1392 token = self.token()
1393 while token != None:
1394 if token[0] == 'name':
1395 token = self.parseGlobal(token)
1397 self.error("token %s %s unexpected at the top level" % (
1398 token[0], token[1]))
1399 token = self.parseGlobal(token)
1401 self.parseTopComment(self.top_comment)
1406 """A documentation builder"""
1407 def __init__(self, name, directories=['.'], excludes=[]):
1409 self.directories = directories
1410 self.excludes = excludes + ignored_files.keys()
1416 if name == 'libxml2':
1417 self.basename = 'libxml'
1419 self.basename = name
1421 def indexString(self, id, str):
1424 str = string.replace(str, "'", ' ')
1425 str = string.replace(str, '"', ' ')
1426 str = string.replace(str, "/", ' ')
1427 str = string.replace(str, '*', ' ')
1428 str = string.replace(str, "[", ' ')
1429 str = string.replace(str, "]", ' ')
1430 str = string.replace(str, "(", ' ')
1431 str = string.replace(str, ")", ' ')
1432 str = string.replace(str, "<", ' ')
1433 str = string.replace(str, '>', ' ')
1434 str = string.replace(str, "&", ' ')
1435 str = string.replace(str, '#', ' ')
1436 str = string.replace(str, ",", ' ')
1437 str = string.replace(str, '.', ' ')
1438 str = string.replace(str, ';', ' ')
1439 tokens = string.split(str)
1440 for token in tokens:
1443 if string.find(string.letters, c) < 0:
1445 elif len(token) < 3:
1448 lower = string.lower(token)
1449 # TODO: generalize this a bit
1450 if lower == 'and' or lower == 'the':
1452 elif self.xref.has_key(token):
1453 self.xref[token].append(id)
1455 self.xref[token] = [id]
1460 print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1463 def scanHeaders(self):
1464 for header in self.headers.keys():
1465 parser = CParser(header)
1466 idx = parser.parse()
1467 self.headers[header] = idx;
1470 def scanModules(self):
1471 for module in self.modules.keys():
1472 parser = CParser(module)
1473 idx = parser.parse()
1475 self.modules[module] = idx
1476 self.idx.merge_public(idx)
1479 for directory in self.directories:
1480 files = glob.glob(directory + "/*.c")
1483 for excl in self.excludes:
1484 if string.find(file, excl) != -1:
1488 self.modules[file] = None;
1489 files = glob.glob(directory + "/*.h")
1492 for excl in self.excludes:
1493 if string.find(file, excl) != -1:
1497 self.headers[file] = None;
1501 def modulename_file(self, file):
1502 module = os.path.basename(file)
1503 if module[-2:] == '.h':
1504 module = module[:-2]
1507 def serialize_enum(self, output, name):
1508 id = self.idx.enums[name]
1509 output.write(" <enum name='%s' file='%s'" % (name,
1510 self.modulename_file(id.module)))
1513 if info[0] != None and info[0] != '':
1518 output.write(" value='%s'" % (val));
1519 if info[2] != None and info[2] != '':
1520 output.write(" type='%s'" % info[2]);
1521 if info[1] != None and info[1] != '':
1522 output.write(" info='%s'" % escape(info[1]));
1523 output.write("/>\n")
1525 def serialize_macro(self, output, name):
1526 id = self.idx.macros[name]
1527 output.write(" <macro name='%s' file='%s'>\n" % (name,
1528 self.modulename_file(id.module)))
1531 (args, desc) = id.info
1532 if desc != None and desc != "":
1533 output.write(" <info>%s</info>\n" % (escape(desc)))
1534 self.indexString(name, desc)
1537 if desc != None and desc != "":
1538 output.write(" <arg name='%s' info='%s'/>\n" % (
1539 name, escape(desc)))
1540 self.indexString(name, desc)
1542 output.write(" <arg name='%s'/>\n" % (name))
1545 output.write(" </macro>\n")
1547 def serialize_typedef(self, output, name):
1548 id = self.idx.typedefs[name]
1549 if id.info[0:7] == 'struct ':
1550 output.write(" <struct name='%s' file='%s' type='%s'" % (
1551 name, self.modulename_file(id.module), id.info))
1553 if self.idx.structs.has_key(name) and ( \
1554 type(self.idx.structs[name].info) == type(()) or
1555 type(self.idx.structs[name].info) == type([])):
1556 output.write(">\n");
1558 for field in self.idx.structs[name].info:
1560 self.indexString(name, desc)
1565 output.write(" <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1567 print "Failed to serialize struct %s" % (name)
1568 output.write(" </struct>\n")
1570 output.write("/>\n");
1572 output.write(" <typedef name='%s' file='%s' type='%s'/>\n" % (
1573 name, self.modulename_file(id.module), id.info))
1575 def serialize_variable(self, output, name):
1576 id = self.idx.variables[name]
1578 output.write(" <variable name='%s' file='%s' type='%s'/>\n" % (
1579 name, self.modulename_file(id.module), id.info))
1581 output.write(" <variable name='%s' file='%s'/>\n" % (
1582 name, self.modulename_file(id.module)))
1584 def serialize_function(self, output, name):
1585 id = self.idx.functions[name]
1586 output.write(" <%s name='%s' file='%s'>\n" % (id.type, name,
1587 self.modulename_file(id.module)))
1589 (ret, params, desc) = id.info
1590 output.write(" <info>%s</info>\n" % (escape(desc)))
1591 self.indexString(name, desc)
1593 if ret[0] == "void":
1594 output.write(" <return type='void'/>\n")
1596 output.write(" <return type='%s' info='%s'/>\n" % (
1597 ret[0], escape(ret[1])))
1598 self.indexString(name, ret[1])
1599 for param in params:
1600 if param[0] == 'void':
1602 if param[2] == None:
1603 output.write(" <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1605 output.write(" <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1606 self.indexString(name, param[2])
1608 print "Failed to save function %s info: " % name, `id.info`
1609 output.write(" </%s>\n" % (id.type))
1611 def serialize_exports(self, output, file):
1612 module = self.modulename_file(file)
1613 output.write(" <file name='%s'>\n" % (module))
1614 dict = self.headers[file]
1615 if dict.info != None:
1616 for data in ('Summary', 'Description', 'Author'):
1618 output.write(" <%s>%s</%s>\n" % (
1620 escape(dict.info[data]),
1621 string.lower(data)))
1623 print "Header %s lacks a %s description" % (module, data)
1624 if dict.info.has_key('Description'):
1625 desc = dict.info['Description']
1626 if string.find(desc, "DEPRECATED") != -1:
1627 output.write(" <deprecated/>\n")
1629 ids = dict.macros.keys()
1631 for id in uniq(ids):
1632 # Macros are sometime used to masquerade other types.
1633 if dict.functions.has_key(id):
1635 if dict.variables.has_key(id):
1637 if dict.typedefs.has_key(id):
1639 if dict.structs.has_key(id):
1641 if dict.enums.has_key(id):
1643 output.write(" <exports symbol='%s' type='macro'/>\n" % (id))
1644 ids = dict.enums.keys()
1646 for id in uniq(ids):
1647 output.write(" <exports symbol='%s' type='enum'/>\n" % (id))
1648 ids = dict.typedefs.keys()
1650 for id in uniq(ids):
1651 output.write(" <exports symbol='%s' type='typedef'/>\n" % (id))
1652 ids = dict.structs.keys()
1654 for id in uniq(ids):
1655 output.write(" <exports symbol='%s' type='struct'/>\n" % (id))
1656 ids = dict.variables.keys()
1658 for id in uniq(ids):
1659 output.write(" <exports symbol='%s' type='variable'/>\n" % (id))
1660 ids = dict.functions.keys()
1662 for id in uniq(ids):
1663 output.write(" <exports symbol='%s' type='function'/>\n" % (id))
1664 output.write(" </file>\n")
1666 def serialize_xrefs_files(self, output):
1667 headers = self.headers.keys()
1669 for file in headers:
1670 module = self.modulename_file(file)
1671 output.write(" <file name='%s'>\n" % (module))
1672 dict = self.headers[file]
1673 ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1674 dict.macros.keys() + dict.typedefs.keys() + \
1675 dict.structs.keys() + dict.enums.keys())
1678 output.write(" <ref name='%s'/>\n" % (id))
1679 output.write(" </file>\n")
1682 def serialize_xrefs_functions(self, output):
1684 for name in self.idx.functions.keys():
1685 id = self.idx.functions[name]
1687 (ret, params, desc) = id.info
1688 for param in params:
1689 if param[0] == 'void':
1691 if funcs.has_key(param[0]):
1692 funcs[param[0]].append(name)
1694 funcs[param[0]] = [name]
1700 if type == '' or type == 'void' or type == "int" or \
1701 type == "char *" or type == "const char *" :
1703 output.write(" <type name='%s'>\n" % (type))
1706 pid = '' # not sure why we have dups, but get rid of them!
1709 output.write(" <ref name='%s'/>\n" % (id))
1711 output.write(" </type>\n")
1713 def serialize_xrefs_constructors(self, output):
1715 for name in self.idx.functions.keys():
1716 id = self.idx.functions[name]
1718 (ret, params, desc) = id.info
1719 if ret[0] == "void":
1721 if funcs.has_key(ret[0]):
1722 funcs[ret[0]].append(name)
1724 funcs[ret[0]] = [name]
1730 if type == '' or type == 'void' or type == "int" or \
1731 type == "char *" or type == "const char *" :
1733 output.write(" <type name='%s'>\n" % (type))
1737 output.write(" <ref name='%s'/>\n" % (id))
1738 output.write(" </type>\n")
1740 def serialize_xrefs_alpha(self, output):
1742 ids = self.idx.identifiers.keys()
1747 output.write(" </letter>\n")
1749 output.write(" <letter name='%s'>\n" % (letter))
1750 output.write(" <ref name='%s'/>\n" % (id))
1752 output.write(" </letter>\n")
1754 def serialize_xrefs_references(self, output):
1755 typ = self.idx.identifiers.keys()
1758 idf = self.idx.identifiers[id]
1760 output.write(" <reference name='%s' href='%s'/>\n" % (id,
1761 'html/' + self.basename + '-' +
1762 self.modulename_file(module) + '.html#' +
1765 def serialize_xrefs_index(self, output):
1774 if len(index[id]) > 30:
1777 if letter == None or count > 200:
1779 output.write(" </letter>\n")
1780 output.write(" </chunk>\n")
1782 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1783 output.write(" <chunk name='chunk%s'>\n" % (chunk))
1784 first_letter = id[0]
1786 elif letter != None:
1787 output.write(" </letter>\n")
1789 output.write(" <letter name='%s'>\n" % (letter))
1790 output.write(" <word name='%s'>\n" % (id))
1794 for token in tokens:
1798 output.write(" <ref name='%s'/>\n" % (token))
1800 output.write(" </word>\n")
1802 output.write(" </letter>\n")
1803 output.write(" </chunk>\n")
1805 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1806 output.write(" <chunks>\n")
1808 output.write(" <chunk name='%s' start='%s' end='%s'/>\n" % (
1809 ch[0], ch[1], ch[2]))
1810 output.write(" </chunks>\n")
1812 def serialize_xrefs(self, output):
1813 output.write(" <references>\n")
1814 self.serialize_xrefs_references(output)
1815 output.write(" </references>\n")
1816 output.write(" <alpha>\n")
1817 self.serialize_xrefs_alpha(output)
1818 output.write(" </alpha>\n")
1819 output.write(" <constructors>\n")
1820 self.serialize_xrefs_constructors(output)
1821 output.write(" </constructors>\n")
1822 output.write(" <functions>\n")
1823 self.serialize_xrefs_functions(output)
1824 output.write(" </functions>\n")
1825 output.write(" <files>\n")
1826 self.serialize_xrefs_files(output)
1827 output.write(" </files>\n")
1828 output.write(" <index>\n")
1829 self.serialize_xrefs_index(output)
1830 output.write(" </index>\n")
1832 def serialize(self, outdir):
1833 filename = outdir + "%s-api.xml" % self.name
1834 print "Saving XML description %s" % (filename)
1835 output = open(filename, "w")
1836 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1837 output.write("<api name='%s'>\n" % self.name)
1838 output.write(" <files>\n")
1839 headers = self.headers.keys()
1841 for file in headers:
1842 self.serialize_exports(output, file)
1843 output.write(" </files>\n")
1844 output.write(" <symbols>\n")
1845 macros = self.idx.macros.keys()
1847 for macro in macros:
1848 self.serialize_macro(output, macro)
1849 enums = self.idx.enums.keys()
1852 self.serialize_enum(output, enum)
1853 typedefs = self.idx.typedefs.keys()
1855 for typedef in typedefs:
1856 self.serialize_typedef(output, typedef)
1857 variables = self.idx.variables.keys()
1859 for variable in variables:
1860 self.serialize_variable(output, variable)
1861 functions = self.idx.functions.keys()
1863 for function in functions:
1864 self.serialize_function(output, function)
1865 output.write(" </symbols>\n")
1866 output.write("</api>\n")
1869 filename = outdir + "%s-refs.xml" % self.name
1870 print "Saving XML Cross References %s" % (filename)
1871 output = open(filename, "w")
1872 output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1873 output.write("<apirefs name='%s'>\n" % self.name)
1874 self.serialize_xrefs(output)
1875 output.write("</apirefs>\n")
1881 if glob.glob("parser.c") != [] :
1882 print "Rebuilding API description for libxml2"
1883 builder = docBuilder("libxml2", [".", "."],
1884 ["xmlwin32version.h", "tst.c"])
1885 elif glob.glob("../parser.c") != [] :
1886 print "Rebuilding API description for libxml2"
1887 builder = docBuilder("libxml2", ["..", "../include/libxml"],
1888 ["xmlwin32version.h", "tst.c"])
1889 elif glob.glob("../libxslt/transform.c") != [] :
1890 print "Rebuilding API description for libxslt"
1891 builder = docBuilder("libxslt", ["../libxslt"],
1892 ["win32config.h", "libxslt.h", "tst.c"])
1894 print "rebuild() failed, unable to guess the module"
1898 builder.serialize("./")
1899 if glob.glob("../libexslt/exslt.c") != [] :
1900 extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1903 extra.serialize("EXSLT/")
1907 # for debugging the parser
1909 def parse(filename):
1910 parser = CParser(filename)
1911 idx = parser.parse()
1914 if __name__ == "__main__":