doc/apibuild.py

   1 #!/usr/bin/python -u
   2 #
   3 # This is the API builder, it parses the C sources and build the
   4 # API formal description in XML.
   5 #
   6 # See Copyright for the status of this software.
   7 #
   8 # daniel@veillard.com
   9 #
  10 import os, sys
  11 import string
  12 import glob
  13
  14 debug=0
  15
  16 #
  17 # C parser analysis code
  18 #
  19 ignored_files = {
  20   "trio": "too many non standard macros",
  21   "trio.c": "too many non standard macros",
  22   "trionan.c": "too many non standard macros",
  23   "triostr.c": "too many non standard macros",
  24   "acconfig.h": "generated portability layer",
  25   "config.h": "generated portability layer",
  26   "libxml.h": "internal only",
  27   "testOOM.c": "out of memory tester",
  28   "testOOMlib.h": "out of memory tester",
  29   "testOOMlib.c": "out of memory tester",
  30 }
  31
  32 ignored_words = {
  33   "WINAPI": (0, "Windows keyword"),
  34   "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
  35   "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
  36   "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
  37   "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
  38   "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
  39   "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
  40   "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
  41   "XMLCALL": (0, "Special macro for win32 calls"),
  42   "XSLTCALL": (0, "Special macro for win32 calls"),
  43   "EXSLTCALL": (0, "Special macro for win32 calls"),
  44   "__declspec": (3, "Windows keyword"),
  45   "ATTRIBUTE_UNUSED": (0, "macro keyword"),
  46   "LIBEXSLT_PUBLIC": (0, "macro keyword"),
  47   "X_IN_Y": (5, "macro function builder"),
  48   "XSLT_ITEM_COMMON_FIELDS": (0, "Special macro"),
  49   "CALLBACK": (0, "macro keyword"),
  50   "LIBXSLT_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
  51 }
  52
  53 def escape(raw):
  54     raw = string.replace(raw, '&', '&amp;')
  55     raw = string.replace(raw, '<', '&lt;')
  56     raw = string.replace(raw, '>', '&gt;')
  57     raw = string.replace(raw, "'", '&apos;')
  58     raw = string.replace(raw, '"', '&quot;')
  59     return raw
  60
  61 def uniq(items):
  62     d = {}
  63     for item in items:
  64         d[item]=1
  65     return d.keys()
  66
  67 class identifier:
  68     def __init__(self, name, module=None, type=None, lineno = 0,
  69                  info=None, extra=None):
  70         self.name = name
  71         self.module = module
  72         self.type = type
  73         self.info = info
  74         self.extra = extra
  75         self.lineno = lineno
  76         self.static = 0
  77
  78     def __repr__(self):
  79         r = "%s %s:" % (self.type, self.name)
  80         if self.static:
  81             r = r + " static"
  82         if self.module != None:
  83             r = r + " from %s" % (self.module)
  84         if self.info != None:
  85             r = r + " " +  `self.info`
  86         if self.extra != None:
  87             r = r + " " + `self.extra`
  88         return r
  89
  90
  91     def set_module(self, module):
  92         self.module = module
  93     def set_type(self, type):
  94         self.type = type
  95     def set_info(self, info):
  96         self.info = info
  97     def set_extra(self, extra):
  98         self.extra = extra
  99     def set_lineno(self, lineno):
 100         self.lineno = lineno
 101     def set_static(self, static):
 102         self.static = static
 103
 104     def get_name(self):
 105         return self.name
 106     def get_module(self):
 107         return self.module
 108     def get_type(self):
 109         return self.type
 110     def get_info(self):
 111         return self.info
 112     def get_lineno(self):
 113         return self.lineno
 114     def get_extra(self):
 115         return self.extra
 116     def get_static(self):
 117         return self.static
 118
 119     def update(self, module, type = None, info = None, extra=None):
 120         if module != None and self.module == None:
 121             self.set_module(module)
 122         if type != None and self.type == None:
 123             self.set_type(type)
 124         if info != None:
 125             self.set_info(info)
 126         if extra != None:
 127             self.set_extra(extra)
 128
 129
 130 class index:
 131     def __init__(self, name = "noname"):
 132         self.name = name
 133         self.identifiers = {}
 134         self.functions = {}
 135         self.variables = {}
 136         self.includes = {}
 137         self.structs = {}
 138         self.enums = {}
 139         self.typedefs = {}
 140         self.macros = {}
 141         self.references = {}
 142         self.info = {}
 143
 144     def add_ref(self, name, module, static, type, lineno, info=None, extra=None):
 145         if name[0:2] == '__':
 146             return None
 147         d = None
 148         try:
 149            d = self.identifiers[name]
 150            d.update(module, type, lineno, info, extra)
 151         except:
 152            d = identifier(name, module, type, lineno, info, extra)
 153            self.identifiers[name] = d
 154
 155         if d != None and static == 1:
 156             d.set_static(1)
 157
 158         if d != None and name != None and type != None:
 159             self.references[name] = d
 160
 161     def add(self, name, module, static, type, lineno, info=None, extra=None):
 162         if name[0:2] == '__':
 163             return None
 164         d = None
 165         try:
 166            d = self.identifiers[name]
 167            d.update(module, type, lineno, info, extra)
 168         except:
 169            d = identifier(name, module, type, lineno, info, extra)
 170            self.identifiers[name] = d
 171
 172         if d != None and static == 1:
 173             d.set_static(1)
 174
 175         if d != None and name != None and type != None:
 176             if type == "function":
 177                 self.functions[name] = d
 178             elif type == "functype":
 179                 self.functions[name] = d
 180             elif type == "variable":
 181                 self.variables[name] = d
 182             elif type == "include":
 183                 self.includes[name] = d
 184             elif type == "struct":
 185                 self.structs[name] = d
 186             elif type == "enum":
 187                 self.enums[name] = d
 188             elif type == "typedef":
 189                 self.typedefs[name] = d
 190             elif type == "macro":
 191                 self.macros[name] = d
 192             else:
 193                 print "Unable to register type ", type
 194         return d
 195
 196     def merge(self, idx):
 197         for id in idx.functions.keys():
 198               #
 199               # macro might be used to override functions or variables
 200               # definitions
 201               #
 202              if self.macros.has_key(id):
 203                  del self.macros[id]
 204              if self.functions.has_key(id):
 205                  print "function %s from %s redeclared in %s" % (
 206                     id, self.functions[id].module, idx.functions[id].module)
 207              else:
 208                  self.functions[id] = idx.functions[id]
 209                  self.identifiers[id] = idx.functions[id]
 210         for id in idx.variables.keys():
 211               #
 212               # macro might be used to override functions or variables
 213               # definitions
 214               #
 215              if self.macros.has_key(id):
 216                  del self.macros[id]
 217              if self.variables.has_key(id):
 218                  print "variable %s from %s redeclared in %s" % (
 219                     id, self.variables[id].module, idx.variables[id].module)
 220              else:
 221                  self.variables[id] = idx.variables[id]
 222                  self.identifiers[id] = idx.variables[id]
 223         for id in idx.structs.keys():
 224              if self.structs.has_key(id):
 225                  print "struct %s from %s redeclared in %s" % (
 226                     id, self.structs[id].module, idx.structs[id].module)
 227              else:
 228                  self.structs[id] = idx.structs[id]
 229                  self.identifiers[id] = idx.structs[id]
 230         for id in idx.typedefs.keys():
 231              if self.typedefs.has_key(id):
 232                  print "typedef %s from %s redeclared in %s" % (
 233                     id, self.typedefs[id].module, idx.typedefs[id].module)
 234              else:
 235                  self.typedefs[id] = idx.typedefs[id]
 236                  self.identifiers[id] = idx.typedefs[id]
 237         for id in idx.macros.keys():
 238               #
 239               # macro might be used to override functions or variables
 240               # definitions
 241               #
 242              if self.variables.has_key(id):
 243                  continue
 244              if self.functions.has_key(id):
 245                  continue
 246              if self.enums.has_key(id):
 247                  continue
 248              if self.macros.has_key(id):
 249                  print "macro %s from %s redeclared in %s" % (
 250                     id, self.macros[id].module, idx.macros[id].module)
 251              else:
 252                  self.macros[id] = idx.macros[id]
 253                  self.identifiers[id] = idx.macros[id]
 254         for id in idx.enums.keys():
 255              if self.enums.has_key(id):
 256                  print "enum %s from %s redeclared in %s" % (
 257                     id, self.enums[id].module, idx.enums[id].module)
 258              else:
 259                  self.enums[id] = idx.enums[id]
 260                  self.identifiers[id] = idx.enums[id]
 261
 262     def merge_public(self, idx):
 263         for id in idx.functions.keys():
 264              if self.functions.has_key(id):
 265                  up = idx.functions[id]
 266                  self.functions[id].update(None, up.type, up.info, up.extra)
 267          #     else:
 268          #         print "Function %s from %s is not declared in headers" % (
 269         #               id, idx.functions[id].module)
 270          # TODO: do the same for variables.
 271
 272     def analyze_dict(self, type, dict):
 273         count = 0
 274         public = 0
 275         for name in dict.keys():
 276             id = dict[name]
 277             count = count + 1
 278             if id.static == 0:
 279                 public = public + 1
 280         if count != public:
 281             print "  %d %s , %d public" % (count, type, public)
 282         elif count != 0:
 283             print "  %d public %s" % (count, type)
 284
 285
 286     def analyze(self):
 287         self.analyze_dict("functions", self.functions)
 288         self.analyze_dict("variables", self.variables)
 289         self.analyze_dict("structs", self.structs)
 290         self.analyze_dict("typedefs", self.typedefs)
 291         self.analyze_dict("macros", self.macros)
 292
 293 class CLexer:
 294     """A lexer for the C language, tokenize the input by reading and
 295        analyzing it line by line"""
 296     def __init__(self, input):
 297         self.input = input
 298         self.tokens = []
 299         self.line = ""
 300         self.lineno = 0
 301
 302     def getline(self):
 303         line = ''
 304         while line == '':
 305             line = self.input.readline()
 306             if not line:
 307                 return None
 308             self.lineno = self.lineno + 1
 309             line = string.lstrip(line)
 310             line = string.rstrip(line)
 311             if line == '':
 312                 continue
 313             while line[-1] == '\\':
 314                 line = line[:-1]
 315                 n = self.input.readline()
 316                 self.lineno = self.lineno + 1
 317                 n = string.lstrip(n)
 318                 n = string.rstrip(n)
 319                 if not n:
 320                     break
 321                 else:
 322                     line = line + n
 323         return line
 324
 325     def getlineno(self):
 326         return self.lineno
 327
 328     def push(self, token):
 329         self.tokens.insert(0, token);
 330
 331     def debug(self):
 332         print "Last token: ", self.last
 333         print "Token queue: ", self.tokens
 334         print "Line %d end: " % (self.lineno), self.line
 335
 336     def token(self):
 337         while self.tokens == []:
 338             if self.line == "":
 339                 line = self.getline()
 340             else:
 341                 line = self.line
 342                 self.line = ""
 343             if line == None:
 344                 return None
 345
 346             if line[0] == '#':
 347                 self.tokens = map((lambda x: ('preproc', x)),
 348                                   string.split(line))
 349                 break;
 350             l = len(line)
 351             if line[0] == '"' or line[0] == "'":
 352                 end = line[0]
 353                 line = line[1:]
 354                 found = 0
 355                 tok = ""
 356                 while found == 0:
 357                     i = 0
 358                     l = len(line)
 359                     while i < l:
 360                         if line[i] == end:
 361                             self.line = line[i+1:]
 362                             line = line[:i]
 363                             l = i
 364                             found = 1
 365                             break
 366                         if line[i] == '\\':
 367                             i = i + 1
 368                         i = i + 1
 369                     tok = tok + line
 370                     if found == 0:
 371                         line = self.getline()
 372                         if line == None:
 373                             return None
 374                 self.last = ('string', tok)
 375                 return self.last
 376
 377             if l >= 2 and line[0] == '/' and line[1] == '*':
 378                 line = line[2:]
 379                 found = 0
 380                 tok = ""
 381                 while found == 0:
 382                     i = 0
 383                     l = len(line)
 384                     while i < l:
 385                         if line[i] == '*' and i+1 < l and line[i+1] == '/':
 386                             self.line = line[i+2:]
 387                             line = line[:i-1]
 388                             l = i
 389                             found = 1
 390                             break
 391                         i = i + 1
 392                     if tok != "":
 393                         tok = tok + "\n"
 394                     tok = tok + line
 395                     if found == 0:
 396                         line = self.getline()
 397                         if line == None:
 398                             return None
 399                 self.last = ('comment', tok)
 400                 return self.last
 401             if l >= 2 and line[0] == '/' and line[1] == '/':
 402                 line = line[2:]
 403                 self.last = ('comment', line)
 404                 return self.last
 405             i = 0
 406             while i < l:
 407                 if line[i] == '/' and i+1 < l and line[i+1] == '/':
 408                     self.line = line[i:]
 409                     line = line[:i]
 410                     break
 411                 if line[i] == '/' and i+1 < l and line[i+1] == '*':
 412                     self.line = line[i:]
 413                     line = line[:i]
 414                     break
 415                 if line[i] == '"' or line[i] == "'":
 416                     self.line = line[i:]
 417                     line = line[:i]
 418                     break
 419                 i = i + 1
 420             l = len(line)
 421             i = 0
 422             while i < l:
 423                 if line[i] == ' ' or line[i] == '\t':
 424                     i = i + 1
 425                     continue
 426                 o = ord(line[i])
 427                 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 428                    (o >= 48 and o <= 57):
 429                     s = i
 430                     while i < l:
 431                         o = ord(line[i])
 432                         if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 433                            (o >= 48 and o <= 57) or string.find(
 434                                " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
 435                             i = i + 1
 436                         else:
 437                             break
 438                     self.tokens.append(('name', line[s:i]))
 439                     continue
 440                 if string.find("(){}:;,[]", line[i]) != -1:
 441 #                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
 442 #                   line[i] == '}' or line[i] == ':' or line[i] == ';' or \
 443 #                   line[i] == ',' or line[i] == '[' or line[i] == ']':
 444                     self.tokens.append(('sep', line[i]))
 445                     i = i + 1
 446                     continue
 447                 if string.find("+-*><=/%&!|.", line[i]) != -1:
 448 #                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
 449 #                   line[i] == '>' or line[i] == '<' or line[i] == '=' or \
 450 #                   line[i] == '/' or line[i] == '%' or line[i] == '&' or \
 451 #                   line[i] == '!' or line[i] == '|' or line[i] == '.':
 452                     if line[i] == '.' and  i + 2 < l and \
 453                        line[i+1] == '.' and line[i+2] == '.':
 454                         self.tokens.append(('name', '...'))
 455                         i = i + 3
 456                         continue
 457
 458                     j = i + 1
 459                     if j < l and (
 460                        string.find("+-*><=/%&!|", line[j]) != -1):
 461 #                       line[j] == '+' or line[j] == '-' or line[j] == '*' or \
 462 #                       line[j] == '>' or line[j] == '<' or line[j] == '=' or \
 463 #                       line[j] == '/' or line[j] == '%' or line[j] == '&' or \
 464 #                       line[j] == '!' or line[j] == '|'):
 465                         self.tokens.append(('op', line[i:j+1]))
 466                         i = j + 1
 467                     else:
 468                         self.tokens.append(('op', line[i]))
 469                         i = i + 1
 470                     continue
 471                 s = i
 472                 while i < l:
 473                     o = ord(line[i])
 474                     if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 475                        (o >= 48 and o <= 57) or (
 476                         string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
 477 #                        line[i] != ' ' and line[i] != '\t' and
 478 #                        line[i] != '(' and line[i] != ')' and
 479 #                        line[i] != '{'  and line[i] != '}' and
 480 #                        line[i] != ':' and line[i] != ';' and
 481 #                        line[i] != ',' and line[i] != '+' and
 482 #                        line[i] != '-' and line[i] != '*' and
 483 #                        line[i] != '/' and line[i] != '%' and
 484 #                        line[i] != '&' and line[i] != '!' and
 485 #                        line[i] != '|' and line[i] != '[' and
 486 #                        line[i] != ']' and line[i] != '=' and
 487 #                        line[i] != '*' and line[i] != '>' and
 488 #                        line[i] != '<'):
 489                         i = i + 1
 490                     else:
 491                         break
 492                 self.tokens.append(('name', line[s:i]))
 493
 494         tok = self.tokens[0]
 495         self.tokens = self.tokens[1:]
 496         self.last = tok
 497         return tok
 498
 499 class CParser:
 500     """The C module parser"""
 501     def __init__(self, filename, idx = None):
 502         self.filename = filename
 503         if len(filename) > 2 and filename[-2:] == '.h':
 504             self.is_header = 1
 505         else:
 506             self.is_header = 0
 507         self.input = open(filename)
 508         self.lexer = CLexer(self.input)
 509         if idx == None:
 510             self.index = index()
 511         else:
 512             self.index = idx
 513         self.top_comment = ""
 514         self.last_comment = ""
 515         self.comment = None
 516         self.collect_ref = 0
 517         self.no_error = 0
 518
 519     def collect_references(self):
 520         self.collect_ref = 1
 521
 522     def stop_error(self):
 523         self.no_error = 1
 524
 525     def start_error(self):
 526         self.no_error = 0
 527
 528     def lineno(self):
 529         return self.lexer.getlineno()
 530
 531     def index_add(self, name, module, static, type, info=None, extra = None):
 532         self.index.add(name, module, static, type, self.lineno(),
 533                        info, extra)
 534
 535     def index_add_ref(self, name, module, static, type, info=None,
 536                       extra = None):
 537         self.index.add_ref(name, module, static, type, self.lineno(),
 538                        info, extra)
 539
 540     def warning(self, msg):
 541         if self.no_error:
 542             return
 543         print msg
 544
 545     def error(self, msg, token=-1):
 546         if self.no_error:
 547             return
 548
 549         print "Parse Error: " + msg
 550         if token != -1:
 551             print "Got token ", token
 552         self.lexer.debug()
 553         sys.exit(1)
 554
 555     def debug(self, msg, token=-1):
 556         print "Debug: " + msg
 557         if token != -1:
 558             print "Got token ", token
 559         self.lexer.debug()
 560
 561     def parseTopComment(self, comment):
 562         res = {}
 563         lines = string.split(comment, "\n")
 564         item = None
 565         for line in lines:
 566             while line != "" and (line[0] == ' ' or line[0] == '\t'):
 567                 line = line[1:]
 568             while line != "" and line[0] == '*':
 569                 line = line[1:]
 570             while line != "" and (line[0] == ' ' or line[0] == '\t'):
 571                 line = line[1:]
 572             try:
 573                 (it, line) = string.split(line, ":", 1)
 574                 item = it
 575                 while line != "" and (line[0] == ' ' or line[0] == '\t'):
 576                     line = line[1:]
 577                 if res.has_key(item):
 578                     res[item] = res[item] + " " + line
 579                 else:
 580                     res[item] = line
 581             except:
 582                 if item != None:
 583                     if res.has_key(item):
 584                         res[item] = res[item] + " " + line
 585                     else:
 586                         res[item] = line
 587         self.index.info = res
 588
 589     def parseComment(self, token):
 590         if self.top_comment == "":
 591             self.top_comment = token[1]
 592         if self.comment == None or token[1][0] == '*':
 593             self.comment = token[1];
 594         else:
 595             self.comment = self.comment + token[1]
 596         token = self.lexer.token()
 597
 598         if string.find(self.comment, "DOC_DISABLE") != -1:
 599             self.stop_error()
 600
 601         if string.find(self.comment, "DOC_ENABLE") != -1:
 602             self.start_error()
 603
 604         return token
 605
 606      #
 607      # Parse a comment block associate to a macro
 608      #
 609     def parseMacroComment(self, name, quiet = 0):
 610         if name[0:2] == '__':
 611             quiet = 1
 612
 613         args = []
 614         desc = ""
 615
 616         if self.comment == None:
 617             if not quiet:
 618                 self.warning("Missing comment for macro %s" % (name))
 619             return((args, desc))
 620         if self.comment[0] != '*':
 621             if not quiet:
 622                 self.warning("Missing * in macro comment for %s" % (name))
 623             return((args, desc))
 624         lines = string.split(self.comment, '\n')
 625         if lines[0] == '*':
 626             del lines[0]
 627         if lines[0] != "* %s:" % (name):
 628             if not quiet:
 629                 self.warning("Misformatted macro comment for %s" % (name))
 630                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 631             return((args, desc))
 632         del lines[0]
 633         while lines[0] == '*':
 634             del lines[0]
 635         while len(lines) > 0 and lines[0][0:3] == '* @':
 636             l = lines[0][3:]
 637             try:
 638                 (arg, desc) = string.split(l, ':', 1)
 639                 desc=string.strip(desc)
 640                 arg=string.strip(arg)
 641             except:
 642                 if not quiet:
 643                     self.warning("Misformatted macro comment for %s" % (name))
 644                     self.warning("  problem with '%s'" % (lines[0]))
 645                 del lines[0]
 646                 continue
 647             del lines[0]
 648             l = string.strip(lines[0])
 649             while len(l) > 2 and l[0:3] != '* @':
 650                 while l[0] == '*':
 651                     l = l[1:]
 652                 desc = desc + ' ' + string.strip(l)
 653                 del lines[0]
 654                 if len(lines) == 0:
 655                     break
 656                 l = lines[0]
 657             args.append((arg, desc))
 658         while len(lines) > 0 and lines[0] == '*':
 659             del lines[0]
 660         desc = ""
 661         while len(lines) > 0:
 662             l = lines[0]
 663             while len(l) > 0 and l[0] == '*':
 664                 l = l[1:]
 665             l = string.strip(l)
 666             desc = desc + " " + l
 667             del lines[0]
 668
 669         desc = string.strip(desc)
 670
 671         if quiet == 0:
 672             if desc == "":
 673                 self.warning("Macro comment for %s lack description of the macro" % (name))
 674
 675         return((args, desc))
 676
 677      #
 678      # Parse a comment block and merge the informations found in the
 679      # parameters descriptions, finally returns a block as complete
 680      # as possible
 681      #
 682     def mergeFunctionComment(self, name, description, quiet = 0):
 683         if name == 'main':
 684             quiet = 1
 685         if name[0:2] == '__':
 686             quiet = 1
 687
 688         (ret, args) = description
 689         desc = ""
 690         retdesc = ""
 691
 692         if self.comment == None:
 693             if not quiet:
 694                 self.warning("Missing comment for function %s" % (name))
 695             return(((ret[0], retdesc), args, desc))
 696         if self.comment[0] != '*':
 697             if not quiet:
 698                 self.warning("Missing * in function comment for %s" % (name))
 699             return(((ret[0], retdesc), args, desc))
 700         lines = string.split(self.comment, '\n')
 701         if lines[0] == '*':
 702             del lines[0]
 703         if lines[0] != "* %s:" % (name):
 704             if not quiet:
 705                 self.warning("Misformatted function comment for %s" % (name))
 706                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 707             return(((ret[0], retdesc), args, desc))
 708         del lines[0]
 709         while len(lines) > 0 and lines[0] == '*':
 710             del lines[0]
 711         nbargs = len(args)
 712         while len(lines) > 0 and lines[0][0:3] == '* @':
 713             l = lines[0][3:]
 714             try:
 715                 (arg, desc) = string.split(l, ':', 1)
 716                 desc=string.strip(desc)
 717                 arg=string.strip(arg)
 718             except:
 719                 if not quiet:
 720                     self.warning("Misformatted function comment for %s" % (name))
 721                     self.warning("  problem with '%s'" % (lines[0]))
 722                 del lines[0]
 723                 continue
 724             del lines[0]
 725             l = string.strip(lines[0])
 726             while len(l) > 2 and l[0:3] != '* @':
 727                 while l[0] == '*':
 728                     l = l[1:]
 729                 desc = desc + ' ' + string.strip(l)
 730                 del lines[0]
 731                 if len(lines) == 0:
 732                     break
 733                 l = lines[0]
 734             i = 0
 735             while i < nbargs:
 736                 if args[i][1] == arg:
 737                     args[i] = (args[i][0], arg, desc)
 738                     break;
 739                 i = i + 1
 740             if i >= nbargs:
 741                 if not quiet:
 742                     self.warning("Unable to find arg %s from function comment for %s" % (
 743                        arg, name))
 744         while len(lines) > 0 and lines[0] == '*':
 745             del lines[0]
 746         desc = ""
 747         while len(lines) > 0:
 748             l = lines[0]
 749             while len(l) > 0 and l[0] == '*':
 750                 l = l[1:]
 751             l = string.strip(l)
 752             if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
 753                 try:
 754                     l = string.split(l, ' ', 1)[1]
 755                 except:
 756                     l = ""
 757                 retdesc = string.strip(l)
 758                 del lines[0]
 759                 while len(lines) > 0:
 760                     l = lines[0]
 761                     while len(l) > 0 and l[0] == '*':
 762                         l = l[1:]
 763                     l = string.strip(l)
 764                     retdesc = retdesc + " " + l
 765                     del lines[0]
 766             else:
 767                 desc = desc + " " + l
 768                 del lines[0]
 769
 770         retdesc = string.strip(retdesc)
 771         desc = string.strip(desc)
 772
 773         if quiet == 0:
 774              #
 775              # report missing comments
 776              #
 777             i = 0
 778             while i < nbargs:
 779                 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
 780                     self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1]))
 781                 i = i + 1
 782             if retdesc == "" and ret[0] != "void":
 783                 self.warning("Function comment for %s lack description of return value" % (name))
 784             if desc == "":
 785                 self.warning("Function comment for %s lack description of the function" % (name))
 786
 787
 788         return(((ret[0], retdesc), args, desc))
 789
 790     def parsePreproc(self, token):
 791         name = token[1]
 792         if name == "#include":
 793             token = self.lexer.token()
 794             if token == None:
 795                 return None
 796             if token[0] == 'preproc':
 797                 self.index_add(token[1], self.filename, not self.is_header,
 798                                 "include")
 799                 return self.lexer.token()
 800             return token
 801         if name == "#define":
 802             token = self.lexer.token()
 803             if token == None:
 804                 return None
 805             if token[0] == 'preproc':
 806                  # TODO macros with arguments
 807                 name = token[1]
 808                 lst = []
 809                 token = self.lexer.token()
 810                 while token != None and token[0] == 'preproc' and \
 811                       token[1][0] != '#':
 812                     lst.append(token[1])
 813                     token = self.lexer.token()
 814                 try:
 815                     name = string.split(name, '(') [0]
 816                 except:
 817                     pass
 818                 info = self.parseMacroComment(name, not self.is_header)
 819                 self.index_add(name, self.filename, not self.is_header,
 820                                 "macro", info)
 821                 return token
 822         token = self.lexer.token()
 823         while token != None and token[0] == 'preproc' and \
 824             token[1][0] != '#':
 825             token = self.lexer.token()
 826         return token
 827
 828      #
 829      # token acquisition on top of the lexer, it handle internally
 830      # preprocessor and comments since they are logically not part of
 831      # the program structure.
 832      #
 833     def token(self):
 834         global ignored_words
 835
 836         token = self.lexer.token()
 837         while token != None:
 838             if token[0] == 'comment':
 839                 token = self.parseComment(token)
 840                 continue
 841             elif token[0] == 'preproc':
 842                 token = self.parsePreproc(token)
 843                 continue
 844             elif token[0] == "name" and ignored_words.has_key(token[1]):
 845                 (n, info) = ignored_words[token[1]]
 846                 i = 0
 847                 while i < n:
 848                     token = self.lexer.token()
 849                     i = i + 1
 850                 token = self.lexer.token()
 851                 continue
 852             else:
 853                 if debug:
 854                     print "=> ", token
 855                 return token
 856         return None
 857
 858      #
 859      # Parse a typedef, it records the type and its name.
 860      #
 861     def parseTypedef(self, token):
 862         if token == None:
 863             return None
 864         token = self.parseType(token)
 865         if token == None:
 866             self.error("parsing typedef")
 867             return None
 868         base_type = self.type
 869         type = base_type
 870          #self.debug("end typedef type", token)
 871         while token != None:
 872             if token[0] == "name":
 873                 name = token[1]
 874                 signature = self.signature
 875                 if signature != None:
 876                     type = string.split(type, '(')[0]
 877                     d = self.mergeFunctionComment(name,
 878                             ((type, None), signature), 1)
 879                     self.index_add(name, self.filename, not self.is_header,
 880                                     "functype", d)
 881                 else:
 882                     if base_type == "struct":
 883                         self.index_add(name, self.filename, not self.is_header,
 884                                         "struct", type)
 885                         base_type = "struct " + name
 886                     else:
 887                         self.index_add(name, self.filename, not self.is_header,
 888                                     "typedef", type)
 889                 token = self.token()
 890             else:
 891                 self.error("parsing typedef: expecting a name")
 892                 return token
 893              #self.debug("end typedef", token)
 894             if token != None and token[0] == 'sep' and token[1] == ',':
 895                 type = base_type
 896                 token = self.token()
 897                 while token != None and token[0] == "op":
 898                     type = type + token[1]
 899                     token = self.token()
 900             elif token != None and token[0] == 'sep' and token[1] == ';':
 901                 break;
 902             elif token != None and token[0] == 'name':
 903                 type = base_type
 904                 continue;
 905             else:
 906                 self.error("parsing typedef: expecting ';'", token)
 907                 return token
 908         token = self.token()
 909         return token
 910
 911      #
 912      # Parse a C code block, used for functions it parse till
 913      # the balancing } included
 914      #
 915     def parseBlock(self, token):
 916         while token != None:
 917             if token[0] == "sep" and token[1] == "{":
 918                 token = self.token()
 919                 token = self.parseBlock(token)
 920             elif token[0] == "sep" and token[1] == "}":
 921                 self.comment = None
 922                 token = self.token()
 923                 return token
 924             else:
 925                 if self.collect_ref == 1:
 926                     oldtok = token
 927                     token = self.token()
 928                     if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
 929                         if token[0] == "sep" and token[1] == "(":
 930                             self.index_add_ref(oldtok[1], self.filename,
 931                                                 0, "function")
 932                             token = self.token()
 933                         elif token[0] == "name":
 934                             token = self.token()
 935                             if token[0] == "sep" and (token[1] == ";" or
 936                                token[1] == "," or token[1] == "="):
 937                                 self.index_add_ref(oldtok[1], self.filename,
 938                                                     0, "type")
 939                     elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
 940                         self.index_add_ref(oldtok[1], self.filename,
 941                                             0, "typedef")
 942                     elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
 943                         self.index_add_ref(oldtok[1], self.filename,
 944                                             0, "typedef")
 945
 946                 else:
 947                     token = self.token()
 948         return token
 949
 950      #
 951      # Parse a C struct definition till the balancing }
 952      #
 953     def parseStruct(self, token):
 954         fields = []
 955          #self.debug("start parseStruct", token)
 956         while token != None:
 957             if token[0] == "sep" and token[1] == "{":
 958                 token = self.token()
 959                 token = self.parseTypeBlock(token)
 960             elif token[0] == "sep" and token[1] == "}":
 961                 self.struct_fields = fields
 962                  #self.debug("end parseStruct", token)
 963                  #print fields
 964                 token = self.token()
 965                 return token
 966             else:
 967                 base_type = self.type
 968                  #self.debug("before parseType", token)
 969                 token = self.parseType(token)
 970                  #self.debug("after parseType", token)
 971                 if token != None and token[0] == "name":
 972                     fname = token[1]
 973                     token = self.token()
 974                     if token[0] == "sep" and token[1] == ";":
 975                         self.comment = None
 976                         token = self.token()
 977                         fields.append((self.type, fname, self.comment))
 978                         self.comment = None
 979                     else:
 980                         self.error("parseStruct: expecting ;", token)
 981                 elif token != None and token[0] == "sep" and token[1] == "{":
 982                     token = self.token()
 983                     token = self.parseTypeBlock(token)
 984                     if token != None and token[0] == "name":
 985                         token = self.token()
 986                     if token != None and token[0] == "sep" and token[1] == ";":
 987                         token = self.token()
 988                     else:
 989                         self.error("parseStruct: expecting ;", token)
 990                 else:
 991                     self.error("parseStruct: name", token)
 992                     token = self.token()
 993                 self.type = base_type;
 994         self.struct_fields = fields
 995          #self.debug("end parseStruct", token)
 996          #print fields
 997         return token
 998
 999      #
1000      # Parse a C enum block, parse till the balancing }
1001      #
1002     def parseEnumBlock(self, token):
1003         self.enums = []
1004         name = None
1005         self.comment = None
1006         comment = ""
1007         value = "0"
1008         while token != None:
1009             if token[0] == "sep" and token[1] == "{":
1010                 token = self.token()
1011                 token = self.parseTypeBlock(token)
1012             elif token[0] == "sep" and token[1] == "}":
1013                 if name != None:
1014                     if self.comment != None:
1015                         comment = self.comment
1016                         self.comment = None
1017                     self.enums.append((name, value, comment))
1018                 token = self.token()
1019                 return token
1020             elif token[0] == "name":
1021                     if name != None:
1022                         if self.comment != None:
1023                             comment = string.strip(self.comment)
1024                             self.comment = None
1025                         self.enums.append((name, value, comment))
1026                     name = token[1]
1027                     comment = ""
1028                     token = self.token()
1029                     if token[0] == "op" and token[1][0] == "=":
1030                         value = ""
1031                         if len(token[1]) > 1:
1032                             value = token[1][1:]
1033                         token = self.token()
1034                         while token[0] != "sep" or (token[1] != ',' and
1035                               token[1] != '}'):
1036                             value = value + token[1]
1037                             token = self.token()
1038                     else:
1039                         try:
1040                             value = "%d" % (int(value) + 1)
1041                         except:
1042                             self.warning("Failed to compute value of enum %s" % (name))
1043                             value=""
1044                     if token[0] == "sep" and token[1] == ",":
1045                         token = self.token()
1046             else:
1047                 token = self.token()
1048         return token
1049
1050      #
1051      # Parse a C definition block, used for structs it parse till
1052      # the balancing }
1053      #
1054     def parseTypeBlock(self, token):
1055         while token != None:
1056             if token[0] == "sep" and token[1] == "{":
1057                 token = self.token()
1058                 token = self.parseTypeBlock(token)
1059             elif token[0] == "sep" and token[1] == "}":
1060                 token = self.token()
1061                 return token
1062             else:
1063                 token = self.token()
1064         return token
1065
1066      #
1067      # Parse a type: the fact that the type name can either occur after
1068      #    the definition or within the definition makes it a little harder
1069      #    if inside, the name token is pushed back before returning
1070      #
1071     def parseType(self, token):
1072         self.type = ""
1073         self.struct_fields = []
1074         self.signature = None
1075         if token == None:
1076             return token
1077
1078         while token[0] == "name" and (
1079               token[1] == "const" or token[1] == "unsigned" or
1080               token[1] == "signed"):
1081             if self.type == "":
1082                 self.type = token[1]
1083             else:
1084                 self.type = self.type + " " + token[1]
1085             token = self.token()
1086
1087         if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1088             if self.type == "":
1089                 self.type = token[1]
1090             else:
1091                 self.type = self.type + " " + token[1]
1092             if token[0] == "name" and token[1] == "int":
1093                 if self.type == "":
1094                     self.type = tmp[1]
1095                 else:
1096                     self.type = self.type + " " + tmp[1]
1097
1098         elif token[0] == "name" and token[1] == "struct":
1099             if self.type == "":
1100                 self.type = token[1]
1101             else:
1102                 self.type = self.type + " " + token[1]
1103             token = self.token()
1104             nametok = None
1105             if token[0] == "name":
1106                 nametok = token
1107                 token = self.token()
1108             if token != None and token[0] == "sep" and token[1] == "{":
1109                 token = self.token()
1110                 token = self.parseStruct(token)
1111             elif token != None and token[0] == "op" and token[1] == "*":
1112                 self.type = self.type + " " + nametok[1] + " *"
1113                 token = self.token()
1114                 while token != None and token[0] == "op" and token[1] == "*":
1115                     self.type = self.type + " *"
1116                     token = self.token()
1117                 if token[0] == "name":
1118                     nametok = token
1119                     token = self.token()
1120                 else:
1121                     self.error("struct : expecting name", token)
1122                     return token
1123             elif token != None and token[0] == "name" and nametok != None:
1124                 self.type = self.type + " " + nametok[1]
1125                 return token
1126
1127             if nametok != None:
1128                 self.lexer.push(token)
1129                 token = nametok
1130             return token
1131
1132         elif token[0] == "name" and token[1] == "enum":
1133             if self.type == "":
1134                 self.type = token[1]
1135             else:
1136                 self.type = self.type + " " + token[1]
1137             self.enums = []
1138             token = self.token()
1139             if token != None and token[0] == "sep" and token[1] == "{":
1140                 token = self.token()
1141                 token = self.parseEnumBlock(token)
1142             else:
1143                 self.error("parsing enum: expecting '{'", token)
1144             enum_type = None
1145             if token != None and token[0] != "name":
1146                 self.lexer.push(token)
1147                 token = ("name", "enum")
1148             else:
1149                 enum_type = token[1]
1150             for enum in self.enums:
1151                 self.index_add(enum[0], self.filename,
1152                                not self.is_header, "enum",
1153                                (enum[1], enum[2], enum_type))
1154             return token
1155
1156         elif token[0] == "name":
1157             if self.type == "":
1158                 self.type = token[1]
1159             else:
1160                 self.type = self.type + " " + token[1]
1161         else:
1162             self.error("parsing type %s: expecting a name" % (self.type),
1163                        token)
1164             return token
1165         token = self.token()
1166         while token != None and (token[0] == "op" or
1167               token[0] == "name" and token[1] == "const"):
1168             self.type = self.type + " " + token[1]
1169             token = self.token()
1170
1171          #
1172          # if there is a parenthesis here, this means a function type
1173          #
1174         if token != None and token[0] == "sep" and token[1] == '(':
1175             self.type = self.type + token[1]
1176             token = self.token()
1177             while token != None and token[0] == "op" and token[1] == '*':
1178                 self.type = self.type + token[1]
1179                 token = self.token()
1180             if token == None or token[0] != "name" :
1181                 self.error("parsing function type, name expected", token);
1182                 return token
1183             self.type = self.type + token[1]
1184             nametok = token
1185             token = self.token()
1186             if token != None and token[0] == "sep" and token[1] == ')':
1187                 self.type = self.type + token[1]
1188                 token = self.token()
1189                 if token != None and token[0] == "sep" and token[1] == '(':
1190                     token = self.token()
1191                     type = self.type;
1192                     token = self.parseSignature(token);
1193                     self.type = type;
1194                 else:
1195                     self.error("parsing function type, '(' expected", token);
1196                     return token
1197             else:
1198                 self.error("parsing function type, ')' expected", token);
1199                 return token
1200             self.lexer.push(token)
1201             token = nametok
1202             return token
1203
1204          #
1205          # do some lookahead for arrays
1206          #
1207         if token != None and token[0] == "name":
1208             nametok = token
1209             token = self.token()
1210             if token != None and token[0] == "sep" and token[1] == '[':
1211                 self.type = self.type + nametok[1]
1212                 while token != None and token[0] == "sep" and token[1] == '[':
1213                     self.type = self.type + token[1]
1214                     token = self.token()
1215                     while token != None and token[0] != 'sep' and \
1216                           token[1] != ']' and token[1] != ';':
1217                         self.type = self.type + token[1]
1218                         token = self.token()
1219                 if token != None and token[0] == 'sep' and token[1] == ']':
1220                     self.type = self.type + token[1]
1221                     token = self.token()
1222                 else:
1223                     self.error("parsing array type, ']' expected", token);
1224                     return token
1225             elif token != None and token[0] == "sep" and token[1] == ':':
1226                  # remove :12 in case it's a limited int size
1227                 token = self.token()
1228                 token = self.token()
1229             self.lexer.push(token)
1230             token = nametok
1231
1232         return token
1233
1234      #
1235      # Parse a signature: '(' has been parsed and we scan the type definition
1236      #    up to the ')' included
1237     def parseSignature(self, token):
1238         signature = []
1239         if token != None and token[0] == "sep" and token[1] == ')':
1240             self.signature = []
1241             token = self.token()
1242             return token
1243         while token != None:
1244             token = self.parseType(token)
1245             if token != None and token[0] == "name":
1246                 signature.append((self.type, token[1], None))
1247                 token = self.token()
1248             elif token != None and token[0] == "sep" and token[1] == ',':
1249                 token = self.token()
1250                 continue
1251             elif token != None and token[0] == "sep" and token[1] == ')':
1252                  # only the type was provided
1253                 if self.type == "...":
1254                     signature.append((self.type, "...", None))
1255                 else:
1256                     signature.append((self.type, None, None))
1257             if token != None and token[0] == "sep":
1258                 if token[1] == ',':
1259                     token = self.token()
1260                     continue
1261                 elif token[1] == ')':
1262                     token = self.token()
1263                     break
1264         self.signature = signature
1265         return token
1266
1267      #
1268      # Parse a global definition, be it a type, variable or function
1269      # the extern "C" blocks are a bit nasty and require it to recurse.
1270      #
1271     def parseGlobal(self, token):
1272         static = 0
1273         if token[1] == 'extern':
1274             token = self.token()
1275             if token == None:
1276                 return token
1277             if token[0] == 'string':
1278                 if token[1] == 'C':
1279                     token = self.token()
1280                     if token == None:
1281                         return token
1282                     if token[0] == 'sep' and token[1] == "{":
1283                         token = self.token()
1284 #                        print 'Entering extern "C line ', self.lineno()
1285                         while token != None and (token[0] != 'sep' or
1286                               token[1] != "}"):
1287                             if token[0] == 'name':
1288                                 token = self.parseGlobal(token)
1289                             else:
1290                                 self.error(
1291                                  "token %s %s unexpected at the top level" % (
1292                                         token[0], token[1]))
1293                                 token = self.parseGlobal(token)
1294 #                        print 'Exiting extern "C" line', self.lineno()
1295                         token = self.token()
1296                         return token
1297                 else:
1298                     return token
1299         elif token[1] == 'static':
1300             static = 1
1301             token = self.token()
1302             if token == None or  token[0] != 'name':
1303                 return token
1304
1305         if token[1] == 'typedef':
1306             token = self.token()
1307             return self.parseTypedef(token)
1308         else:
1309             token = self.parseType(token)
1310             type_orig = self.type
1311         if token == None or token[0] != "name":
1312             return token
1313         type = type_orig
1314         self.name = token[1]
1315         token = self.token()
1316         while token != None and (token[0] == "sep" or token[0] == "op"):
1317             if token[0] == "sep":
1318                 if token[1] == "[":
1319                     type = type + token[1]
1320                     token = self.token()
1321                     while token != None and (token[0] != "sep" or \
1322                           token[1] != ";"):
1323                         type = type + token[1]
1324                         token = self.token()
1325
1326             if token != None and token[0] == "op" and token[1] == "=":
1327                  #
1328                  # Skip the initialization of the variable
1329                  #
1330                 token = self.token()
1331                 if token[0] == 'sep' and token[1] == '{':
1332                     token = self.token()
1333                     token = self.parseBlock(token)
1334                 else:
1335                     self.comment = None
1336                     while token != None and (token[0] != "sep" or \
1337                           (token[1] != ';' and token[1] != ',')):
1338                             token = self.token()
1339                 self.comment = None
1340                 if token == None or token[0] != "sep" or (token[1] != ';' and
1341                    token[1] != ','):
1342                     self.error("missing ';' or ',' after value")
1343
1344             if token != None and token[0] == "sep":
1345                 if token[1] == ";":
1346                     self.comment = None
1347                     token = self.token()
1348                     if type == "struct":
1349                         self.index_add(self.name, self.filename,
1350                              not self.is_header, "struct", self.struct_fields)
1351                     else:
1352                         self.index_add(self.name, self.filename,
1353                              not self.is_header, "variable", type)
1354                     break
1355                 elif token[1] == "(":
1356                     token = self.token()
1357                     token = self.parseSignature(token)
1358                     if token == None:
1359                         return None
1360                     if token[0] == "sep" and token[1] == ";":
1361                         d = self.mergeFunctionComment(self.name,
1362                                 ((type, None), self.signature), 1)
1363                         self.index_add(self.name, self.filename, static,
1364                                         "function", d)
1365                         token = self.token()
1366                     elif token[0] == "sep" and token[1] == "{":
1367                         d = self.mergeFunctionComment(self.name,
1368                                 ((type, None), self.signature), static)
1369                         self.index_add(self.name, self.filename, static,
1370                                         "function", d)
1371                         token = self.token()
1372                         token = self.parseBlock(token);
1373                 elif token[1] == ',':
1374                     self.comment = None
1375                     self.index_add(self.name, self.filename, static,
1376                                     "variable", type)
1377                     type = type_orig
1378                     token = self.token()
1379                     while token != None and token[0] == "sep":
1380                         type = type + token[1]
1381                         token = self.token()
1382                     if token != None and token[0] == "name":
1383                         self.name = token[1]
1384                         token = self.token()
1385                 else:
1386                     break
1387
1388         return token
1389
1390     def parse(self):
1391         self.warning("Parsing %s" % (self.filename))
1392         token = self.token()
1393         while token != None:
1394             if token[0] == 'name':
1395                 token = self.parseGlobal(token)
1396             else:
1397                 self.error("token %s %s unexpected at the top level" % (
1398                        token[0], token[1]))
1399                 token = self.parseGlobal(token)
1400                 return
1401         self.parseTopComment(self.top_comment)
1402         return self.index
1403
1404
1405 class docBuilder:
1406     """A documentation builder"""
1407     def __init__(self, name, directories=['.'], excludes=[]):
1408         self.name = name
1409         self.directories = directories
1410         self.excludes = excludes + ignored_files.keys()
1411         self.modules = {}
1412         self.headers = {}
1413         self.idx = index()
1414         self.xref = {}
1415         self.index = {}
1416         if name == 'libxml2':
1417             self.basename = 'libxml'
1418         else:
1419             self.basename = name
1420
1421     def indexString(self, id, str):
1422         if str == None:
1423             return
1424         str = string.replace(str, "'", ' ')
1425         str = string.replace(str, '"', ' ')
1426         str = string.replace(str, "/", ' ')
1427         str = string.replace(str, '*', ' ')
1428         str = string.replace(str, "[", ' ')
1429         str = string.replace(str, "]", ' ')
1430         str = string.replace(str, "(", ' ')
1431         str = string.replace(str, ")", ' ')
1432         str = string.replace(str, "<", ' ')
1433         str = string.replace(str, '>', ' ')
1434         str = string.replace(str, "&", ' ')
1435         str = string.replace(str, '#', ' ')
1436         str = string.replace(str, ",", ' ')
1437         str = string.replace(str, '.', ' ')
1438         str = string.replace(str, ';', ' ')
1439         tokens = string.split(str)
1440         for token in tokens:
1441             try:
1442                 c = token[0]
1443                 if string.find(string.letters, c) < 0:
1444                     pass
1445                 elif len(token) < 3:
1446                     pass
1447                 else:
1448                     lower = string.lower(token)
1449                     # TODO: generalize this a bit
1450                     if lower == 'and' or lower == 'the':
1451                         pass
1452                     elif self.xref.has_key(token):
1453                         self.xref[token].append(id)
1454                     else:
1455                         self.xref[token] = [id]
1456             except:
1457                 pass
1458
1459     def analyze(self):
1460         print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1461         self.idx.analyze()
1462
1463     def scanHeaders(self):
1464         for header in self.headers.keys():
1465             parser = CParser(header)
1466             idx = parser.parse()
1467             self.headers[header] = idx;
1468             self.idx.merge(idx)
1469
1470     def scanModules(self):
1471         for module in self.modules.keys():
1472             parser = CParser(module)
1473             idx = parser.parse()
1474             # idx.analyze()
1475             self.modules[module] = idx
1476             self.idx.merge_public(idx)
1477
1478     def scan(self):
1479         for directory in self.directories:
1480             files = glob.glob(directory + "/*.c")
1481             for file in files:
1482                 skip = 0
1483                 for excl in self.excludes:
1484                     if string.find(file, excl) != -1:
1485                         skip = 1;
1486                         break
1487                 if skip == 0:
1488                     self.modules[file] = None;
1489             files = glob.glob(directory + "/*.h")
1490             for file in files:
1491                 skip = 0
1492                 for excl in self.excludes:
1493                     if string.find(file, excl) != -1:
1494                         skip = 1;
1495                         break
1496                 if skip == 0:
1497                     self.headers[file] = None;
1498         self.scanHeaders()
1499         self.scanModules()
1500
1501     def modulename_file(self, file):
1502         module = os.path.basename(file)
1503         if module[-2:] == '.h':
1504             module = module[:-2]
1505         return module
1506
1507     def serialize_enum(self, output, name):
1508         id = self.idx.enums[name]
1509         output.write("    <enum name='%s' file='%s'" % (name,
1510                      self.modulename_file(id.module)))
1511         if id.info != None:
1512             info = id.info
1513             if info[0] != None and info[0] != '':
1514                 try:
1515                     val = eval(info[0])
1516                 except:
1517                     val = info[0]
1518                 output.write(" value='%s'" % (val));
1519             if info[2] != None and info[2] != '':
1520                 output.write(" type='%s'" % info[2]);
1521             if info[1] != None and info[1] != '':
1522                 output.write(" info='%s'" % escape(info[1]));
1523         output.write("/>\n")
1524
1525     def serialize_macro(self, output, name):
1526         id = self.idx.macros[name]
1527         output.write("    <macro name='%s' file='%s'>\n" % (name,
1528                      self.modulename_file(id.module)))
1529         if id.info != None:
1530             try:
1531                 (args, desc) = id.info
1532                 if desc != None and desc != "":
1533                     output.write("      <info>%s</info>\n" % (escape(desc)))
1534                     self.indexString(name, desc)
1535                 for arg in args:
1536                     (name, desc) = arg
1537                     if desc != None and desc != "":
1538                         output.write("      <arg name='%s' info='%s'/>\n" % (
1539                                      name, escape(desc)))
1540                         self.indexString(name, desc)
1541                     else:
1542                         output.write("      <arg name='%s'/>\n" % (name))
1543             except:
1544                 pass
1545         output.write("    </macro>\n")
1546
1547     def serialize_typedef(self, output, name):
1548         id = self.idx.typedefs[name]
1549         if id.info[0:7] == 'struct ':
1550             output.write("    <struct name='%s' file='%s' type='%s'" % (
1551                      name, self.modulename_file(id.module), id.info))
1552             name = id.info[7:]
1553             if self.idx.structs.has_key(name) and ( \
1554                type(self.idx.structs[name].info) == type(()) or
1555                 type(self.idx.structs[name].info) == type([])):
1556                 output.write(">\n");
1557                 try:
1558                     for field in self.idx.structs[name].info:
1559                         desc = field[2]
1560                         self.indexString(name, desc)
1561                         if desc == None:
1562                             desc = ''
1563                         else:
1564                             desc = escape(desc)
1565                         output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1566                 except:
1567                     print "Failed to serialize struct %s" % (name)
1568                 output.write("    </struct>\n")
1569             else:
1570                 output.write("/>\n");
1571         else :
1572             output.write("    <typedef name='%s' file='%s' type='%s'/>\n" % (
1573                      name, self.modulename_file(id.module), id.info))
1574
1575     def serialize_variable(self, output, name):
1576         id = self.idx.variables[name]
1577         if id.info != None:
1578             output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1579                     name, self.modulename_file(id.module), id.info))
1580         else:
1581             output.write("    <variable name='%s' file='%s'/>\n" % (
1582                     name, self.modulename_file(id.module)))
1583
1584     def serialize_function(self, output, name):
1585         id = self.idx.functions[name]
1586         output.write("    <%s name='%s' file='%s'>\n" % (id.type, name,
1587                      self.modulename_file(id.module)))
1588         try:
1589             (ret, params, desc) = id.info
1590             output.write("      <info>%s</info>\n" % (escape(desc)))
1591             self.indexString(name, desc)
1592             if ret[0] != None:
1593                 if ret[0] == "void":
1594                     output.write("      <return type='void'/>\n")
1595                 else:
1596                     output.write("      <return type='%s' info='%s'/>\n" % (
1597                              ret[0], escape(ret[1])))
1598                     self.indexString(name, ret[1])
1599             for param in params:
1600                 if param[0] == 'void':
1601                     continue
1602                 if param[2] == None:
1603                     output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1604                 else:
1605                     output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1606                     self.indexString(name, param[2])
1607         except:
1608             print "Failed to save function %s info: " % name, `id.info`
1609         output.write("    </%s>\n" % (id.type))
1610
1611     def serialize_exports(self, output, file):
1612         module = self.modulename_file(file)
1613         output.write("    <file name='%s'>\n" % (module))
1614         dict = self.headers[file]
1615         if dict.info != None:
1616             for data in ('Summary', 'Description', 'Author'):
1617                 try:
1618                     output.write("     <%s>%s</%s>\n" % (
1619                                  string.lower(data),
1620                                  escape(dict.info[data]),
1621                                  string.lower(data)))
1622                 except:
1623                     print "Header %s lacks a %s description" % (module, data)
1624             if dict.info.has_key('Description'):
1625                 desc = dict.info['Description']
1626                 if string.find(desc, "DEPRECATED") != -1:
1627                     output.write("     <deprecated/>\n")
1628
1629         ids = dict.macros.keys()
1630         ids.sort()
1631         for id in uniq(ids):
1632             # Macros are sometime used to masquerade other types.
1633             if dict.functions.has_key(id):
1634                 continue
1635             if dict.variables.has_key(id):
1636                 continue
1637             if dict.typedefs.has_key(id):
1638                 continue
1639             if dict.structs.has_key(id):
1640                 continue
1641             if dict.enums.has_key(id):
1642                 continue
1643             output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1644         ids = dict.enums.keys()
1645         ids.sort()
1646         for id in uniq(ids):
1647             output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1648         ids = dict.typedefs.keys()
1649         ids.sort()
1650         for id in uniq(ids):
1651             output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1652         ids = dict.structs.keys()
1653         ids.sort()
1654         for id in uniq(ids):
1655             output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1656         ids = dict.variables.keys()
1657         ids.sort()
1658         for id in uniq(ids):
1659             output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1660         ids = dict.functions.keys()
1661         ids.sort()
1662         for id in uniq(ids):
1663             output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1664         output.write("    </file>\n")
1665
1666     def serialize_xrefs_files(self, output):
1667         headers = self.headers.keys()
1668         headers.sort()
1669         for file in headers:
1670             module = self.modulename_file(file)
1671             output.write("    <file name='%s'>\n" % (module))
1672             dict = self.headers[file]
1673             ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1674                   dict.macros.keys() + dict.typedefs.keys() + \
1675                   dict.structs.keys() + dict.enums.keys())
1676             ids.sort()
1677             for id in ids:
1678                 output.write("      <ref name='%s'/>\n" % (id))
1679             output.write("    </file>\n")
1680         pass
1681
1682     def serialize_xrefs_functions(self, output):
1683         funcs = {}
1684         for name in self.idx.functions.keys():
1685             id = self.idx.functions[name]
1686             try:
1687                 (ret, params, desc) = id.info
1688                 for param in params:
1689                     if param[0] == 'void':
1690                         continue
1691                     if funcs.has_key(param[0]):
1692                         funcs[param[0]].append(name)
1693                     else:
1694                         funcs[param[0]] = [name]
1695             except:
1696                 pass
1697         typ = funcs.keys()
1698         typ.sort()
1699         for type in typ:
1700             if type == '' or type == 'void' or type == "int" or \
1701                type == "char *" or type == "const char *" :
1702                 continue
1703             output.write("    <type name='%s'>\n" % (type))
1704             ids = funcs[type]
1705             ids.sort()
1706             pid = ''    # not sure why we have dups, but get rid of them!
1707             for id in ids:
1708                 if id != pid:
1709                     output.write("      <ref name='%s'/>\n" % (id))
1710                     pid = id
1711             output.write("    </type>\n")
1712
1713     def serialize_xrefs_constructors(self, output):
1714         funcs = {}
1715         for name in self.idx.functions.keys():
1716             id = self.idx.functions[name]
1717             try:
1718                 (ret, params, desc) = id.info
1719                 if ret[0] == "void":
1720                     continue
1721                 if funcs.has_key(ret[0]):
1722                     funcs[ret[0]].append(name)
1723                 else:
1724                     funcs[ret[0]] = [name]
1725             except:
1726                 pass
1727         typ = funcs.keys()
1728         typ.sort()
1729         for type in typ:
1730             if type == '' or type == 'void' or type == "int" or \
1731                type == "char *" or type == "const char *" :
1732                 continue
1733             output.write("    <type name='%s'>\n" % (type))
1734             ids = funcs[type]
1735             ids.sort()
1736             for id in ids:
1737                 output.write("      <ref name='%s'/>\n" % (id))
1738             output.write("    </type>\n")
1739
1740     def serialize_xrefs_alpha(self, output):
1741         letter = None
1742         ids = self.idx.identifiers.keys()
1743         ids.sort()
1744         for id in ids:
1745             if id[0] != letter:
1746                 if letter != None:
1747                     output.write("    </letter>\n")
1748                 letter = id[0]
1749                 output.write("    <letter name='%s'>\n" % (letter))
1750             output.write("      <ref name='%s'/>\n" % (id))
1751         if letter != None:
1752             output.write("    </letter>\n")
1753
1754     def serialize_xrefs_references(self, output):
1755         typ = self.idx.identifiers.keys()
1756         typ.sort()
1757         for id in typ:
1758             idf = self.idx.identifiers[id]
1759             module = idf.module
1760             output.write("    <reference name='%s' href='%s'/>\n" % (id,
1761                          'html/' + self.basename + '-' +
1762                          self.modulename_file(module) + '.html#' +
1763                          id))
1764
1765     def serialize_xrefs_index(self, output):
1766         index = self.xref
1767         typ = index.keys()
1768         typ.sort()
1769         letter = None
1770         count = 0
1771         chunk = 0
1772         chunks = []
1773         for id in typ:
1774             if len(index[id]) > 30:
1775                 continue
1776             if id[0] != letter:
1777                 if letter == None or count > 200:
1778                     if letter != None:
1779                         output.write("      </letter>\n")
1780                         output.write("    </chunk>\n")
1781                         count = 0
1782                         chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1783                     output.write("    <chunk name='chunk%s'>\n" % (chunk))
1784                     first_letter = id[0]
1785                     chunk = chunk + 1
1786                 elif letter != None:
1787                     output.write("      </letter>\n")
1788                 letter = id[0]
1789                 output.write("      <letter name='%s'>\n" % (letter))
1790             output.write("        <word name='%s'>\n" % (id))
1791             tokens = index[id];
1792             tokens.sort()
1793             tok = None
1794             for token in tokens:
1795                 if tok == token:
1796                     continue
1797                 tok = token
1798                 output.write("          <ref name='%s'/>\n" % (token))
1799                 count = count + 1
1800             output.write("        </word>\n")
1801         if letter != None:
1802             output.write("      </letter>\n")
1803             output.write("    </chunk>\n")
1804             if count != 0:
1805                 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1806             output.write("    <chunks>\n")
1807             for ch in chunks:
1808                 output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
1809                              ch[0], ch[1], ch[2]))
1810             output.write("    </chunks>\n")
1811
1812     def serialize_xrefs(self, output):
1813         output.write("  <references>\n")
1814         self.serialize_xrefs_references(output)
1815         output.write("  </references>\n")
1816         output.write("  <alpha>\n")
1817         self.serialize_xrefs_alpha(output)
1818         output.write("  </alpha>\n")
1819         output.write("  <constructors>\n")
1820         self.serialize_xrefs_constructors(output)
1821         output.write("  </constructors>\n")
1822         output.write("  <functions>\n")
1823         self.serialize_xrefs_functions(output)
1824         output.write("  </functions>\n")
1825         output.write("  <files>\n")
1826         self.serialize_xrefs_files(output)
1827         output.write("  </files>\n")
1828         output.write("  <index>\n")
1829         self.serialize_xrefs_index(output)
1830         output.write("  </index>\n")
1831
1832     def serialize(self, outdir):
1833         filename = outdir + "%s-api.xml" % self.name
1834         print "Saving XML description %s" % (filename)
1835         output = open(filename, "w")
1836         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1837         output.write("<api name='%s'>\n" % self.name)
1838         output.write("  <files>\n")
1839         headers = self.headers.keys()
1840         headers.sort()
1841         for file in headers:
1842             self.serialize_exports(output, file)
1843         output.write("  </files>\n")
1844         output.write("  <symbols>\n")
1845         macros = self.idx.macros.keys()
1846         macros.sort()
1847         for macro in macros:
1848             self.serialize_macro(output, macro)
1849         enums = self.idx.enums.keys()
1850         enums.sort()
1851         for enum in enums:
1852             self.serialize_enum(output, enum)
1853         typedefs = self.idx.typedefs.keys()
1854         typedefs.sort()
1855         for typedef in typedefs:
1856             self.serialize_typedef(output, typedef)
1857         variables = self.idx.variables.keys()
1858         variables.sort()
1859         for variable in variables:
1860             self.serialize_variable(output, variable)
1861         functions = self.idx.functions.keys()
1862         functions.sort()
1863         for function in functions:
1864             self.serialize_function(output, function)
1865         output.write("  </symbols>\n")
1866         output.write("</api>\n")
1867         output.close()
1868
1869         filename = outdir + "%s-refs.xml" % self.name
1870         print "Saving XML Cross References %s" % (filename)
1871         output = open(filename, "w")
1872         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1873         output.write("<apirefs name='%s'>\n" % self.name)
1874         self.serialize_xrefs(output)
1875         output.write("</apirefs>\n")
1876         output.close()
1877
1878
1879 def rebuild():
1880     builder = None
1881     if glob.glob("parser.c") != [] :
1882         print "Rebuilding API description for libxml2"
1883         builder = docBuilder("libxml2", [".", "."],
1884                              ["xmlwin32version.h", "tst.c"])
1885     elif glob.glob("../parser.c") != [] :
1886         print "Rebuilding API description for libxml2"
1887         builder = docBuilder("libxml2", ["..", "../include/libxml"],
1888                              ["xmlwin32version.h", "tst.c"])
1889     elif glob.glob("../libxslt/transform.c") != [] :
1890         print "Rebuilding API description for libxslt"
1891         builder = docBuilder("libxslt", ["../libxslt"],
1892                              ["win32config.h", "libxslt.h", "tst.c"])
1893     else:
1894         print "rebuild() failed, unable to guess the module"
1895         return None
1896     builder.scan()
1897     builder.analyze()
1898     builder.serialize("./")
1899     if glob.glob("../libexslt/exslt.c") != [] :
1900         extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1901         extra.scan()
1902         extra.analyze()
1903         extra.serialize("EXSLT/")
1904     return builder
1905
1906 #
1907 # for debugging the parser
1908 #
1909 def parse(filename):
1910     parser = CParser(filename)
1911     idx = parser.parse()
1912     return idx
1913
1914 if __name__ == "__main__":
1915     rebuild()