doc/apibuild.py

   1 #!/usr/bin/python -u
   2 #
   3 # This is the API builder, it parses the C sources and build the
   4 # API formal description in XML.
   5 #
   6 # See Copyright for the status of this software.
   7 #
   8 # daniel@veillard.com
   9 #
  10 import os, sys
  11 import string
  12 import glob
  13
  14 debug=0
  15
  16 #
  17 # C parser analysis code
  18 #
  19 ignored_files = {
  20   "trio": "too many non standard macros",
  21   "trio.c": "too many non standard macros",
  22   "trionan.c": "too many non standard macros",
  23   "triostr.c": "too many non standard macros",
  24   "acconfig.h": "generated portability layer",
  25   "config.h": "generated portability layer",
  26   "libxml.h": "internal only",
  27   "testOOM.c": "out of memory tester",
  28   "testOOMlib.h": "out of memory tester",
  29   "testOOMlib.c": "out of memory tester",
  30 }
  31
  32 ignored_words = {
  33   "WINAPI": (0, "Windows keyword"),
  34   "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
  35   "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
  36   "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
  37   "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
  38   "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
  39   "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
  40   "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
  41   "XMLCALL": (0, "Special macro for win32 calls"),
  42   "XSLTCALL": (0, "Special macro for win32 calls"),
  43   "EXSLTCALL": (0, "Special macro for win32 calls"),
  44   "__declspec": (3, "Windows keyword"),
  45   "ATTRIBUTE_UNUSED": (0, "macro keyword"),
  46   "LIBEXSLT_PUBLIC": (0, "macro keyword"),
  47   "X_IN_Y": (5, "macro function builder"),
  48 }
  49
  50 def escape(raw):
  51     raw = string.replace(raw, '&', '&amp;')
  52     raw = string.replace(raw, '<', '&lt;')
  53     raw = string.replace(raw, '>', '&gt;')
  54     raw = string.replace(raw, "'", '&apos;')
  55     raw = string.replace(raw, '"', '&quot;')
  56     return raw
  57
  58 def uniq(items):
  59     d = {}
  60     for item in items:
  61         d[item]=1
  62     return d.keys()
  63
  64 class identifier:
  65     def __init__(self, name, module=None, type=None, lineno = 0,
  66                  info=None, extra=None):
  67         self.name = name
  68         self.module = module
  69         self.type = type
  70         self.info = info
  71         self.extra = extra
  72         self.lineno = lineno
  73         self.static = 0
  74
  75     def __repr__(self):
  76         r = "%s %s:" % (self.type, self.name)
  77         if self.static:
  78             r = r + " static"
  79         if self.module != None:
  80             r = r + " from %s" % (self.module)
  81         if self.info != None:
  82             r = r + " " +  `self.info`
  83         if self.extra != None:
  84             r = r + " " + `self.extra`
  85         return r
  86
  87
  88     def set_module(self, module):
  89         self.module = module
  90     def set_type(self, type):
  91         self.type = type
  92     def set_info(self, info):
  93         self.info = info
  94     def set_extra(self, extra):
  95         self.extra = extra
  96     def set_lineno(self, lineno):
  97         self.lineno = lineno
  98     def set_static(self, static):
  99         self.static = static
 100
 101     def get_name(self):
 102         return self.name
 103     def get_module(self):
 104         return self.module
 105     def get_type(self):
 106         return self.type
 107     def get_info(self):
 108         return self.info
 109     def get_lineno(self):
 110         return self.lineno
 111     def get_extra(self):
 112         return self.extra
 113     def get_static(self):
 114         return self.static
 115
 116     def update(self, module, type = None, info = None, extra=None):
 117         if module != None and self.module == None:
 118             self.set_module(module)
 119         if type != None and self.type == None:
 120             self.set_type(type)
 121         if info != None:
 122             self.set_info(info)
 123         if extra != None:
 124             self.set_extra(extra)
 125
 126
 127 class index:
 128     def __init__(self, name = "noname"):
 129         self.name = name
 130         self.identifiers = {}
 131         self.functions = {}
 132         self.variables = {}
 133         self.includes = {}
 134         self.structs = {}
 135         self.enums = {}
 136         self.typedefs = {}
 137         self.macros = {}
 138         self.references = {}
 139         self.info = {}
 140
 141     def add_ref(self, name, module, static, type, lineno, info=None, extra=None):
 142         if name[0:2] == '__':
 143             return None
 144         d = None
 145         try:
 146            d = self.identifiers[name]
 147            d.update(module, type, lineno, info, extra)
 148         except:
 149            d = identifier(name, module, type, lineno, info, extra)
 150            self.identifiers[name] = d
 151
 152         if d != None and static == 1:
 153             d.set_static(1)
 154
 155         if d != None and name != None and type != None:
 156             self.references[name] = d
 157
 158     def add(self, name, module, static, type, lineno, info=None, extra=None):
 159         if name[0:2] == '__':
 160             return None
 161         d = None
 162         try:
 163            d = self.identifiers[name]
 164            d.update(module, type, lineno, info, extra)
 165         except:
 166            d = identifier(name, module, type, lineno, info, extra)
 167            self.identifiers[name] = d
 168
 169         if d != None and static == 1:
 170             d.set_static(1)
 171
 172         if d != None and name != None and type != None:
 173             if type == "function":
 174                 self.functions[name] = d
 175             elif type == "functype":
 176                 self.functions[name] = d
 177             elif type == "variable":
 178                 self.variables[name] = d
 179             elif type == "include":
 180                 self.includes[name] = d
 181             elif type == "struct":
 182                 self.structs[name] = d
 183             elif type == "enum":
 184                 self.enums[name] = d
 185             elif type == "typedef":
 186                 self.typedefs[name] = d
 187             elif type == "macro":
 188                 self.macros[name] = d
 189             else:
 190                 print "Unable to register type ", type
 191         return d
 192
 193     def merge(self, idx):
 194         for id in idx.functions.keys():
 195               #
 196               # macro might be used to override functions or variables
 197               # definitions
 198               #
 199              if self.macros.has_key(id):
 200                  del self.macros[id]
 201              if self.functions.has_key(id):
 202                  print "function %s from %s redeclared in %s" % (
 203                     id, self.functions[id].module, idx.functions[id].module)
 204              else:
 205                  self.functions[id] = idx.functions[id]
 206                  self.identifiers[id] = idx.functions[id]
 207         for id in idx.variables.keys():
 208               #
 209               # macro might be used to override functions or variables
 210               # definitions
 211               #
 212              if self.macros.has_key(id):
 213                  del self.macros[id]
 214              if self.variables.has_key(id):
 215                  print "variable %s from %s redeclared in %s" % (
 216                     id, self.variables[id].module, idx.variables[id].module)
 217              else:
 218                  self.variables[id] = idx.variables[id]
 219                  self.identifiers[id] = idx.variables[id]
 220         for id in idx.structs.keys():
 221              if self.structs.has_key(id):
 222                  print "struct %s from %s redeclared in %s" % (
 223                     id, self.structs[id].module, idx.structs[id].module)
 224              else:
 225                  self.structs[id] = idx.structs[id]
 226                  self.identifiers[id] = idx.structs[id]
 227         for id in idx.typedefs.keys():
 228              if self.typedefs.has_key(id):
 229                  print "typedef %s from %s redeclared in %s" % (
 230                     id, self.typedefs[id].module, idx.typedefs[id].module)
 231              else:
 232                  self.typedefs[id] = idx.typedefs[id]
 233                  self.identifiers[id] = idx.typedefs[id]
 234         for id in idx.macros.keys():
 235               #
 236               # macro might be used to override functions or variables
 237               # definitions
 238               #
 239              if self.variables.has_key(id):
 240                  continue
 241              if self.functions.has_key(id):
 242                  continue
 243              if self.enums.has_key(id):
 244                  continue
 245              if self.macros.has_key(id):
 246                  print "macro %s from %s redeclared in %s" % (
 247                     id, self.macros[id].module, idx.macros[id].module)
 248              else:
 249                  self.macros[id] = idx.macros[id]
 250                  self.identifiers[id] = idx.macros[id]
 251         for id in idx.enums.keys():
 252              if self.enums.has_key(id):
 253                  print "enum %s from %s redeclared in %s" % (
 254                     id, self.enums[id].module, idx.enums[id].module)
 255              else:
 256                  self.enums[id] = idx.enums[id]
 257                  self.identifiers[id] = idx.enums[id]
 258
 259     def merge_public(self, idx):
 260         for id in idx.functions.keys():
 261              if self.functions.has_key(id):
 262                  up = idx.functions[id]
 263                  self.functions[id].update(None, up.type, up.info, up.extra)
 264          #     else:
 265          #         print "Function %s from %s is not declared in headers" % (
 266         #               id, idx.functions[id].module)
 267          # TODO: do the same for variables.
 268
 269     def analyze_dict(self, type, dict):
 270         count = 0
 271         public = 0
 272         for name in dict.keys():
 273             id = dict[name]
 274             count = count + 1
 275             if id.static == 0:
 276                 public = public + 1
 277         if count != public:
 278             print "  %d %s , %d public" % (count, type, public)
 279         elif count != 0:
 280             print "  %d public %s" % (count, type)
 281
 282
 283     def analyze(self):
 284         self.analyze_dict("functions", self.functions)
 285         self.analyze_dict("variables", self.variables)
 286         self.analyze_dict("structs", self.structs)
 287         self.analyze_dict("typedefs", self.typedefs)
 288         self.analyze_dict("macros", self.macros)
 289
 290 class CLexer:
 291     """A lexer for the C language, tokenize the input by reading and
 292        analyzing it line by line"""
 293     def __init__(self, input):
 294         self.input = input
 295         self.tokens = []
 296         self.line = ""
 297         self.lineno = 0
 298
 299     def getline(self):
 300         line = ''
 301         while line == '':
 302             line = self.input.readline()
 303             if not line:
 304                 return None
 305             self.lineno = self.lineno + 1
 306             line = string.lstrip(line)
 307             line = string.rstrip(line)
 308             if line == '':
 309                 continue
 310             while line[-1] == '\\':
 311                 line = line[:-1]
 312                 n = self.input.readline()
 313                 self.lineno = self.lineno + 1
 314                 n = string.lstrip(n)
 315                 n = string.rstrip(n)
 316                 if not n:
 317                     break
 318                 else:
 319                     line = line + n
 320         return line
 321
 322     def getlineno(self):
 323         return self.lineno
 324
 325     def push(self, token):
 326         self.tokens.insert(0, token);
 327
 328     def debug(self):
 329         print "Last token: ", self.last
 330         print "Token queue: ", self.tokens
 331         print "Line %d end: " % (self.lineno), self.line
 332
 333     def token(self):
 334         while self.tokens == []:
 335             if self.line == "":
 336                 line = self.getline()
 337             else:
 338                 line = self.line
 339                 self.line = ""
 340             if line == None:
 341                 return None
 342
 343             if line[0] == '#':
 344                 self.tokens = map((lambda x: ('preproc', x)),
 345                                   string.split(line))
 346                 break;
 347             l = len(line)
 348             if line[0] == '"' or line[0] == "'":
 349                 end = line[0]
 350                 line = line[1:]
 351                 found = 0
 352                 tok = ""
 353                 while found == 0:
 354                     i = 0
 355                     l = len(line)
 356                     while i < l:
 357                         if line[i] == end:
 358                             self.line = line[i+1:]
 359                             line = line[:i]
 360                             l = i
 361                             found = 1
 362                             break
 363                         if line[i] == '\\':
 364                             i = i + 1
 365                         i = i + 1
 366                     tok = tok + line
 367                     if found == 0:
 368                         line = self.getline()
 369                         if line == None:
 370                             return None
 371                 self.last = ('string', tok)
 372                 return self.last
 373
 374             if l >= 2 and line[0] == '/' and line[1] == '*':
 375                 line = line[2:]
 376                 found = 0
 377                 tok = ""
 378                 while found == 0:
 379                     i = 0
 380                     l = len(line)
 381                     while i < l:
 382                         if line[i] == '*' and i+1 < l and line[i+1] == '/':
 383                             self.line = line[i+2:]
 384                             line = line[:i-1]
 385                             l = i
 386                             found = 1
 387                             break
 388                         i = i + 1
 389                     if tok != "":
 390                         tok = tok + "\n"
 391                     tok = tok + line
 392                     if found == 0:
 393                         line = self.getline()
 394                         if line == None:
 395                             return None
 396                 self.last = ('comment', tok)
 397                 return self.last
 398             if l >= 2 and line[0] == '/' and line[1] == '/':
 399                 line = line[2:]
 400                 self.last = ('comment', line)
 401                 return self.last
 402             i = 0
 403             while i < l:
 404                 if line[i] == '/' and i+1 < l and line[i+1] == '/':
 405                     self.line = line[i:]
 406                     line = line[:i]
 407                     break
 408                 if line[i] == '/' and i+1 < l and line[i+1] == '*':
 409                     self.line = line[i:]
 410                     line = line[:i]
 411                     break
 412                 if line[i] == '"' or line[i] == "'":
 413                     self.line = line[i:]
 414                     line = line[:i]
 415                     break
 416                 i = i + 1
 417             l = len(line)
 418             i = 0
 419             while i < l:
 420                 if line[i] == ' ' or line[i] == '\t':
 421                     i = i + 1
 422                     continue
 423                 o = ord(line[i])
 424                 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 425                    (o >= 48 and o <= 57):
 426                     s = i
 427                     while i < l:
 428                         o = ord(line[i])
 429                         if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 430                            (o >= 48 and o <= 57) or string.find(
 431                                " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
 432                             i = i + 1
 433                         else:
 434                             break
 435                     self.tokens.append(('name', line[s:i]))
 436                     continue
 437                 if string.find("(){}:;,[]", line[i]) != -1:
 438 #                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
 439 #                   line[i] == '}' or line[i] == ':' or line[i] == ';' or \
 440 #                   line[i] == ',' or line[i] == '[' or line[i] == ']':
 441                     self.tokens.append(('sep', line[i]))
 442                     i = i + 1
 443                     continue
 444                 if string.find("+-*><=/%&!|.", line[i]) != -1:
 445 #                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
 446 #                   line[i] == '>' or line[i] == '<' or line[i] == '=' or \
 447 #                   line[i] == '/' or line[i] == '%' or line[i] == '&' or \
 448 #                   line[i] == '!' or line[i] == '|' or line[i] == '.':
 449                     if line[i] == '.' and  i + 2 < l and \
 450                        line[i+1] == '.' and line[i+2] == '.':
 451                         self.tokens.append(('name', '...'))
 452                         i = i + 3
 453                         continue
 454
 455                     j = i + 1
 456                     if j < l and (
 457                        string.find("+-*><=/%&!|", line[j]) != -1):
 458 #                       line[j] == '+' or line[j] == '-' or line[j] == '*' or \
 459 #                       line[j] == '>' or line[j] == '<' or line[j] == '=' or \
 460 #                       line[j] == '/' or line[j] == '%' or line[j] == '&' or \
 461 #                       line[j] == '!' or line[j] == '|'):
 462                         self.tokens.append(('op', line[i:j+1]))
 463                         i = j + 1
 464                     else:
 465                         self.tokens.append(('op', line[i]))
 466                         i = i + 1
 467                     continue
 468                 s = i
 469                 while i < l:
 470                     o = ord(line[i])
 471                     if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 472                        (o >= 48 and o <= 57) or (
 473                         string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
 474 #                        line[i] != ' ' and line[i] != '\t' and
 475 #                        line[i] != '(' and line[i] != ')' and
 476 #                        line[i] != '{'  and line[i] != '}' and
 477 #                        line[i] != ':' and line[i] != ';' and
 478 #                        line[i] != ',' and line[i] != '+' and
 479 #                        line[i] != '-' and line[i] != '*' and
 480 #                        line[i] != '/' and line[i] != '%' and
 481 #                        line[i] != '&' and line[i] != '!' and
 482 #                        line[i] != '|' and line[i] != '[' and
 483 #                        line[i] != ']' and line[i] != '=' and
 484 #                        line[i] != '*' and line[i] != '>' and
 485 #                        line[i] != '<'):
 486                         i = i + 1
 487                     else:
 488                         break
 489                 self.tokens.append(('name', line[s:i]))
 490
 491         tok = self.tokens[0]
 492         self.tokens = self.tokens[1:]
 493         self.last = tok
 494         return tok
 495
 496 class CParser:
 497     """The C module parser"""
 498     def __init__(self, filename, idx = None):
 499         self.filename = filename
 500         if len(filename) > 2 and filename[-2:] == '.h':
 501             self.is_header = 1
 502         else:
 503             self.is_header = 0
 504         self.input = open(filename)
 505         self.lexer = CLexer(self.input)
 506         if idx == None:
 507             self.index = index()
 508         else:
 509             self.index = idx
 510         self.top_comment = ""
 511         self.last_comment = ""
 512         self.comment = None
 513         self.collect_ref = 0
 514         self.no_error = 0
 515
 516     def collect_references(self):
 517         self.collect_ref = 1
 518
 519     def stop_error(self):
 520         self.no_error = 1
 521
 522     def start_error(self):
 523         self.no_error = 0
 524
 525     def lineno(self):
 526         return self.lexer.getlineno()
 527
 528     def index_add(self, name, module, static, type, info=None, extra = None):
 529         self.index.add(name, module, static, type, self.lineno(),
 530                        info, extra)
 531
 532     def index_add_ref(self, name, module, static, type, info=None,
 533                       extra = None):
 534         self.index.add_ref(name, module, static, type, self.lineno(),
 535                        info, extra)
 536
 537     def warning(self, msg):
 538         if self.no_error:
 539             return
 540         print msg
 541
 542     def error(self, msg, token=-1):
 543         if self.no_error:
 544             return
 545
 546         print "Parse Error: " + msg
 547         if token != -1:
 548             print "Got token ", token
 549         self.lexer.debug()
 550         sys.exit(1)
 551
 552     def debug(self, msg, token=-1):
 553         print "Debug: " + msg
 554         if token != -1:
 555             print "Got token ", token
 556         self.lexer.debug()
 557
 558     def parseTopComment(self, comment):
 559         res = {}
 560         lines = string.split(comment, "\n")
 561         item = None
 562         for line in lines:
 563             while line != "" and (line[0] == ' ' or line[0] == '\t'):
 564                 line = line[1:]
 565             while line != "" and line[0] == '*':
 566                 line = line[1:]
 567             while line != "" and (line[0] == ' ' or line[0] == '\t'):
 568                 line = line[1:]
 569             try:
 570                 (it, line) = string.split(line, ":", 1)
 571                 item = it
 572                 while line != "" and (line[0] == ' ' or line[0] == '\t'):
 573                     line = line[1:]
 574                 if res.has_key(item):
 575                     res[item] = res[item] + " " + line
 576                 else:
 577                     res[item] = line
 578             except:
 579                 if item != None:
 580                     if res.has_key(item):
 581                         res[item] = res[item] + " " + line
 582                     else:
 583                         res[item] = line
 584         self.index.info = res
 585
 586     def parseComment(self, token):
 587         if self.top_comment == "":
 588             self.top_comment = token[1]
 589         if self.comment == None or token[1][0] == '*':
 590             self.comment = token[1];
 591         else:
 592             self.comment = self.comment + token[1]
 593         token = self.lexer.token()
 594
 595         if string.find(self.comment, "DOC_DISABLE") != -1:
 596             self.stop_error()
 597
 598         if string.find(self.comment, "DOC_ENABLE") != -1:
 599             self.start_error()
 600
 601         return token
 602
 603      #
 604      # Parse a comment block associate to a macro
 605      #
 606     def parseMacroComment(self, name, quiet = 0):
 607         if name[0:2] == '__':
 608             quiet = 1
 609
 610         args = []
 611         desc = ""
 612
 613         if self.comment == None:
 614             if not quiet:
 615                 self.warning("Missing comment for macro %s" % (name))
 616             return((args, desc))
 617         if self.comment[0] != '*':
 618             if not quiet:
 619                 self.warning("Missing * in macro comment for %s" % (name))
 620             return((args, desc))
 621         lines = string.split(self.comment, '\n')
 622         if lines[0] == '*':
 623             del lines[0]
 624         if lines[0] != "* %s:" % (name):
 625             if not quiet:
 626                 self.warning("Misformatted macro comment for %s" % (name))
 627                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 628             return((args, desc))
 629         del lines[0]
 630         while lines[0] == '*':
 631             del lines[0]
 632         while len(lines) > 0 and lines[0][0:3] == '* @':
 633             l = lines[0][3:]
 634             try:
 635                 (arg, desc) = string.split(l, ':', 1)
 636                 desc=string.strip(desc)
 637                 arg=string.strip(arg)
 638             except:
 639                 if not quiet:
 640                     self.warning("Misformatted macro comment for %s" % (name))
 641                     self.warning("  problem with '%s'" % (lines[0]))
 642                 del lines[0]
 643                 continue
 644             del lines[0]
 645             l = string.strip(lines[0])
 646             while len(l) > 2 and l[0:3] != '* @':
 647                 while l[0] == '*':
 648                     l = l[1:]
 649                 desc = desc + ' ' + string.strip(l)
 650                 del lines[0]
 651                 if len(lines) == 0:
 652                     break
 653                 l = lines[0]
 654             args.append((arg, desc))
 655         while len(lines) > 0 and lines[0] == '*':
 656             del lines[0]
 657         desc = ""
 658         while len(lines) > 0:
 659             l = lines[0]
 660             while len(l) > 0 and l[0] == '*':
 661                 l = l[1:]
 662             l = string.strip(l)
 663             desc = desc + " " + l
 664             del lines[0]
 665
 666         desc = string.strip(desc)
 667
 668         if quiet == 0:
 669             if desc == "":
 670                 self.warning("Macro comment for %s lack description of the macro" % (name))
 671
 672         return((args, desc))
 673
 674      #
 675      # Parse a comment block and merge the informations found in the
 676      # parameters descriptions, finally returns a block as complete
 677      # as possible
 678      #
 679     def mergeFunctionComment(self, name, description, quiet = 0):
 680         if name == 'main':
 681             quiet = 1
 682         if name[0:2] == '__':
 683             quiet = 1
 684
 685         (ret, args) = description
 686         desc = ""
 687         retdesc = ""
 688
 689         if self.comment == None:
 690             if not quiet:
 691                 self.warning("Missing comment for function %s" % (name))
 692             return(((ret[0], retdesc), args, desc))
 693         if self.comment[0] != '*':
 694             if not quiet:
 695                 self.warning("Missing * in function comment for %s" % (name))
 696             return(((ret[0], retdesc), args, desc))
 697         lines = string.split(self.comment, '\n')
 698         if lines[0] == '*':
 699             del lines[0]
 700         if lines[0] != "* %s:" % (name):
 701             if not quiet:
 702                 self.warning("Misformatted function comment for %s" % (name))
 703                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 704             return(((ret[0], retdesc), args, desc))
 705         del lines[0]
 706         while lines[0] == '*':
 707             del lines[0]
 708         nbargs = len(args)
 709         while len(lines) > 0 and lines[0][0:3] == '* @':
 710             l = lines[0][3:]
 711             try:
 712                 (arg, desc) = string.split(l, ':', 1)
 713                 desc=string.strip(desc)
 714                 arg=string.strip(arg)
 715             except:
 716                 if not quiet:
 717                     self.warning("Misformatted function comment for %s" % (name))
 718                     self.warning("  problem with '%s'" % (lines[0]))
 719                 del lines[0]
 720                 continue
 721             del lines[0]
 722             l = string.strip(lines[0])
 723             while len(l) > 2 and l[0:3] != '* @':
 724                 while l[0] == '*':
 725                     l = l[1:]
 726                 desc = desc + ' ' + string.strip(l)
 727                 del lines[0]
 728                 if len(lines) == 0:
 729                     break
 730                 l = lines[0]
 731             i = 0
 732             while i < nbargs:
 733                 if args[i][1] == arg:
 734                     args[i] = (args[i][0], arg, desc)
 735                     break;
 736                 i = i + 1
 737             if i >= nbargs:
 738                 if not quiet:
 739                     self.warning("Unable to find arg %s from function comment for %s" % (
 740                        arg, name))
 741         while len(lines) > 0 and lines[0] == '*':
 742             del lines[0]
 743         desc = ""
 744         while len(lines) > 0:
 745             l = lines[0]
 746             while len(l) > 0 and l[0] == '*':
 747                 l = l[1:]
 748             l = string.strip(l)
 749             if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
 750                 try:
 751                     l = string.split(l, ' ', 1)[1]
 752                 except:
 753                     l = ""
 754                 retdesc = string.strip(l)
 755                 del lines[0]
 756                 while len(lines) > 0:
 757                     l = lines[0]
 758                     while len(l) > 0 and l[0] == '*':
 759                         l = l[1:]
 760                     l = string.strip(l)
 761                     retdesc = retdesc + " " + l
 762                     del lines[0]
 763             else:
 764                 desc = desc + " " + l
 765                 del lines[0]
 766
 767         retdesc = string.strip(retdesc)
 768         desc = string.strip(desc)
 769
 770         if quiet == 0:
 771              #
 772              # report missing comments
 773              #
 774             i = 0
 775             while i < nbargs:
 776                 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
 777                     self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1]))
 778                 i = i + 1
 779             if retdesc == "" and ret[0] != "void":
 780                 self.warning("Function comment for %s lack description of return value" % (name))
 781             if desc == "":
 782                 self.warning("Function comment for %s lack description of the function" % (name))
 783
 784
 785         return(((ret[0], retdesc), args, desc))
 786
 787     def parsePreproc(self, token):
 788         name = token[1]
 789         if name == "#include":
 790             token = self.lexer.token()
 791             if token == None:
 792                 return None
 793             if token[0] == 'preproc':
 794                 self.index_add(token[1], self.filename, not self.is_header,
 795                                 "include")
 796                 return self.lexer.token()
 797             return token
 798         if name == "#define":
 799             token = self.lexer.token()
 800             if token == None:
 801                 return None
 802             if token[0] == 'preproc':
 803                  # TODO macros with arguments
 804                 name = token[1]
 805                 lst = []
 806                 token = self.lexer.token()
 807                 while token != None and token[0] == 'preproc' and \
 808                       token[1][0] != '#':
 809                     lst.append(token[1])
 810                     token = self.lexer.token()
 811                 try:
 812                     name = string.split(name, '(') [0]
 813                 except:
 814                     pass
 815                 info = self.parseMacroComment(name, not self.is_header)
 816                 self.index_add(name, self.filename, not self.is_header,
 817                                 "macro", info)
 818                 return token
 819         token = self.lexer.token()
 820         while token != None and token[0] == 'preproc' and \
 821             token[1][0] != '#':
 822             token = self.lexer.token()
 823         return token
 824
 825      #
 826      # token acquisition on top of the lexer, it handle internally
 827      # preprocessor and comments since they are logically not part of
 828      # the program structure.
 829      #
 830     def token(self):
 831         global ignored_words
 832
 833         token = self.lexer.token()
 834         while token != None:
 835             if token[0] == 'comment':
 836                 token = self.parseComment(token)
 837                 continue
 838             elif token[0] == 'preproc':
 839                 token = self.parsePreproc(token)
 840                 continue
 841             elif token[0] == "name" and ignored_words.has_key(token[1]):
 842                 (n, info) = ignored_words[token[1]]
 843                 i = 0
 844                 while i < n:
 845                     token = self.lexer.token()
 846                     i = i + 1
 847                 token = self.lexer.token()
 848                 continue
 849             else:
 850                 if debug:
 851                     print "=> ", token
 852                 return token
 853         return None
 854
 855      #
 856      # Parse a typedef, it records the type and its name.
 857      #
 858     def parseTypedef(self, token):
 859         if token == None:
 860             return None
 861         token = self.parseType(token)
 862         if token == None:
 863             self.error("parsing typedef")
 864             return None
 865         base_type = self.type
 866         type = base_type
 867          #self.debug("end typedef type", token)
 868         while token != None:
 869             if token[0] == "name":
 870                 name = token[1]
 871                 signature = self.signature
 872                 if signature != None:
 873                     type = string.split(type, '(')[0]
 874                     d = self.mergeFunctionComment(name,
 875                             ((type, None), signature), 1)
 876                     self.index_add(name, self.filename, not self.is_header,
 877                                     "functype", d)
 878                 else:
 879                     if base_type == "struct":
 880                         self.index_add(name, self.filename, not self.is_header,
 881                                         "struct", type)
 882                         base_type = "struct " + name
 883                     else:
 884                         self.index_add(name, self.filename, not self.is_header,
 885                                     "typedef", type)
 886                 token = self.token()
 887             else:
 888                 self.error("parsing typedef: expecting a name")
 889                 return token
 890              #self.debug("end typedef", token)
 891             if token != None and token[0] == 'sep' and token[1] == ',':
 892                 type = base_type
 893                 token = self.token()
 894                 while token != None and token[0] == "op":
 895                     type = type + token[1]
 896                     token = self.token()
 897             elif token != None and token[0] == 'sep' and token[1] == ';':
 898                 break;
 899             elif token != None and token[0] == 'name':
 900                 type = base_type
 901                 continue;
 902             else:
 903                 self.error("parsing typedef: expecting ';'", token)
 904                 return token
 905         token = self.token()
 906         return token
 907
 908      #
 909      # Parse a C code block, used for functions it parse till
 910      # the balancing } included
 911      #
 912     def parseBlock(self, token):
 913         while token != None:
 914             if token[0] == "sep" and token[1] == "{":
 915                 token = self.token()
 916                 token = self.parseBlock(token)
 917             elif token[0] == "sep" and token[1] == "}":
 918                 self.comment = None
 919                 token = self.token()
 920                 return token
 921             else:
 922                 if self.collect_ref == 1:
 923                     oldtok = token
 924                     token = self.token()
 925                     if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
 926                         if token[0] == "sep" and token[1] == "(":
 927                             self.index_add_ref(oldtok[1], self.filename,
 928                                                 0, "function")
 929                             token = self.token()
 930                         elif token[0] == "name":
 931                             token = self.token()
 932                             if token[0] == "sep" and (token[1] == ";" or
 933                                token[1] == "," or token[1] == "="):
 934                                 self.index_add_ref(oldtok[1], self.filename,
 935                                                     0, "type")
 936                     elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
 937                         self.index_add_ref(oldtok[1], self.filename,
 938                                             0, "typedef")
 939                     elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
 940                         self.index_add_ref(oldtok[1], self.filename,
 941                                             0, "typedef")
 942
 943                 else:
 944                     token = self.token()
 945         return token
 946
 947      #
 948      # Parse a C struct definition till the balancing }
 949      #
 950     def parseStruct(self, token):
 951         fields = []
 952          #self.debug("start parseStruct", token)
 953         while token != None:
 954             if token[0] == "sep" and token[1] == "{":
 955                 token = self.token()
 956                 token = self.parseTypeBlock(token)
 957             elif token[0] == "sep" and token[1] == "}":
 958                 self.struct_fields = fields
 959                  #self.debug("end parseStruct", token)
 960                  #print fields
 961                 token = self.token()
 962                 return token
 963             else:
 964                 base_type = self.type
 965                  #self.debug("before parseType", token)
 966                 token = self.parseType(token)
 967                  #self.debug("after parseType", token)
 968                 if token != None and token[0] == "name":
 969                     fname = token[1]
 970                     token = self.token()
 971                     if token[0] == "sep" and token[1] == ";":
 972                         self.comment = None
 973                         token = self.token()
 974                         fields.append((self.type, fname, self.comment))
 975                         self.comment = None
 976                     else:
 977                         self.error("parseStruct: expecting ;", token)
 978                 elif token != None and token[0] == "sep" and token[1] == "{":
 979                     token = self.token()
 980                     token = self.parseTypeBlock(token)
 981                     if token != None and token[0] == "name":
 982                         token = self.token()
 983                     if token != None and token[0] == "sep" and token[1] == ";":
 984                         token = self.token()
 985                     else:
 986                         self.error("parseStruct: expecting ;", token)
 987                 else:
 988                     self.error("parseStruct: name", token)
 989                     token = self.token()
 990                 self.type = base_type;
 991         self.struct_fields = fields
 992          #self.debug("end parseStruct", token)
 993          #print fields
 994         return token
 995
 996      #
 997      # Parse a C enum block, parse till the balancing }
 998      #
 999     def parseEnumBlock(self, token):
1000         self.enums = []
1001         name = None
1002         self.comment = None
1003         comment = ""
1004         value = "0"
1005         while token != None:
1006             if token[0] == "sep" and token[1] == "{":
1007                 token = self.token()
1008                 token = self.parseTypeBlock(token)
1009             elif token[0] == "sep" and token[1] == "}":
1010                 if name != None:
1011                     if self.comment != None:
1012                         comment = self.comment
1013                         self.comment = None
1014                     self.enums.append((name, value, comment))
1015                 token = self.token()
1016                 return token
1017             elif token[0] == "name":
1018                     if name != None:
1019                         if self.comment != None:
1020                             comment = string.strip(self.comment)
1021                             self.comment = None
1022                         self.enums.append((name, value, comment))
1023                     name = token[1]
1024                     comment = ""
1025                     token = self.token()
1026                     if token[0] == "op" and token[1][0] == "=":
1027                         value = ""
1028                         if len(token[1]) > 1:
1029                             value = token[1][1:]
1030                         token = self.token()
1031                         while token[0] != "sep" or (token[1] != ',' and
1032                               token[1] != '}'):
1033                             value = value + token[1]
1034                             token = self.token()
1035                     else:
1036                         try:
1037                             value = "%d" % (int(value) + 1)
1038                         except:
1039                             self.warning("Failed to compute value of enum %s" % (name))
1040                             value=""
1041                     if token[0] == "sep" and token[1] == ",":
1042                         token = self.token()
1043             else:
1044                 token = self.token()
1045         return token
1046
1047      #
1048      # Parse a C definition block, used for structs it parse till
1049      # the balancing }
1050      #
1051     def parseTypeBlock(self, token):
1052         while token != None:
1053             if token[0] == "sep" and token[1] == "{":
1054                 token = self.token()
1055                 token = self.parseTypeBlock(token)
1056             elif token[0] == "sep" and token[1] == "}":
1057                 token = self.token()
1058                 return token
1059             else:
1060                 token = self.token()
1061         return token
1062
1063      #
1064      # Parse a type: the fact that the type name can either occur after
1065      #    the definition or within the definition makes it a little harder
1066      #    if inside, the name token is pushed back before returning
1067      #
1068     def parseType(self, token):
1069         self.type = ""
1070         self.struct_fields = []
1071         self.signature = None
1072         if token == None:
1073             return token
1074
1075         while token[0] == "name" and (
1076               token[1] == "const" or token[1] == "unsigned"):
1077             if self.type == "":
1078                 self.type = token[1]
1079             else:
1080                 self.type = self.type + " " + token[1]
1081             token = self.token()
1082
1083         if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1084             if self.type == "":
1085                 self.type = token[1]
1086             else:
1087                 self.type = self.type + " " + token[1]
1088             if token[0] == "name" and token[1] == "int":
1089                 if self.type == "":
1090                     self.type = tmp[1]
1091                 else:
1092                     self.type = self.type + " " + tmp[1]
1093
1094         elif token[0] == "name" and token[1] == "struct":
1095             if self.type == "":
1096                 self.type = token[1]
1097             else:
1098                 self.type = self.type + " " + token[1]
1099             token = self.token()
1100             nametok = None
1101             if token[0] == "name":
1102                 nametok = token
1103                 token = self.token()
1104             if token != None and token[0] == "sep" and token[1] == "{":
1105                 token = self.token()
1106                 token = self.parseStruct(token)
1107             elif token != None and token[0] == "op" and token[1] == "*":
1108                 self.type = self.type + " " + nametok[1] + " *"
1109                 token = self.token()
1110                 while token != None and token[0] == "op" and token[1] == "*":
1111                     self.type = self.type + " *"
1112                     token = self.token()
1113                 if token[0] == "name":
1114                     nametok = token
1115                     token = self.token()
1116                 else:
1117                     self.error("struct : expecting name", token)
1118                     return token
1119             elif token != None and token[0] == "name" and nametok != None:
1120                 self.type = self.type + " " + nametok[1]
1121                 return token
1122
1123             if nametok != None:
1124                 self.lexer.push(token)
1125                 token = nametok
1126             return token
1127
1128         elif token[0] == "name" and token[1] == "enum":
1129             if self.type == "":
1130                 self.type = token[1]
1131             else:
1132                 self.type = self.type + " " + token[1]
1133             self.enums = []
1134             token = self.token()
1135             if token != None and token[0] == "sep" and token[1] == "{":
1136                 token = self.token()
1137                 token = self.parseEnumBlock(token)
1138             else:
1139                 self.error("parsing enum: expecting '{'", token)
1140             enum_type = None
1141             if token != None and token[0] != "name":
1142                 self.lexer.push(token)
1143                 token = ("name", "enum")
1144             else:
1145                 enum_type = token[1]
1146             for enum in self.enums:
1147                 self.index_add(enum[0], self.filename,
1148                                not self.is_header, "enum",
1149                                (enum[1], enum[2], enum_type))
1150             return token
1151
1152         elif token[0] == "name":
1153             if self.type == "":
1154                 self.type = token[1]
1155             else:
1156                 self.type = self.type + " " + token[1]
1157         else:
1158             self.error("parsing type %s: expecting a name" % (self.type),
1159                        token)
1160             return token
1161         token = self.token()
1162         while token != None and (token[0] == "op" or
1163               token[0] == "name" and token[1] == "const"):
1164             self.type = self.type + " " + token[1]
1165             token = self.token()
1166
1167          #
1168          # if there is a parenthesis here, this means a function type
1169          #
1170         if token != None and token[0] == "sep" and token[1] == '(':
1171             self.type = self.type + token[1]
1172             token = self.token()
1173             while token != None and token[0] == "op" and token[1] == '*':
1174                 self.type = self.type + token[1]
1175                 token = self.token()
1176             if token == None or token[0] != "name" :
1177                 self.error("parsing function type, name expected", token);
1178                 return token
1179             self.type = self.type + token[1]
1180             nametok = token
1181             token = self.token()
1182             if token != None and token[0] == "sep" and token[1] == ')':
1183                 self.type = self.type + token[1]
1184                 token = self.token()
1185                 if token != None and token[0] == "sep" and token[1] == '(':
1186                     token = self.token()
1187                     type = self.type;
1188                     token = self.parseSignature(token);
1189                     self.type = type;
1190                 else:
1191                     self.error("parsing function type, '(' expected", token);
1192                     return token
1193             else:
1194                 self.error("parsing function type, ')' expected", token);
1195                 return token
1196             self.lexer.push(token)
1197             token = nametok
1198             return token
1199
1200          #
1201          # do some lookahead for arrays
1202          #
1203         if token != None and token[0] == "name":
1204             nametok = token
1205             token = self.token()
1206             if token != None and token[0] == "sep" and token[1] == '[':
1207                 self.type = self.type + nametok[1]
1208                 while token != None and token[0] == "sep" and token[1] == '[':
1209                     self.type = self.type + token[1]
1210                     token = self.token()
1211                     while token != None and token[0] != 'sep' and \
1212                           token[1] != ']' and token[1] != ';':
1213                         self.type = self.type + token[1]
1214                         token = self.token()
1215                 if token != None and token[0] == 'sep' and token[1] == ']':
1216                     self.type = self.type + token[1]
1217                     token = self.token()
1218                 else:
1219                     self.error("parsing array type, ']' expected", token);
1220                     return token
1221             elif token != None and token[0] == "sep" and token[1] == ':':
1222                  # remove :12 in case it's a limited int size
1223                 token = self.token()
1224                 token = self.token()
1225             self.lexer.push(token)
1226             token = nametok
1227
1228         return token
1229
1230      #
1231      # Parse a signature: '(' has been parsed and we scan the type definition
1232      #    up to the ')' included
1233     def parseSignature(self, token):
1234         signature = []
1235         if token != None and token[0] == "sep" and token[1] == ')':
1236             self.signature = []
1237             token = self.token()
1238             return token
1239         while token != None:
1240             token = self.parseType(token)
1241             if token != None and token[0] == "name":
1242                 signature.append((self.type, token[1], None))
1243                 token = self.token()
1244             elif token != None and token[0] == "sep" and token[1] == ',':
1245                 token = self.token()
1246                 continue
1247             elif token != None and token[0] == "sep" and token[1] == ')':
1248                  # only the type was provided
1249                 if self.type == "...":
1250                     signature.append((self.type, "...", None))
1251                 else:
1252                     signature.append((self.type, None, None))
1253             if token != None and token[0] == "sep":
1254                 if token[1] == ',':
1255                     token = self.token()
1256                     continue
1257                 elif token[1] == ')':
1258                     token = self.token()
1259                     break
1260         self.signature = signature
1261         return token
1262
1263      #
1264      # Parse a global definition, be it a type, variable or function
1265      # the extern "C" blocks are a bit nasty and require it to recurse.
1266      #
1267     def parseGlobal(self, token):
1268         static = 0
1269         if token[1] == 'extern':
1270             token = self.token()
1271             if token == None:
1272                 return token
1273             if token[0] == 'string':
1274                 if token[1] == 'C':
1275                     token = self.token()
1276                     if token == None:
1277                         return token
1278                     if token[0] == 'sep' and token[1] == "{":
1279                         token = self.token()
1280 #                        print 'Entering extern "C line ', self.lineno()
1281                         while token != None and (token[0] != 'sep' or
1282                               token[1] != "}"):
1283                             if token[0] == 'name':
1284                                 token = self.parseGlobal(token)
1285                             else:
1286                                 self.error(
1287                                  "token %s %s unexpected at the top level" % (
1288                                         token[0], token[1]))
1289                                 token = self.parseGlobal(token)
1290 #                        print 'Exiting extern "C" line', self.lineno()
1291                         token = self.token()
1292                         return token
1293                 else:
1294                     return token
1295         elif token[1] == 'static':
1296             static = 1
1297             token = self.token()
1298             if token == None or  token[0] != 'name':
1299                 return token
1300
1301         if token[1] == 'typedef':
1302             token = self.token()
1303             return self.parseTypedef(token)
1304         else:
1305             token = self.parseType(token)
1306             type_orig = self.type
1307         if token == None or token[0] != "name":
1308             return token
1309         type = type_orig
1310         self.name = token[1]
1311         token = self.token()
1312         while token != None and (token[0] == "sep" or token[0] == "op"):
1313             if token[0] == "sep":
1314                 if token[1] == "[":
1315                     type = type + token[1]
1316                     token = self.token()
1317                     while token != None and (token[0] != "sep" or \
1318                           token[1] != ";"):
1319                         type = type + token[1]
1320                         token = self.token()
1321
1322             if token != None and token[0] == "op" and token[1] == "=":
1323                  #
1324                  # Skip the initialization of the variable
1325                  #
1326                 token = self.token()
1327                 if token[0] == 'sep' and token[1] == '{':
1328                     token = self.token()
1329                     token = self.parseBlock(token)
1330                 else:
1331                     self.comment = None
1332                     while token != None and (token[0] != "sep" or \
1333                           (token[1] != ';' and token[1] != ',')):
1334                             token = self.token()
1335                 self.comment = None
1336                 if token == None or token[0] != "sep" or (token[1] != ';' and
1337                    token[1] != ','):
1338                     self.error("missing ';' or ',' after value")
1339
1340             if token != None and token[0] == "sep":
1341                 if token[1] == ";":
1342                     self.comment = None
1343                     token = self.token()
1344                     if type == "struct":
1345                         self.index_add(self.name, self.filename,
1346                              not self.is_header, "struct", self.struct_fields)
1347                     else:
1348                         self.index_add(self.name, self.filename,
1349                              not self.is_header, "variable", type)
1350                     break
1351                 elif token[1] == "(":
1352                     token = self.token()
1353                     token = self.parseSignature(token)
1354                     if token == None:
1355                         return None
1356                     if token[0] == "sep" and token[1] == ";":
1357                         d = self.mergeFunctionComment(self.name,
1358                                 ((type, None), self.signature), 1)
1359                         self.index_add(self.name, self.filename, static,
1360                                         "function", d)
1361                         token = self.token()
1362                     elif token[0] == "sep" and token[1] == "{":
1363                         d = self.mergeFunctionComment(self.name,
1364                                 ((type, None), self.signature), static)
1365                         self.index_add(self.name, self.filename, static,
1366                                         "function", d)
1367                         token = self.token()
1368                         token = self.parseBlock(token);
1369                 elif token[1] == ',':
1370                     self.comment = None
1371                     self.index_add(self.name, self.filename, static,
1372                                     "variable", type)
1373                     type = type_orig
1374                     token = self.token()
1375                     while token != None and token[0] == "sep":
1376                         type = type + token[1]
1377                         token = self.token()
1378                     if token != None and token[0] == "name":
1379                         self.name = token[1]
1380                         token = self.token()
1381                 else:
1382                     break
1383
1384         return token
1385
1386     def parse(self):
1387         self.warning("Parsing %s" % (self.filename))
1388         token = self.token()
1389         while token != None:
1390             if token[0] == 'name':
1391                 token = self.parseGlobal(token)
1392             else:
1393                 self.error("token %s %s unexpected at the top level" % (
1394                        token[0], token[1]))
1395                 token = self.parseGlobal(token)
1396                 return
1397         self.parseTopComment(self.top_comment)
1398         return self.index
1399
1400
1401 class docBuilder:
1402     """A documentation builder"""
1403     def __init__(self, name, directories=['.'], excludes=[]):
1404         self.name = name
1405         self.directories = directories
1406         self.excludes = excludes + ignored_files.keys()
1407         self.modules = {}
1408         self.headers = {}
1409         self.idx = index()
1410         self.xref = {}
1411         self.index = {}
1412         if name == 'libxml2':
1413             self.basename = 'libxml'
1414         else:
1415             self.basename = name
1416
1417     def indexString(self, id, str):
1418         if str == None:
1419             return
1420         str = string.replace(str, "'", ' ')
1421         str = string.replace(str, '"', ' ')
1422         str = string.replace(str, "/", ' ')
1423         str = string.replace(str, '*', ' ')
1424         str = string.replace(str, "[", ' ')
1425         str = string.replace(str, "]", ' ')
1426         str = string.replace(str, "(", ' ')
1427         str = string.replace(str, ")", ' ')
1428         str = string.replace(str, "<", ' ')
1429         str = string.replace(str, '>', ' ')
1430         str = string.replace(str, "&", ' ')
1431         str = string.replace(str, '#', ' ')
1432         str = string.replace(str, ",", ' ')
1433         str = string.replace(str, '.', ' ')
1434         str = string.replace(str, ';', ' ')
1435         tokens = string.split(str)
1436         for token in tokens:
1437             try:
1438                 c = token[0]
1439                 if string.find(string.letters, c) < 0:
1440                     pass
1441                 elif len(token) < 3:
1442                     pass
1443                 else:
1444                     lower = string.lower(token)
1445                     # TODO: generalize this a bit
1446                     if lower == 'and' or lower == 'the':
1447                         pass
1448                     elif self.xref.has_key(token):
1449                         self.xref[token].append(id)
1450                     else:
1451                         self.xref[token] = [id]
1452             except:
1453                 pass
1454
1455     def analyze(self):
1456         print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1457         self.idx.analyze()
1458
1459     def scanHeaders(self):
1460         for header in self.headers.keys():
1461             parser = CParser(header)
1462             idx = parser.parse()
1463             self.headers[header] = idx;
1464             self.idx.merge(idx)
1465
1466     def scanModules(self):
1467         for module in self.modules.keys():
1468             parser = CParser(module)
1469             idx = parser.parse()
1470             # idx.analyze()
1471             self.modules[module] = idx
1472             self.idx.merge_public(idx)
1473
1474     def scan(self):
1475         for directory in self.directories:
1476             files = glob.glob(directory + "/*.c")
1477             for file in files:
1478                 skip = 0
1479                 for excl in self.excludes:
1480                     if string.find(file, excl) != -1:
1481                         skip = 1;
1482                         break
1483                 if skip == 0:
1484                     self.modules[file] = None;
1485             files = glob.glob(directory + "/*.h")
1486             for file in files:
1487                 skip = 0
1488                 for excl in self.excludes:
1489                     if string.find(file, excl) != -1:
1490                         skip = 1;
1491                         break
1492                 if skip == 0:
1493                     self.headers[file] = None;
1494         self.scanHeaders()
1495         self.scanModules()
1496
1497     def modulename_file(self, file):
1498         module = os.path.basename(file)
1499         if module[-2:] == '.h':
1500             module = module[:-2]
1501         return module
1502
1503     def serialize_enum(self, output, name):
1504         id = self.idx.enums[name]
1505         output.write("    <enum name='%s' file='%s'" % (name,
1506                      self.modulename_file(id.module)))
1507         if id.info != None:
1508             info = id.info
1509             if info[0] != None and info[0] != '':
1510                 try:
1511                     val = eval(info[0])
1512                 except:
1513                     val = info[0]
1514                 output.write(" value='%s'" % (val));
1515             if info[2] != None and info[2] != '':
1516                 output.write(" type='%s'" % info[2]);
1517             if info[1] != None and info[1] != '':
1518                 output.write(" info='%s'" % escape(info[1]));
1519         output.write("/>\n")
1520
1521     def serialize_macro(self, output, name):
1522         id = self.idx.macros[name]
1523         output.write("    <macro name='%s' file='%s'>\n" % (name,
1524                      self.modulename_file(id.module)))
1525         if id.info != None:
1526             try:
1527                 (args, desc) = id.info
1528                 if desc != None and desc != "":
1529                     output.write("      <info>%s</info>\n" % (escape(desc)))
1530                     self.indexString(name, desc)
1531                 for arg in args:
1532                     (name, desc) = arg
1533                     if desc != None and desc != "":
1534                         output.write("      <arg name='%s' info='%s'/>\n" % (
1535                                      name, escape(desc)))
1536                         self.indexString(name, desc)
1537                     else:
1538                         output.write("      <arg name='%s'/>\n" % (name))
1539             except:
1540                 pass
1541         output.write("    </macro>\n")
1542
1543     def serialize_typedef(self, output, name):
1544         id = self.idx.typedefs[name]
1545         if id.info[0:7] == 'struct ':
1546             output.write("    <struct name='%s' file='%s' type='%s'" % (
1547                      name, self.modulename_file(id.module), id.info))
1548             name = id.info[7:]
1549             if self.idx.structs.has_key(name) and ( \
1550                type(self.idx.structs[name].info) == type(()) or
1551                 type(self.idx.structs[name].info) == type([])):
1552                 output.write(">\n");
1553                 try:
1554                     for field in self.idx.structs[name].info:
1555                         desc = field[2]
1556                         self.indexString(name, desc)
1557                         if desc == None:
1558                             desc = ''
1559                         else:
1560                             desc = escape(desc)
1561                         output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1562                 except:
1563                     print "Failed to serialize struct %s" % (name)
1564                 output.write("    </struct>\n")
1565             else:
1566                 output.write("/>\n");
1567         else :
1568             output.write("    <typedef name='%s' file='%s' type='%s'/>\n" % (
1569                      name, self.modulename_file(id.module), id.info))
1570
1571     def serialize_variable(self, output, name):
1572         id = self.idx.variables[name]
1573         if id.info != None:
1574             output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1575                     name, self.modulename_file(id.module), id.info))
1576         else:
1577             output.write("    <variable name='%s' file='%s'/>\n" % (
1578                     name, self.modulename_file(id.module)))
1579
1580     def serialize_function(self, output, name):
1581         id = self.idx.functions[name]
1582         output.write("    <%s name='%s' file='%s'>\n" % (id.type, name,
1583                      self.modulename_file(id.module)))
1584         try:
1585             (ret, params, desc) = id.info
1586             output.write("      <info>%s</info>\n" % (escape(desc)))
1587             self.indexString(name, desc)
1588             if ret[0] != None:
1589                 if ret[0] == "void":
1590                     output.write("      <return type='void'/>\n")
1591                 else:
1592                     output.write("      <return type='%s' info='%s'/>\n" % (
1593                              ret[0], escape(ret[1])))
1594                     self.indexString(name, ret[1])
1595             for param in params:
1596                 if param[0] == 'void':
1597                     continue
1598                 if param[2] == None:
1599                     output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1600                 else:
1601                     output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1602                     self.indexString(name, param[2])
1603         except:
1604             print "Failed to save function %s info: " % name, `id.info`
1605         output.write("    </%s>\n" % (id.type))
1606
1607     def serialize_exports(self, output, file):
1608         module = self.modulename_file(file)
1609         output.write("    <file name='%s'>\n" % (module))
1610         dict = self.headers[file]
1611         if dict.info != None:
1612             for data in ('Summary', 'Description', 'Author'):
1613                 try:
1614                     output.write("     <%s>%s</%s>\n" % (
1615                                  string.lower(data),
1616                                  escape(dict.info[data]),
1617                                  string.lower(data)))
1618                 except:
1619                     print "Header %s lacks a %s description" % (module, data)
1620             if dict.info.has_key('Description'):
1621                 desc = dict.info['Description']
1622                 if string.find(desc, "DEPRECATED") != -1:
1623                     output.write("     <deprecated/>\n")
1624
1625         ids = dict.macros.keys()
1626         ids.sort()
1627         for id in uniq(ids):
1628             # Macros are sometime used to masquerade other types.
1629             if dict.functions.has_key(id):
1630                 continue
1631             if dict.variables.has_key(id):
1632                 continue
1633             if dict.typedefs.has_key(id):
1634                 continue
1635             if dict.structs.has_key(id):
1636                 continue
1637             if dict.enums.has_key(id):
1638                 continue
1639             output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1640         ids = dict.enums.keys()
1641         ids.sort()
1642         for id in uniq(ids):
1643             output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1644         ids = dict.typedefs.keys()
1645         ids.sort()
1646         for id in uniq(ids):
1647             output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1648         ids = dict.structs.keys()
1649         ids.sort()
1650         for id in uniq(ids):
1651             output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1652         ids = dict.variables.keys()
1653         ids.sort()
1654         for id in uniq(ids):
1655             output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1656         ids = dict.functions.keys()
1657         ids.sort()
1658         for id in uniq(ids):
1659             output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1660         output.write("    </file>\n")
1661
1662     def serialize_xrefs_files(self, output):
1663         headers = self.headers.keys()
1664         headers.sort()
1665         for file in headers:
1666             module = self.modulename_file(file)
1667             output.write("    <file name='%s'>\n" % (module))
1668             dict = self.headers[file]
1669             ids = dict.functions.keys() + dict.variables.keys() + \
1670                   dict.macros.keys() + dict.typedefs.keys() + \
1671                   dict.structs.keys() + dict.enums.keys()
1672             ids.sort()
1673             for id in uniq(ids):
1674                 output.write("      <ref name='%s'/>\n" % (id))
1675             output.write("    </file>\n")
1676         pass
1677
1678     def serialize_xrefs_functions(self, output):
1679         funcs = {}
1680         for name in self.idx.functions.keys():
1681             id = self.idx.functions[name]
1682             try:
1683                 (ret, params, desc) = id.info
1684                 for param in params:
1685                     if param[0] == 'void':
1686                         continue
1687                     if funcs.has_key(param[0]):
1688                         funcs[param[0]].append(name)
1689                     else:
1690                         funcs[param[0]] = [name]
1691             except:
1692                 pass
1693         typ = funcs.keys()
1694         typ.sort()
1695         for type in typ:
1696             if type == '' or type == 'void' or type == "int" or \
1697                type == "char *" or type == "const char *" :
1698                 continue
1699             output.write("    <type name='%s'>\n" % (type))
1700             ids = funcs[type]
1701             ids.sort()
1702             for id in ids:
1703                 output.write("      <ref name='%s'/>\n" % (id))
1704             output.write("    </type>\n")
1705
1706     def serialize_xrefs_constructors(self, output):
1707         funcs = {}
1708         for name in self.idx.functions.keys():
1709             id = self.idx.functions[name]
1710             try:
1711                 (ret, params, desc) = id.info
1712                 if ret[0] == "void":
1713                     continue
1714                 if funcs.has_key(ret[0]):
1715                     funcs[ret[0]].append(name)
1716                 else:
1717                     funcs[ret[0]] = [name]
1718             except:
1719                 pass
1720         typ = funcs.keys()
1721         typ.sort()
1722         for type in typ:
1723             if type == '' or type == 'void' or type == "int" or \
1724                type == "char *" or type == "const char *" :
1725                 continue
1726             output.write("    <type name='%s'>\n" % (type))
1727             ids = funcs[type]
1728             for id in ids:
1729                 output.write("      <ref name='%s'/>\n" % (id))
1730             output.write("    </type>\n")
1731
1732     def serialize_xrefs_alpha(self, output):
1733         letter = None
1734         ids = self.idx.identifiers.keys()
1735         ids.sort()
1736         for id in ids:
1737             if id[0] != letter:
1738                 if letter != None:
1739                     output.write("    </letter>\n")
1740                 letter = id[0]
1741                 output.write("    <letter name='%s'>\n" % (letter))
1742             output.write("      <ref name='%s'/>\n" % (id))
1743         if letter != None:
1744             output.write("    </letter>\n")
1745
1746     def serialize_xrefs_references(self, output):
1747         typ = self.idx.identifiers.keys()
1748         typ.sort()
1749         for id in typ:
1750             idf = self.idx.identifiers[id]
1751             module = idf.module
1752             output.write("    <reference name='%s' href='%s'/>\n" % (id,
1753                          'html/' + self.basename + '-' +
1754                          self.modulename_file(module) + '.html#' +
1755                          id))
1756
1757     def serialize_xrefs_index(self, output):
1758         index = self.xref
1759         typ = index.keys()
1760         typ.sort()
1761         letter = None
1762         count = 0
1763         chunk = 0
1764         chunks = []
1765         for id in typ:
1766             if len(index[id]) > 30:
1767                 continue
1768             if id[0] != letter:
1769                 if letter == None or count > 200:
1770                     if letter != None:
1771                         output.write("      </letter>\n")
1772                         output.write("    </chunk>\n")
1773                         count = 0
1774                         chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1775                     output.write("    <chunk name='chunk%s'>\n" % (chunk))
1776                     first_letter = id[0]
1777                     chunk = chunk + 1
1778                 elif letter != None:
1779                     output.write("      </letter>\n")
1780                 letter = id[0]
1781                 output.write("      <letter name='%s'>\n" % (letter))
1782             output.write("        <word name='%s'>\n" % (id))
1783             tokens = index[id];
1784             tokens.sort()
1785             tok = None
1786             for token in index[id]:
1787                 if tok == token:
1788                     continue
1789                 tok = token
1790                 output.write("          <ref name='%s'/>\n" % (token))
1791                 count = count + 1
1792             output.write("        </word>\n")
1793         if letter != None:
1794             output.write("      </letter>\n")
1795             output.write("    </chunk>\n")
1796             if count != 0:
1797                 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1798             output.write("    <chunks>\n")
1799             for ch in chunks:
1800                 output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
1801                              ch[0], ch[1], ch[2]))
1802             output.write("    </chunks>\n")
1803
1804     def serialize_xrefs(self, output):
1805         output.write("  <references>\n")
1806         self.serialize_xrefs_references(output)
1807         output.write("  </references>\n")
1808         output.write("  <alpha>\n")
1809         self.serialize_xrefs_alpha(output)
1810         output.write("  </alpha>\n")
1811         output.write("  <constructors>\n")
1812         self.serialize_xrefs_constructors(output)
1813         output.write("  </constructors>\n")
1814         output.write("  <functions>\n")
1815         self.serialize_xrefs_functions(output)
1816         output.write("  </functions>\n")
1817         output.write("  <files>\n")
1818         self.serialize_xrefs_files(output)
1819         output.write("  </files>\n")
1820         output.write("  <index>\n")
1821         self.serialize_xrefs_index(output)
1822         output.write("  </index>\n")
1823
1824     def serialize(self, outdir):
1825         filename = outdir + "%s-api.xml" % self.name
1826         print "Saving XML description %s" % (filename)
1827         output = open(filename, "w")
1828         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1829         output.write("<api name='%s'>\n" % self.name)
1830         output.write("  <files>\n")
1831         headers = self.headers.keys()
1832         headers.sort()
1833         for file in headers:
1834             self.serialize_exports(output, file)
1835         output.write("  </files>\n")
1836         output.write("  <symbols>\n")
1837         macros = self.idx.macros.keys()
1838         macros.sort()
1839         for macro in macros:
1840             self.serialize_macro(output, macro)
1841         enums = self.idx.enums.keys()
1842         enums.sort()
1843         for enum in enums:
1844             self.serialize_enum(output, enum)
1845         typedefs = self.idx.typedefs.keys()
1846         typedefs.sort()
1847         for typedef in typedefs:
1848             self.serialize_typedef(output, typedef)
1849         variables = self.idx.variables.keys()
1850         variables.sort()
1851         for variable in variables:
1852             self.serialize_variable(output, variable)
1853         functions = self.idx.functions.keys()
1854         functions.sort()
1855         for function in functions:
1856             self.serialize_function(output, function)
1857         output.write("  </symbols>\n")
1858         output.write("</api>\n")
1859         output.close()
1860
1861         filename = outdir + "%s-refs.xml" % self.name
1862         print "Saving XML Cross References %s" % (filename)
1863         output = open(filename, "w")
1864         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1865         output.write("<apirefs name='%s'>\n" % self.name)
1866         self.serialize_xrefs(output)
1867         output.write("</apirefs>\n")
1868         output.close()
1869
1870
1871 def rebuild():
1872     builder = None
1873     if glob.glob("parser.c") != [] :
1874         print "Rebuilding API description for libxml2"
1875         builder = docBuilder("libxml2", [".", "."],
1876                              ["xmlwin32version.h", "tst.c"])
1877     elif glob.glob("../parser.c") != [] :
1878         print "Rebuilding API description for libxml2"
1879         builder = docBuilder("libxml2", ["..", "../include/libxml"],
1880                              ["xmlwin32version.h", "tst.c"])
1881     elif glob.glob("../libxslt/transform.c") != [] :
1882         print "Rebuilding API description for libxslt"
1883         builder = docBuilder("libxslt", ["../libxslt"],
1884                              ["win32config.h", "libxslt.h", "tst.c"])
1885     else:
1886         print "rebuild() failed, unable to guess the module"
1887         return None
1888     builder.scan()
1889     builder.analyze()
1890     builder.serialize("./")
1891     if glob.glob("../libexslt/exslt.c") != [] :
1892         extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1893         extra.scan()
1894         extra.analyze()
1895         extra.serialize("EXSLT/")
1896     return builder
1897
1898 #
1899 # for debugging the parser
1900 #
1901 def parse(filename):
1902     parser = CParser(filename)
1903     idx = parser.parse()
1904     return idx
1905
1906 if __name__ == "__main__":
1907     rebuild()