doc/apibuild.py

   1 #!/usr/bin/python -u
   2 #
   3 # This is the API builder, it parses the C sources and build the
   4 # API formal description in XML.
   5 #
   6 # See Copyright for the status of this software.
   7 #
   8 # daniel@veillard.com
   9 #
  10 import os, sys
  11 import string
  12 import glob
  13
  14 debug=0
  15 #debugsym='ignorableWhitespaceSAXFunc'
  16 debugsym=None
  17
  18 #
  19 # C parser analysis code
  20 #
  21 ignored_files = {
  22   "trio": "too many non standard macros",
  23   "trio.c": "too many non standard macros",
  24   "trionan.c": "too many non standard macros",
  25   "triostr.c": "too many non standard macros",
  26   "acconfig.h": "generated portability layer",
  27   "config.h": "generated portability layer",
  28   "libxml.h": "internal only",
  29   "testOOM.c": "out of memory tester",
  30   "testOOMlib.h": "out of memory tester",
  31   "testOOMlib.c": "out of memory tester",
  32   "rngparser.c": "not yet integrated",
  33   "rngparser.h": "not yet integrated",
  34   "elfgcchack.h": "not a normal header",
  35   "testHTML.c": "test tool",
  36   "testReader.c": "test tool",
  37   "testSchemas.c": "test tool",
  38   "testXPath.c": "test tool",
  39   "testAutomata.c": "test tool",
  40   "testModule.c": "test tool",
  41   "testRegexp.c": "test tool",
  42   "testThreads.c": "test tool",
  43   "testC14N.c": "test tool",
  44   "testRelax.c": "test tool",
  45   "testThreadsWin32.c": "test tool",
  46   "testSAX.c": "test tool",
  47   "testURI.c": "test tool",
  48   "testapi.c": "generated regression tests",
  49   "runtest.c": "regression tests program",
  50   "runsuite.c": "regression tests program",
  51   "tst.c": "not part of the library",
  52   "test.c": "not part of the library",
  53   "testdso.c": "test for dynamid shared libraries",
  54   "testrecurse.c": "test for entities recursions",
  55   "xzlib.h": "Internal API only 2.8.0",
  56   "buf.h": "Internal API only 2.9.0",
  57   "enc.h": "Internal API only 2.9.0",
  58   "/save.h": "Internal API only 2.9.0",
  59   "timsort.h": "Internal header only for xpath.c 2.9.0",
  60 }
  61
  62 ignored_words = {
  63   "WINAPI": (0, "Windows keyword"),
  64   "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
  65   "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
  66   "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
  67   "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
  68   "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
  69   "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
  70   "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
  71   "XMLCALL": (0, "Special macro for win32 calls"),
  72   "XSLTCALL": (0, "Special macro for win32 calls"),
  73   "XMLCDECL": (0, "Special macro for win32 calls"),
  74   "EXSLTCALL": (0, "Special macro for win32 calls"),
  75   "__declspec": (3, "Windows keyword"),
  76   "__stdcall": (0, "Windows keyword"),
  77   "ATTRIBUTE_UNUSED": (0, "macro keyword"),
  78   "LIBEXSLT_PUBLIC": (0, "macro keyword"),
  79   "X_IN_Y": (5, "macro function builder"),
  80   "ATTRIBUTE_ALLOC_SIZE": (3, "macro for gcc checking extension"),
  81   "ATTRIBUTE_PRINTF": (5, "macro for gcc printf args checking extension"),
  82   "LIBXML_ATTR_FORMAT": (5, "macro for gcc printf args checking extension"),
  83   "LIBXML_ATTR_ALLOC_SIZE": (3, "macro for gcc checking extension"),
  84 }
  85
  86 def escape(raw):
  87     raw = raw.replace('&', '&amp;')
  88     raw = raw.replace('<', '&lt;')
  89     raw = raw.replace('>', '&gt;')
  90     raw = raw.replace("'", '&apos;')
  91     raw = raw.replace('"', '&quot;')
  92     return raw
  93
  94 def uniq(items):
  95     d = {}
  96     for item in items:
  97         d[item]=1
  98     return list(d.keys())
  99
 100 class identifier:
 101     def __init__(self, name, header=None, module=None, type=None, lineno = 0,
 102                  info=None, extra=None, conditionals = None):
 103         self.name = name
 104         self.header = header
 105         self.module = module
 106         self.type = type
 107         self.info = info
 108         self.extra = extra
 109         self.lineno = lineno
 110         self.static = 0
 111         if conditionals == None or len(conditionals) == 0:
 112             self.conditionals = None
 113         else:
 114             self.conditionals = conditionals[:]
 115         if self.name == debugsym:
 116             print("=> define %s : %s" % (debugsym, (module, type, info,
 117                                          extra, conditionals)))
 118
 119     def __repr__(self):
 120         r = "%s %s:" % (self.type, self.name)
 121         if self.static:
 122             r = r + " static"
 123         if self.module != None:
 124             r = r + " from %s" % (self.module)
 125         if self.info != None:
 126             r = r + " " +  repr(self.info)
 127         if self.extra != None:
 128             r = r + " " + repr(self.extra)
 129         if self.conditionals != None:
 130             r = r + " " + repr(self.conditionals)
 131         return r
 132
 133
 134     def set_header(self, header):
 135         self.header = header
 136     def set_module(self, module):
 137         self.module = module
 138     def set_type(self, type):
 139         self.type = type
 140     def set_info(self, info):
 141         self.info = info
 142     def set_extra(self, extra):
 143         self.extra = extra
 144     def set_lineno(self, lineno):
 145         self.lineno = lineno
 146     def set_static(self, static):
 147         self.static = static
 148     def set_conditionals(self, conditionals):
 149         if conditionals == None or len(conditionals) == 0:
 150             self.conditionals = None
 151         else:
 152             self.conditionals = conditionals[:]
 153
 154     def get_name(self):
 155         return self.name
 156     def get_header(self):
 157         return self.module
 158     def get_module(self):
 159         return self.module
 160     def get_type(self):
 161         return self.type
 162     def get_info(self):
 163         return self.info
 164     def get_lineno(self):
 165         return self.lineno
 166     def get_extra(self):
 167         return self.extra
 168     def get_static(self):
 169         return self.static
 170     def get_conditionals(self):
 171         return self.conditionals
 172
 173     def update(self, header, module, type = None, info = None, extra=None,
 174                conditionals=None):
 175         if self.name == debugsym:
 176             print("=> update %s : %s" % (debugsym, (module, type, info,
 177                                          extra, conditionals)))
 178         if header != None and self.header == None:
 179             self.set_header(module)
 180         if module != None and (self.module == None or self.header == self.module):
 181             self.set_module(module)
 182         if type != None and self.type == None:
 183             self.set_type(type)
 184         if info != None:
 185             self.set_info(info)
 186         if extra != None:
 187             self.set_extra(extra)
 188         if conditionals != None:
 189             self.set_conditionals(conditionals)
 190
 191 class index:
 192     def __init__(self, name = "noname"):
 193         self.name = name
 194         self.identifiers = {}
 195         self.functions = {}
 196         self.variables = {}
 197         self.includes = {}
 198         self.structs = {}
 199         self.enums = {}
 200         self.typedefs = {}
 201         self.macros = {}
 202         self.references = {}
 203         self.info = {}
 204
 205     def add_ref(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
 206         if name[0:2] == '__':
 207             return None
 208         d = None
 209         try:
 210            d = self.identifiers[name]
 211            d.update(header, module, type, lineno, info, extra, conditionals)
 212         except:
 213            d = identifier(name, header, module, type, lineno, info, extra, conditionals)
 214            self.identifiers[name] = d
 215
 216         if d != None and static == 1:
 217             d.set_static(1)
 218
 219         if d != None and name != None and type != None:
 220             self.references[name] = d
 221
 222         if name == debugsym:
 223             print("New ref: %s" % (d))
 224
 225         return d
 226
 227     def add(self, name, header, module, static, type, lineno, info=None, extra=None, conditionals = None):
 228         if name[0:2] == '__':
 229             return None
 230         d = None
 231         try:
 232            d = self.identifiers[name]
 233            d.update(header, module, type, lineno, info, extra, conditionals)
 234         except:
 235            d = identifier(name, header, module, type, lineno, info, extra, conditionals)
 236            self.identifiers[name] = d
 237
 238         if d != None and static == 1:
 239             d.set_static(1)
 240
 241         if d != None and name != None and type != None:
 242             if type == "function":
 243                 self.functions[name] = d
 244             elif type == "functype":
 245                 self.functions[name] = d
 246             elif type == "variable":
 247                 self.variables[name] = d
 248             elif type == "include":
 249                 self.includes[name] = d
 250             elif type == "struct":
 251                 self.structs[name] = d
 252             elif type == "enum":
 253                 self.enums[name] = d
 254             elif type == "typedef":
 255                 self.typedefs[name] = d
 256             elif type == "macro":
 257                 self.macros[name] = d
 258             else:
 259                 print("Unable to register type ", type)
 260
 261         if name == debugsym:
 262             print("New symbol: %s" % (d))
 263
 264         return d
 265
 266     def merge(self, idx):
 267         for id in list(idx.functions.keys()):
 268               #
 269               # macro might be used to override functions or variables
 270               # definitions
 271               #
 272              if id in self.macros:
 273                  del self.macros[id]
 274              if id in self.functions:
 275                  print("function %s from %s redeclared in %s" % (
 276                     id, self.functions[id].header, idx.functions[id].header))
 277              else:
 278                  self.functions[id] = idx.functions[id]
 279                  self.identifiers[id] = idx.functions[id]
 280         for id in list(idx.variables.keys()):
 281               #
 282               # macro might be used to override functions or variables
 283               # definitions
 284               #
 285              if id in self.macros:
 286                  del self.macros[id]
 287              if id in self.variables:
 288                  print("variable %s from %s redeclared in %s" % (
 289                     id, self.variables[id].header, idx.variables[id].header))
 290              else:
 291                  self.variables[id] = idx.variables[id]
 292                  self.identifiers[id] = idx.variables[id]
 293         for id in list(idx.structs.keys()):
 294              if id in self.structs:
 295                  print("struct %s from %s redeclared in %s" % (
 296                     id, self.structs[id].header, idx.structs[id].header))
 297              else:
 298                  self.structs[id] = idx.structs[id]
 299                  self.identifiers[id] = idx.structs[id]
 300         for id in list(idx.typedefs.keys()):
 301              if id in self.typedefs:
 302                  print("typedef %s from %s redeclared in %s" % (
 303                     id, self.typedefs[id].header, idx.typedefs[id].header))
 304              else:
 305                  self.typedefs[id] = idx.typedefs[id]
 306                  self.identifiers[id] = idx.typedefs[id]
 307         for id in list(idx.macros.keys()):
 308               #
 309               # macro might be used to override functions or variables
 310               # definitions
 311               #
 312              if id in self.variables:
 313                  continue
 314              if id in self.functions:
 315                  continue
 316              if id in self.enums:
 317                  continue
 318              if id in self.macros:
 319                  print("macro %s from %s redeclared in %s" % (
 320                     id, self.macros[id].header, idx.macros[id].header))
 321              else:
 322                  self.macros[id] = idx.macros[id]
 323                  self.identifiers[id] = idx.macros[id]
 324         for id in list(idx.enums.keys()):
 325              if id in self.enums:
 326                  print("enum %s from %s redeclared in %s" % (
 327                     id, self.enums[id].header, idx.enums[id].header))
 328              else:
 329                  self.enums[id] = idx.enums[id]
 330                  self.identifiers[id] = idx.enums[id]
 331
 332     def merge_public(self, idx):
 333         for id in list(idx.functions.keys()):
 334              if id in self.functions:
 335                  # check that function condition agrees with header
 336                  if idx.functions[id].conditionals != \
 337                     self.functions[id].conditionals:
 338                      print("Header condition differs from Function for %s:" \
 339                         % id)
 340                      print("  H: %s" % self.functions[id].conditionals)
 341                      print("  C: %s" % idx.functions[id].conditionals)
 342                  up = idx.functions[id]
 343                  self.functions[id].update(None, up.module, up.type, up.info, up.extra)
 344          #     else:
 345          #         print "Function %s from %s is not declared in headers" % (
 346          #                id, idx.functions[id].module)
 347          # TODO: do the same for variables.
 348
 349     def analyze_dict(self, type, dict):
 350         count = 0
 351         public = 0
 352         for name in list(dict.keys()):
 353             id = dict[name]
 354             count = count + 1
 355             if id.static == 0:
 356                 public = public + 1
 357         if count != public:
 358             print("  %d %s , %d public" % (count, type, public))
 359         elif count != 0:
 360             print("  %d public %s" % (count, type))
 361
 362
 363     def analyze(self):
 364         self.analyze_dict("functions", self.functions)
 365         self.analyze_dict("variables", self.variables)
 366         self.analyze_dict("structs", self.structs)
 367         self.analyze_dict("typedefs", self.typedefs)
 368         self.analyze_dict("macros", self.macros)
 369
 370 class CLexer:
 371     """A lexer for the C language, tokenize the input by reading and
 372        analyzing it line by line"""
 373     def __init__(self, input):
 374         self.input = input
 375         self.tokens = []
 376         self.line = ""
 377         self.lineno = 0
 378
 379     def getline(self):
 380         line = ''
 381         while line == '':
 382             line = self.input.readline()
 383             if not line:
 384                 return None
 385             self.lineno = self.lineno + 1
 386             line = line.lstrip()
 387             line = line.rstrip()
 388             if line == '':
 389                 continue
 390             while line[-1] == '\\':
 391                 line = line[:-1]
 392                 n = self.input.readline()
 393                 self.lineno = self.lineno + 1
 394                 n = n.lstrip()
 395                 n = n.rstrip()
 396                 if not n:
 397                     break
 398                 else:
 399                     line = line + n
 400         return line
 401
 402     def getlineno(self):
 403         return self.lineno
 404
 405     def push(self, token):
 406         self.tokens.insert(0, token);
 407
 408     def debug(self):
 409         print("Last token: ", self.last)
 410         print("Token queue: ", self.tokens)
 411         print("Line %d end: " % (self.lineno), self.line)
 412
 413     def token(self):
 414         while self.tokens == []:
 415             if self.line == "":
 416                 line = self.getline()
 417             else:
 418                 line = self.line
 419                 self.line = ""
 420             if line == None:
 421                 return None
 422
 423             if line[0] == '#':
 424                 self.tokens = list(map((lambda x: ('preproc', x)),
 425                                   line.split()))
 426                 break;
 427             l = len(line)
 428             if line[0] == '"' or line[0] == "'":
 429                 end = line[0]
 430                 line = line[1:]
 431                 found = 0
 432                 tok = ""
 433                 while found == 0:
 434                     i = 0
 435                     l = len(line)
 436                     while i < l:
 437                         if line[i] == end:
 438                             self.line = line[i+1:]
 439                             line = line[:i]
 440                             l = i
 441                             found = 1
 442                             break
 443                         if line[i] == '\\':
 444                             i = i + 1
 445                         i = i + 1
 446                     tok = tok + line
 447                     if found == 0:
 448                         line = self.getline()
 449                         if line == None:
 450                             return None
 451                 self.last = ('string', tok)
 452                 return self.last
 453
 454             if l >= 2 and line[0] == '/' and line[1] == '*':
 455                 line = line[2:]
 456                 found = 0
 457                 tok = ""
 458                 while found == 0:
 459                     i = 0
 460                     l = len(line)
 461                     while i < l:
 462                         if line[i] == '*' and i+1 < l and line[i+1] == '/':
 463                             self.line = line[i+2:]
 464                             line = line[:i-1]
 465                             l = i
 466                             found = 1
 467                             break
 468                         i = i + 1
 469                     if tok != "":
 470                         tok = tok + "\n"
 471                     tok = tok + line
 472                     if found == 0:
 473                         line = self.getline()
 474                         if line == None:
 475                             return None
 476                 self.last = ('comment', tok)
 477                 return self.last
 478             if l >= 2 and line[0] == '/' and line[1] == '/':
 479                 line = line[2:]
 480                 self.last = ('comment', line)
 481                 return self.last
 482             i = 0
 483             while i < l:
 484                 if line[i] == '/' and i+1 < l and line[i+1] == '/':
 485                     self.line = line[i:]
 486                     line = line[:i]
 487                     break
 488                 if line[i] == '/' and i+1 < l and line[i+1] == '*':
 489                     self.line = line[i:]
 490                     line = line[:i]
 491                     break
 492                 if line[i] == '"' or line[i] == "'":
 493                     self.line = line[i:]
 494                     line = line[:i]
 495                     break
 496                 i = i + 1
 497             l = len(line)
 498             i = 0
 499             while i < l:
 500                 if line[i] == ' ' or line[i] == '\t':
 501                     i = i + 1
 502                     continue
 503                 o = ord(line[i])
 504                 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 505                    (o >= 48 and o <= 57):
 506                     s = i
 507                     while i < l:
 508                         o = ord(line[i])
 509                         if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 510                            (o >= 48 and o <= 57) or \
 511                            (" \t(){}:;,+-*/%&!|[]=><".find(line[i])) == -1:
 512                             i = i + 1
 513                         else:
 514                             break
 515                     self.tokens.append(('name', line[s:i]))
 516                     continue
 517                 if "(){}:;,[]".find(line[i]) != -1:
 518 #                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
 519 #                    line[i] == '}' or line[i] == ':' or line[i] == ';' or \
 520 #                    line[i] == ',' or line[i] == '[' or line[i] == ']':
 521                     self.tokens.append(('sep', line[i]))
 522                     i = i + 1
 523                     continue
 524                 if "+-*><=/%&!|.".find(line[i]) != -1:
 525 #                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
 526 #                    line[i] == '>' or line[i] == '<' or line[i] == '=' or \
 527 #                    line[i] == '/' or line[i] == '%' or line[i] == '&' or \
 528 #                    line[i] == '!' or line[i] == '|' or line[i] == '.':
 529                     if line[i] == '.' and  i + 2 < l and \
 530                        line[i+1] == '.' and line[i+2] == '.':
 531                         self.tokens.append(('name', '...'))
 532                         i = i + 3
 533                         continue
 534
 535                     j = i + 1
 536                     if j < l and (
 537                        "+-*><=/%&!|".find(line[j]) != -1):
 538 #                        line[j] == '+' or line[j] == '-' or line[j] == '*' or \
 539 #                        line[j] == '>' or line[j] == '<' or line[j] == '=' or \
 540 #                        line[j] == '/' or line[j] == '%' or line[j] == '&' or \
 541 #                        line[j] == '!' or line[j] == '|'):
 542                         self.tokens.append(('op', line[i:j+1]))
 543                         i = j + 1
 544                     else:
 545                         self.tokens.append(('op', line[i]))
 546                         i = i + 1
 547                     continue
 548                 s = i
 549                 while i < l:
 550                     o = ord(line[i])
 551                     if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 552                        (o >= 48 and o <= 57) or (
 553                         " \t(){}:;,+-*/%&!|[]=><".find(line[i]) == -1):
 554 #                         line[i] != ' ' and line[i] != '\t' and
 555 #                         line[i] != '(' and line[i] != ')' and
 556 #                         line[i] != '{'  and line[i] != '}' and
 557 #                         line[i] != ':' and line[i] != ';' and
 558 #                         line[i] != ',' and line[i] != '+' and
 559 #                         line[i] != '-' and line[i] != '*' and
 560 #                         line[i] != '/' and line[i] != '%' and
 561 #                         line[i] != '&' and line[i] != '!' and
 562 #                         line[i] != '|' and line[i] != '[' and
 563 #                         line[i] != ']' and line[i] != '=' and
 564 #                         line[i] != '*' and line[i] != '>' and
 565 #                         line[i] != '<'):
 566                         i = i + 1
 567                     else:
 568                         break
 569                 self.tokens.append(('name', line[s:i]))
 570
 571         tok = self.tokens[0]
 572         self.tokens = self.tokens[1:]
 573         self.last = tok
 574         return tok
 575
 576 class CParser:
 577     """The C module parser"""
 578     def __init__(self, filename, idx = None):
 579         self.filename = filename
 580         if len(filename) > 2 and filename[-2:] == '.h':
 581             self.is_header = 1
 582         else:
 583             self.is_header = 0
 584         self.input = open(filename)
 585         self.lexer = CLexer(self.input)
 586         if idx == None:
 587             self.index = index()
 588         else:
 589             self.index = idx
 590         self.top_comment = ""
 591         self.last_comment = ""
 592         self.comment = None
 593         self.collect_ref = 0
 594         self.no_error = 0
 595         self.conditionals = []
 596         self.defines = []
 597
 598     def collect_references(self):
 599         self.collect_ref = 1
 600
 601     def stop_error(self):
 602         self.no_error = 1
 603
 604     def start_error(self):
 605         self.no_error = 0
 606
 607     def lineno(self):
 608         return self.lexer.getlineno()
 609
 610     def index_add(self, name, module, static, type, info=None, extra = None):
 611         if self.is_header == 1:
 612             self.index.add(name, module, module, static, type, self.lineno(),
 613                            info, extra, self.conditionals)
 614         else:
 615             self.index.add(name, None, module, static, type, self.lineno(),
 616                            info, extra, self.conditionals)
 617
 618     def index_add_ref(self, name, module, static, type, info=None,
 619                       extra = None):
 620         if self.is_header == 1:
 621             self.index.add_ref(name, module, module, static, type,
 622                                self.lineno(), info, extra, self.conditionals)
 623         else:
 624             self.index.add_ref(name, None, module, static, type, self.lineno(),
 625                                info, extra, self.conditionals)
 626
 627     def warning(self, msg):
 628         if self.no_error:
 629             return
 630         print(msg)
 631
 632     def error(self, msg, token=-1):
 633         if self.no_error:
 634             return
 635
 636         print("Parse Error: " + msg)
 637         if token != -1:
 638             print("Got token ", token)
 639         self.lexer.debug()
 640         sys.exit(1)
 641
 642     def debug(self, msg, token=-1):
 643         print("Debug: " + msg)
 644         if token != -1:
 645             print("Got token ", token)
 646         self.lexer.debug()
 647
 648     def parseTopComment(self, comment):
 649         res = {}
 650         lines = comment.split("\n")
 651         item = None
 652         for line in lines:
 653             while line != "" and (line[0] == ' ' or line[0] == '\t'):
 654                 line = line[1:]
 655             while line != "" and line[0] == '*':
 656                 line = line[1:]
 657             while line != "" and (line[0] == ' ' or line[0] == '\t'):
 658                 line = line[1:]
 659             try:
 660                 (it, line) = line.split(":", 1)
 661                 item = it
 662                 while line != "" and (line[0] == ' ' or line[0] == '\t'):
 663                     line = line[1:]
 664                 if item in res:
 665                     res[item] = res[item] + " " + line
 666                 else:
 667                     res[item] = line
 668             except:
 669                 if item != None:
 670                     if item in res:
 671                         res[item] = res[item] + " " + line
 672                     else:
 673                         res[item] = line
 674         self.index.info = res
 675
 676     def parseComment(self, token):
 677         if self.top_comment == "":
 678             self.top_comment = token[1]
 679         if self.comment == None or token[1][0] == '*':
 680             self.comment = token[1];
 681         else:
 682             self.comment = self.comment + token[1]
 683         token = self.lexer.token()
 684
 685         if self.comment.find("DOC_DISABLE") != -1:
 686             self.stop_error()
 687
 688         if self.comment.find("DOC_ENABLE") != -1:
 689             self.start_error()
 690
 691         return token
 692
 693     #
 694     # Parse a comment block associate to a typedef
 695     #
 696     def parseTypeComment(self, name, quiet = 0):
 697         if name[0:2] == '__':
 698             quiet = 1
 699
 700         args = []
 701         desc = ""
 702
 703         if self.comment == None:
 704             if not quiet:
 705                 self.warning("Missing comment for type %s" % (name))
 706             return((args, desc))
 707         if self.comment[0] != '*':
 708             if not quiet:
 709                 self.warning("Missing * in type comment for %s" % (name))
 710             return((args, desc))
 711         lines = self.comment.split('\n')
 712         if lines[0] == '*':
 713             del lines[0]
 714         if lines[0] != "* %s:" % (name):
 715             if not quiet:
 716                 self.warning("Misformatted type comment for %s" % (name))
 717                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 718             return((args, desc))
 719         del lines[0]
 720         while len(lines) > 0 and lines[0] == '*':
 721             del lines[0]
 722         desc = ""
 723         while len(lines) > 0:
 724             l = lines[0]
 725             while len(l) > 0 and l[0] == '*':
 726                 l = l[1:]
 727             l = l.strip()
 728             desc = desc + " " + l
 729             del lines[0]
 730
 731         desc = desc.strip()
 732
 733         if quiet == 0:
 734             if desc == "":
 735                 self.warning("Type comment for %s lack description of the macro" % (name))
 736
 737         return(desc)
 738     #
 739     # Parse a comment block associate to a macro
 740     #
 741     def parseMacroComment(self, name, quiet = 0):
 742         if name[0:2] == '__':
 743             quiet = 1
 744
 745         args = []
 746         desc = ""
 747
 748         if self.comment == None:
 749             if not quiet:
 750                 self.warning("Missing comment for macro %s" % (name))
 751             return((args, desc))
 752         if self.comment[0] != '*':
 753             if not quiet:
 754                 self.warning("Missing * in macro comment for %s" % (name))
 755             return((args, desc))
 756         lines = self.comment.split('\n')
 757         if lines[0] == '*':
 758             del lines[0]
 759         if lines[0] != "* %s:" % (name):
 760             if not quiet:
 761                 self.warning("Misformatted macro comment for %s" % (name))
 762                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 763             return((args, desc))
 764         del lines[0]
 765         while lines[0] == '*':
 766             del lines[0]
 767         while len(lines) > 0 and lines[0][0:3] == '* @':
 768             l = lines[0][3:]
 769             try:
 770                 (arg, desc) = l.split(':', 1)
 771                 desc=desc.strip()
 772                 arg=arg.strip()
 773             except:
 774                 if not quiet:
 775                     self.warning("Misformatted macro comment for %s" % (name))
 776                     self.warning("  problem with '%s'" % (lines[0]))
 777                 del lines[0]
 778                 continue
 779             del lines[0]
 780             l = lines[0].strip()
 781             while len(l) > 2 and l[0:3] != '* @':
 782                 while l[0] == '*':
 783                     l = l[1:]
 784                 desc = desc + ' ' + l.strip()
 785                 del lines[0]
 786                 if len(lines) == 0:
 787                     break
 788                 l = lines[0]
 789             args.append((arg, desc))
 790         while len(lines) > 0 and lines[0] == '*':
 791             del lines[0]
 792         desc = ""
 793         while len(lines) > 0:
 794             l = lines[0]
 795             while len(l) > 0 and l[0] == '*':
 796                 l = l[1:]
 797             l = l.strip()
 798             desc = desc + " " + l
 799             del lines[0]
 800
 801         desc = desc.strip()
 802
 803         if quiet == 0:
 804             if desc == "":
 805                 self.warning("Macro comment for %s lack description of the macro" % (name))
 806
 807         return((args, desc))
 808
 809      #
 810      # Parse a comment block and merge the informations found in the
 811      # parameters descriptions, finally returns a block as complete
 812      # as possible
 813      #
 814     def mergeFunctionComment(self, name, description, quiet = 0):
 815         if name == 'main':
 816             quiet = 1
 817         if name[0:2] == '__':
 818             quiet = 1
 819
 820         (ret, args) = description
 821         desc = ""
 822         retdesc = ""
 823
 824         if self.comment == None:
 825             if not quiet:
 826                 self.warning("Missing comment for function %s" % (name))
 827             return(((ret[0], retdesc), args, desc))
 828         if self.comment[0] != '*':
 829             if not quiet:
 830                 self.warning("Missing * in function comment for %s" % (name))
 831             return(((ret[0], retdesc), args, desc))
 832         lines = self.comment.split('\n')
 833         if lines[0] == '*':
 834             del lines[0]
 835         if lines[0] != "* %s:" % (name):
 836             if not quiet:
 837                 self.warning("Misformatted function comment for %s" % (name))
 838                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 839             return(((ret[0], retdesc), args, desc))
 840         del lines[0]
 841         while lines[0] == '*':
 842             del lines[0]
 843         nbargs = len(args)
 844         while len(lines) > 0 and lines[0][0:3] == '* @':
 845             l = lines[0][3:]
 846             try:
 847                 (arg, desc) = l.split(':', 1)
 848                 desc=desc.strip()
 849                 arg=arg.strip()
 850             except:
 851                 if not quiet:
 852                     self.warning("Misformatted function comment for %s" % (name))
 853                     self.warning("  problem with '%s'" % (lines[0]))
 854                 del lines[0]
 855                 continue
 856             del lines[0]
 857             l = lines[0].strip()
 858             while len(l) > 2 and l[0:3] != '* @':
 859                 while l[0] == '*':
 860                     l = l[1:]
 861                 desc = desc + ' ' + l.strip()
 862                 del lines[0]
 863                 if len(lines) == 0:
 864                     break
 865                 l = lines[0]
 866             i = 0
 867             while i < nbargs:
 868                 if args[i][1] == arg:
 869                     args[i] = (args[i][0], arg, desc)
 870                     break;
 871                 i = i + 1
 872             if i >= nbargs:
 873                 if not quiet:
 874                     self.warning("Unable to find arg %s from function comment for %s" % (
 875                        arg, name))
 876         while len(lines) > 0 and lines[0] == '*':
 877             del lines[0]
 878         desc = ""
 879         while len(lines) > 0:
 880             l = lines[0]
 881             while len(l) > 0 and l[0] == '*':
 882                 l = l[1:]
 883             l = l.strip()
 884             if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
 885                 try:
 886                     l = l.split(' ', 1)[1]
 887                 except:
 888                     l = ""
 889                 retdesc = l.strip()
 890                 del lines[0]
 891                 while len(lines) > 0:
 892                     l = lines[0]
 893                     while len(l) > 0 and l[0] == '*':
 894                         l = l[1:]
 895                     l = l.strip()
 896                     retdesc = retdesc + " " + l
 897                     del lines[0]
 898             else:
 899                 desc = desc + " " + l
 900                 del lines[0]
 901
 902         retdesc = retdesc.strip()
 903         desc = desc.strip()
 904
 905         if quiet == 0:
 906              #
 907              # report missing comments
 908              #
 909             i = 0
 910             while i < nbargs:
 911                 if args[i][2] == None and args[i][0] != "void" and \
 912                    ((args[i][1] != None) or (args[i][1] == '')):
 913                     self.warning("Function comment for %s lacks description of arg %s" % (name, args[i][1]))
 914                 i = i + 1
 915             if retdesc == "" and ret[0] != "void":
 916                 self.warning("Function comment for %s lacks description of return value" % (name))
 917             if desc == "":
 918                 self.warning("Function comment for %s lacks description of the function" % (name))
 919
 920         return(((ret[0], retdesc), args, desc))
 921
 922     def parsePreproc(self, token):
 923         if debug:
 924             print("=> preproc ", token, self.lexer.tokens)
 925         name = token[1]
 926         if name == "#include":
 927             token = self.lexer.token()
 928             if token == None:
 929                 return None
 930             if token[0] == 'preproc':
 931                 self.index_add(token[1], self.filename, not self.is_header,
 932                                 "include")
 933                 return self.lexer.token()
 934             return token
 935         if name == "#define":
 936             token = self.lexer.token()
 937             if token == None:
 938                 return None
 939             if token[0] == 'preproc':
 940                  # TODO macros with arguments
 941                 name = token[1]
 942                 lst = []
 943                 token = self.lexer.token()
 944                 while token != None and token[0] == 'preproc' and \
 945                       token[1][0] != '#':
 946                     lst.append(token[1])
 947                     token = self.lexer.token()
 948                 try:
 949                     name = name.split('(') [0]
 950                 except:
 951                     pass
 952                 info = self.parseMacroComment(name, not self.is_header)
 953                 self.index_add(name, self.filename, not self.is_header,
 954                                 "macro", info)
 955                 return token
 956
 957         #
 958         # Processing of conditionals modified by Bill 1/1/05
 959         #
 960         # We process conditionals (i.e. tokens from #ifdef, #ifndef,
 961         # #if, #else and #endif) for headers and mainline code,
 962         # store the ones from the header in libxml2-api.xml, and later
 963         # (in the routine merge_public) verify that the two (header and
 964         # mainline code) agree.
 965         #
 966         # There is a small problem with processing the headers. Some of
 967         # the variables are not concerned with enabling / disabling of
 968         # library functions (e.g. '__XML_PARSER_H__'), and we don't want
 969         # them to be included in libxml2-api.xml, or involved in
 970         # the check between the header and the mainline code.  To
 971         # accomplish this, we ignore any conditional which doesn't include
 972         # the string 'ENABLED'
 973         #
 974         if name == "#ifdef":
 975             apstr = self.lexer.tokens[0][1]
 976             try:
 977                 self.defines.append(apstr)
 978                 if apstr.find('ENABLED') != -1:
 979                     self.conditionals.append("defined(%s)" % apstr)
 980             except:
 981                 pass
 982         elif name == "#ifndef":
 983             apstr = self.lexer.tokens[0][1]
 984             try:
 985                 self.defines.append(apstr)
 986                 if apstr.find('ENABLED') != -1:
 987                     self.conditionals.append("!defined(%s)" % apstr)
 988             except:
 989                 pass
 990         elif name == "#if":
 991             apstr = ""
 992             for tok in self.lexer.tokens:
 993                 if apstr != "":
 994                     apstr = apstr + " "
 995                 apstr = apstr + tok[1]
 996             try:
 997                 self.defines.append(apstr)
 998                 if apstr.find('ENABLED') != -1:
 999                     self.conditionals.append(apstr)
1000             except:
1001                 pass
1002         elif name == "#else":
1003             if self.conditionals != [] and \
1004                self.defines[-1].find('ENABLED') != -1:
1005                 self.conditionals[-1] = "!(%s)" % self.conditionals[-1]
1006         elif name == "#endif":
1007             if self.conditionals != [] and \
1008                self.defines[-1].find('ENABLED') != -1:
1009                 self.conditionals = self.conditionals[:-1]
1010             self.defines = self.defines[:-1]
1011         token = self.lexer.token()
1012         while token != None and token[0] == 'preproc' and \
1013             token[1][0] != '#':
1014             token = self.lexer.token()
1015         return token
1016
1017      #
1018      # token acquisition on top of the lexer, it handle internally
1019      # preprocessor and comments since they are logically not part of
1020      # the program structure.
1021      #
1022     def token(self):
1023         global ignored_words
1024
1025         token = self.lexer.token()
1026         while token != None:
1027             if token[0] == 'comment':
1028                 token = self.parseComment(token)
1029                 continue
1030             elif token[0] == 'preproc':
1031                 token = self.parsePreproc(token)
1032                 continue
1033             elif token[0] == "name" and token[1] == "__const":
1034                 token = ("name", "const")
1035                 return token
1036             elif token[0] == "name" and token[1] == "__attribute":
1037                 token = self.lexer.token()
1038                 while token != None and token[1] != ";":
1039                     token = self.lexer.token()
1040                 return token
1041             elif token[0] == "name" and token[1] in ignored_words:
1042                 (n, info) = ignored_words[token[1]]
1043                 i = 0
1044                 while i < n:
1045                     token = self.lexer.token()
1046                     i = i + 1
1047                 token = self.lexer.token()
1048                 continue
1049             else:
1050                 if debug:
1051                     print("=> ", token)
1052                 return token
1053         return None
1054
1055      #
1056      # Parse a typedef, it records the type and its name.
1057      #
1058     def parseTypedef(self, token):
1059         if token == None:
1060             return None
1061         token = self.parseType(token)
1062         if token == None:
1063             self.error("parsing typedef")
1064             return None
1065         base_type = self.type
1066         type = base_type
1067          #self.debug("end typedef type", token)
1068         while token != None:
1069             if token[0] == "name":
1070                 name = token[1]
1071                 signature = self.signature
1072                 if signature != None:
1073                     type = type.split('(')[0]
1074                     d = self.mergeFunctionComment(name,
1075                             ((type, None), signature), 1)
1076                     self.index_add(name, self.filename, not self.is_header,
1077                                     "functype", d)
1078                 else:
1079                     if base_type == "struct":
1080                         self.index_add(name, self.filename, not self.is_header,
1081                                         "struct", type)
1082                         base_type = "struct " + name
1083                     else:
1084                         # TODO report missing or misformatted comments
1085                         info = self.parseTypeComment(name, 1)
1086                         self.index_add(name, self.filename, not self.is_header,
1087                                     "typedef", type, info)
1088                 token = self.token()
1089             else:
1090                 self.error("parsing typedef: expecting a name")
1091                 return token
1092              #self.debug("end typedef", token)
1093             if token != None and token[0] == 'sep' and token[1] == ',':
1094                 type = base_type
1095                 token = self.token()
1096                 while token != None and token[0] == "op":
1097                     type = type + token[1]
1098                     token = self.token()
1099             elif token != None and token[0] == 'sep' and token[1] == ';':
1100                 break;
1101             elif token != None and token[0] == 'name':
1102                 type = base_type
1103                 continue;
1104             else:
1105                 self.error("parsing typedef: expecting ';'", token)
1106                 return token
1107         token = self.token()
1108         return token
1109
1110      #
1111      # Parse a C code block, used for functions it parse till
1112      # the balancing } included
1113      #
1114     def parseBlock(self, token):
1115         while token != None:
1116             if token[0] == "sep" and token[1] == "{":
1117                 token = self.token()
1118                 token = self.parseBlock(token)
1119             elif token[0] == "sep" and token[1] == "}":
1120                 self.comment = None
1121                 token = self.token()
1122                 return token
1123             else:
1124                 if self.collect_ref == 1:
1125                     oldtok = token
1126                     token = self.token()
1127                     if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
1128                         if token[0] == "sep" and token[1] == "(":
1129                             self.index_add_ref(oldtok[1], self.filename,
1130                                                 0, "function")
1131                             token = self.token()
1132                         elif token[0] == "name":
1133                             token = self.token()
1134                             if token[0] == "sep" and (token[1] == ";" or
1135                                token[1] == "," or token[1] == "="):
1136                                 self.index_add_ref(oldtok[1], self.filename,
1137                                                     0, "type")
1138                     elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
1139                         self.index_add_ref(oldtok[1], self.filename,
1140                                             0, "typedef")
1141                     elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
1142                         self.index_add_ref(oldtok[1], self.filename,
1143                                             0, "typedef")
1144
1145                 else:
1146                     token = self.token()
1147         return token
1148
1149      #
1150      # Parse a C struct definition till the balancing }
1151      #
1152     def parseStruct(self, token):
1153         fields = []
1154          #self.debug("start parseStruct", token)
1155         while token != None:
1156             if token[0] == "sep" and token[1] == "{":
1157                 token = self.token()
1158                 token = self.parseTypeBlock(token)
1159             elif token[0] == "sep" and token[1] == "}":
1160                 self.struct_fields = fields
1161                  #self.debug("end parseStruct", token)
1162                  #print fields
1163                 token = self.token()
1164                 return token
1165             else:
1166                 base_type = self.type
1167                  #self.debug("before parseType", token)
1168                 token = self.parseType(token)
1169                  #self.debug("after parseType", token)
1170                 if token != None and token[0] == "name":
1171                     fname = token[1]
1172                     token = self.token()
1173                     if token[0] == "sep" and token[1] == ";":
1174                         self.comment = None
1175                         token = self.token()
1176                         fields.append((self.type, fname, self.comment))
1177                         self.comment = None
1178                     else:
1179                         self.error("parseStruct: expecting ;", token)
1180                 elif token != None and token[0] == "sep" and token[1] == "{":
1181                     token = self.token()
1182                     token = self.parseTypeBlock(token)
1183                     if token != None and token[0] == "name":
1184                         token = self.token()
1185                     if token != None and token[0] == "sep" and token[1] == ";":
1186                         token = self.token()
1187                     else:
1188                         self.error("parseStruct: expecting ;", token)
1189                 else:
1190                     self.error("parseStruct: name", token)
1191                     token = self.token()
1192                 self.type = base_type;
1193         self.struct_fields = fields
1194          #self.debug("end parseStruct", token)
1195          #print fields
1196         return token
1197
1198      #
1199      # Parse a C enum block, parse till the balancing }
1200      #
1201     def parseEnumBlock(self, token):
1202         self.enums = []
1203         name = None
1204         self.comment = None
1205         comment = ""
1206         value = "0"
1207         while token != None:
1208             if token[0] == "sep" and token[1] == "{":
1209                 token = self.token()
1210                 token = self.parseTypeBlock(token)
1211             elif token[0] == "sep" and token[1] == "}":
1212                 if name != None:
1213                     if self.comment != None:
1214                         comment = self.comment
1215                         self.comment = None
1216                     self.enums.append((name, value, comment))
1217                 token = self.token()
1218                 return token
1219             elif token[0] == "name":
1220                     if name != None:
1221                         if self.comment != None:
1222                             comment = self.comment.strip()
1223                             self.comment = None
1224                         self.enums.append((name, value, comment))
1225                     name = token[1]
1226                     comment = ""
1227                     token = self.token()
1228                     if token[0] == "op" and token[1][0] == "=":
1229                         value = ""
1230                         if len(token[1]) > 1:
1231                             value = token[1][1:]
1232                         token = self.token()
1233                         while token[0] != "sep" or (token[1] != ',' and
1234                               token[1] != '}'):
1235                             value = value + token[1]
1236                             token = self.token()
1237                     else:
1238                         try:
1239                             value = "%d" % (int(value) + 1)
1240                         except:
1241                             self.warning("Failed to compute value of enum %s" % (name))
1242                             value=""
1243                     if token[0] == "sep" and token[1] == ",":
1244                         token = self.token()
1245             else:
1246                 token = self.token()
1247         return token
1248
1249      #
1250      # Parse a C definition block, used for structs it parse till
1251      # the balancing }
1252      #
1253     def parseTypeBlock(self, token):
1254         while token != None:
1255             if token[0] == "sep" and token[1] == "{":
1256                 token = self.token()
1257                 token = self.parseTypeBlock(token)
1258             elif token[0] == "sep" and token[1] == "}":
1259                 token = self.token()
1260                 return token
1261             else:
1262                 token = self.token()
1263         return token
1264
1265      #
1266      # Parse a type: the fact that the type name can either occur after
1267      #    the definition or within the definition makes it a little harder
1268      #    if inside, the name token is pushed back before returning
1269      #
1270     def parseType(self, token):
1271         self.type = ""
1272         self.struct_fields = []
1273         self.signature = None
1274         if token == None:
1275             return token
1276
1277         while token[0] == "name" and (
1278               token[1] == "const" or \
1279               token[1] == "unsigned" or \
1280               token[1] == "signed"):
1281             if self.type == "":
1282                 self.type = token[1]
1283             else:
1284                 self.type = self.type + " " + token[1]
1285             token = self.token()
1286
1287         if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1288             if self.type == "":
1289                 self.type = token[1]
1290             else:
1291                 self.type = self.type + " " + token[1]
1292             if token[0] == "name" and token[1] == "int":
1293                 if self.type == "":
1294                     self.type = tmp[1]
1295                 else:
1296                     self.type = self.type + " " + tmp[1]
1297
1298         elif token[0] == "name" and token[1] == "struct":
1299             if self.type == "":
1300                 self.type = token[1]
1301             else:
1302                 self.type = self.type + " " + token[1]
1303             token = self.token()
1304             nametok = None
1305             if token[0] == "name":
1306                 nametok = token
1307                 token = self.token()
1308             if token != None and token[0] == "sep" and token[1] == "{":
1309                 token = self.token()
1310                 token = self.parseStruct(token)
1311             elif token != None and token[0] == "op" and token[1] == "*":
1312                 self.type = self.type + " " + nametok[1] + " *"
1313                 token = self.token()
1314                 while token != None and token[0] == "op" and token[1] == "*":
1315                     self.type = self.type + " *"
1316                     token = self.token()
1317                 if token[0] == "name":
1318                     nametok = token
1319                     token = self.token()
1320                 else:
1321                     self.error("struct : expecting name", token)
1322                     return token
1323             elif token != None and token[0] == "name" and nametok != None:
1324                 self.type = self.type + " " + nametok[1]
1325                 return token
1326
1327             if nametok != None:
1328                 self.lexer.push(token)
1329                 token = nametok
1330             return token
1331
1332         elif token[0] == "name" and token[1] == "enum":
1333             if self.type == "":
1334                 self.type = token[1]
1335             else:
1336                 self.type = self.type + " " + token[1]
1337             self.enums = []
1338             token = self.token()
1339             if token != None and token[0] == "sep" and token[1] == "{":
1340                 token = self.token()
1341                 token = self.parseEnumBlock(token)
1342             else:
1343                 self.error("parsing enum: expecting '{'", token)
1344             enum_type = None
1345             if token != None and token[0] != "name":
1346                 self.lexer.push(token)
1347                 token = ("name", "enum")
1348             else:
1349                 enum_type = token[1]
1350             for enum in self.enums:
1351                 self.index_add(enum[0], self.filename,
1352                                not self.is_header, "enum",
1353                                (enum[1], enum[2], enum_type))
1354             return token
1355
1356         elif token[0] == "name":
1357             if self.type == "":
1358                 self.type = token[1]
1359             else:
1360                 self.type = self.type + " " + token[1]
1361         else:
1362             self.error("parsing type %s: expecting a name" % (self.type),
1363                        token)
1364             return token
1365         token = self.token()
1366         while token != None and (token[0] == "op" or
1367               token[0] == "name" and token[1] == "const"):
1368             self.type = self.type + " " + token[1]
1369             token = self.token()
1370
1371          #
1372          # if there is a parenthesis here, this means a function type
1373          #
1374         if token != None and token[0] == "sep" and token[1] == '(':
1375             self.type = self.type + token[1]
1376             token = self.token()
1377             while token != None and token[0] == "op" and token[1] == '*':
1378                 self.type = self.type + token[1]
1379                 token = self.token()
1380             if token == None or token[0] != "name" :
1381                 self.error("parsing function type, name expected", token);
1382                 return token
1383             self.type = self.type + token[1]
1384             nametok = token
1385             token = self.token()
1386             if token != None and token[0] == "sep" and token[1] == ')':
1387                 self.type = self.type + token[1]
1388                 token = self.token()
1389                 if token != None and token[0] == "sep" and token[1] == '(':
1390                     token = self.token()
1391                     type = self.type;
1392                     token = self.parseSignature(token);
1393                     self.type = type;
1394                 else:
1395                     self.error("parsing function type, '(' expected", token);
1396                     return token
1397             else:
1398                 self.error("parsing function type, ')' expected", token);
1399                 return token
1400             self.lexer.push(token)
1401             token = nametok
1402             return token
1403
1404          #
1405          # do some lookahead for arrays
1406          #
1407         if token != None and token[0] == "name":
1408             nametok = token
1409             token = self.token()
1410             if token != None and token[0] == "sep" and token[1] == '[':
1411                 self.type = self.type + nametok[1]
1412                 while token != None and token[0] == "sep" and token[1] == '[':
1413                     self.type = self.type + token[1]
1414                     token = self.token()
1415                     while token != None and token[0] != 'sep' and \
1416                           token[1] != ']' and token[1] != ';':
1417                         self.type = self.type + token[1]
1418                         token = self.token()
1419                 if token != None and token[0] == 'sep' and token[1] == ']':
1420                     self.type = self.type + token[1]
1421                     token = self.token()
1422                 else:
1423                     self.error("parsing array type, ']' expected", token);
1424                     return token
1425             elif token != None and token[0] == "sep" and token[1] == ':':
1426                  # remove :12 in case it's a limited int size
1427                 token = self.token()
1428                 token = self.token()
1429             self.lexer.push(token)
1430             token = nametok
1431
1432         return token
1433
1434      #
1435      # Parse a signature: '(' has been parsed and we scan the type definition
1436      #    up to the ')' included
1437     def parseSignature(self, token):
1438         signature = []
1439         if token != None and token[0] == "sep" and token[1] == ')':
1440             self.signature = []
1441             token = self.token()
1442             return token
1443         while token != None:
1444             token = self.parseType(token)
1445             if token != None and token[0] == "name":
1446                 signature.append((self.type, token[1], None))
1447                 token = self.token()
1448             elif token != None and token[0] == "sep" and token[1] == ',':
1449                 token = self.token()
1450                 continue
1451             elif token != None and token[0] == "sep" and token[1] == ')':
1452                  # only the type was provided
1453                 if self.type == "...":
1454                     signature.append((self.type, "...", None))
1455                 else:
1456                     signature.append((self.type, None, None))
1457             if token != None and token[0] == "sep":
1458                 if token[1] == ',':
1459                     token = self.token()
1460                     continue
1461                 elif token[1] == ')':
1462                     token = self.token()
1463                     break
1464         self.signature = signature
1465         return token
1466
1467      #
1468      # Parse a global definition, be it a type, variable or function
1469      # the extern "C" blocks are a bit nasty and require it to recurse.
1470      #
1471     def parseGlobal(self, token):
1472         static = 0
1473         if token[1] == 'extern':
1474             token = self.token()
1475             if token == None:
1476                 return token
1477             if token[0] == 'string':
1478                 if token[1] == 'C':
1479                     token = self.token()
1480                     if token == None:
1481                         return token
1482                     if token[0] == 'sep' and token[1] == "{":
1483                         token = self.token()
1484 #                         print 'Entering extern "C line ', self.lineno()
1485                         while token != None and (token[0] != 'sep' or
1486                               token[1] != "}"):
1487                             if token[0] == 'name':
1488                                 token = self.parseGlobal(token)
1489                             else:
1490                                 self.error(
1491                                  "token %s %s unexpected at the top level" % (
1492                                         token[0], token[1]))
1493                                 token = self.parseGlobal(token)
1494 #                         print 'Exiting extern "C" line', self.lineno()
1495                         token = self.token()
1496                         return token
1497                 else:
1498                     return token
1499         elif token[1] == 'static':
1500             static = 1
1501             token = self.token()
1502             if token == None or  token[0] != 'name':
1503                 return token
1504
1505         if token[1] == 'typedef':
1506             token = self.token()
1507             return self.parseTypedef(token)
1508         else:
1509             token = self.parseType(token)
1510             type_orig = self.type
1511         if token == None or token[0] != "name":
1512             return token
1513         type = type_orig
1514         self.name = token[1]
1515         token = self.token()
1516         while token != None and (token[0] == "sep" or token[0] == "op"):
1517             if token[0] == "sep":
1518                 if token[1] == "[":
1519                     type = type + token[1]
1520                     token = self.token()
1521                     while token != None and (token[0] != "sep" or \
1522                           token[1] != ";"):
1523                         type = type + token[1]
1524                         token = self.token()
1525
1526             if token != None and token[0] == "op" and token[1] == "=":
1527                  #
1528                  # Skip the initialization of the variable
1529                  #
1530                 token = self.token()
1531                 if token[0] == 'sep' and token[1] == '{':
1532                     token = self.token()
1533                     token = self.parseBlock(token)
1534                 else:
1535                     self.comment = None
1536                     while token != None and (token[0] != "sep" or \
1537                           (token[1] != ';' and token[1] != ',')):
1538                             token = self.token()
1539                 self.comment = None
1540                 if token == None or token[0] != "sep" or (token[1] != ';' and
1541                    token[1] != ','):
1542                     self.error("missing ';' or ',' after value")
1543
1544             if token != None and token[0] == "sep":
1545                 if token[1] == ";":
1546                     self.comment = None
1547                     token = self.token()
1548                     if type == "struct":
1549                         self.index_add(self.name, self.filename,
1550                              not self.is_header, "struct", self.struct_fields)
1551                     else:
1552                         self.index_add(self.name, self.filename,
1553                              not self.is_header, "variable", type)
1554                     break
1555                 elif token[1] == "(":
1556                     token = self.token()
1557                     token = self.parseSignature(token)
1558                     if token == None:
1559                         return None
1560                     if token[0] == "sep" and token[1] == ";":
1561                         d = self.mergeFunctionComment(self.name,
1562                                 ((type, None), self.signature), 1)
1563                         self.index_add(self.name, self.filename, static,
1564                                         "function", d)
1565                         token = self.token()
1566                     elif token[0] == "sep" and token[1] == "{":
1567                         d = self.mergeFunctionComment(self.name,
1568                                 ((type, None), self.signature), static)
1569                         self.index_add(self.name, self.filename, static,
1570                                         "function", d)
1571                         token = self.token()
1572                         token = self.parseBlock(token);
1573                 elif token[1] == ',':
1574                     self.comment = None
1575                     self.index_add(self.name, self.filename, static,
1576                                     "variable", type)
1577                     type = type_orig
1578                     token = self.token()
1579                     while token != None and token[0] == "sep":
1580                         type = type + token[1]
1581                         token = self.token()
1582                     if token != None and token[0] == "name":
1583                         self.name = token[1]
1584                         token = self.token()
1585                 else:
1586                     break
1587
1588         return token
1589
1590     def parse(self):
1591         self.warning("Parsing %s" % (self.filename))
1592         token = self.token()
1593         while token != None:
1594             if token[0] == 'name':
1595                 token = self.parseGlobal(token)
1596             else:
1597                 self.error("token %s %s unexpected at the top level" % (
1598                        token[0], token[1]))
1599                 token = self.parseGlobal(token)
1600                 return
1601         self.parseTopComment(self.top_comment)
1602         return self.index
1603
1604
1605 class docBuilder:
1606     """A documentation builder"""
1607     def __init__(self, name, directories=['.'], excludes=[]):
1608         self.name = name
1609         self.directories = directories
1610         self.excludes = excludes + list(ignored_files.keys())
1611         self.modules = {}
1612         self.headers = {}
1613         self.idx = index()
1614         self.xref = {}
1615         self.index = {}
1616         if name == 'libxml2':
1617             self.basename = 'libxml'
1618         else:
1619             self.basename = name
1620
1621     def indexString(self, id, str):
1622         if str == None:
1623             return
1624         str = str.replace("'", ' ')
1625         str = str.replace('"', ' ')
1626         str = str.replace("/", ' ')
1627         str = str.replace('*', ' ')
1628         str = str.replace("[", ' ')
1629         str = str.replace("]", ' ')
1630         str = str.replace("(", ' ')
1631         str = str.replace(")", ' ')
1632         str = str.replace("<", ' ')
1633         str = str.replace('>', ' ')
1634         str = str.replace("&", ' ')
1635         str = str.replace('#', ' ')
1636         str = str.replace(",", ' ')
1637         str = str.replace('.', ' ')
1638         str = str.replace(';', ' ')
1639         tokens = str.split()
1640         for token in tokens:
1641             try:
1642                 c = token[0]
1643                 if string.ascii_letters.find(c) < 0:
1644                     pass
1645                 elif len(token) < 3:
1646                     pass
1647                 else:
1648                     lower = token.lower()
1649                     # TODO: generalize this a bit
1650                     if lower == 'and' or lower == 'the':
1651                         pass
1652                     elif token in self.xref:
1653                         self.xref[token].append(id)
1654                     else:
1655                         self.xref[token] = [id]
1656             except:
1657                 pass
1658
1659     def analyze(self):
1660         print("Project %s : %d headers, %d modules" % (self.name, len(list(self.headers.keys())), len(list(self.modules.keys()))))
1661         self.idx.analyze()
1662
1663     def scanHeaders(self):
1664         for header in list(self.headers.keys()):
1665             parser = CParser(header)
1666             idx = parser.parse()
1667             self.headers[header] = idx;
1668             self.idx.merge(idx)
1669
1670     def scanModules(self):
1671         for module in list(self.modules.keys()):
1672             parser = CParser(module)
1673             idx = parser.parse()
1674             # idx.analyze()
1675             self.modules[module] = idx
1676             self.idx.merge_public(idx)
1677
1678     def scan(self):
1679         for directory in self.directories:
1680             files = glob.glob(directory + "/*.c")
1681             for file in files:
1682                 skip = 0
1683                 for excl in self.excludes:
1684                     if file.find(excl) != -1:
1685                         print("Skipping %s" % file)
1686                         skip = 1
1687                         break
1688                 if skip == 0:
1689                     self.modules[file] = None;
1690             files = glob.glob(directory + "/*.h")
1691             for file in files:
1692                 skip = 0
1693                 for excl in self.excludes:
1694                     if file.find(excl) != -1:
1695                         print("Skipping %s" % file)
1696                         skip = 1
1697                         break
1698                 if skip == 0:
1699                     self.headers[file] = None;
1700         self.scanHeaders()
1701         self.scanModules()
1702
1703     def modulename_file(self, file):
1704         module = os.path.basename(file)
1705         if module[-2:] == '.h':
1706             module = module[:-2]
1707         elif module[-2:] == '.c':
1708             module = module[:-2]
1709         return module
1710
1711     def serialize_enum(self, output, name):
1712         id = self.idx.enums[name]
1713         output.write("    <enum name='%s' file='%s'" % (name,
1714                      self.modulename_file(id.header)))
1715         if id.info != None:
1716             info = id.info
1717             if info[0] != None and info[0] != '':
1718                 try:
1719                     val = eval(info[0])
1720                 except:
1721                     val = info[0]
1722                 output.write(" value='%s'" % (val));
1723             if info[2] != None and info[2] != '':
1724                 output.write(" type='%s'" % info[2]);
1725             if info[1] != None and info[1] != '':
1726                 output.write(" info='%s'" % escape(info[1]));
1727         output.write("/>\n")
1728
1729     def serialize_macro(self, output, name):
1730         id = self.idx.macros[name]
1731         output.write("    <macro name='%s' file='%s'>\n" % (name,
1732                      self.modulename_file(id.header)))
1733         if id.info != None:
1734             try:
1735                 (args, desc) = id.info
1736                 if desc != None and desc != "":
1737                     output.write("      <info>%s</info>\n" % (escape(desc)))
1738                     self.indexString(name, desc)
1739                 for arg in args:
1740                     (name, desc) = arg
1741                     if desc != None and desc != "":
1742                         output.write("      <arg name='%s' info='%s'/>\n" % (
1743                                      name, escape(desc)))
1744                         self.indexString(name, desc)
1745                     else:
1746                         output.write("      <arg name='%s'/>\n" % (name))
1747             except:
1748                 pass
1749         output.write("    </macro>\n")
1750
1751     def serialize_typedef(self, output, name):
1752         id = self.idx.typedefs[name]
1753         if id.info[0:7] == 'struct ':
1754             output.write("    <struct name='%s' file='%s' type='%s'" % (
1755                      name, self.modulename_file(id.header), id.info))
1756             name = id.info[7:]
1757             if name in self.idx.structs and ( \
1758                type(self.idx.structs[name].info) == type(()) or
1759                 type(self.idx.structs[name].info) == type([])):
1760                 output.write(">\n");
1761                 try:
1762                     for field in self.idx.structs[name].info:
1763                         desc = field[2]
1764                         self.indexString(name, desc)
1765                         if desc == None:
1766                             desc = ''
1767                         else:
1768                             desc = escape(desc)
1769                         output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1770                 except:
1771                     print("Failed to serialize struct %s" % (name))
1772                 output.write("    </struct>\n")
1773             else:
1774                 output.write("/>\n");
1775         else :
1776             output.write("    <typedef name='%s' file='%s' type='%s'" % (
1777                          name, self.modulename_file(id.header), id.info))
1778             try:
1779                 desc = id.extra
1780                 if desc != None and desc != "":
1781                     output.write(">\n      <info>%s</info>\n" % (escape(desc)))
1782                     output.write("    </typedef>\n")
1783                 else:
1784                     output.write("/>\n")
1785             except:
1786                 output.write("/>\n")
1787
1788     def serialize_variable(self, output, name):
1789         id = self.idx.variables[name]
1790         if id.info != None:
1791             output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1792                     name, self.modulename_file(id.header), id.info))
1793         else:
1794             output.write("    <variable name='%s' file='%s'/>\n" % (
1795                     name, self.modulename_file(id.header)))
1796
1797     def serialize_function(self, output, name):
1798         id = self.idx.functions[name]
1799         if name == debugsym:
1800             print("=>", id)
1801
1802         output.write("    <%s name='%s' file='%s' module='%s'>\n" % (id.type,
1803                      name, self.modulename_file(id.header),
1804                      self.modulename_file(id.module)))
1805         #
1806         # Processing of conditionals modified by Bill 1/1/05
1807         #
1808         if id.conditionals != None:
1809             apstr = ""
1810             for cond in id.conditionals:
1811                 if apstr != "":
1812                     apstr = apstr + " &amp;&amp; "
1813                 apstr = apstr + cond
1814             output.write("      <cond>%s</cond>\n"% (apstr));
1815         try:
1816             (ret, params, desc) = id.info
1817             if (desc == None or desc == '') and \
1818                name[0:9] != "xmlThrDef" and name != "xmlDllMain":
1819                 print("%s %s from %s has no description" % (id.type, name,
1820                        self.modulename_file(id.module)))
1821
1822             output.write("      <info>%s</info>\n" % (escape(desc)))
1823             self.indexString(name, desc)
1824             if ret[0] != None:
1825                 if ret[0] == "void":
1826                     output.write("      <return type='void'/>\n")
1827                 else:
1828                     output.write("      <return type='%s' info='%s'/>\n" % (
1829                              ret[0], escape(ret[1])))
1830                     self.indexString(name, ret[1])
1831             for param in params:
1832                 if param[0] == 'void':
1833                     continue
1834                 if param[2] == None:
1835                     output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1836                 else:
1837                     output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1838                     self.indexString(name, param[2])
1839         except:
1840             print("Failed to save function %s info: " % name, repr(id.info))
1841         output.write("    </%s>\n" % (id.type))
1842
1843     def serialize_exports(self, output, file):
1844         module = self.modulename_file(file)
1845         output.write("    <file name='%s'>\n" % (module))
1846         dict = self.headers[file]
1847         if dict.info != None:
1848             for data in ('Summary', 'Description', 'Author'):
1849                 try:
1850                     output.write("     <%s>%s</%s>\n" % (
1851                                  data.lower(),
1852                                  escape(dict.info[data]),
1853                                  data.lower()))
1854                 except:
1855                     print("Header %s lacks a %s description" % (module, data))
1856             if 'Description' in dict.info:
1857                 desc = dict.info['Description']
1858                 if desc.find("DEPRECATED") != -1:
1859                     output.write("     <deprecated/>\n")
1860
1861         ids = list(dict.macros.keys())
1862         ids.sort()
1863         for id in uniq(ids):
1864             # Macros are sometime used to masquerade other types.
1865             if id in dict.functions:
1866                 continue
1867             if id in dict.variables:
1868                 continue
1869             if id in dict.typedefs:
1870                 continue
1871             if id in dict.structs:
1872                 continue
1873             if id in dict.enums:
1874                 continue
1875             output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1876         ids = list(dict.enums.keys())
1877         ids.sort()
1878         for id in uniq(ids):
1879             output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1880         ids = list(dict.typedefs.keys())
1881         ids.sort()
1882         for id in uniq(ids):
1883             output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1884         ids = list(dict.structs.keys())
1885         ids.sort()
1886         for id in uniq(ids):
1887             output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1888         ids = list(dict.variables.keys())
1889         ids.sort()
1890         for id in uniq(ids):
1891             output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1892         ids = list(dict.functions.keys())
1893         ids.sort()
1894         for id in uniq(ids):
1895             output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1896         output.write("    </file>\n")
1897
1898     def serialize_xrefs_files(self, output):
1899         headers = list(self.headers.keys())
1900         headers.sort()
1901         for file in headers:
1902             module = self.modulename_file(file)
1903             output.write("    <file name='%s'>\n" % (module))
1904             dict = self.headers[file]
1905             ids = uniq(list(dict.functions.keys()) + list(dict.variables.keys()) + \
1906                   list(dict.macros.keys()) + list(dict.typedefs.keys()) + \
1907                   list(dict.structs.keys()) + list(dict.enums.keys()))
1908             ids.sort()
1909             for id in ids:
1910                 output.write("      <ref name='%s'/>\n" % (id))
1911             output.write("    </file>\n")
1912         pass
1913
1914     def serialize_xrefs_functions(self, output):
1915         funcs = {}
1916         for name in list(self.idx.functions.keys()):
1917             id = self.idx.functions[name]
1918             try:
1919                 (ret, params, desc) = id.info
1920                 for param in params:
1921                     if param[0] == 'void':
1922                         continue
1923                     if param[0] in funcs:
1924                         funcs[param[0]].append(name)
1925                     else:
1926                         funcs[param[0]] = [name]
1927             except:
1928                 pass
1929         typ = list(funcs.keys())
1930         typ.sort()
1931         for type in typ:
1932             if type == '' or type == 'void' or type == "int" or \
1933                type == "char *" or type == "const char *" :
1934                 continue
1935             output.write("    <type name='%s'>\n" % (type))
1936             ids = funcs[type]
1937             ids.sort()
1938             pid = ''        # not sure why we have dups, but get rid of them!
1939             for id in ids:
1940                 if id != pid:
1941                     output.write("      <ref name='%s'/>\n" % (id))
1942                     pid = id
1943             output.write("    </type>\n")
1944
1945     def serialize_xrefs_constructors(self, output):
1946         funcs = {}
1947         for name in list(self.idx.functions.keys()):
1948             id = self.idx.functions[name]
1949             try:
1950                 (ret, params, desc) = id.info
1951                 if ret[0] == "void":
1952                     continue
1953                 if ret[0] in funcs:
1954                     funcs[ret[0]].append(name)
1955                 else:
1956                     funcs[ret[0]] = [name]
1957             except:
1958                 pass
1959         typ = list(funcs.keys())
1960         typ.sort()
1961         for type in typ:
1962             if type == '' or type == 'void' or type == "int" or \
1963                type == "char *" or type == "const char *" :
1964                 continue
1965             output.write("    <type name='%s'>\n" % (type))
1966             ids = funcs[type]
1967             ids.sort()
1968             for id in ids:
1969                 output.write("      <ref name='%s'/>\n" % (id))
1970             output.write("    </type>\n")
1971
1972     def serialize_xrefs_alpha(self, output):
1973         letter = None
1974         ids = list(self.idx.identifiers.keys())
1975         ids.sort()
1976         for id in ids:
1977             if id[0] != letter:
1978                 if letter != None:
1979                     output.write("    </letter>\n")
1980                 letter = id[0]
1981                 output.write("    <letter name='%s'>\n" % (letter))
1982             output.write("      <ref name='%s'/>\n" % (id))
1983         if letter != None:
1984             output.write("    </letter>\n")
1985
1986     def serialize_xrefs_references(self, output):
1987         typ = list(self.idx.identifiers.keys())
1988         typ.sort()
1989         for id in typ:
1990             idf = self.idx.identifiers[id]
1991             module = idf.header
1992             output.write("    <reference name='%s' href='%s'/>\n" % (id,
1993                          'html/' + self.basename + '-' +
1994                          self.modulename_file(module) + '.html#' +
1995                          id))
1996
1997     def serialize_xrefs_index(self, output):
1998         index = self.xref
1999         typ = list(index.keys())
2000         typ.sort()
2001         letter = None
2002         count = 0
2003         chunk = 0
2004         chunks = []
2005         for id in typ:
2006             if len(index[id]) > 30:
2007                 continue
2008             if id[0] != letter:
2009                 if letter == None or count > 200:
2010                     if letter != None:
2011                         output.write("      </letter>\n")
2012                         output.write("    </chunk>\n")
2013                         count = 0
2014                         chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2015                     output.write("    <chunk name='chunk%s'>\n" % (chunk))
2016                     first_letter = id[0]
2017                     chunk = chunk + 1
2018                 elif letter != None:
2019                     output.write("      </letter>\n")
2020                 letter = id[0]
2021                 output.write("      <letter name='%s'>\n" % (letter))
2022             output.write("        <word name='%s'>\n" % (id))
2023             tokens = index[id];
2024             tokens.sort()
2025             tok = None
2026             for token in tokens:
2027                 if tok == token:
2028                     continue
2029                 tok = token
2030                 output.write("          <ref name='%s'/>\n" % (token))
2031                 count = count + 1
2032             output.write("        </word>\n")
2033         if letter != None:
2034             output.write("      </letter>\n")
2035             output.write("    </chunk>\n")
2036             if count != 0:
2037                 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
2038             output.write("    <chunks>\n")
2039             for ch in chunks:
2040                 output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
2041                              ch[0], ch[1], ch[2]))
2042             output.write("    </chunks>\n")
2043
2044     def serialize_xrefs(self, output):
2045         output.write("  <references>\n")
2046         self.serialize_xrefs_references(output)
2047         output.write("  </references>\n")
2048         output.write("  <alpha>\n")
2049         self.serialize_xrefs_alpha(output)
2050         output.write("  </alpha>\n")
2051         output.write("  <constructors>\n")
2052         self.serialize_xrefs_constructors(output)
2053         output.write("  </constructors>\n")
2054         output.write("  <functions>\n")
2055         self.serialize_xrefs_functions(output)
2056         output.write("  </functions>\n")
2057         output.write("  <files>\n")
2058         self.serialize_xrefs_files(output)
2059         output.write("  </files>\n")
2060         output.write("  <index>\n")
2061         self.serialize_xrefs_index(output)
2062         output.write("  </index>\n")
2063
2064     def serialize(self):
2065         filename = "%s-api.xml" % self.name
2066         print("Saving XML description %s" % (filename))
2067         output = open(filename, "w")
2068         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2069         output.write("<api name='%s'>\n" % self.name)
2070         output.write("  <files>\n")
2071         headers = list(self.headers.keys())
2072         headers.sort()
2073         for file in headers:
2074             self.serialize_exports(output, file)
2075         output.write("  </files>\n")
2076         output.write("  <symbols>\n")
2077         macros = list(self.idx.macros.keys())
2078         macros.sort()
2079         for macro in macros:
2080             self.serialize_macro(output, macro)
2081         enums = list(self.idx.enums.keys())
2082         enums.sort()
2083         for enum in enums:
2084             self.serialize_enum(output, enum)
2085         typedefs = list(self.idx.typedefs.keys())
2086         typedefs.sort()
2087         for typedef in typedefs:
2088             self.serialize_typedef(output, typedef)
2089         variables = list(self.idx.variables.keys())
2090         variables.sort()
2091         for variable in variables:
2092             self.serialize_variable(output, variable)
2093         functions = list(self.idx.functions.keys())
2094         functions.sort()
2095         for function in functions:
2096             self.serialize_function(output, function)
2097         output.write("  </symbols>\n")
2098         output.write("</api>\n")
2099         output.close()
2100
2101         filename = "%s-refs.xml" % self.name
2102         print("Saving XML Cross References %s" % (filename))
2103         output = open(filename, "w")
2104         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
2105         output.write("<apirefs name='%s'>\n" % self.name)
2106         self.serialize_xrefs(output)
2107         output.write("</apirefs>\n")
2108         output.close()
2109
2110
2111 def rebuild():
2112     builder = None
2113     if glob.glob("parser.c") != [] :
2114         print("Rebuilding API description for libxml2")
2115         builder = docBuilder("libxml2", [".", "."],
2116                              ["xmlwin32version.h", "tst.c"])
2117     elif glob.glob("../parser.c") != [] :
2118         print("Rebuilding API description for libxml2")
2119         builder = docBuilder("libxml2", ["..", "../include/libxml"],
2120                              ["xmlwin32version.h", "tst.c"])
2121     elif glob.glob("../libxslt/transform.c") != [] :
2122         print("Rebuilding API description for libxslt")
2123         builder = docBuilder("libxslt", ["../libxslt"],
2124                              ["win32config.h", "libxslt.h", "tst.c"])
2125     else:
2126         print("rebuild() failed, unable to guess the module")
2127         return None
2128     builder.scan()
2129     builder.analyze()
2130     builder.serialize()
2131     if glob.glob("../libexslt/exslt.c") != [] :
2132         extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
2133         extra.scan()
2134         extra.analyze()
2135         extra.serialize()
2136     return builder
2137
2138 #
2139 # for debugging the parser
2140 #
2141 def parse(filename):
2142     parser = CParser(filename)
2143     idx = parser.parse()
2144     return idx
2145
2146 if __name__ == "__main__":
2147     if len(sys.argv) > 1:
2148         debug = 1
2149         parse(sys.argv[1])
2150     else:
2151         rebuild()