doc/apibuild.py

   1 #!/usr/bin/python -u
   2 #
   3 # This is the API builder, it parses the C sources and build the
   4 # API formal description in XML.
   5 #
   6 # See Copyright for the status of this software.
   7 #
   8 # daniel@veillard.com
   9 #
  10 import os, sys
  11 import string
  12 import glob
  13
  14 debug=0
  15
  16 #
  17 # C parser analysis code
  18 #
  19 ignored_files = {
  20   "trio": "too many non standard macros",
  21   "trio.c": "too many non standard macros",
  22   "trionan.c": "too many non standard macros",
  23   "triostr.c": "too many non standard macros",
  24   "acconfig.h": "generated portability layer",
  25   "config.h": "generated portability layer",
  26   "libxml.h": "internal only",
  27   "testOOM.c": "out of memory tester",
  28   "testOOMlib.h": "out of memory tester",
  29   "testOOMlib.c": "out of memory tester",
  30 }
  31
  32 ignored_words = {
  33   "WINAPI": (0, "Windows keyword"),
  34   "LIBXML_DLL_IMPORT": (0, "Special macro to flag external keywords"),
  35   "XMLPUBVAR": (0, "Special macro for extern vars for win32"),
  36   "XSLTPUBVAR": (0, "Special macro for extern vars for win32"),
  37   "EXSLTPUBVAR": (0, "Special macro for extern vars for win32"),
  38   "XMLPUBFUN": (0, "Special macro for extern funcs for win32"),
  39   "XSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
  40   "EXSLTPUBFUN": (0, "Special macro for extern funcs for win32"),
  41   "XMLCALL": (0, "Special macro for win32 calls"),
  42   "XSLTCALL": (0, "Special macro for win32 calls"),
  43   "EXSLTCALL": (0, "Special macro for win32 calls"),
  44   "__declspec": (3, "Windows keyword"),
  45   "ATTRIBUTE_UNUSED": (0, "macro keyword"),
  46   "LIBEXSLT_PUBLIC": (0, "macro keyword"),
  47   "X_IN_Y": (5, "macro function builder"),
  48   "XSLT_ITEM_COMMON_FIELDS": (0, "Special macro")
  49 }
  50
  51 def escape(raw):
  52     raw = string.replace(raw, '&', '&amp;')
  53     raw = string.replace(raw, '<', '&lt;')
  54     raw = string.replace(raw, '>', '&gt;')
  55     raw = string.replace(raw, "'", '&apos;')
  56     raw = string.replace(raw, '"', '&quot;')
  57     return raw
  58
  59 def uniq(items):
  60     d = {}
  61     for item in items:
  62         d[item]=1
  63     return d.keys()
  64
  65 class identifier:
  66     def __init__(self, name, module=None, type=None, lineno = 0,
  67                  info=None, extra=None):
  68         self.name = name
  69         self.module = module
  70         self.type = type
  71         self.info = info
  72         self.extra = extra
  73         self.lineno = lineno
  74         self.static = 0
  75
  76     def __repr__(self):
  77         r = "%s %s:" % (self.type, self.name)
  78         if self.static:
  79             r = r + " static"
  80         if self.module != None:
  81             r = r + " from %s" % (self.module)
  82         if self.info != None:
  83             r = r + " " +  `self.info`
  84         if self.extra != None:
  85             r = r + " " + `self.extra`
  86         return r
  87
  88
  89     def set_module(self, module):
  90         self.module = module
  91     def set_type(self, type):
  92         self.type = type
  93     def set_info(self, info):
  94         self.info = info
  95     def set_extra(self, extra):
  96         self.extra = extra
  97     def set_lineno(self, lineno):
  98         self.lineno = lineno
  99     def set_static(self, static):
 100         self.static = static
 101
 102     def get_name(self):
 103         return self.name
 104     def get_module(self):
 105         return self.module
 106     def get_type(self):
 107         return self.type
 108     def get_info(self):
 109         return self.info
 110     def get_lineno(self):
 111         return self.lineno
 112     def get_extra(self):
 113         return self.extra
 114     def get_static(self):
 115         return self.static
 116
 117     def update(self, module, type = None, info = None, extra=None):
 118         if module != None and self.module == None:
 119             self.set_module(module)
 120         if type != None and self.type == None:
 121             self.set_type(type)
 122         if info != None:
 123             self.set_info(info)
 124         if extra != None:
 125             self.set_extra(extra)
 126
 127
 128 class index:
 129     def __init__(self, name = "noname"):
 130         self.name = name
 131         self.identifiers = {}
 132         self.functions = {}
 133         self.variables = {}
 134         self.includes = {}
 135         self.structs = {}
 136         self.enums = {}
 137         self.typedefs = {}
 138         self.macros = {}
 139         self.references = {}
 140         self.info = {}
 141
 142     def add_ref(self, name, module, static, type, lineno, info=None, extra=None):
 143         if name[0:2] == '__':
 144             return None
 145         d = None
 146         try:
 147            d = self.identifiers[name]
 148            d.update(module, type, lineno, info, extra)
 149         except:
 150            d = identifier(name, module, type, lineno, info, extra)
 151            self.identifiers[name] = d
 152
 153         if d != None and static == 1:
 154             d.set_static(1)
 155
 156         if d != None and name != None and type != None:
 157             self.references[name] = d
 158
 159     def add(self, name, module, static, type, lineno, info=None, extra=None):
 160         if name[0:2] == '__':
 161             return None
 162         d = None
 163         try:
 164            d = self.identifiers[name]
 165            d.update(module, type, lineno, info, extra)
 166         except:
 167            d = identifier(name, module, type, lineno, info, extra)
 168            self.identifiers[name] = d
 169
 170         if d != None and static == 1:
 171             d.set_static(1)
 172
 173         if d != None and name != None and type != None:
 174             if type == "function":
 175                 self.functions[name] = d
 176             elif type == "functype":
 177                 self.functions[name] = d
 178             elif type == "variable":
 179                 self.variables[name] = d
 180             elif type == "include":
 181                 self.includes[name] = d
 182             elif type == "struct":
 183                 self.structs[name] = d
 184             elif type == "enum":
 185                 self.enums[name] = d
 186             elif type == "typedef":
 187                 self.typedefs[name] = d
 188             elif type == "macro":
 189                 self.macros[name] = d
 190             else:
 191                 print "Unable to register type ", type
 192         return d
 193
 194     def merge(self, idx):
 195         for id in idx.functions.keys():
 196               #
 197               # macro might be used to override functions or variables
 198               # definitions
 199               #
 200              if self.macros.has_key(id):
 201                  del self.macros[id]
 202              if self.functions.has_key(id):
 203                  print "function %s from %s redeclared in %s" % (
 204                     id, self.functions[id].module, idx.functions[id].module)
 205              else:
 206                  self.functions[id] = idx.functions[id]
 207                  self.identifiers[id] = idx.functions[id]
 208         for id in idx.variables.keys():
 209               #
 210               # macro might be used to override functions or variables
 211               # definitions
 212               #
 213              if self.macros.has_key(id):
 214                  del self.macros[id]
 215              if self.variables.has_key(id):
 216                  print "variable %s from %s redeclared in %s" % (
 217                     id, self.variables[id].module, idx.variables[id].module)
 218              else:
 219                  self.variables[id] = idx.variables[id]
 220                  self.identifiers[id] = idx.variables[id]
 221         for id in idx.structs.keys():
 222              if self.structs.has_key(id):
 223                  print "struct %s from %s redeclared in %s" % (
 224                     id, self.structs[id].module, idx.structs[id].module)
 225              else:
 226                  self.structs[id] = idx.structs[id]
 227                  self.identifiers[id] = idx.structs[id]
 228         for id in idx.typedefs.keys():
 229              if self.typedefs.has_key(id):
 230                  print "typedef %s from %s redeclared in %s" % (
 231                     id, self.typedefs[id].module, idx.typedefs[id].module)
 232              else:
 233                  self.typedefs[id] = idx.typedefs[id]
 234                  self.identifiers[id] = idx.typedefs[id]
 235         for id in idx.macros.keys():
 236               #
 237               # macro might be used to override functions or variables
 238               # definitions
 239               #
 240              if self.variables.has_key(id):
 241                  continue
 242              if self.functions.has_key(id):
 243                  continue
 244              if self.enums.has_key(id):
 245                  continue
 246              if self.macros.has_key(id):
 247                  print "macro %s from %s redeclared in %s" % (
 248                     id, self.macros[id].module, idx.macros[id].module)
 249              else:
 250                  self.macros[id] = idx.macros[id]
 251                  self.identifiers[id] = idx.macros[id]
 252         for id in idx.enums.keys():
 253              if self.enums.has_key(id):
 254                  print "enum %s from %s redeclared in %s" % (
 255                     id, self.enums[id].module, idx.enums[id].module)
 256              else:
 257                  self.enums[id] = idx.enums[id]
 258                  self.identifiers[id] = idx.enums[id]
 259
 260     def merge_public(self, idx):
 261         for id in idx.functions.keys():
 262              if self.functions.has_key(id):
 263                  up = idx.functions[id]
 264                  self.functions[id].update(None, up.type, up.info, up.extra)
 265          #     else:
 266          #         print "Function %s from %s is not declared in headers" % (
 267         #               id, idx.functions[id].module)
 268          # TODO: do the same for variables.
 269
 270     def analyze_dict(self, type, dict):
 271         count = 0
 272         public = 0
 273         for name in dict.keys():
 274             id = dict[name]
 275             count = count + 1
 276             if id.static == 0:
 277                 public = public + 1
 278         if count != public:
 279             print "  %d %s , %d public" % (count, type, public)
 280         elif count != 0:
 281             print "  %d public %s" % (count, type)
 282
 283
 284     def analyze(self):
 285         self.analyze_dict("functions", self.functions)
 286         self.analyze_dict("variables", self.variables)
 287         self.analyze_dict("structs", self.structs)
 288         self.analyze_dict("typedefs", self.typedefs)
 289         self.analyze_dict("macros", self.macros)
 290
 291 class CLexer:
 292     """A lexer for the C language, tokenize the input by reading and
 293        analyzing it line by line"""
 294     def __init__(self, input):
 295         self.input = input
 296         self.tokens = []
 297         self.line = ""
 298         self.lineno = 0
 299
 300     def getline(self):
 301         line = ''
 302         while line == '':
 303             line = self.input.readline()
 304             if not line:
 305                 return None
 306             self.lineno = self.lineno + 1
 307             line = string.lstrip(line)
 308             line = string.rstrip(line)
 309             if line == '':
 310                 continue
 311             while line[-1] == '\\':
 312                 line = line[:-1]
 313                 n = self.input.readline()
 314                 self.lineno = self.lineno + 1
 315                 n = string.lstrip(n)
 316                 n = string.rstrip(n)
 317                 if not n:
 318                     break
 319                 else:
 320                     line = line + n
 321         return line
 322
 323     def getlineno(self):
 324         return self.lineno
 325
 326     def push(self, token):
 327         self.tokens.insert(0, token);
 328
 329     def debug(self):
 330         print "Last token: ", self.last
 331         print "Token queue: ", self.tokens
 332         print "Line %d end: " % (self.lineno), self.line
 333
 334     def token(self):
 335         while self.tokens == []:
 336             if self.line == "":
 337                 line = self.getline()
 338             else:
 339                 line = self.line
 340                 self.line = ""
 341             if line == None:
 342                 return None
 343
 344             if line[0] == '#':
 345                 self.tokens = map((lambda x: ('preproc', x)),
 346                                   string.split(line))
 347                 break;
 348             l = len(line)
 349             if line[0] == '"' or line[0] == "'":
 350                 end = line[0]
 351                 line = line[1:]
 352                 found = 0
 353                 tok = ""
 354                 while found == 0:
 355                     i = 0
 356                     l = len(line)
 357                     while i < l:
 358                         if line[i] == end:
 359                             self.line = line[i+1:]
 360                             line = line[:i]
 361                             l = i
 362                             found = 1
 363                             break
 364                         if line[i] == '\\':
 365                             i = i + 1
 366                         i = i + 1
 367                     tok = tok + line
 368                     if found == 0:
 369                         line = self.getline()
 370                         if line == None:
 371                             return None
 372                 self.last = ('string', tok)
 373                 return self.last
 374
 375             if l >= 2 and line[0] == '/' and line[1] == '*':
 376                 line = line[2:]
 377                 found = 0
 378                 tok = ""
 379                 while found == 0:
 380                     i = 0
 381                     l = len(line)
 382                     while i < l:
 383                         if line[i] == '*' and i+1 < l and line[i+1] == '/':
 384                             self.line = line[i+2:]
 385                             line = line[:i-1]
 386                             l = i
 387                             found = 1
 388                             break
 389                         i = i + 1
 390                     if tok != "":
 391                         tok = tok + "\n"
 392                     tok = tok + line
 393                     if found == 0:
 394                         line = self.getline()
 395                         if line == None:
 396                             return None
 397                 self.last = ('comment', tok)
 398                 return self.last
 399             if l >= 2 and line[0] == '/' and line[1] == '/':
 400                 line = line[2:]
 401                 self.last = ('comment', line)
 402                 return self.last
 403             i = 0
 404             while i < l:
 405                 if line[i] == '/' and i+1 < l and line[i+1] == '/':
 406                     self.line = line[i:]
 407                     line = line[:i]
 408                     break
 409                 if line[i] == '/' and i+1 < l and line[i+1] == '*':
 410                     self.line = line[i:]
 411                     line = line[:i]
 412                     break
 413                 if line[i] == '"' or line[i] == "'":
 414                     self.line = line[i:]
 415                     line = line[:i]
 416                     break
 417                 i = i + 1
 418             l = len(line)
 419             i = 0
 420             while i < l:
 421                 if line[i] == ' ' or line[i] == '\t':
 422                     i = i + 1
 423                     continue
 424                 o = ord(line[i])
 425                 if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 426                    (o >= 48 and o <= 57):
 427                     s = i
 428                     while i < l:
 429                         o = ord(line[i])
 430                         if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 431                            (o >= 48 and o <= 57) or string.find(
 432                                " \t(){}:;,+-*/%&!|[]=><", line[i]) == -1:
 433                             i = i + 1
 434                         else:
 435                             break
 436                     self.tokens.append(('name', line[s:i]))
 437                     continue
 438                 if string.find("(){}:;,[]", line[i]) != -1:
 439 #                 if line[i] == '(' or line[i] == ')' or line[i] == '{' or \
 440 #                   line[i] == '}' or line[i] == ':' or line[i] == ';' or \
 441 #                   line[i] == ',' or line[i] == '[' or line[i] == ']':
 442                     self.tokens.append(('sep', line[i]))
 443                     i = i + 1
 444                     continue
 445                 if string.find("+-*><=/%&!|.", line[i]) != -1:
 446 #                 if line[i] == '+' or line[i] == '-' or line[i] == '*' or \
 447 #                   line[i] == '>' or line[i] == '<' or line[i] == '=' or \
 448 #                   line[i] == '/' or line[i] == '%' or line[i] == '&' or \
 449 #                   line[i] == '!' or line[i] == '|' or line[i] == '.':
 450                     if line[i] == '.' and  i + 2 < l and \
 451                        line[i+1] == '.' and line[i+2] == '.':
 452                         self.tokens.append(('name', '...'))
 453                         i = i + 3
 454                         continue
 455
 456                     j = i + 1
 457                     if j < l and (
 458                        string.find("+-*><=/%&!|", line[j]) != -1):
 459 #                       line[j] == '+' or line[j] == '-' or line[j] == '*' or \
 460 #                       line[j] == '>' or line[j] == '<' or line[j] == '=' or \
 461 #                       line[j] == '/' or line[j] == '%' or line[j] == '&' or \
 462 #                       line[j] == '!' or line[j] == '|'):
 463                         self.tokens.append(('op', line[i:j+1]))
 464                         i = j + 1
 465                     else:
 466                         self.tokens.append(('op', line[i]))
 467                         i = i + 1
 468                     continue
 469                 s = i
 470                 while i < l:
 471                     o = ord(line[i])
 472                     if (o >= 97 and o <= 122) or (o >= 65 and o <= 90) or \
 473                        (o >= 48 and o <= 57) or (
 474                         string.find(" \t(){}:;,+-*/%&!|[]=><", line[i]) == -1):
 475 #                        line[i] != ' ' and line[i] != '\t' and
 476 #                        line[i] != '(' and line[i] != ')' and
 477 #                        line[i] != '{'  and line[i] != '}' and
 478 #                        line[i] != ':' and line[i] != ';' and
 479 #                        line[i] != ',' and line[i] != '+' and
 480 #                        line[i] != '-' and line[i] != '*' and
 481 #                        line[i] != '/' and line[i] != '%' and
 482 #                        line[i] != '&' and line[i] != '!' and
 483 #                        line[i] != '|' and line[i] != '[' and
 484 #                        line[i] != ']' and line[i] != '=' and
 485 #                        line[i] != '*' and line[i] != '>' and
 486 #                        line[i] != '<'):
 487                         i = i + 1
 488                     else:
 489                         break
 490                 self.tokens.append(('name', line[s:i]))
 491
 492         tok = self.tokens[0]
 493         self.tokens = self.tokens[1:]
 494         self.last = tok
 495         return tok
 496
 497 class CParser:
 498     """The C module parser"""
 499     def __init__(self, filename, idx = None):
 500         self.filename = filename
 501         if len(filename) > 2 and filename[-2:] == '.h':
 502             self.is_header = 1
 503         else:
 504             self.is_header = 0
 505         self.input = open(filename)
 506         self.lexer = CLexer(self.input)
 507         if idx == None:
 508             self.index = index()
 509         else:
 510             self.index = idx
 511         self.top_comment = ""
 512         self.last_comment = ""
 513         self.comment = None
 514         self.collect_ref = 0
 515         self.no_error = 0
 516
 517     def collect_references(self):
 518         self.collect_ref = 1
 519
 520     def stop_error(self):
 521         self.no_error = 1
 522
 523     def start_error(self):
 524         self.no_error = 0
 525
 526     def lineno(self):
 527         return self.lexer.getlineno()
 528
 529     def index_add(self, name, module, static, type, info=None, extra = None):
 530         self.index.add(name, module, static, type, self.lineno(),
 531                        info, extra)
 532
 533     def index_add_ref(self, name, module, static, type, info=None,
 534                       extra = None):
 535         self.index.add_ref(name, module, static, type, self.lineno(),
 536                        info, extra)
 537
 538     def warning(self, msg):
 539         if self.no_error:
 540             return
 541         print msg
 542
 543     def error(self, msg, token=-1):
 544         if self.no_error:
 545             return
 546
 547         print "Parse Error: " + msg
 548         if token != -1:
 549             print "Got token ", token
 550         self.lexer.debug()
 551         sys.exit(1)
 552
 553     def debug(self, msg, token=-1):
 554         print "Debug: " + msg
 555         if token != -1:
 556             print "Got token ", token
 557         self.lexer.debug()
 558
 559     def parseTopComment(self, comment):
 560         res = {}
 561         lines = string.split(comment, "\n")
 562         item = None
 563         for line in lines:
 564             while line != "" and (line[0] == ' ' or line[0] == '\t'):
 565                 line = line[1:]
 566             while line != "" and line[0] == '*':
 567                 line = line[1:]
 568             while line != "" and (line[0] == ' ' or line[0] == '\t'):
 569                 line = line[1:]
 570             try:
 571                 (it, line) = string.split(line, ":", 1)
 572                 item = it
 573                 while line != "" and (line[0] == ' ' or line[0] == '\t'):
 574                     line = line[1:]
 575                 if res.has_key(item):
 576                     res[item] = res[item] + " " + line
 577                 else:
 578                     res[item] = line
 579             except:
 580                 if item != None:
 581                     if res.has_key(item):
 582                         res[item] = res[item] + " " + line
 583                     else:
 584                         res[item] = line
 585         self.index.info = res
 586
 587     def parseComment(self, token):
 588         if self.top_comment == "":
 589             self.top_comment = token[1]
 590         if self.comment == None or token[1][0] == '*':
 591             self.comment = token[1];
 592         else:
 593             self.comment = self.comment + token[1]
 594         token = self.lexer.token()
 595
 596         if string.find(self.comment, "DOC_DISABLE") != -1:
 597             self.stop_error()
 598
 599         if string.find(self.comment, "DOC_ENABLE") != -1:
 600             self.start_error()
 601
 602         return token
 603
 604      #
 605      # Parse a comment block associate to a macro
 606      #
 607     def parseMacroComment(self, name, quiet = 0):
 608         if name[0:2] == '__':
 609             quiet = 1
 610
 611         args = []
 612         desc = ""
 613
 614         if self.comment == None:
 615             if not quiet:
 616                 self.warning("Missing comment for macro %s" % (name))
 617             return((args, desc))
 618         if self.comment[0] != '*':
 619             if not quiet:
 620                 self.warning("Missing * in macro comment for %s" % (name))
 621             return((args, desc))
 622         lines = string.split(self.comment, '\n')
 623         if lines[0] == '*':
 624             del lines[0]
 625         if lines[0] != "* %s:" % (name):
 626             if not quiet:
 627                 self.warning("Misformatted macro comment for %s" % (name))
 628                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 629             return((args, desc))
 630         del lines[0]
 631         while lines[0] == '*':
 632             del lines[0]
 633         while len(lines) > 0 and lines[0][0:3] == '* @':
 634             l = lines[0][3:]
 635             try:
 636                 (arg, desc) = string.split(l, ':', 1)
 637                 desc=string.strip(desc)
 638                 arg=string.strip(arg)
 639             except:
 640                 if not quiet:
 641                     self.warning("Misformatted macro comment for %s" % (name))
 642                     self.warning("  problem with '%s'" % (lines[0]))
 643                 del lines[0]
 644                 continue
 645             del lines[0]
 646             l = string.strip(lines[0])
 647             while len(l) > 2 and l[0:3] != '* @':
 648                 while l[0] == '*':
 649                     l = l[1:]
 650                 desc = desc + ' ' + string.strip(l)
 651                 del lines[0]
 652                 if len(lines) == 0:
 653                     break
 654                 l = lines[0]
 655             args.append((arg, desc))
 656         while len(lines) > 0 and lines[0] == '*':
 657             del lines[0]
 658         desc = ""
 659         while len(lines) > 0:
 660             l = lines[0]
 661             while len(l) > 0 and l[0] == '*':
 662                 l = l[1:]
 663             l = string.strip(l)
 664             desc = desc + " " + l
 665             del lines[0]
 666
 667         desc = string.strip(desc)
 668
 669         if quiet == 0:
 670             if desc == "":
 671                 self.warning("Macro comment for %s lack description of the macro" % (name))
 672
 673         return((args, desc))
 674
 675      #
 676      # Parse a comment block and merge the informations found in the
 677      # parameters descriptions, finally returns a block as complete
 678      # as possible
 679      #
 680     def mergeFunctionComment(self, name, description, quiet = 0):
 681         if name == 'main':
 682             quiet = 1
 683         if name[0:2] == '__':
 684             quiet = 1
 685
 686         (ret, args) = description
 687         desc = ""
 688         retdesc = ""
 689
 690         if self.comment == None:
 691             if not quiet:
 692                 self.warning("Missing comment for function %s" % (name))
 693             return(((ret[0], retdesc), args, desc))
 694         if self.comment[0] != '*':
 695             if not quiet:
 696                 self.warning("Missing * in function comment for %s" % (name))
 697             return(((ret[0], retdesc), args, desc))
 698         lines = string.split(self.comment, '\n')
 699         if lines[0] == '*':
 700             del lines[0]
 701         if lines[0] != "* %s:" % (name):
 702             if not quiet:
 703                 self.warning("Misformatted function comment for %s" % (name))
 704                 self.warning("  Expecting '* %s:' got '%s'" % (name, lines[0]))
 705             return(((ret[0], retdesc), args, desc))
 706         del lines[0]
 707         while len(lines) > 0 and lines[0] == '*':
 708             del lines[0]
 709         nbargs = len(args)
 710         while len(lines) > 0 and lines[0][0:3] == '* @':
 711             l = lines[0][3:]
 712             try:
 713                 (arg, desc) = string.split(l, ':', 1)
 714                 desc=string.strip(desc)
 715                 arg=string.strip(arg)
 716             except:
 717                 if not quiet:
 718                     self.warning("Misformatted function comment for %s" % (name))
 719                     self.warning("  problem with '%s'" % (lines[0]))
 720                 del lines[0]
 721                 continue
 722             del lines[0]
 723             l = string.strip(lines[0])
 724             while len(l) > 2 and l[0:3] != '* @':
 725                 while l[0] == '*':
 726                     l = l[1:]
 727                 desc = desc + ' ' + string.strip(l)
 728                 del lines[0]
 729                 if len(lines) == 0:
 730                     break
 731                 l = lines[0]
 732             i = 0
 733             while i < nbargs:
 734                 if args[i][1] == arg:
 735                     args[i] = (args[i][0], arg, desc)
 736                     break;
 737                 i = i + 1
 738             if i >= nbargs:
 739                 if not quiet:
 740                     self.warning("Unable to find arg %s from function comment for %s" % (
 741                        arg, name))
 742         while len(lines) > 0 and lines[0] == '*':
 743             del lines[0]
 744         desc = ""
 745         while len(lines) > 0:
 746             l = lines[0]
 747             while len(l) > 0 and l[0] == '*':
 748                 l = l[1:]
 749             l = string.strip(l)
 750             if len(l) >= 6 and  l[0:6] == "return" or l[0:6] == "Return":
 751                 try:
 752                     l = string.split(l, ' ', 1)[1]
 753                 except:
 754                     l = ""
 755                 retdesc = string.strip(l)
 756                 del lines[0]
 757                 while len(lines) > 0:
 758                     l = lines[0]
 759                     while len(l) > 0 and l[0] == '*':
 760                         l = l[1:]
 761                     l = string.strip(l)
 762                     retdesc = retdesc + " " + l
 763                     del lines[0]
 764             else:
 765                 desc = desc + " " + l
 766                 del lines[0]
 767
 768         retdesc = string.strip(retdesc)
 769         desc = string.strip(desc)
 770
 771         if quiet == 0:
 772              #
 773              # report missing comments
 774              #
 775             i = 0
 776             while i < nbargs:
 777                 if args[i][2] == None and args[i][0] != "void" and args[i][1] != None:
 778                     self.warning("Function comment for %s lack description of arg %s" % (name, args[i][1]))
 779                 i = i + 1
 780             if retdesc == "" and ret[0] != "void":
 781                 self.warning("Function comment for %s lack description of return value" % (name))
 782             if desc == "":
 783                 self.warning("Function comment for %s lack description of the function" % (name))
 784
 785
 786         return(((ret[0], retdesc), args, desc))
 787
 788     def parsePreproc(self, token):
 789         name = token[1]
 790         if name == "#include":
 791             token = self.lexer.token()
 792             if token == None:
 793                 return None
 794             if token[0] == 'preproc':
 795                 self.index_add(token[1], self.filename, not self.is_header,
 796                                 "include")
 797                 return self.lexer.token()
 798             return token
 799         if name == "#define":
 800             token = self.lexer.token()
 801             if token == None:
 802                 return None
 803             if token[0] == 'preproc':
 804                  # TODO macros with arguments
 805                 name = token[1]
 806                 lst = []
 807                 token = self.lexer.token()
 808                 while token != None and token[0] == 'preproc' and \
 809                       token[1][0] != '#':
 810                     lst.append(token[1])
 811                     token = self.lexer.token()
 812                 try:
 813                     name = string.split(name, '(') [0]
 814                 except:
 815                     pass
 816                 info = self.parseMacroComment(name, not self.is_header)
 817                 self.index_add(name, self.filename, not self.is_header,
 818                                 "macro", info)
 819                 return token
 820         token = self.lexer.token()
 821         while token != None and token[0] == 'preproc' and \
 822             token[1][0] != '#':
 823             token = self.lexer.token()
 824         return token
 825
 826      #
 827      # token acquisition on top of the lexer, it handle internally
 828      # preprocessor and comments since they are logically not part of
 829      # the program structure.
 830      #
 831     def token(self):
 832         global ignored_words
 833
 834         token = self.lexer.token()
 835         while token != None:
 836             if token[0] == 'comment':
 837                 token = self.parseComment(token)
 838                 continue
 839             elif token[0] == 'preproc':
 840                 token = self.parsePreproc(token)
 841                 continue
 842             elif token[0] == "name" and ignored_words.has_key(token[1]):
 843                 (n, info) = ignored_words[token[1]]
 844                 i = 0
 845                 while i < n:
 846                     token = self.lexer.token()
 847                     i = i + 1
 848                 token = self.lexer.token()
 849                 continue
 850             else:
 851                 if debug:
 852                     print "=> ", token
 853                 return token
 854         return None
 855
 856      #
 857      # Parse a typedef, it records the type and its name.
 858      #
 859     def parseTypedef(self, token):
 860         if token == None:
 861             return None
 862         token = self.parseType(token)
 863         if token == None:
 864             self.error("parsing typedef")
 865             return None
 866         base_type = self.type
 867         type = base_type
 868          #self.debug("end typedef type", token)
 869         while token != None:
 870             if token[0] == "name":
 871                 name = token[1]
 872                 signature = self.signature
 873                 if signature != None:
 874                     type = string.split(type, '(')[0]
 875                     d = self.mergeFunctionComment(name,
 876                             ((type, None), signature), 1)
 877                     self.index_add(name, self.filename, not self.is_header,
 878                                     "functype", d)
 879                 else:
 880                     if base_type == "struct":
 881                         self.index_add(name, self.filename, not self.is_header,
 882                                         "struct", type)
 883                         base_type = "struct " + name
 884                     else:
 885                         self.index_add(name, self.filename, not self.is_header,
 886                                     "typedef", type)
 887                 token = self.token()
 888             else:
 889                 self.error("parsing typedef: expecting a name")
 890                 return token
 891              #self.debug("end typedef", token)
 892             if token != None and token[0] == 'sep' and token[1] == ',':
 893                 type = base_type
 894                 token = self.token()
 895                 while token != None and token[0] == "op":
 896                     type = type + token[1]
 897                     token = self.token()
 898             elif token != None and token[0] == 'sep' and token[1] == ';':
 899                 break;
 900             elif token != None and token[0] == 'name':
 901                 type = base_type
 902                 continue;
 903             else:
 904                 self.error("parsing typedef: expecting ';'", token)
 905                 return token
 906         token = self.token()
 907         return token
 908
 909      #
 910      # Parse a C code block, used for functions it parse till
 911      # the balancing } included
 912      #
 913     def parseBlock(self, token):
 914         while token != None:
 915             if token[0] == "sep" and token[1] == "{":
 916                 token = self.token()
 917                 token = self.parseBlock(token)
 918             elif token[0] == "sep" and token[1] == "}":
 919                 self.comment = None
 920                 token = self.token()
 921                 return token
 922             else:
 923                 if self.collect_ref == 1:
 924                     oldtok = token
 925                     token = self.token()
 926                     if oldtok[0] == "name" and oldtok[1][0:3] == "xml":
 927                         if token[0] == "sep" and token[1] == "(":
 928                             self.index_add_ref(oldtok[1], self.filename,
 929                                                 0, "function")
 930                             token = self.token()
 931                         elif token[0] == "name":
 932                             token = self.token()
 933                             if token[0] == "sep" and (token[1] == ";" or
 934                                token[1] == "," or token[1] == "="):
 935                                 self.index_add_ref(oldtok[1], self.filename,
 936                                                     0, "type")
 937                     elif oldtok[0] == "name" and oldtok[1][0:4] == "XML_":
 938                         self.index_add_ref(oldtok[1], self.filename,
 939                                             0, "typedef")
 940                     elif oldtok[0] == "name" and oldtok[1][0:7] == "LIBXML_":
 941                         self.index_add_ref(oldtok[1], self.filename,
 942                                             0, "typedef")
 943
 944                 else:
 945                     token = self.token()
 946         return token
 947
 948      #
 949      # Parse a C struct definition till the balancing }
 950      #
 951     def parseStruct(self, token):
 952         fields = []
 953          #self.debug("start parseStruct", token)
 954         while token != None:
 955             if token[0] == "sep" and token[1] == "{":
 956                 token = self.token()
 957                 token = self.parseTypeBlock(token)
 958             elif token[0] == "sep" and token[1] == "}":
 959                 self.struct_fields = fields
 960                  #self.debug("end parseStruct", token)
 961                  #print fields
 962                 token = self.token()
 963                 return token
 964             else:
 965                 base_type = self.type
 966                  #self.debug("before parseType", token)
 967                 token = self.parseType(token)
 968                  #self.debug("after parseType", token)
 969                 if token != None and token[0] == "name":
 970                     fname = token[1]
 971                     token = self.token()
 972                     if token[0] == "sep" and token[1] == ";":
 973                         self.comment = None
 974                         token = self.token()
 975                         fields.append((self.type, fname, self.comment))
 976                         self.comment = None
 977                     else:
 978                         self.error("parseStruct: expecting ;", token)
 979                 elif token != None and token[0] == "sep" and token[1] == "{":
 980                     token = self.token()
 981                     token = self.parseTypeBlock(token)
 982                     if token != None and token[0] == "name":
 983                         token = self.token()
 984                     if token != None and token[0] == "sep" and token[1] == ";":
 985                         token = self.token()
 986                     else:
 987                         self.error("parseStruct: expecting ;", token)
 988                 else:
 989                     self.error("parseStruct: name", token)
 990                     token = self.token()
 991                 self.type = base_type;
 992         self.struct_fields = fields
 993          #self.debug("end parseStruct", token)
 994          #print fields
 995         return token
 996
 997      #
 998      # Parse a C enum block, parse till the balancing }
 999      #
1000     def parseEnumBlock(self, token):
1001         self.enums = []
1002         name = None
1003         self.comment = None
1004         comment = ""
1005         value = "0"
1006         while token != None:
1007             if token[0] == "sep" and token[1] == "{":
1008                 token = self.token()
1009                 token = self.parseTypeBlock(token)
1010             elif token[0] == "sep" and token[1] == "}":
1011                 if name != None:
1012                     if self.comment != None:
1013                         comment = self.comment
1014                         self.comment = None
1015                     self.enums.append((name, value, comment))
1016                 token = self.token()
1017                 return token
1018             elif token[0] == "name":
1019                     if name != None:
1020                         if self.comment != None:
1021                             comment = string.strip(self.comment)
1022                             self.comment = None
1023                         self.enums.append((name, value, comment))
1024                     name = token[1]
1025                     comment = ""
1026                     token = self.token()
1027                     if token[0] == "op" and token[1][0] == "=":
1028                         value = ""
1029                         if len(token[1]) > 1:
1030                             value = token[1][1:]
1031                         token = self.token()
1032                         while token[0] != "sep" or (token[1] != ',' and
1033                               token[1] != '}'):
1034                             value = value + token[1]
1035                             token = self.token()
1036                     else:
1037                         try:
1038                             value = "%d" % (int(value) + 1)
1039                         except:
1040                             self.warning("Failed to compute value of enum %s" % (name))
1041                             value=""
1042                     if token[0] == "sep" and token[1] == ",":
1043                         token = self.token()
1044             else:
1045                 token = self.token()
1046         return token
1047
1048      #
1049      # Parse a C definition block, used for structs it parse till
1050      # the balancing }
1051      #
1052     def parseTypeBlock(self, token):
1053         while token != None:
1054             if token[0] == "sep" and token[1] == "{":
1055                 token = self.token()
1056                 token = self.parseTypeBlock(token)
1057             elif token[0] == "sep" and token[1] == "}":
1058                 token = self.token()
1059                 return token
1060             else:
1061                 token = self.token()
1062         return token
1063
1064      #
1065      # Parse a type: the fact that the type name can either occur after
1066      #    the definition or within the definition makes it a little harder
1067      #    if inside, the name token is pushed back before returning
1068      #
1069     def parseType(self, token):
1070         self.type = ""
1071         self.struct_fields = []
1072         self.signature = None
1073         if token == None:
1074             return token
1075
1076         while token[0] == "name" and (
1077               token[1] == "const" or token[1] == "unsigned" or
1078               token[1] == "signed"):
1079             if self.type == "":
1080                 self.type = token[1]
1081             else:
1082                 self.type = self.type + " " + token[1]
1083             token = self.token()
1084
1085         if token[0] == "name" and (token[1] == "long" or token[1] == "short"):
1086             if self.type == "":
1087                 self.type = token[1]
1088             else:
1089                 self.type = self.type + " " + token[1]
1090             if token[0] == "name" and token[1] == "int":
1091                 if self.type == "":
1092                     self.type = tmp[1]
1093                 else:
1094                     self.type = self.type + " " + tmp[1]
1095
1096         elif token[0] == "name" and token[1] == "struct":
1097             if self.type == "":
1098                 self.type = token[1]
1099             else:
1100                 self.type = self.type + " " + token[1]
1101             token = self.token()
1102             nametok = None
1103             if token[0] == "name":
1104                 nametok = token
1105                 token = self.token()
1106             if token != None and token[0] == "sep" and token[1] == "{":
1107                 token = self.token()
1108                 token = self.parseStruct(token)
1109             elif token != None and token[0] == "op" and token[1] == "*":
1110                 self.type = self.type + " " + nametok[1] + " *"
1111                 token = self.token()
1112                 while token != None and token[0] == "op" and token[1] == "*":
1113                     self.type = self.type + " *"
1114                     token = self.token()
1115                 if token[0] == "name":
1116                     nametok = token
1117                     token = self.token()
1118                 else:
1119                     self.error("struct : expecting name", token)
1120                     return token
1121             elif token != None and token[0] == "name" and nametok != None:
1122                 self.type = self.type + " " + nametok[1]
1123                 return token
1124
1125             if nametok != None:
1126                 self.lexer.push(token)
1127                 token = nametok
1128             return token
1129
1130         elif token[0] == "name" and token[1] == "enum":
1131             if self.type == "":
1132                 self.type = token[1]
1133             else:
1134                 self.type = self.type + " " + token[1]
1135             self.enums = []
1136             token = self.token()
1137             if token != None and token[0] == "sep" and token[1] == "{":
1138                 token = self.token()
1139                 token = self.parseEnumBlock(token)
1140             else:
1141                 self.error("parsing enum: expecting '{'", token)
1142             enum_type = None
1143             if token != None and token[0] != "name":
1144                 self.lexer.push(token)
1145                 token = ("name", "enum")
1146             else:
1147                 enum_type = token[1]
1148             for enum in self.enums:
1149                 self.index_add(enum[0], self.filename,
1150                                not self.is_header, "enum",
1151                                (enum[1], enum[2], enum_type))
1152             return token
1153
1154         elif token[0] == "name":
1155             if self.type == "":
1156                 self.type = token[1]
1157             else:
1158                 self.type = self.type + " " + token[1]
1159         else:
1160             self.error("parsing type %s: expecting a name" % (self.type),
1161                        token)
1162             return token
1163         token = self.token()
1164         while token != None and (token[0] == "op" or
1165               token[0] == "name" and token[1] == "const"):
1166             self.type = self.type + " " + token[1]
1167             token = self.token()
1168
1169          #
1170          # if there is a parenthesis here, this means a function type
1171          #
1172         if token != None and token[0] == "sep" and token[1] == '(':
1173             self.type = self.type + token[1]
1174             token = self.token()
1175             while token != None and token[0] == "op" and token[1] == '*':
1176                 self.type = self.type + token[1]
1177                 token = self.token()
1178             if token == None or token[0] != "name" :
1179                 self.error("parsing function type, name expected", token);
1180                 return token
1181             self.type = self.type + token[1]
1182             nametok = token
1183             token = self.token()
1184             if token != None and token[0] == "sep" and token[1] == ')':
1185                 self.type = self.type + token[1]
1186                 token = self.token()
1187                 if token != None and token[0] == "sep" and token[1] == '(':
1188                     token = self.token()
1189                     type = self.type;
1190                     token = self.parseSignature(token);
1191                     self.type = type;
1192                 else:
1193                     self.error("parsing function type, '(' expected", token);
1194                     return token
1195             else:
1196                 self.error("parsing function type, ')' expected", token);
1197                 return token
1198             self.lexer.push(token)
1199             token = nametok
1200             return token
1201
1202          #
1203          # do some lookahead for arrays
1204          #
1205         if token != None and token[0] == "name":
1206             nametok = token
1207             token = self.token()
1208             if token != None and token[0] == "sep" and token[1] == '[':
1209                 self.type = self.type + nametok[1]
1210                 while token != None and token[0] == "sep" and token[1] == '[':
1211                     self.type = self.type + token[1]
1212                     token = self.token()
1213                     while token != None and token[0] != 'sep' and \
1214                           token[1] != ']' and token[1] != ';':
1215                         self.type = self.type + token[1]
1216                         token = self.token()
1217                 if token != None and token[0] == 'sep' and token[1] == ']':
1218                     self.type = self.type + token[1]
1219                     token = self.token()
1220                 else:
1221                     self.error("parsing array type, ']' expected", token);
1222                     return token
1223             elif token != None and token[0] == "sep" and token[1] == ':':
1224                  # remove :12 in case it's a limited int size
1225                 token = self.token()
1226                 token = self.token()
1227             self.lexer.push(token)
1228             token = nametok
1229
1230         return token
1231
1232      #
1233      # Parse a signature: '(' has been parsed and we scan the type definition
1234      #    up to the ')' included
1235     def parseSignature(self, token):
1236         signature = []
1237         if token != None and token[0] == "sep" and token[1] == ')':
1238             self.signature = []
1239             token = self.token()
1240             return token
1241         while token != None:
1242             token = self.parseType(token)
1243             if token != None and token[0] == "name":
1244                 signature.append((self.type, token[1], None))
1245                 token = self.token()
1246             elif token != None and token[0] == "sep" and token[1] == ',':
1247                 token = self.token()
1248                 continue
1249             elif token != None and token[0] == "sep" and token[1] == ')':
1250                  # only the type was provided
1251                 if self.type == "...":
1252                     signature.append((self.type, "...", None))
1253                 else:
1254                     signature.append((self.type, None, None))
1255             if token != None and token[0] == "sep":
1256                 if token[1] == ',':
1257                     token = self.token()
1258                     continue
1259                 elif token[1] == ')':
1260                     token = self.token()
1261                     break
1262         self.signature = signature
1263         return token
1264
1265      #
1266      # Parse a global definition, be it a type, variable or function
1267      # the extern "C" blocks are a bit nasty and require it to recurse.
1268      #
1269     def parseGlobal(self, token):
1270         static = 0
1271         if token[1] == 'extern':
1272             token = self.token()
1273             if token == None:
1274                 return token
1275             if token[0] == 'string':
1276                 if token[1] == 'C':
1277                     token = self.token()
1278                     if token == None:
1279                         return token
1280                     if token[0] == 'sep' and token[1] == "{":
1281                         token = self.token()
1282 #                        print 'Entering extern "C line ', self.lineno()
1283                         while token != None and (token[0] != 'sep' or
1284                               token[1] != "}"):
1285                             if token[0] == 'name':
1286                                 token = self.parseGlobal(token)
1287                             else:
1288                                 self.error(
1289                                  "token %s %s unexpected at the top level" % (
1290                                         token[0], token[1]))
1291                                 token = self.parseGlobal(token)
1292 #                        print 'Exiting extern "C" line', self.lineno()
1293                         token = self.token()
1294                         return token
1295                 else:
1296                     return token
1297         elif token[1] == 'static':
1298             static = 1
1299             token = self.token()
1300             if token == None or  token[0] != 'name':
1301                 return token
1302
1303         if token[1] == 'typedef':
1304             token = self.token()
1305             return self.parseTypedef(token)
1306         else:
1307             token = self.parseType(token)
1308             type_orig = self.type
1309         if token == None or token[0] != "name":
1310             return token
1311         type = type_orig
1312         self.name = token[1]
1313         token = self.token()
1314         while token != None and (token[0] == "sep" or token[0] == "op"):
1315             if token[0] == "sep":
1316                 if token[1] == "[":
1317                     type = type + token[1]
1318                     token = self.token()
1319                     while token != None and (token[0] != "sep" or \
1320                           token[1] != ";"):
1321                         type = type + token[1]
1322                         token = self.token()
1323
1324             if token != None and token[0] == "op" and token[1] == "=":
1325                  #
1326                  # Skip the initialization of the variable
1327                  #
1328                 token = self.token()
1329                 if token[0] == 'sep' and token[1] == '{':
1330                     token = self.token()
1331                     token = self.parseBlock(token)
1332                 else:
1333                     self.comment = None
1334                     while token != None and (token[0] != "sep" or \
1335                           (token[1] != ';' and token[1] != ',')):
1336                             token = self.token()
1337                 self.comment = None
1338                 if token == None or token[0] != "sep" or (token[1] != ';' and
1339                    token[1] != ','):
1340                     self.error("missing ';' or ',' after value")
1341
1342             if token != None and token[0] == "sep":
1343                 if token[1] == ";":
1344                     self.comment = None
1345                     token = self.token()
1346                     if type == "struct":
1347                         self.index_add(self.name, self.filename,
1348                              not self.is_header, "struct", self.struct_fields)
1349                     else:
1350                         self.index_add(self.name, self.filename,
1351                              not self.is_header, "variable", type)
1352                     break
1353                 elif token[1] == "(":
1354                     token = self.token()
1355                     token = self.parseSignature(token)
1356                     if token == None:
1357                         return None
1358                     if token[0] == "sep" and token[1] == ";":
1359                         d = self.mergeFunctionComment(self.name,
1360                                 ((type, None), self.signature), 1)
1361                         self.index_add(self.name, self.filename, static,
1362                                         "function", d)
1363                         token = self.token()
1364                     elif token[0] == "sep" and token[1] == "{":
1365                         d = self.mergeFunctionComment(self.name,
1366                                 ((type, None), self.signature), static)
1367                         self.index_add(self.name, self.filename, static,
1368                                         "function", d)
1369                         token = self.token()
1370                         token = self.parseBlock(token);
1371                 elif token[1] == ',':
1372                     self.comment = None
1373                     self.index_add(self.name, self.filename, static,
1374                                     "variable", type)
1375                     type = type_orig
1376                     token = self.token()
1377                     while token != None and token[0] == "sep":
1378                         type = type + token[1]
1379                         token = self.token()
1380                     if token != None and token[0] == "name":
1381                         self.name = token[1]
1382                         token = self.token()
1383                 else:
1384                     break
1385
1386         return token
1387
1388     def parse(self):
1389         self.warning("Parsing %s" % (self.filename))
1390         token = self.token()
1391         while token != None:
1392             if token[0] == 'name':
1393                 token = self.parseGlobal(token)
1394             else:
1395                 self.error("token %s %s unexpected at the top level" % (
1396                        token[0], token[1]))
1397                 token = self.parseGlobal(token)
1398                 return
1399         self.parseTopComment(self.top_comment)
1400         return self.index
1401
1402
1403 class docBuilder:
1404     """A documentation builder"""
1405     def __init__(self, name, directories=['.'], excludes=[]):
1406         self.name = name
1407         self.directories = directories
1408         self.excludes = excludes + ignored_files.keys()
1409         self.modules = {}
1410         self.headers = {}
1411         self.idx = index()
1412         self.xref = {}
1413         self.index = {}
1414         if name == 'libxml2':
1415             self.basename = 'libxml'
1416         else:
1417             self.basename = name
1418
1419     def indexString(self, id, str):
1420         if str == None:
1421             return
1422         str = string.replace(str, "'", ' ')
1423         str = string.replace(str, '"', ' ')
1424         str = string.replace(str, "/", ' ')
1425         str = string.replace(str, '*', ' ')
1426         str = string.replace(str, "[", ' ')
1427         str = string.replace(str, "]", ' ')
1428         str = string.replace(str, "(", ' ')
1429         str = string.replace(str, ")", ' ')
1430         str = string.replace(str, "<", ' ')
1431         str = string.replace(str, '>', ' ')
1432         str = string.replace(str, "&", ' ')
1433         str = string.replace(str, '#', ' ')
1434         str = string.replace(str, ",", ' ')
1435         str = string.replace(str, '.', ' ')
1436         str = string.replace(str, ';', ' ')
1437         tokens = string.split(str)
1438         for token in tokens:
1439             try:
1440                 c = token[0]
1441                 if string.find(string.letters, c) < 0:
1442                     pass
1443                 elif len(token) < 3:
1444                     pass
1445                 else:
1446                     lower = string.lower(token)
1447                     # TODO: generalize this a bit
1448                     if lower == 'and' or lower == 'the':
1449                         pass
1450                     elif self.xref.has_key(token):
1451                         self.xref[token].append(id)
1452                     else:
1453                         self.xref[token] = [id]
1454             except:
1455                 pass
1456
1457     def analyze(self):
1458         print "Project %s : %d headers, %d modules" % (self.name, len(self.headers.keys()), len(self.modules.keys()))
1459         self.idx.analyze()
1460
1461     def scanHeaders(self):
1462         for header in self.headers.keys():
1463             parser = CParser(header)
1464             idx = parser.parse()
1465             self.headers[header] = idx;
1466             self.idx.merge(idx)
1467
1468     def scanModules(self):
1469         for module in self.modules.keys():
1470             parser = CParser(module)
1471             idx = parser.parse()
1472             # idx.analyze()
1473             self.modules[module] = idx
1474             self.idx.merge_public(idx)
1475
1476     def scan(self):
1477         for directory in self.directories:
1478             files = glob.glob(directory + "/*.c")
1479             for file in files:
1480                 skip = 0
1481                 for excl in self.excludes:
1482                     if string.find(file, excl) != -1:
1483                         skip = 1;
1484                         break
1485                 if skip == 0:
1486                     self.modules[file] = None;
1487             files = glob.glob(directory + "/*.h")
1488             for file in files:
1489                 skip = 0
1490                 for excl in self.excludes:
1491                     if string.find(file, excl) != -1:
1492                         skip = 1;
1493                         break
1494                 if skip == 0:
1495                     self.headers[file] = None;
1496         self.scanHeaders()
1497         self.scanModules()
1498
1499     def modulename_file(self, file):
1500         module = os.path.basename(file)
1501         if module[-2:] == '.h':
1502             module = module[:-2]
1503         return module
1504
1505     def serialize_enum(self, output, name):
1506         id = self.idx.enums[name]
1507         output.write("    <enum name='%s' file='%s'" % (name,
1508                      self.modulename_file(id.module)))
1509         if id.info != None:
1510             info = id.info
1511             if info[0] != None and info[0] != '':
1512                 try:
1513                     val = eval(info[0])
1514                 except:
1515                     val = info[0]
1516                 output.write(" value='%s'" % (val));
1517             if info[2] != None and info[2] != '':
1518                 output.write(" type='%s'" % info[2]);
1519             if info[1] != None and info[1] != '':
1520                 output.write(" info='%s'" % escape(info[1]));
1521         output.write("/>\n")
1522
1523     def serialize_macro(self, output, name):
1524         id = self.idx.macros[name]
1525         output.write("    <macro name='%s' file='%s'>\n" % (name,
1526                      self.modulename_file(id.module)))
1527         if id.info != None:
1528             try:
1529                 (args, desc) = id.info
1530                 if desc != None and desc != "":
1531                     output.write("      <info>%s</info>\n" % (escape(desc)))
1532                     self.indexString(name, desc)
1533                 for arg in args:
1534                     (name, desc) = arg
1535                     if desc != None and desc != "":
1536                         output.write("      <arg name='%s' info='%s'/>\n" % (
1537                                      name, escape(desc)))
1538                         self.indexString(name, desc)
1539                     else:
1540                         output.write("      <arg name='%s'/>\n" % (name))
1541             except:
1542                 pass
1543         output.write("    </macro>\n")
1544
1545     def serialize_typedef(self, output, name):
1546         id = self.idx.typedefs[name]
1547         if id.info[0:7] == 'struct ':
1548             output.write("    <struct name='%s' file='%s' type='%s'" % (
1549                      name, self.modulename_file(id.module), id.info))
1550             name = id.info[7:]
1551             if self.idx.structs.has_key(name) and ( \
1552                type(self.idx.structs[name].info) == type(()) or
1553                 type(self.idx.structs[name].info) == type([])):
1554                 output.write(">\n");
1555                 try:
1556                     for field in self.idx.structs[name].info:
1557                         desc = field[2]
1558                         self.indexString(name, desc)
1559                         if desc == None:
1560                             desc = ''
1561                         else:
1562                             desc = escape(desc)
1563                         output.write("      <field name='%s' type='%s' info='%s'/>\n" % (field[1] , field[0], desc))
1564                 except:
1565                     print "Failed to serialize struct %s" % (name)
1566                 output.write("    </struct>\n")
1567             else:
1568                 output.write("/>\n");
1569         else :
1570             output.write("    <typedef name='%s' file='%s' type='%s'/>\n" % (
1571                      name, self.modulename_file(id.module), id.info))
1572
1573     def serialize_variable(self, output, name):
1574         id = self.idx.variables[name]
1575         if id.info != None:
1576             output.write("    <variable name='%s' file='%s' type='%s'/>\n" % (
1577                     name, self.modulename_file(id.module), id.info))
1578         else:
1579             output.write("    <variable name='%s' file='%s'/>\n" % (
1580                     name, self.modulename_file(id.module)))
1581
1582     def serialize_function(self, output, name):
1583         id = self.idx.functions[name]
1584         output.write("    <%s name='%s' file='%s'>\n" % (id.type, name,
1585                      self.modulename_file(id.module)))
1586         try:
1587             (ret, params, desc) = id.info
1588             output.write("      <info>%s</info>\n" % (escape(desc)))
1589             self.indexString(name, desc)
1590             if ret[0] != None:
1591                 if ret[0] == "void":
1592                     output.write("      <return type='void'/>\n")
1593                 else:
1594                     output.write("      <return type='%s' info='%s'/>\n" % (
1595                              ret[0], escape(ret[1])))
1596                     self.indexString(name, ret[1])
1597             for param in params:
1598                 if param[0] == 'void':
1599                     continue
1600                 if param[2] == None:
1601                     output.write("      <arg name='%s' type='%s' info=''/>\n" % (param[1], param[0]))
1602                 else:
1603                     output.write("      <arg name='%s' type='%s' info='%s'/>\n" % (param[1], param[0], escape(param[2])))
1604                     self.indexString(name, param[2])
1605         except:
1606             print "Failed to save function %s info: " % name, `id.info`
1607         output.write("    </%s>\n" % (id.type))
1608
1609     def serialize_exports(self, output, file):
1610         module = self.modulename_file(file)
1611         output.write("    <file name='%s'>\n" % (module))
1612         dict = self.headers[file]
1613         if dict.info != None:
1614             for data in ('Summary', 'Description', 'Author'):
1615                 try:
1616                     output.write("     <%s>%s</%s>\n" % (
1617                                  string.lower(data),
1618                                  escape(dict.info[data]),
1619                                  string.lower(data)))
1620                 except:
1621                     print "Header %s lacks a %s description" % (module, data)
1622             if dict.info.has_key('Description'):
1623                 desc = dict.info['Description']
1624                 if string.find(desc, "DEPRECATED") != -1:
1625                     output.write("     <deprecated/>\n")
1626
1627         ids = dict.macros.keys()
1628         ids.sort()
1629         for id in uniq(ids):
1630             # Macros are sometime used to masquerade other types.
1631             if dict.functions.has_key(id):
1632                 continue
1633             if dict.variables.has_key(id):
1634                 continue
1635             if dict.typedefs.has_key(id):
1636                 continue
1637             if dict.structs.has_key(id):
1638                 continue
1639             if dict.enums.has_key(id):
1640                 continue
1641             output.write("     <exports symbol='%s' type='macro'/>\n" % (id))
1642         ids = dict.enums.keys()
1643         ids.sort()
1644         for id in uniq(ids):
1645             output.write("     <exports symbol='%s' type='enum'/>\n" % (id))
1646         ids = dict.typedefs.keys()
1647         ids.sort()
1648         for id in uniq(ids):
1649             output.write("     <exports symbol='%s' type='typedef'/>\n" % (id))
1650         ids = dict.structs.keys()
1651         ids.sort()
1652         for id in uniq(ids):
1653             output.write("     <exports symbol='%s' type='struct'/>\n" % (id))
1654         ids = dict.variables.keys()
1655         ids.sort()
1656         for id in uniq(ids):
1657             output.write("     <exports symbol='%s' type='variable'/>\n" % (id))
1658         ids = dict.functions.keys()
1659         ids.sort()
1660         for id in uniq(ids):
1661             output.write("     <exports symbol='%s' type='function'/>\n" % (id))
1662         output.write("    </file>\n")
1663
1664     def serialize_xrefs_files(self, output):
1665         headers = self.headers.keys()
1666         headers.sort()
1667         for file in headers:
1668             module = self.modulename_file(file)
1669             output.write("    <file name='%s'>\n" % (module))
1670             dict = self.headers[file]
1671             ids = uniq(dict.functions.keys() + dict.variables.keys() + \
1672                   dict.macros.keys() + dict.typedefs.keys() + \
1673                   dict.structs.keys() + dict.enums.keys())
1674             ids.sort()
1675             for id in ids:
1676                 output.write("      <ref name='%s'/>\n" % (id))
1677             output.write("    </file>\n")
1678         pass
1679
1680     def serialize_xrefs_functions(self, output):
1681         funcs = {}
1682         for name in self.idx.functions.keys():
1683             id = self.idx.functions[name]
1684             try:
1685                 (ret, params, desc) = id.info
1686                 for param in params:
1687                     if param[0] == 'void':
1688                         continue
1689                     if funcs.has_key(param[0]):
1690                         funcs[param[0]].append(name)
1691                     else:
1692                         funcs[param[0]] = [name]
1693             except:
1694                 pass
1695         typ = funcs.keys()
1696         typ.sort()
1697         for type in typ:
1698             if type == '' or type == 'void' or type == "int" or \
1699                type == "char *" or type == "const char *" :
1700                 continue
1701             output.write("    <type name='%s'>\n" % (type))
1702             ids = funcs[type]
1703             ids.sort()
1704             pid = ''    # not sure why we have dups, but get rid of them!
1705             for id in ids:
1706                 if id != pid:
1707                     output.write("      <ref name='%s'/>\n" % (id))
1708                     pid = id
1709             output.write("    </type>\n")
1710
1711     def serialize_xrefs_constructors(self, output):
1712         funcs = {}
1713         for name in self.idx.functions.keys():
1714             id = self.idx.functions[name]
1715             try:
1716                 (ret, params, desc) = id.info
1717                 if ret[0] == "void":
1718                     continue
1719                 if funcs.has_key(ret[0]):
1720                     funcs[ret[0]].append(name)
1721                 else:
1722                     funcs[ret[0]] = [name]
1723             except:
1724                 pass
1725         typ = funcs.keys()
1726         typ.sort()
1727         for type in typ:
1728             if type == '' or type == 'void' or type == "int" or \
1729                type == "char *" or type == "const char *" :
1730                 continue
1731             output.write("    <type name='%s'>\n" % (type))
1732             ids = funcs[type]
1733             ids.sort()
1734             for id in ids:
1735                 output.write("      <ref name='%s'/>\n" % (id))
1736             output.write("    </type>\n")
1737
1738     def serialize_xrefs_alpha(self, output):
1739         letter = None
1740         ids = self.idx.identifiers.keys()
1741         ids.sort()
1742         for id in ids:
1743             if id[0] != letter:
1744                 if letter != None:
1745                     output.write("    </letter>\n")
1746                 letter = id[0]
1747                 output.write("    <letter name='%s'>\n" % (letter))
1748             output.write("      <ref name='%s'/>\n" % (id))
1749         if letter != None:
1750             output.write("    </letter>\n")
1751
1752     def serialize_xrefs_references(self, output):
1753         typ = self.idx.identifiers.keys()
1754         typ.sort()
1755         for id in typ:
1756             idf = self.idx.identifiers[id]
1757             module = idf.module
1758             output.write("    <reference name='%s' href='%s'/>\n" % (id,
1759                          'html/' + self.basename + '-' +
1760                          self.modulename_file(module) + '.html#' +
1761                          id))
1762
1763     def serialize_xrefs_index(self, output):
1764         index = self.xref
1765         typ = index.keys()
1766         typ.sort()
1767         letter = None
1768         count = 0
1769         chunk = 0
1770         chunks = []
1771         for id in typ:
1772             if len(index[id]) > 30:
1773                 continue
1774             if id[0] != letter:
1775                 if letter == None or count > 200:
1776                     if letter != None:
1777                         output.write("      </letter>\n")
1778                         output.write("    </chunk>\n")
1779                         count = 0
1780                         chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1781                     output.write("    <chunk name='chunk%s'>\n" % (chunk))
1782                     first_letter = id[0]
1783                     chunk = chunk + 1
1784                 elif letter != None:
1785                     output.write("      </letter>\n")
1786                 letter = id[0]
1787                 output.write("      <letter name='%s'>\n" % (letter))
1788             output.write("        <word name='%s'>\n" % (id))
1789             tokens = index[id];
1790             tokens.sort()
1791             tok = None
1792             for token in tokens:
1793                 if tok == token:
1794                     continue
1795                 tok = token
1796                 output.write("          <ref name='%s'/>\n" % (token))
1797                 count = count + 1
1798             output.write("        </word>\n")
1799         if letter != None:
1800             output.write("      </letter>\n")
1801             output.write("    </chunk>\n")
1802             if count != 0:
1803                 chunks.append(["chunk%s" % (chunk -1), first_letter, letter])
1804             output.write("    <chunks>\n")
1805             for ch in chunks:
1806                 output.write("      <chunk name='%s' start='%s' end='%s'/>\n" % (
1807                              ch[0], ch[1], ch[2]))
1808             output.write("    </chunks>\n")
1809
1810     def serialize_xrefs(self, output):
1811         output.write("  <references>\n")
1812         self.serialize_xrefs_references(output)
1813         output.write("  </references>\n")
1814         output.write("  <alpha>\n")
1815         self.serialize_xrefs_alpha(output)
1816         output.write("  </alpha>\n")
1817         output.write("  <constructors>\n")
1818         self.serialize_xrefs_constructors(output)
1819         output.write("  </constructors>\n")
1820         output.write("  <functions>\n")
1821         self.serialize_xrefs_functions(output)
1822         output.write("  </functions>\n")
1823         output.write("  <files>\n")
1824         self.serialize_xrefs_files(output)
1825         output.write("  </files>\n")
1826         output.write("  <index>\n")
1827         self.serialize_xrefs_index(output)
1828         output.write("  </index>\n")
1829
1830     def serialize(self, outdir):
1831         filename = outdir + "%s-api.xml" % self.name
1832         print "Saving XML description %s" % (filename)
1833         output = open(filename, "w")
1834         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1835         output.write("<api name='%s'>\n" % self.name)
1836         output.write("  <files>\n")
1837         headers = self.headers.keys()
1838         headers.sort()
1839         for file in headers:
1840             self.serialize_exports(output, file)
1841         output.write("  </files>\n")
1842         output.write("  <symbols>\n")
1843         macros = self.idx.macros.keys()
1844         macros.sort()
1845         for macro in macros:
1846             self.serialize_macro(output, macro)
1847         enums = self.idx.enums.keys()
1848         enums.sort()
1849         for enum in enums:
1850             self.serialize_enum(output, enum)
1851         typedefs = self.idx.typedefs.keys()
1852         typedefs.sort()
1853         for typedef in typedefs:
1854             self.serialize_typedef(output, typedef)
1855         variables = self.idx.variables.keys()
1856         variables.sort()
1857         for variable in variables:
1858             self.serialize_variable(output, variable)
1859         functions = self.idx.functions.keys()
1860         functions.sort()
1861         for function in functions:
1862             self.serialize_function(output, function)
1863         output.write("  </symbols>\n")
1864         output.write("</api>\n")
1865         output.close()
1866
1867         filename = outdir + "%s-refs.xml" % self.name
1868         print "Saving XML Cross References %s" % (filename)
1869         output = open(filename, "w")
1870         output.write('<?xml version="1.0" encoding="ISO-8859-1"?>\n')
1871         output.write("<apirefs name='%s'>\n" % self.name)
1872         self.serialize_xrefs(output)
1873         output.write("</apirefs>\n")
1874         output.close()
1875
1876
1877 def rebuild():
1878     builder = None
1879     if glob.glob("parser.c") != [] :
1880         print "Rebuilding API description for libxml2"
1881         builder = docBuilder("libxml2", [".", "."],
1882                              ["xmlwin32version.h", "tst.c"])
1883     elif glob.glob("../parser.c") != [] :
1884         print "Rebuilding API description for libxml2"
1885         builder = docBuilder("libxml2", ["..", "../include/libxml"],
1886                              ["xmlwin32version.h", "tst.c"])
1887     elif glob.glob("../libxslt/transform.c") != [] :
1888         print "Rebuilding API description for libxslt"
1889         builder = docBuilder("libxslt", ["../libxslt"],
1890                              ["win32config.h", "libxslt.h", "tst.c"])
1891     else:
1892         print "rebuild() failed, unable to guess the module"
1893         return None
1894     builder.scan()
1895     builder.analyze()
1896     builder.serialize("./")
1897     if glob.glob("../libexslt/exslt.c") != [] :
1898         extra = docBuilder("libexslt", ["../libexslt"], ["libexslt.h"])
1899         extra.scan()
1900         extra.analyze()
1901         extra.serialize("EXSLT/")
1902     return builder
1903
1904 #
1905 # for debugging the parser
1906 #
1907 def parse(filename):
1908     parser = CParser(filename)
1909     idx = parser.parse()
1910     return idx
1911
1912 if __name__ == "__main__":
1913     rebuild()