Added initial version of rst_parser script
authorAndrey Kamaev <no@email>
Tue, 5 Jul 2011 17:29:54 +0000 (17:29 +0000)
committerAndrey Kamaev <no@email>
Tue, 5 Jul 2011 17:29:54 +0000 (17:29 +0000)
modules/java/rst_parser.py [new file with mode: 0644]

diff --git a/modules/java/rst_parser.py b/modules/java/rst_parser.py
new file mode 100644 (file)
index 0000000..aabede4
--- /dev/null
@@ -0,0 +1,372 @@
+import os, sys, re, string, glob
+from string import Template
+
+class DeclarationParser(object):
+    def __init__(self, line=None):
+        if line is None:
+            self.fdecl = ""
+            self.lang = ""
+            self.balance = 0
+            return
+        self.lang = self.getLang(line)
+        assert self.lang is not None
+        self.fdecl = line[line.find("::")+2:].strip()
+        self.balance = self.fdecl.count("(") - self.fdecl.count(")")
+        assert self.balance >= 0
+
+    def append(self, line):
+        self.fdecl += line
+        self.balance = self.fdecl.count("(") - self.fdecl.count(")")
+
+    def isready(self):
+        return self.balance == 0
+
+    def getLang(self, line):
+        if line.startswith(".. ocv:function::"):
+            return "C++"
+        if line.startswith(".. ocv:cfunction::"):
+            return "C"
+        if line.startswith(".. ocv:pyfunction::"):
+            return "Python2"
+        if line.startswith(".. ocv:pyoldfunction::"):
+            return "Python1"
+        if line.startswith(".. ocv:jfunction::"):
+            return "Java"
+        return None
+    
+    def hasDeclaration(self, line):
+        return self.getLang(line) is not None
+
+class ParamParser(object):
+    def __init__(self, line=None):
+        if line is None:
+            self.prefix = ""
+            self.name = ""
+            self.comment = ""
+            self.active = False
+            return
+        offset = line.find(":param")
+        assert offset > 0
+        self.prefix = line[:offset]
+        assert self.prefix==" "*len(self.prefix)
+        line = line[offset + 6:].lstrip()
+        name_end = line.find(":")
+        assert name_end > 0
+        self.name = line[:name_end]
+        self.comment = line[name_end+1:].lstrip()
+        self.active = True
+
+    def append(self, line):
+        assert self.active
+        if (self.hasDeclaration(line)):
+            self.active = False
+        elif line.startswith(self.prefix) or not line:
+            self.comment += "\n" + line.lstrip()
+        else:
+            self.active = False
+            
+    def hasDeclaration(self, line):
+        return line.lstrip().startswith(":param")
+
+class RstParser(object):
+    def __init__(self, cpp_parser):
+        self.cpp_parser = cpp_parser
+        self.definitions = {}
+
+    def parse(self, module_path):
+        doclist = glob.glob(os.path.join(module_path,"doc/*.rst"))
+        for doc in doclist:
+            self.parse_rst_file(doc)
+
+    def parse_section(self, section_name, file_name, lineno, lines):
+        func = {}
+        func["name"] = section_name
+        func["file"] = file_name
+        func["line"] = lineno
+
+        # parse section name
+        class_separator_idx = func["name"].find("::")
+        if class_separator_idx > 0:
+            func["class"] = func["name"][:class_separator_idx]
+            func["method"] = func["name"][class_separator_idx+2:]
+        else:
+            func["method"] = func["name"]
+
+        skip_code_lines = False
+        expected_brief = True
+        fdecl = DeclarationParser()
+        pdecl = ParamParser()
+
+        for l in lines:
+            # read tail of function/method declaration if needed
+            if not fdecl.isready():
+                fdecl.append(ll)
+                if fdecl.isready():
+                    self.add_new_fdecl(func, fdecl)
+                continue
+
+            # skip lines if line-skipping mode is activated
+            if skip_code_lines:
+                if not l or l.startswith(" ") or l.startswith("\t"):
+                    continue
+                else:
+                    skip_code_lines = False
+
+            ll = l.strip()
+            if ll == "..": #strange construction...
+                continue
+
+            # turn on line-skipping mode for code fragments
+            if ll.endswith("::"):
+                skip_code_lines = True
+                ll = ll[:len(ll)-3]
+
+            if ll.startswith(".. code-block::"):
+                skip_code_lines = True
+                continue
+
+            # continue param parsing
+            if pdecl.active:
+                pdecl.append(l)
+                if pdecl.active:
+                    continue
+                else:
+                    self.add_new_pdecl(func, pdecl)
+                    #do not continue - current line can contain next parameter definition
+
+            # todo: parse structure members; skip them for now
+            if ll.startswith(".. ocv:member::"):
+                skip_code_lines = True
+                continue
+
+            # parse class & struct definitions
+            if ll.startswith(".. ocv:class::"):
+                func["class"] = ll[ll.find("::")+2:].strip()
+                if "method" in func:
+                    del func["method"]
+                func["isclass"] = True
+                expected_brief = True
+                continue
+
+            if ll.startswith(".. ocv:struct::"):
+                func["class"] = ll[ll.find("::")+2:].strip()
+                if "method" in func:
+                    del func["method"]
+                func["isstruct"] = True
+                expected_brief = True
+                continue
+
+            # parse function/method definitions
+            if fdecl.hasDeclaration(ll):
+                fdecl = DeclarationParser(ll)
+                if fdecl.isready():
+                    self.add_new_fdecl(func, fdecl)
+                expected_brief = False
+                continue
+
+            # parse parameters
+            if pdecl.hasDeclaration(l):
+                pdecl = ParamParser(l)
+                expected_brief = False
+                continue
+
+            # record brief description
+            if expected_brief and len(ll) == 0:
+                if "brief" in func:
+                    expected_brief = False
+                continue
+            
+            if expected_brief:
+                func["brief"] = func.get("brief", "") + "\n" + ll
+                if skip_code_lines:
+                    expected_brief = False #force end brief if code block begins
+                continue
+
+            # record other lines as long description
+            func["long"] = func.get("long", "") + "\n" + ll
+        # endfor l in lines
+
+        # save last parameter if needed
+        if pdecl.active:
+            self.add_new_pdecl(func, pdecl)
+
+        # add definition to list
+        func = self.normalize(func)
+        if self.validate(func):
+            self.definitions[func["name"]] = func
+            #self.print_info(func)
+        elif func:
+            self.print_info(func, True)
+
+    def parse_rst_file(self, doc):
+        doc = os.path.abspath(doc)
+        lineno = 0
+      
+        lines = []
+        flineno = 0
+        fname = ""
+        prev_line = None
+
+        df = open(doc, "rt")
+        for l in df.readlines():
+            lineno += 1
+            if prev_line == None:
+                prev_line = l.rstrip()
+                continue
+            ll = l.rstrip()
+            if len(prev_line) > 0 and len(ll) >= len(prev_line) and ll == "-" * len(ll):
+                #new function candidate
+                if len(lines) > 1:
+                    self.parse_section(fname, doc, flineno, lines[:len(lines)-1])
+                lines = []
+                flineno = lineno-1
+                fname = prev_line.strip()
+            elif flineno > 0:
+                lines.append(ll)               
+            prev_line = ll
+        df.close()
+
+        #don't forget about the last function section in file!!!
+        if len(lines) > 1:
+            self.parse_section(fname, doc, flineno, lines[:len(lines)])
+
+    def add_new_fdecl(self, func, decl):
+        decls =  func.get("decls",[])
+        if (decl.lang == "C++" or decl.lang == "C"):
+            rst_decl = self.cpp_parser.parse_func_decl_no_wrap(decl.fdecl)
+            decls.append( (decl.lang, decl.fdecl, rst_decl) )
+        else:
+            decls.append( (decl.lang, decl.fdecl) )
+        func["decls"] = decls
+
+    def add_new_pdecl(self, func, decl):
+        params =  func.get("params",{})
+        if decl.name in params:
+            print "Parser error: parameter \"%s\" for %s is defined multiple times. See %s line %s" \
+                 % (decl.name, func["name"], func["file"], func["line"])
+        else:
+            params[decl.name] = decl.comment
+            func["params"] = params
+
+    def print_info(self, func, skipped=False):
+        print ""
+        if skipped:
+            print "SKIPPED DEFINITION:"
+        print "name:      %s" % (func.get("name","~empty~"))
+        print "file:      %s (line %s)" % (func.get("file","~empty~"), func.get("line","~empty~"))
+        print "is class:  %s" % func.get("isclass",False)
+        print "is struct: %s" % func.get("isstruct",False)
+        print "class:     %s" % (func.get("class","~empty~"))
+        print "method:    %s" % (func.get("method","~empty~"))
+        print "brief:     %s" % (func.get("brief","~empty~"))
+        if "decls" in func:
+            print "declarations:"
+            for d in func["decls"]:
+               print "     %7s: %s" % (d[0], re.sub(r"[ \t]+", " ", d[1]))
+        if "params" in func:
+            print "parameters:"
+            for name, comment in func["params"].items():
+                print "%23s:   %s" % (name, comment)
+        if not skipped:
+            print "long:      %s" % (func.get("long","~empty~"))
+
+    def validate(self, func):
+        if func.get("decls",None) is None:
+             if not func.get("isclass",False):
+                 return False
+        if func["name"] in self.definitions:
+             print "Parser error: function/class/struct \"%s\" in %s line %s is already documented in %s line %s" \
+                 % (func["name"], func["file"], func["line"], self.definitions[func["name"]]["file"], self.definitions[func["name"]]["line"])
+             return False
+        #todo: validate parameter names
+        return True
+
+    def normalize(self, func):
+        if not func:
+            return func
+        func["name"] = self.normalizeText(func["name"])
+        if "method" in func:
+            func["method"] = self.normalizeText(func["method"])
+        if "class" in func:
+            func["class"] = self.normalizeText(func["class"])
+        if "brief" in func:
+            func["brief"] = self.normalizeText(func.get("brief",None))
+            if not func["brief"]:
+                del func["brief"]
+        if "long" in func:
+            func["long"] = self.normalizeText(func.get("long",None))
+            if not func["long"]:
+                del func["long"]
+        if "decls" in func:
+            func["decls"].sort()
+        if "params" in func:
+            params = {}
+            for name, comment in func["params"].items():
+                cmt = self.normalizeText(comment)
+                if cmt:
+                    params[name] = cmt
+            func["params"] = params
+        return func
+
+    def normalizeText(self, s):
+        if s is None:
+            return s
+        # normalize line endings
+        s = re.sub(r"\r\n", "\n", s)
+        # remove tailing ::
+        s = re.sub(r"::$", "\n", s)
+        # remove extra line breaks before/after _ or ,
+        s = re.sub(r"\n[ \t]*([_,])\n", r"\1", s)
+        # remove extra line breaks after `
+        #s = re.sub(r"`\n", "` ", s)
+        # remove extra line breaks before *
+        s = re.sub(r"\n\n\*", "\n\*", s)
+        # remove extra line breaks before #.
+        s = re.sub(r"\n\n#\.", "\n#.", s)
+        # remove extra line breaks after #.
+        s = re.sub(r"\n#\.\n", "\n#. ", s)
+        # remove extra line breaks before `
+        s = re.sub(r"\n[ \t]*`", " `", s)
+        # remove trailing whitespaces
+        s = re.sub(r"[ \t]+$", "", s)
+        # remove whitespace before .
+        s = re.sub(r"[ \t]+\.", "\.", s)
+        # remove .. for references
+        s = re.sub(r"\.\. \[", "[", s)
+        # unescape
+        s = re.sub(r"\\(.)", "\\1", s)
+        # compress whitespace
+        s = re.sub(r"[ \t]+", " ", s)
+
+        s = s.replace("**", "")
+        s = s.replace("``", "\"")
+        s = s.replace("`", "\"")
+        s = s.replace("\"\"", "\"")
+        s = s.replace(":ocv:cfunc:","")
+        s = s.replace(":math:", "")
+        s = s.replace(":ocv:class:", "")
+        s = s.replace(":ocv:func:", "")
+        s = s.replace("]_", "]")
+        s = s.strip()
+        return s
+
+if __name__ == "__main__":
+    if len(sys.argv) < 1:
+        print "Usage:\n", os.path.basename(sys.argv[0]), " <module path>"
+        exit(0)
+
+    rst_parser_dir  = os.path.dirname(os.path.abspath(sys.argv[0]))
+    hdr_parser_path = os.path.join(rst_parser_dir, "../python/src2")
+
+    sys.path.append(hdr_parser_path)
+    import hdr_parser
+
+    module = sys.argv[1]
+
+    if not os.path.isdir(os.path.join(rst_parser_dir, "../" + module)):
+        print "Module \"" + module + "\" could not be found."
+        exit(1)
+
+    parser = RstParser(hdr_parser.CppHeaderParser())
+    parser.parse(os.path.join(rst_parser_dir, "../" + module))
+