From: Janos Kovacs Date: Sat, 21 Apr 2012 16:43:24 +0000 (+0300) Subject: murphy: abnf.py script now uses both the flex and bison file to generate ABNF X-Git-Tag: build/2012-07-04.133153~68^2~13 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3a0abe6143bfcb0c14f21751293c0abf506402a2;p=profile%2Fivi%2Fmurphy.git murphy: abnf.py script now uses both the flex and bison file to generate ABNF * flex support added for the automatic document generation, ie. ABNF is generated from the regexp's in the definition section * more correct ABNF description is generated * the output is directly includable into DocBook xml file --- diff --git a/doc/scripts/abnf.py b/doc/scripts/abnf.py index 74ca065..8691ef1 100755 --- a/doc/scripts/abnf.py +++ b/doc/scripts/abnf.py @@ -10,91 +10,430 @@ import os, sys, re -def canonic_optlist(olist): - canonic = "" - sep = "" - if len(olist) > 0: - members = olist.split(' ') - nmember = len(members) - if nmember > 0: - for member in members: - tag = member.strip() - if len(tag) < 1: +def regexp_to_abnf(string): + return regexp_parse(string, 0)[0] + +def regexp_parse(string, index): + stack = [] + precedence = 0 + escape = False + + while index < len(string): + c = string[index] + + insert = False + + if c == "\\": + escape = True + else: + if c.isalnum() or escape: + if c == "\"": + new_string = "DQUOTE" + elif c == " ": + new_string = "SP" + else: + new_string = "\"%s\"" % c + new_precedence = 100 + escape = False + elif c == ".": + new_precedence = 90 + new_string = "VCHAR" + elif c == "{": + new_precedence = 100 + if index+1 >= len(string) or string.find("}", index+1) < 0: + new_string = "\"{\"" + else: + begin = index+1 + end = string.find("}", begin) + new_string = string[begin:end] + index = end + elif c == "(": + new_precedence = 80 + result, index = regexp_parse(string, index+1) + if len(result) < 1: + index += 1 continue - canonic += sep - sep = " " - if tag.startswith("TKN_"): - canonic += tag[4:] + new_string = "(" + result + ")" + elif c == ")": + break + elif c == "[": + new_precedence = 70 + new_string, index = regexp_character_class(string, index) + elif c == "*": + new_precedence = 43 + new_string = "*" + insert = True + elif c == "+": + new_precedence = 42 + new_string = "1*" + insert = True + elif c == "?": + new_precedence = 40 + new_string = "0*1" + insert = True + elif c == "|": + new_precedence = 30 + new_string = "/" + else: + new_precedence = 100 + if c == "\"": + new_string = "DQUOTE" + elif ord(c) < ord(' '): + new_string = "\%x%x" % ord(c) else: - canonic += tag - return canonic + new_string = "\"" + c + "\"" + + if insert: + last = len(stack) - 1 + if last >= 0: + stack.insert(last, (stack[last][0], new_string)) + stack = regexp_merge(new_precedence, stack) + else: + stack = regexp_merge(new_precedence, stack) + stack.append((new_precedence, new_string)) + precedence = new_precedence + index += 1 -def canonic_complist(clist): - stripped = clist.strip() - canonic = "" + if len(stack) < 1: + result = "" + else: + result = regexp_merge(-1, stack)[0][1] + + + if result.startswith("(") and result.endswith(")"): + strip = True + balance = 0 + for c in result[1:-1]: + if c == "(": + balance += 1 + elif c == ")": + balance -= 1 + if balance < 0: + strip = False + break + if strip and balance == 0: + result = result[1:-1] + + return (result, index) + +def regexp_character_class(string, index): + cnt = 0 + result = "" + backslash = False sep = "" - if len(stripped) > 0: - options = stripped.split('|') - noption = len(options) - if noption == 1: - stripped_option = options[0].strip() - canonic += canonic_optlist(stripped_option) - elif noption > 1: - if len(options[0].strip()) < 1: - canonic = "[" - close = "]" + + if string[index+1] == "^": + index += 1 + ranges = [(32, 126)] + escape = False + while index+1 < len(string): + index += 1 + char = string[index] + + if char == "]": + break + + if char == "\\": + escape = True + continue + + c = ord(char) + + if escape: + if char == "n": + c = 10 + if char == "t": + c = 9 + escape = False + + for r in ranges: + if c >= r[0] and c <= r[1]: + ranges.remove(r) + if c == r[0]: + if c != r[1]: + ranges.append((r[0]+1, r[1])) + break + elif c == r[1]: + if c != r[0]: + ranges.append((r[0], r[1]-1)) + break + else: + ranges.append((r[0], c-1)) + ranges.append((c+1, r[1])) + break + + lower = False + upper = False + digit = False + for r in ranges: + if r[0] <= 65 and r[1] >= 90: + upper = r + if r[0] <= 97 and r[1] >= 122: + lower = r + if r[0] <= 48 and r[1] >= 57: + digit = r + if lower and upper: + ranges = regexp_range_extract(ranges, 97,122) + ranges = regexp_range_extract(ranges, 65,90) + result += sep + "ALPHA" + sep = " / " + cnt += 1 + if digit: + ranges = regexp_range_extract(ranges, 48,57) + result += sep + "DIGIT" + sep = " / " + cnt += 1 + ranges.sort(regexp_range_sort) + for r in ranges: + if r[0] == r[1]: + result += sep + "%x" + "%x" % r[0] else: - canonic = "(" - close = ")" - for option in options: - stripped_option = option.strip() - if len(stripped_option) > 0: - canonic += sep + canonic_optlist(stripped_option) - sep = "|" - canonic += close - return canonic + result += sep + "%x" + "%x-%x" % r + sep = " / " + cnt += 1 + else: + while index+1 < len(string): + index += 1 + c = string[index] + + if c == "]": + break + + if c == "\\": + if not backslash: + backslash = True + continue + else: + if backslash: + backslash = False + c = regexp_escape(c) + elif c == " ": + c = "SP" + elif c == "\"": + c = "DQUOTE" + else: + if string[index:index+3] == "0-9": + index += 2 + c = "DIGIT" + elif string[index:index+6] == "a-zA-Z" or \ + string[index:index+6] == "A-Za-z": + index += 5 + c = "ALPHA" + elif index < len(string)-2 and string[index+1] == "-" and \ + ((string[index].isalpha() and \ + string[index+2].isalpha()) or \ + (string[index].isdigit() and \ + string[index+2].isdigit())): + index += 2 + start = ord(c) + end = ord(string[index])+1 + sep2 = "" + c = "" + + for i in range(start, end): + c += sep2 + "\"" + chr(i) + "\"" + sep2 = " / " + else: + c = "\"" + c + "\"" + + result += sep + c + sep = " / " + cnt += 1 + + if cnt > 1: + result = "(" + result + ")" + + return (result, index) + +def regexp_escape(char): + if char == "n": + return "CRLF" + elif char == "t": + return "HTAB" + elif char == " ": + return "SP" + elif char == "\"": + return "DQUOTE" + elif char == "\\": + return "\"\\\"" + elif char == "^": + return "\"^\"" + + return "\"" + char + "\"" + + +def regexp_merge(new_precedence, stack): + last = len(stack) - 1 + precedence = 0 + + if last >= 0: + for merge in range(last,-1,-1): + precedence = stack[merge][0] + + if new_precedence >= precedence: + break + + append = False + string = "" + sep = "" + + for i in range(merge,last+1): + element = stack[i][1] + string += sep + element + if element.find("*") == len(element)-1: + sep = "" + else: + sep = " " + append = True -def print_canonic_rules(toprules): - print terminals + "\n" - for rule in toprules: - print canonic_rule(toprules, rule, 0, " ") + "\n" + for i in range(last,merge-1,-1): + stack.pop() + if append: + stack.append((new_precedence, string)) -def canonic_rule(toprules, name, rdepth, gap): - canonic = "" - stripped_name = name.strip() - toplevel = name in toprules + return stack - if stripped_name in rules: - rule = rules[stripped_name] - if toplevel: - canonic += "<" + name + "> =" - if rule.startswith("(") and rule.endswith(")"): - stripped_rule = rule[1:-1].strip() +def regexp_range_extract(ranges, l,h): + for r in ranges: + if r[0] <= l and r[1] >= h: + ranges.remove(r) + if l == r[0] and h < r[1]: + ranges.append((h+1, r[1])) + elif l > r[0] and h == r[1]: + ranges.append((r[0], l-1)) + elif l > r[0] and h < r[1]: + ranges.append((r[0],l-1)) + ranges.append((h+1,r[1])) + break + return ranges + +def regexp_range_sort(a,b): + if a[0] < b[0]: + return -1 + elif a[0] > b[0]: + return +1 + return 0 + + +def component_list(input_list): + output_list = "" + sep = "" + if len(input_list) > 0: + components = input_list.split(' ') + ncomponent = len(components) + if ncomponent > 0: + for component in components: + name = component.strip() + if len(name) < 1: + continue + output_list += sep + sep = " " + if name.startswith("TKN_"): + output_list += name[4:] + else: + output_list += name + return output_list + + +def rule_list(rule_def): + stripped_rule_def = rule_def.strip() + rlist = "" + sep = "" + if len(stripped_rule_def) > 0: + rules = stripped_rule_def.split('|') + nrule = len(rules) + if nrule == 1: + stripped_rule = rules[0].strip() + rlist += component_list(stripped_rule) + elif nrule > 1: + if len(rules[0].strip()) < 1: + rlist = "[" + close = "]" + else: + rlist = "(" + close = ")" + for rule in rules: + stripped_rule = rule.strip() + if len(stripped_rule) > 0: + rlist += sep + component_list(stripped_rule) + sep = "|" + rlist += close + return rlist + + +def print_abnf_rules(): + name_len = 0 + prologue = "" + extra_linefeed = "" + + for rule in abnf: + name_len = max(len(rule[0]), name_len) + + for rule in abnf: + line = rule[0].ljust(name_len) + " =" + words = rule[1].split(' ') + margin = len(line) + width = margin + + for word in words: + wl = len(word) + 1 + if width + wl > line_width: + line += "\n".ljust(margin+2) + width = margin + line += " " + word + width += wl + + if rule[0].isupper(): + extra_linefeed = "" + else: + if len(extra_linefeed) < 1: + print "\n" + extra_linefeed = "\n" + + print prologue + line + extra_linefeed + prologue = "" + print epilogue + +def make_abnf_rules(topresults): + for result in topresults: + abnf.append( (result, abnf_rule(topresults, result, 0, " ")) ) + + +def abnf_rule(topresults, result, rdepth, gap): + canonic = "" + component = result.strip() + toplevel = component in topresults + + if component in results: + rule_list = results[component] + if rule_list.startswith("(") and rule_list.endswith(")"): + stripped_rule_list = rule_list[1:-1].strip() if rdepth < 1: close = "" else: canonic += " (" close = ")" gap = "" - elif rule.startswith("[") and rule.endswith("]"): - stripped_rule = rule[1:-1].strip() + elif rule_list.startswith("[") and rule_list.endswith("]"): + stripped_rule_list = rule_list[1:-1].strip() canonic += " [" close = "]" gap = "" else: - stripped_rule = rule.strip() + stripped_rule_list = rule_list.strip() close = "" - escaped_rule = stripped_rule.replace("\"|\"","Ö") - members = escaped_rule.split("|") + escaped_rule_list = stripped_rule_list.replace("\"|\"","Ö") + rules = escaped_rule_list.split("|") sep = "" - for member in members: - tags = member.replace("Ö","\"|\"").split(' ') + for rule in rules: + components = rule.replace("Ö","\"|\"").split(' ') - if tags[0].strip() == name: + if components[0].strip() == result: rept = ")" else: rept = "" @@ -102,46 +441,50 @@ def canonic_rule(toprules, name, rdepth, gap): sep = " /" first = True - for tag in tags: - tagname = tag.strip() - if len(tagname) < 1: + for component in components: + component_name = component.strip() + if len(component_name) < 1: continue - if first and tagname == name: + if first and component_name == result: canonic += " *(" gap = "" - elif tagname in toprules: - canonic += gap + "<" + tagname + ">" + elif component_name in topresults: + canonic += gap + component_name gap = " " else: - canonic += canonic_rule(toprules, tagname, rdepth+1, gap) + canonic += abnf_rule(topresults, component_name, \ + rdepth+1,gap) gap = " " first = False canonic += rept canonic += close else: - canonic += gap + stripped_name + canonic += gap + component return canonic -lfile = sys.argv[1] -yfile = sys.argv[2] -inputdir = os.path.dirname(yfile) -scriptdir = sys.path[0] -start = "" -grammar = False -prologue = False -comment = False -toplevel = False -depth = 0 -result = "" -components = "" -terminals = "" -rules = {} -top_rules = [] -pos = 0 -end = 0 +lfile = sys.argv[1] +yfile = sys.argv[2] +line_width = 78 +inputdir = os.path.dirname(yfile) +scriptdir = sys.path[0] +lname = os.path.basename(lfile) +yname = os.path.basename(yfile) +start = "" +grammar = False +prologue = False +comment = False +toplevel = False +depth = 0 +result = "" +result_def = "" +results = {} +top_results = [] +abnf = [] +pos = 0 +end = 0 @@ -188,18 +531,18 @@ with open(lfile, "r") as l: continue if value.isalpha(): - terminals += key + " = \"" + value.upper() + "\"\n" + abnf.append( (key, " \"" + value.upper() + "\"") ) else: if value[0] == "\\" and len(value) == 2: - rules[key] = "\"%s\"" % value[1] + results[key] = "\"%s\"" % value[1] elif len(value) == 1: - rules[key] = "\"%s\"" % value[0] + results[key] = "\"%s\"" % value[0] else: if value.find("[") < 0 and value.find("(") < 0: - rules[key] = "\"%s\"" % value.replace("/","") + results[key] = "\"%s\"" % value.replace("/","") else: - terminals += "<" + key.lower() + "> = " + value + "\n" - rules[key] = "<" + key.lower() + ">" + abnf.append( (key.lower(), " "+regexp_to_abnf(value)) ) + results[key] = key.lower() l.close() @@ -221,7 +564,7 @@ with open(yfile, "r") as y: slash_star = line.find("/*") if colon > 0 and (slash_star < 0 or slash_star > colon): result = line[:colon] - components = "" + result_def = "" pos = colon + 1 while pos < end-1: if comment: @@ -246,7 +589,7 @@ with open(yfile, "r") as y: (slash_star < 0 or open_brace < slash_star ): if depth == 0: - components += line[pos: open_brace] + result_def += line[pos: open_brace] depth += 1 pos = open_brace + 1 continue @@ -259,15 +602,15 @@ with open(yfile, "r") as y: if depth == 0: semicolon = line.find(";", pos) if semicolon >= pos: - components += line[pos: semicolon] - rules[result] = canonic_complist(components) + result_def += line[pos: semicolon] + results[result] = rule_list(result_def) if toplevel: - top_rules.append(result) + top_results.append(result) result = "" - components = "" + result_def = "" toplevel = False else: - components += line[pos: -1] + result_def += line[pos: -1] break else: if prologue: @@ -283,7 +626,15 @@ with open(yfile, "r") as y: -print_canonic_rules(top_rules) +make_abnf_rules(top_results) + +print "\n" % \ + (lname, yname) + +print "" +print_abnf_rules() +print "" +