1 """Helper functions for XML.
3 This module has misc. helper functions for working with XML DOM nodes."""
10 from xml.dom import minidom
11 from xml.sax import saxutils
14 return minidom.parseString(s)
16 from javax.xml.parsers import *
19 builder = DocumentBuilderFactory.newInstance().newDocumentBuilder()
22 stream = java.io.ByteArrayInputStream(java.lang.String(s).getBytes())
23 return builder.parse(stream)
25 def parseAndStripWhitespace(s):
27 element = parseDocument(s).documentElement
28 except BaseException, e:
29 raise SyntaxError(str(e))
30 stripWhitespace(element)
33 #Goes through a DOM tree and removes whitespace besides child elements,
34 #as long as this whitespace is correctly tab-ified
35 def stripWhitespace(element, tab=0):
38 lastSpacer = "\n" + ("\t"*tab)
39 spacer = lastSpacer + "\t"
41 #Zero children aren't allowed (i.e. <empty/>)
42 #This makes writing output simpler, and matches Canonical XML
43 if element.childNodes.length==0: #DON'T DO len(element.childNodes) - doesn't work in Jython
44 raise SyntaxError("Empty XML elements not allowed")
46 #If there's a single child, it must be text context
47 if element.childNodes.length==1:
48 if element.firstChild.nodeType == element.firstChild.TEXT_NODE:
49 #If it's an empty element, remove
50 if element.firstChild.data == lastSpacer:
51 element.removeChild(element.firstChild)
53 #If not text content, give an error
54 elif element.firstChild.nodeType == element.firstChild.ELEMENT_NODE:
55 raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
57 raise SyntaxError("Unexpected node type in XML document")
59 #Otherwise there's multiple child element
60 child = element.firstChild
62 if child.nodeType == child.ELEMENT_NODE:
63 stripWhitespace(child, tab+1)
64 child = child.nextSibling
65 elif child.nodeType == child.TEXT_NODE:
66 if child == element.lastChild:
67 if child.data != lastSpacer:
68 raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
69 elif child.data != spacer:
70 raise SyntaxError("Bad whitespace under '%s'" % element.tagName)
71 next = child.nextSibling
72 element.removeChild(child)
75 raise SyntaxError("Unexpected node type in XML document")
78 def checkName(element, name):
79 if element.nodeType != element.ELEMENT_NODE:
80 raise SyntaxError("Missing element: '%s'" % name)
85 if element.tagName != name:
86 raise SyntaxError("Wrong element name: should be '%s', is '%s'" % (name, element.tagName))
88 def getChild(element, index, name=None):
89 if element.nodeType != element.ELEMENT_NODE:
90 raise SyntaxError("Wrong node type in getChild()")
92 child = element.childNodes.item(index)
94 raise SyntaxError("Missing child: '%s'" % name)
95 checkName(child, name)
98 def getChildIter(element, index):
100 def __init__(self, element, index):
101 self.element = element
105 if self.index < len(self.element.childNodes):
106 retVal = self.element.childNodes.item(self.index)
113 if self.index != len(self.element.childNodes):
114 raise SyntaxError("Too many elements under: '%s'" % self.element.tagName)
115 return ChildIter(element, index)
117 def getChildOrNone(element, index):
118 if element.nodeType != element.ELEMENT_NODE:
119 raise SyntaxError("Wrong node type in getChild()")
120 child = element.childNodes.item(index)
123 def getLastChild(element, index, name=None):
124 if element.nodeType != element.ELEMENT_NODE:
125 raise SyntaxError("Wrong node type in getLastChild()")
127 child = element.childNodes.item(index)
129 raise SyntaxError("Missing child: '%s'" % name)
130 if child != element.lastChild:
131 raise SyntaxError("Too many elements under: '%s'" % element.tagName)
132 checkName(child, name)
135 #Regular expressions for syntax-checking attribute and element content
136 nsRegEx = "http://trevp.net/cryptoID\Z"
137 cryptoIDRegEx = "([a-km-z3-9]{5}\.){3}[a-km-z3-9]{5}\Z"
138 urlRegEx = "http(s)?://.{1,100}\Z"
139 sha1Base64RegEx = "[A-Za-z0-9+/]{27}=\Z"
140 base64RegEx = "[A-Za-z0-9+/]+={0,4}\Z"
141 certsListRegEx = "(0)?(1)?(2)?(3)?(4)?(5)?(6)?(7)?(8)?(9)?\Z"
143 keysListRegEx = "(A)?(B)?(C)?(D)?(E)?(F)?(G)?(H)?(I)?(J)?(K)?(L)?(M)?(N)?(O)?(P)?(Q)?(R)?(S)?(T)?(U)?(V)?(W)?(X)?(Y)?(Z)?\Z"
144 dateTimeRegEx = "\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ\Z"
145 shortStringRegEx = ".{1,100}\Z"
146 exprRegEx = "[a-zA-Z0-9 ,()]{1,200}\Z"
147 notAfterDeltaRegEx = "0|([1-9][0-9]{0,8})\Z" #A number from 0 to (1 billion)-1
148 booleanRegEx = "(true)|(false)"
150 def getReqAttribute(element, attrName, regEx=""):
151 if element.nodeType != element.ELEMENT_NODE:
152 raise SyntaxError("Wrong node type in getReqAttribute()")
154 value = element.getAttribute(attrName)
156 raise SyntaxError("Missing Attribute: " + attrName)
157 if not re.match(regEx, value):
158 raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value))
159 element.removeAttribute(attrName)
160 return str(value) #de-unicode it; this is needed for bsddb, for example
162 def getAttribute(element, attrName, regEx=""):
163 if element.nodeType != element.ELEMENT_NODE:
164 raise SyntaxError("Wrong node type in getAttribute()")
166 value = element.getAttribute(attrName)
168 if not re.match(regEx, value):
169 raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value))
170 element.removeAttribute(attrName)
171 return str(value) #de-unicode it; this is needed for bsddb, for example
173 def checkNoMoreAttributes(element):
174 if element.nodeType != element.ELEMENT_NODE:
175 raise SyntaxError("Wrong node type in checkNoMoreAttributes()")
177 if element.attributes.length!=0:
178 raise SyntaxError("Extra attributes on '%s'" % element.tagName)
180 def getText(element, regEx=""):
181 textNode = element.firstChild
183 raise SyntaxError("Empty element '%s'" % element.tagName)
184 if textNode.nodeType != textNode.TEXT_NODE:
185 raise SyntaxError("Non-text node: '%s'" % element.tagName)
186 if not re.match(regEx, textNode.data):
187 raise SyntaxError("Bad Text Value for '%s': '%s' " % (element.tagName, textNode.data))
188 return str(textNode.data) #de-unicode it; this is needed for bsddb, for example
190 #Function for adding tabs to a string
191 def indent(s, steps, ch="\t"):
194 s = tabs + s.replace("\n", "\n"+tabs)
196 s = tabs + s.replace("\n", "\n"+tabs)
201 return saxutils.escape(s)