1 # support for DTD validation
2 from lxml.includes cimport dtdvalid
4 cdef class DTDError(LxmlError):
5 """Base class for DTD errors.
8 cdef class DTDParseError(DTDError):
9 """Error while parsing a DTD.
12 cdef class DTDValidateError(DTDError):
13 """Error while validating an XML document with a DTD.
17 cdef inline int _assertValidDTDNode(node, void *c_node) except -1:
18 assert c_node is not NULL, u"invalid DTD proxy at %s" % id(node)
24 cdef class _DTDElementContentDecl:
26 cdef tree.xmlElementContent* _c_node
29 return "<%s.%s object name=%r type=%r occur=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.type, self.occur, id(self))
33 _assertValidDTDNode(self, self._c_node)
34 return funicodeOrNone(self._c_node.name)
38 _assertValidDTDNode(self, self._c_node)
39 cdef int type = self._c_node.type
40 if type == tree.XML_ELEMENT_CONTENT_PCDATA:
42 elif type == tree.XML_ELEMENT_CONTENT_ELEMENT:
44 elif type == tree.XML_ELEMENT_CONTENT_SEQ:
46 elif type == tree.XML_ELEMENT_CONTENT_OR:
53 _assertValidDTDNode(self, self._c_node)
54 cdef int occur = self._c_node.ocur
55 if occur == tree.XML_ELEMENT_CONTENT_ONCE:
57 elif occur == tree.XML_ELEMENT_CONTENT_OPT:
59 elif occur == tree.XML_ELEMENT_CONTENT_MULT:
61 elif occur == tree.XML_ELEMENT_CONTENT_PLUS:
68 _assertValidDTDNode(self, self._c_node)
71 node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
73 node._c_node = <tree.xmlElementContent*>c1
80 _assertValidDTDNode(self, self._c_node)
83 node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
85 node._c_node = <tree.xmlElementContent*>c2
94 cdef class _DTDAttributeDecl:
96 cdef tree.xmlAttribute* _c_node
99 return "<%s.%s object name=%r elemname=%r prefix=%r type=%r default=%r default_value=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.elemname, self.prefix, self.type, self.default, self.default_value, id(self))
103 _assertValidDTDNode(self, self._c_node)
104 return funicodeOrNone(self._c_node.name)
108 _assertValidDTDNode(self, self._c_node)
109 return funicodeOrNone(self._c_node.elem)
113 _assertValidDTDNode(self, self._c_node)
114 return funicodeOrNone(self._c_node.prefix)
118 _assertValidDTDNode(self, self._c_node)
119 cdef int type = self._c_node.atype
120 if type == tree.XML_ATTRIBUTE_CDATA:
122 elif type == tree.XML_ATTRIBUTE_ID:
124 elif type == tree.XML_ATTRIBUTE_IDREF:
126 elif type == tree.XML_ATTRIBUTE_IDREFS:
128 elif type == tree.XML_ATTRIBUTE_ENTITY:
130 elif type == tree.XML_ATTRIBUTE_ENTITIES:
132 elif type == tree.XML_ATTRIBUTE_NMTOKEN:
134 elif type == tree.XML_ATTRIBUTE_NMTOKENS:
136 elif type == tree.XML_ATTRIBUTE_ENUMERATION:
138 elif type == tree.XML_ATTRIBUTE_NOTATION:
145 _assertValidDTDNode(self, self._c_node)
146 cdef int default = self._c_node.def_
147 if default == tree.XML_ATTRIBUTE_NONE:
149 elif default == tree.XML_ATTRIBUTE_REQUIRED:
151 elif default == tree.XML_ATTRIBUTE_IMPLIED:
153 elif default == tree.XML_ATTRIBUTE_FIXED:
159 def default_value(self):
160 _assertValidDTDNode(self, self._c_node)
161 return funicodeOrNone(self._c_node.defaultValue)
163 def itervalues(self):
164 _assertValidDTDNode(self, self._c_node)
165 cdef tree.xmlEnumeration *c_node = self._c_node.tree
166 while c_node is not NULL:
167 yield funicode(c_node.name)
171 return list(self.itervalues())
177 cdef class _DTDElementDecl:
179 cdef tree.xmlElement* _c_node
182 return "<%s.%s object name=%r prefix=%r type=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.prefix, self.type, id(self))
186 _assertValidDTDNode(self, self._c_node)
187 return funicodeOrNone(self._c_node.name)
191 _assertValidDTDNode(self, self._c_node)
192 return funicodeOrNone(self._c_node.prefix)
196 _assertValidDTDNode(self, self._c_node)
197 cdef int type = self._c_node.etype
198 if type == tree.XML_ELEMENT_TYPE_UNDEFINED:
200 elif type == tree.XML_ELEMENT_TYPE_EMPTY:
202 elif type == tree.XML_ELEMENT_TYPE_ANY:
204 elif type == tree.XML_ELEMENT_TYPE_MIXED:
206 elif type == tree.XML_ELEMENT_TYPE_ELEMENT:
213 _assertValidDTDNode(self, self._c_node)
214 cdef tree.xmlElementContent *content = self._c_node.content
216 node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
217 node._dtd = self._dtd
218 node._c_node = content
223 def iterattributes(self):
224 _assertValidDTDNode(self, self._c_node)
225 cdef tree.xmlAttribute *c_node = self._c_node.attributes
227 node = <_DTDAttributeDecl>_DTDAttributeDecl.__new__(_DTDAttributeDecl)
228 node._dtd = self._dtd
229 node._c_node = c_node
231 c_node = c_node.nexth
233 def attributes(self):
234 return list(self.iterattributes())
240 cdef class _DTDEntityDecl:
242 cdef tree.xmlEntity* _c_node
244 return "<%s.%s object name=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
248 _assertValidDTDNode(self, self._c_node)
249 return funicodeOrNone(self._c_node.name)
253 _assertValidDTDNode(self, self._c_node)
254 return funicodeOrNone(self._c_node.orig)
258 _assertValidDTDNode(self, self._c_node)
259 return funicodeOrNone(self._c_node.content)
262 ################################################################################
265 cdef class DTD(_Validator):
266 u"""DTD(self, file=None, external_id=None)
269 Can load from filesystem directly given a filename or file-like object.
270 Alternatively, pass the keyword parameter ``external_id`` to load from a
273 cdef tree.xmlDtd* _c_dtd
274 def __init__(self, file=None, *, external_id=None):
275 _Validator.__init__(self)
278 file = _encodeFilename(file)
279 with self._error_log:
280 self._c_dtd = xmlparser.xmlParseDTD(NULL, _xcstr(file))
281 elif hasattr(file, 'read'):
282 self._c_dtd = _parseDtdFromFilelike(file)
284 raise DTDParseError, u"file must be a filename or file-like object"
285 elif external_id is not None:
286 with self._error_log:
287 self._c_dtd = xmlparser.xmlParseDTD(<const_xmlChar*>external_id, NULL)
289 raise DTDParseError, u"either filename or external ID required"
291 if self._c_dtd is NULL:
293 self._error_log._buildExceptionMessage(u"error parsing DTD"),
298 if self._c_dtd is NULL:
300 return funicodeOrNone(self._c_dtd.name)
303 def external_id(self):
304 if self._c_dtd is NULL:
306 return funicodeOrNone(self._c_dtd.ExternalID)
309 def system_url(self):
310 if self._c_dtd is NULL:
312 return funicodeOrNone(self._c_dtd.SystemID)
314 def iterelements(self):
315 cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
316 while c_node is not NULL:
317 if c_node.type == tree.XML_ELEMENT_DECL:
318 node = _DTDElementDecl()
320 node._c_node = <tree.xmlElement*>c_node
325 return list(self.iterelements())
327 def iterentities(self):
328 cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
329 while c_node is not NULL:
330 if c_node.type == tree.XML_ENTITY_DECL:
331 node = _DTDEntityDecl()
333 node._c_node = <tree.xmlEntity*>c_node
338 return list(self.iterentities())
340 def __dealloc__(self):
341 tree.xmlFreeDtd(self._c_dtd)
343 def __call__(self, etree):
344 u"""__call__(self, etree)
346 Validate doc using the DTD.
348 Returns true if the document is valid, false if not.
351 cdef _Element root_node
353 cdef dtdvalid.xmlValidCtxt* valid_ctxt
356 assert self._c_dtd is not NULL, "DTD not initialised"
357 doc = _documentOrRaise(etree)
358 root_node = _rootNodeOrRaise(etree)
360 valid_ctxt = dtdvalid.xmlNewValidCtxt()
361 if valid_ctxt is NULL:
362 raise DTDError(u"Failed to create validation context")
364 # work around error reporting bug in libxml2 <= 2.9.1 (and later?)
365 # https://bugzilla.gnome.org/show_bug.cgi?id=724903
366 valid_ctxt.error = <dtdvalid.xmlValidityErrorFunc>_nullGenericErrorFunc
367 valid_ctxt.userData = NULL
370 with self._error_log:
371 c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
372 ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
373 _destroyFakeDoc(doc._c_doc, c_doc)
375 dtdvalid.xmlFreeValidCtxt(valid_ctxt)
378 raise DTDValidateError(u"Internal error in DTD validation",
383 cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
384 cdef _ExceptionContext exc_context
385 cdef _FileReaderContext dtd_parser
386 cdef _ErrorLog error_log
387 cdef tree.xmlDtd* c_dtd = NULL
388 exc_context = _ExceptionContext()
389 dtd_parser = _FileReaderContext(file, exc_context, None)
390 error_log = _ErrorLog()
393 c_dtd = dtd_parser._readDtd()
395 exc_context._raise_if_stored()
397 raise DTDParseError(u"error parsing DTD", error_log)
400 cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
401 # do not run through DTD.__init__()!
405 dtd = DTD.__new__(DTD)
406 dtd._c_dtd = _copyDtd(c_dtd)
407 _Validator.__init__(dtd)
411 cdef tree.xmlDtd* _copyDtd(tree.xmlDtd* c_orig_dtd) except NULL:
413 Copy a DTD. libxml2 (currently) fails to set up the element->attributes
414 links when copying DTDs, so we have to rebuild them here.
416 c_dtd = tree.xmlCopyDtd(c_orig_dtd)
419 cdef tree.xmlNode* c_node = c_dtd.children
421 if c_node.type == tree.XML_ATTRIBUTE_DECL:
422 _linkDtdAttribute(c_dtd, <tree.xmlAttribute*>c_node)
427 cdef void _linkDtdAttribute(tree.xmlDtd* c_dtd, tree.xmlAttribute* c_attr):
429 Create the link to the DTD attribute declaration from the corresponding
432 c_elem = dtdvalid.xmlGetDtdElementDesc(c_dtd, c_attr.elem)
434 # no such element? something is wrong with the DTD ...
436 c_pos = c_elem.attributes
438 c_elem.attributes = c_attr
441 # libxml2 keeps namespace declarations first, and we need to make
442 # sure we don't re-insert attributes that are already there
443 if _isDtdNsDecl(c_attr):
444 if not _isDtdNsDecl(c_pos):
445 c_elem.attributes = c_attr
448 while c_pos != c_attr and c_pos.nexth and _isDtdNsDecl(c_pos.nexth):
452 while c_pos != c_attr and c_pos.nexth:
456 c_attr.nexth = c_pos.nexth
460 cdef bint _isDtdNsDecl(tree.xmlAttribute* c_attr):
461 if cstring_h.strcmp(<const_char*>c_attr.name, "xmlns") == 0:
463 if (c_attr.prefix is not NULL and
464 cstring_h.strcmp(<const_char*>c_attr.prefix, "xmlns") == 0):