1 # support for Schematron validation
2 from lxml.includes cimport schematron
5 cdef class SchematronError(LxmlError):
6 """Base class of all Schematron errors.
9 cdef class SchematronParseError(SchematronError):
10 """Error while parsing an XML document as Schematron schema.
13 cdef class SchematronValidateError(SchematronError):
14 """Error while validating an XML document with a Schematron schema.
18 ################################################################################
21 cdef class Schematron(_Validator):
22 u"""Schematron(self, etree=None, file=None)
23 A Schematron validator.
25 Pass a root Element or an ElementTree to turn it into a validator.
26 Alternatively, pass a filename as keyword argument 'file' to parse from
29 Schematron is a less well known, but very powerful schema language. The main
30 idea is to use the capabilities of XPath to put restrictions on the structure
31 and the content of XML documents. Here is a simple example::
33 >>> schematron = Schematron(XML('''
34 ... <schema xmlns="http://www.ascc.net/xml/schematron" >
35 ... <pattern name="id is the only permitted attribute name">
36 ... <rule context="*">
37 ... <report test="@*[not(name()='id')]">Attribute
38 ... <name path="@*[not(name()='id')]"/> is forbidden<name/>
48 ... <CCC color="ccc"/>
52 >>> schematron.validate(xml)
62 >>> schematron.validate(xml)
65 Schematron was added to libxml2 in version 2.6.21. Before version 2.6.32,
66 however, Schematron lacked support for error reporting other than to stderr.
67 This version is therefore required to retrieve validation warnings and
70 cdef schematron.xmlSchematron* _c_schema
71 cdef xmlDoc* _c_schema_doc
74 self._c_schema_doc = NULL
76 def __init__(self, etree=None, *, file=None):
78 cdef _Element root_node
81 cdef schematron.xmlSchematronParserCtxt* parser_ctxt = NULL
82 _Validator.__init__(self)
83 if not config.ENABLE_SCHEMATRON:
84 raise SchematronError, \
85 u"lxml.etree was compiled without Schematron support."
87 doc = _documentOrRaise(etree)
88 root_node = _rootNodeOrRaise(etree)
89 self._c_schema_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
90 parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_schema_doc)
91 elif file is not None:
92 filename = _getFilenameForFile(file)
94 # XXX assume a string object
96 filename = _encodeFilename(filename)
98 parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename))
100 raise SchematronParseError, u"No tree or file given"
102 if parser_ctxt is NULL:
103 if self._c_schema_doc is not NULL:
104 tree.xmlFreeDoc(self._c_schema_doc)
105 self._c_schema_doc = NULL
109 with self._error_log:
110 self._c_schema = schematron.xmlSchematronParse(parser_ctxt)
112 schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
114 if self._c_schema is NULL:
115 raise SchematronParseError(
116 u"Document is not a valid Schematron schema",
119 def __dealloc__(self):
120 schematron.xmlSchematronFree(self._c_schema)
121 if self._c_schema_doc is not NULL:
122 tree.xmlFreeDoc(self._c_schema_doc)
124 def __call__(self, etree):
125 u"""__call__(self, etree)
127 Validate doc using Schematron.
129 Returns true if document is valid, false if not."""
131 cdef _Element root_node
133 cdef schematron.xmlSchematronValidCtxt* valid_ctxt
136 assert self._c_schema is not NULL, "Schematron instance not initialised"
137 doc = _documentOrRaise(etree)
138 root_node = _rootNodeOrRaise(etree)
140 valid_ctxt = schematron.xmlSchematronNewValidCtxt(
141 self._c_schema, schematron.XML_SCHEMATRON_OUT_ERROR)
142 if valid_ctxt is NULL:
146 self._error_log.clear()
147 schematron.xmlSchematronSetValidStructuredErrors(
148 valid_ctxt, _receiveError, <void*>self._error_log)
149 c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
151 ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
152 _destroyFakeDoc(doc._c_doc, c_doc)
154 schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
157 raise SchematronValidateError(
158 u"Internal error in Schematron validation",