Imported Upstream version 4.5.2
[platform/upstream/python-lxml.git] / doc / html / validation.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3 <head>
4 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
5 <meta name="generator" content="Docutils 0.14: http://docutils.sourceforge.net/" />
6 <title>Validation with lxml</title>
7 <link rel="stylesheet" href="style.css" type="text/css" />
8 <script type="text/javascript">
9 function trigger_menu(event) {
10     var sidemenu = document.getElementById("sidemenu");
11     var classes = sidemenu.getAttribute("class");
12     classes = (classes.indexOf(" visible") === -1) ? classes + " visible" : classes.replace(" visible", "");
13     sidemenu.setAttribute("class", classes);
14     event.preventDefault();
15     event.stopPropagation();
16 }
17 function hide_menu() {
18     var sidemenu = document.getElementById("sidemenu");
19     var classes = sidemenu.getAttribute("class");
20     if (classes.indexOf(" visible") !== -1) {
21         sidemenu.setAttribute("class", classes.replace(" visible", ""));
22     }
23 }
24 </script><meta content="width=device-width, initial-scale=1" name="viewport" /></head>
25 <body onclick="hide_menu()">
26 <div class="document" id="validation-with-lxml">
27 <div class="sidemenu" id="sidemenu"><div class="menutrigger" onclick="trigger_menu(event)">Menu</div><div class="menu"><ul id="lxml-section"><li><span class="section title">lxml</span><ul class="menu foreign" id="index-menu"><li class="menu title"><a href="index.html">lxml</a><ul class="submenu"><li class="menu item"><a href="index.html#introduction">Introduction</a></li><li class="menu item"><a href="index.html#support-the-project">Support the project</a></li><li class="menu item"><a href="index.html#documentation">Documentation</a></li><li class="menu item"><a href="index.html#download">Download</a></li><li class="menu item"><a href="index.html#mailing-list">Mailing list</a></li><li class="menu item"><a href="index.html#bug-tracker">Bug tracker</a></li><li class="menu item"><a href="index.html#license">License</a></li><li class="menu item"><a href="index.html#old-versions">Old Versions</a></li><li class="menu item"><a href="index.html#project-income-report">Project income report</a></li><li class="menu item"><a href="index.html#legal-notice-for-donations">Legal Notice for Donations</a></li></ul></li></ul><ul class="menu foreign" id="intro-menu"><li class="menu title"><a href="intro.html">Why lxml?</a><ul class="submenu"><li class="menu item"><a href="intro.html#motto">Motto</a></li><li class="menu item"><a href="intro.html#aims">Aims</a></li></ul></li></ul><ul class="menu foreign" id="installation-menu"><li class="menu title"><a href="installation.html">Installing lxml</a><ul class="submenu"><li class="menu item"><a href="installation.html#where-to-get-it">Where to get it</a></li><li class="menu item"><a href="installation.html#requirements">Requirements</a></li><li class="menu item"><a href="installation.html#installation">Installation</a></li><li class="menu item"><a href="installation.html#building-lxml-from-dev-sources">Building lxml from dev sources</a></li><li class="menu item"><a href="installation.html#using-lxml-with-python-libxml2">Using lxml with python-libxml2</a></li><li class="menu item"><a href="installation.html#source-builds-on-ms-windows">Source builds on MS Windows</a></li><li class="menu item"><a href="installation.html#source-builds-on-macos-x">Source builds on MacOS-X</a></li></ul></li></ul><ul class="menu foreign" id="performance-menu"><li class="menu title"><a href="performance.html">Benchmarks and Speed</a><ul class="submenu"><li class="menu item"><a href="performance.html#general-notes">General notes</a></li><li class="menu item"><a href="performance.html#how-to-read-the-timings">How to read the timings</a></li><li class="menu item"><a href="performance.html#parsing-and-serialising">Parsing and Serialising</a></li><li class="menu item"><a href="performance.html#the-elementtree-api">The ElementTree API</a></li><li class="menu item"><a href="performance.html#xpath">XPath</a></li><li class="menu item"><a href="performance.html#a-longer-example">A longer example</a></li><li class="menu item"><a href="performance.html#lxml-objectify">lxml.objectify</a></li></ul></li></ul><ul class="menu foreign" id="compatibility-menu"><li class="menu title"><a href="compatibility.html">ElementTree compatibility of lxml.etree</a></li></ul><ul class="menu foreign" id="FAQ-menu"><li class="menu title"><a href="FAQ.html">lxml FAQ - Frequently Asked Questions</a><ul class="submenu"><li class="menu item"><a href="FAQ.html#general-questions">General Questions</a></li><li class="menu item"><a href="FAQ.html#installation">Installation</a></li><li class="menu item"><a href="FAQ.html#contributing">Contributing</a></li><li class="menu item"><a href="FAQ.html#bugs">Bugs</a></li><li class="menu item"><a href="FAQ.html#id1">Threading</a></li><li class="menu item"><a href="FAQ.html#parsing-and-serialisation">Parsing and Serialisation</a></li><li class="menu item"><a href="FAQ.html#xpath-and-document-traversal">XPath and Document Traversal</a></li></ul></li></ul></li></ul><ul id="Developing with lxml-section"><li><span class="section title">Developing with lxml</span><ul class="menu foreign" id="tutorial-menu"><li class="menu title"><a href="tutorial.html">The lxml.etree Tutorial</a><ul class="submenu"><li class="menu item"><a href="tutorial.html#the-element-class">The Element class</a></li><li class="menu item"><a href="tutorial.html#the-elementtree-class">The ElementTree class</a></li><li class="menu item"><a href="tutorial.html#parsing-from-strings-and-files">Parsing from strings and files</a></li><li class="menu item"><a href="tutorial.html#namespaces">Namespaces</a></li><li class="menu item"><a href="tutorial.html#the-e-factory">The E-factory</a></li><li class="menu item"><a href="tutorial.html#elementpath">ElementPath</a></li></ul></li></ul><ul class="menu foreign" id="api index-menu"><li class="menu title"><a href="api/index.html">API reference</a></li></ul><ul class="menu foreign" id="api-menu"><li class="menu title"><a href="api.html">APIs specific to lxml.etree</a><ul class="submenu"><li class="menu item"><a href="api.html#lxml-etree">lxml.etree</a></li><li class="menu item"><a href="api.html#other-element-apis">Other Element APIs</a></li><li class="menu item"><a href="api.html#trees-and-documents">Trees and Documents</a></li><li class="menu item"><a href="api.html#iteration">Iteration</a></li><li class="menu item"><a href="api.html#error-handling-on-exceptions">Error handling on exceptions</a></li><li class="menu item"><a href="api.html#error-logging">Error logging</a></li><li class="menu item"><a href="api.html#serialisation">Serialisation</a></li><li class="menu item"><a href="api.html#incremental-xml-generation">Incremental XML generation</a></li><li class="menu item"><a href="api.html#cdata">CDATA</a></li><li class="menu item"><a href="api.html#xinclude-and-elementinclude">XInclude and ElementInclude</a></li></ul></li></ul><ul class="menu foreign" id="parsing-menu"><li class="menu title"><a href="parsing.html">Parsing XML and HTML with lxml</a><ul class="submenu"><li class="menu item"><a href="parsing.html#parsers">Parsers</a></li><li class="menu item"><a href="parsing.html#the-target-parser-interface">The target parser interface</a></li><li class="menu item"><a href="parsing.html#the-feed-parser-interface">The feed parser interface</a></li><li class="menu item"><a href="parsing.html#incremental-event-parsing">Incremental event parsing</a></li><li class="menu item"><a href="parsing.html#iterparse-and-iterwalk">iterparse and iterwalk</a></li><li class="menu item"><a href="parsing.html#python-unicode-strings">Python unicode strings</a></li></ul></li></ul><ul class="menu current" id="validation-menu"><li class="menu title"><a href="validation.html">Validation with lxml</a><ul class="submenu"><li class="menu item"><a href="validation.html#validation-at-parse-time">Validation at parse time</a></li><li class="menu item"><a href="validation.html#id1">DTD</a></li><li class="menu item"><a href="validation.html#relaxng">RelaxNG</a></li><li class="menu item"><a href="validation.html#xmlschema">XMLSchema</a></li><li class="menu item"><a href="validation.html#id2">Schematron</a></li><li class="menu item"><a href="validation.html#id3">(Pre-ISO-Schematron)</a></li></ul></li></ul><ul class="menu foreign" id="xpathxslt-menu"><li class="menu title"><a href="xpathxslt.html">XPath and XSLT with lxml</a><ul class="submenu"><li class="menu item"><a href="xpathxslt.html#xpath">XPath</a></li><li class="menu item"><a href="xpathxslt.html#xslt">XSLT</a></li></ul></li></ul><ul class="menu foreign" id="objectify-menu"><li class="menu title"><a href="objectify.html">lxml.objectify</a><ul class="submenu"><li class="menu item"><a href="objectify.html#the-lxml-objectify-api">The lxml.objectify API</a></li><li class="menu item"><a href="objectify.html#asserting-a-schema">Asserting a Schema</a></li><li class="menu item"><a href="objectify.html#objectpath">ObjectPath</a></li><li class="menu item"><a href="objectify.html#python-data-types">Python data types</a></li><li class="menu item"><a href="objectify.html#how-data-types-are-matched">How data types are matched</a></li><li class="menu item"><a href="objectify.html#what-is-different-from-lxml-etree">What is different from lxml.etree?</a></li></ul></li></ul><ul class="menu foreign" id="lxmlhtml-menu"><li class="menu title"><a href="lxmlhtml.html">lxml.html</a><ul class="submenu"><li class="menu item"><a href="lxmlhtml.html#parsing-html">Parsing HTML</a></li><li class="menu item"><a href="lxmlhtml.html#html-element-methods">HTML Element Methods</a></li><li class="menu item"><a href="lxmlhtml.html#running-html-doctests">Running HTML doctests</a></li><li class="menu item"><a href="lxmlhtml.html#creating-html-with-the-e-factory">Creating HTML with the E-factory</a></li><li class="menu item"><a href="lxmlhtml.html#working-with-links">Working with links</a></li><li class="menu item"><a href="lxmlhtml.html#forms">Forms</a></li><li class="menu item"><a href="lxmlhtml.html#cleaning-up-html">Cleaning up HTML</a></li><li class="menu item"><a href="lxmlhtml.html#html-diff">HTML Diff</a></li><li class="menu item"><a href="lxmlhtml.html#examples">Examples</a></li></ul></li></ul><ul class="menu foreign" id="cssselect-menu"><li class="menu title"><a href="cssselect.html">lxml.cssselect</a><ul class="submenu"><li class="menu item"><a href="cssselect.html#the-cssselector-class">The CSSSelector class</a></li><li class="menu item"><a href="cssselect.html#the-cssselect-method">The cssselect method</a></li><li class="menu item"><a href="cssselect.html#supported-selectors">Supported Selectors</a></li><li class="menu item"><a href="cssselect.html#namespaces">Namespaces</a></li></ul></li></ul><ul class="menu foreign" id="elementsoup-menu"><li class="menu title"><a href="elementsoup.html">BeautifulSoup Parser</a><ul class="submenu"><li class="menu item"><a href="elementsoup.html#parsing-with-the-soupparser">Parsing with the soupparser</a></li><li class="menu item"><a href="elementsoup.html#entity-handling">Entity handling</a></li><li class="menu item"><a href="elementsoup.html#using-soupparser-as-a-fallback">Using soupparser as a fallback</a></li><li class="menu item"><a href="elementsoup.html#using-only-the-encoding-detection">Using only the encoding detection</a></li></ul></li></ul><ul class="menu foreign" id="html5parser-menu"><li class="menu title"><a href="html5parser.html">html5lib Parser</a><ul class="submenu"><li class="menu item"><a href="html5parser.html#differences-to-regular-html-parsing">Differences to regular HTML parsing</a></li><li class="menu item"><a href="html5parser.html#function-reference">Function Reference</a></li></ul></li></ul></li></ul><ul id="Extending lxml-section"><li><span class="section title">Extending lxml</span><ul class="menu foreign" id="resolvers-menu"><li class="menu title"><a href="resolvers.html">Document loading and URL resolving</a><ul class="submenu"><li class="menu item"><a href="resolvers.html#xml-catalogs">XML Catalogs</a></li><li class="menu item"><a href="resolvers.html#uri-resolvers">URI Resolvers</a></li><li class="menu item"><a href="resolvers.html#document-loading-in-context">Document loading in context</a></li><li class="menu item"><a href="resolvers.html#i-o-access-control-in-xslt">I/O access control in XSLT</a></li></ul></li></ul><ul class="menu foreign" id="extensions-menu"><li class="menu title"><a href="extensions.html">Python extensions for XPath and XSLT</a><ul class="submenu"><li class="menu item"><a href="extensions.html#xpath-extension-functions">XPath Extension functions</a></li><li class="menu item"><a href="extensions.html#xslt-extension-elements">XSLT extension elements</a></li></ul></li></ul><ul class="menu foreign" id="element classes-menu"><li class="menu title"><a href="element_classes.html">Using custom Element classes in lxml</a><ul class="submenu"><li class="menu item"><a href="element_classes.html#background-on-element-proxies">Background on Element proxies</a></li><li class="menu item"><a href="element_classes.html#element-initialization">Element initialization</a></li><li class="menu item"><a href="element_classes.html#setting-up-a-class-lookup-scheme">Setting up a class lookup scheme</a></li><li class="menu item"><a href="element_classes.html#generating-xml-with-custom-classes">Generating XML with custom classes</a></li><li class="menu item"><a href="element_classes.html#id1">Implementing namespaces</a></li></ul></li></ul><ul class="menu foreign" id="sax-menu"><li class="menu title"><a href="sax.html">Sax support</a><ul class="submenu"><li class="menu item"><a href="sax.html#building-a-tree-from-sax-events">Building a tree from SAX events</a></li><li class="menu item"><a href="sax.html#producing-sax-events-from-an-elementtree-or-element">Producing SAX events from an ElementTree or Element</a></li><li class="menu item"><a href="sax.html#interfacing-with-pulldom-minidom">Interfacing with pulldom/minidom</a></li></ul></li></ul><ul class="menu foreign" id="capi-menu"><li class="menu title"><a href="capi.html">The public C-API of lxml.etree</a><ul class="submenu"><li class="menu item"><a href="capi.html#passing-generated-trees-through-python">Passing generated trees through Python</a></li><li class="menu item"><a href="capi.html#writing-external-modules-in-cython">Writing external modules in Cython</a></li><li class="menu item"><a href="capi.html#writing-external-modules-in-c">Writing external modules in C</a></li></ul></li></ul></li></ul><ul id="Developing lxml-section"><li><span class="section title">Developing lxml</span><ul class="menu foreign" id="build-menu"><li class="menu title"><a href="build.html">How to build lxml from source</a><ul class="submenu"><li class="menu item"><a href="build.html#cython">Cython</a></li><li class="menu item"><a href="build.html#github-git-and-hg">Github, git and hg</a></li><li class="menu item"><a href="build.html#building-the-sources">Building the sources</a></li><li class="menu item"><a href="build.html#running-the-tests-and-reporting-errors">Running the tests and reporting errors</a></li><li class="menu item"><a href="build.html#building-an-egg-or-wheel">Building an egg or wheel</a></li><li class="menu item"><a href="build.html#building-lxml-on-macos-x">Building lxml on MacOS-X</a></li><li class="menu item"><a href="build.html#static-linking-on-windows">Static linking on Windows</a></li><li class="menu item"><a href="build.html#building-debian-packages-from-svn-sources">Building Debian packages from SVN sources</a></li></ul></li></ul><ul class="menu foreign" id="lxml source howto-menu"><li class="menu title"><a href="lxml-source-howto.html">How to read the source of lxml</a><ul class="submenu"><li class="menu item"><a href="lxml-source-howto.html#what-is-cython">What is Cython?</a></li><li class="menu item"><a href="lxml-source-howto.html#where-to-start">Where to start?</a></li><li class="menu item"><a href="lxml-source-howto.html#lxml-etree">lxml.etree</a></li><li class="menu item"><a href="lxml-source-howto.html#python-modules">Python modules</a></li><li class="menu item"><a href="lxml-source-howto.html#lxml-objectify">lxml.objectify</a></li><li class="menu item"><a href="lxml-source-howto.html#lxml-html">lxml.html</a></li></ul></li></ul><ul class="menu foreign" id="changes 4 5 2-menu"><li class="menu title"><a href="changes-4.5.2.html">Release Changelog</a></li></ul><ul class="menu foreign" id="credits-menu"><li class="menu title"><a href="credits.html">Credits</a><ul class="submenu"><li class="menu item"><a href="credits.html#main-contributors">Main contributors</a></li><li class="menu item"><a href="credits.html#special-thanks-goes-to">Special thanks goes to:</a></li></ul></li></ul></li><li><a href="/sitemap.html">Sitemap</a></li></ul></div></div><h1 class="title">Validation with lxml</h1>
28
29 <p>Apart from the built-in DTD support in parsers, lxml currently supports three
30 schema languages: <a class="reference external" href="http://en.wikipedia.org/wiki/Document_Type_Definition">DTD</a>, <a class="reference external" href="http://www.relaxng.org/">Relax NG</a> and <a class="reference external" href="http://www.w3.org/XML/Schema">XML Schema</a>.  All three provide
31 identical APIs in lxml, represented by validator classes with the obvious
32 names.</p>
33 <p>lxml also provides support for ISO-<a class="reference external" href="http://www.schematron.com">Schematron</a>, based on the pure-XSLT
34 <a class="reference external" href="http://www.schematron.com/implementation.html">skeleton implementation</a> of Schematron:</p>
35 <p>There is also basic support for <cite>pre-ISO-Schematron</cite> through the libxml2
36 Schematron features. However, this does not currently support error reporting
37 in the validation phase due to insufficiencies in the implementation as of
38 libxml2 2.6.30.</p>
39 <div class="contents topic" id="contents">
40 <p class="topic-title first">Contents</p>
41 <ul class="simple">
42 <li><a class="reference internal" href="#validation-at-parse-time" id="id4">Validation at parse time</a></li>
43 <li><a class="reference internal" href="#id1" id="id5">DTD</a></li>
44 <li><a class="reference internal" href="#relaxng" id="id6">RelaxNG</a></li>
45 <li><a class="reference internal" href="#xmlschema" id="id7">XMLSchema</a></li>
46 <li><a class="reference internal" href="#id2" id="id8">Schematron</a></li>
47 <li><a class="reference internal" href="#id3" id="id9">(Pre-ISO-Schematron)</a></li>
48 </ul>
49 </div>
50 <p>The usual setup procedure:</p>
51 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">etree</span>
52 </pre></div>
53 <div class="section" id="validation-at-parse-time">
54 <h1>Validation at parse time</h1>
55 <p>The parser in lxml can do on-the-fly validation of a document against
56 a DTD or an XML schema.  The DTD is retrieved automatically based on
57 the DOCTYPE of the parsed document.  All you have to do is use a
58 parser that has DTD validation enabled:</p>
59 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">parser</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XMLParser</span><span class="p">(</span><span class="n">dtd_validation</span><span class="o">=</span><span class="bp">True</span><span class="p">)</span>
60 </pre></div>
61 <p>Obviously, a request for validation enables the DTD loading feature.
62 There are two other options that enable loading the DTD, but that do
63 not perform any validation.  The first is the <tt class="docutils literal">load_dtd</tt> keyword
64 option, which simply loads the DTD into the parser and makes it
65 available to the document as external subset.  You can retrieve the
66 DTD from the parsed document using the <tt class="docutils literal">docinfo</tt> property of the
67 result ElementTree object.  The internal subset is available as
68 <tt class="docutils literal">internalDTD</tt>, the external subset is provided as <tt class="docutils literal">externalDTD</tt>.</p>
69 <p>The third way to activate DTD loading is with the
70 <tt class="docutils literal">attribute_defaults</tt> option, which loads the DTD and weaves
71 attribute default values into the document.  Again, no validation is
72 performed unless explicitly requested.</p>
73 <p>XML schema is supported in a similar way, but requires an explicit
74 schema to be provided:</p>
75 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">schema_root</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XML</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
76 <span class="gp">... </span><span class="s1">  &lt;xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"&gt;</span>
77 <span class="gp">... </span><span class="s1">    &lt;xsd:element name="a" type="xsd:integer"/&gt;</span>
78 <span class="gp">... </span><span class="s1">  &lt;/xsd:schema&gt;</span>
79 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
80 <span class="gp">&gt;&gt;&gt; </span><span class="n">schema</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XMLSchema</span><span class="p">(</span><span class="n">schema_root</span><span class="p">)</span>
81
82 <span class="gp">&gt;&gt;&gt; </span><span class="n">parser</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XMLParser</span><span class="p">(</span><span class="n">schema</span> <span class="o">=</span> <span class="n">schema</span><span class="p">)</span>
83 <span class="gp">&gt;&gt;&gt; </span><span class="n">root</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="s2">"&lt;a&gt;5&lt;/a&gt;"</span><span class="p">,</span> <span class="n">parser</span><span class="p">)</span>
84 </pre></div>
85 <p>If the validation fails (be it for a DTD or an XML schema), the parser
86 will raise an exception:</p>
87 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">root</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">fromstring</span><span class="p">(</span><span class="s2">"&lt;a&gt;no int&lt;/a&gt;"</span><span class="p">,</span> <span class="n">parser</span><span class="p">)</span>  <span class="c1"># doctest: +ELLIPSIS</span>
88 <span class="gt">Traceback (most recent call last):</span>
89 <span class="gr">lxml.etree.XMLSyntaxError</span>: <span class="n">Element 'a': 'no int' is not a valid value of the atomic type 'xs:integer'...</span>
90 </pre></div>
91 <p>If you want the parser to succeed regardless of the outcome of the
92 validation, you should use a non validating parser and run the
93 validation separately after parsing the document.</p>
94 </div>
95 <div class="section" id="id1">
96 <h1>DTD</h1>
97 <p>As described above, the parser support for DTDs depends on internal or
98 external subsets of the XML file.  This means that the XML file itself
99 must either contain a DTD or must reference a DTD to make this work.
100 If you want to validate an XML document against a DTD that is not
101 referenced by the document itself, you can use the <tt class="docutils literal">DTD</tt> class.</p>
102 <p>To use the <tt class="docutils literal">DTD</tt> class, you must first pass a filename or file-like object
103 into the constructor to parse a DTD:</p>
104 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s2">"&lt;!ELEMENT b EMPTY&gt;"</span><span class="p">)</span>
105 <span class="gp">&gt;&gt;&gt; </span><span class="n">dtd</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">DTD</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
106 </pre></div>
107 <p>Now you can use it to validate documents:</p>
108 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">root</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XML</span><span class="p">(</span><span class="s2">"&lt;b/&gt;"</span><span class="p">)</span>
109 <span class="gp">&gt;&gt;&gt; </span><span class="k">print</span><span class="p">(</span><span class="n">dtd</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">root</span><span class="p">))</span>
110 <span class="go">True</span>
111
112 <span class="gp">&gt;&gt;&gt; </span><span class="n">root</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XML</span><span class="p">(</span><span class="s2">"&lt;b&gt;&lt;a/&gt;&lt;/b&gt;"</span><span class="p">)</span>
113 <span class="gp">&gt;&gt;&gt; </span><span class="k">print</span><span class="p">(</span><span class="n">dtd</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">root</span><span class="p">))</span>
114 <span class="go">False</span>
115 </pre></div>
116 <p>The reason for the validation failure can be found in the error log:</p>
117 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="k">print</span><span class="p">(</span><span class="n">dtd</span><span class="o">.</span><span class="n">error_log</span><span class="o">.</span><span class="n">filter_from_errors</span><span class="p">()[</span><span class="mi">0</span><span class="p">])</span>
118 <span class="go">&lt;string&gt;:1:0:ERROR:VALID:DTD_NOT_EMPTY: Element b was declared EMPTY this one has content</span>
119 </pre></div>
120 <p>As an alternative to parsing from a file, you can use the
121 <tt class="docutils literal">external_id</tt> keyword argument to parse from a catalog.  The
122 following example reads the DocBook DTD in version 4.2, if available
123 in the system catalog:</p>
124 <div class="syntax"><pre><span></span><span class="n">dtd</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">DTD</span><span class="p">(</span><span class="n">external_id</span> <span class="o">=</span> <span class="s2">"-//OASIS//DTD DocBook XML V4.2//EN"</span><span class="p">)</span>
125 </pre></div>
126 <p>The DTD information is available as attributes on the DTD object. The method
127 <tt class="docutils literal">iterelements</tt> provides an iterator over the element declarations:</p>
128 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">dtd</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">DTD</span><span class="p">(</span><span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;!ELEMENT a EMPTY&gt;&lt;!ELEMENT b EMPTY&gt;'</span><span class="p">))</span>
129 <span class="gp">&gt;&gt;&gt; </span><span class="k">for</span> <span class="n">el</span> <span class="ow">in</span> <span class="n">dtd</span><span class="o">.</span><span class="n">iterelements</span><span class="p">():</span>
130 <span class="gp">... </span>    <span class="k">print</span><span class="p">(</span><span class="n">el</span><span class="o">.</span><span class="n">name</span><span class="p">)</span>
131 <span class="go">a</span>
132 <span class="go">b</span>
133 </pre></div>
134 <p>The method <tt class="docutils literal">elements</tt> returns the element declarations as a list:</p>
135 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">dtd</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">DTD</span><span class="p">(</span><span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;!ELEMENT a EMPTY&gt;&lt;!ELEMENT b EMPTY&gt;'</span><span class="p">))</span>
136 <span class="gp">&gt;&gt;&gt; </span><span class="nb">len</span><span class="p">(</span><span class="n">dtd</span><span class="o">.</span><span class="n">elements</span><span class="p">())</span>
137 <span class="go">2</span>
138 </pre></div>
139 <p>An element declaration object provides the following attributes/methods:</p>
140 <blockquote>
141 <ul class="simple">
142 <li><tt class="docutils literal">name</tt>: The name of the element;</li>
143 <li><tt class="docutils literal">type</tt>: The element type, one of "undefined", "empty", "any", "mixed", or "element";</li>
144 <li><tt class="docutils literal">content</tt>: Element content declaration (see below);</li>
145 <li><tt class="docutils literal">iterattributes()</tt>: Return an iterator over attribute declarations (see below);</li>
146 <li><tt class="docutils literal">attributes()</tt>: Return a list of attribute declarations.</li>
147 </ul>
148 </blockquote>
149 <p>The <tt class="docutils literal">content</tt> attribute contains information about the content model of the element.
150 These element content declaration objects form a binary tree (via the <tt class="docutils literal">left</tt> and <tt class="docutils literal">right</tt>
151 attributes), that makes it possible to reconstruct the content model expression. Here's a
152 list of all attributes:</p>
153 <blockquote>
154 <ul class="simple">
155 <li><tt class="docutils literal">name</tt>: If this object represents an element in the content model expression,
156 <tt class="docutils literal">name</tt> is the name of the element, otherwise it is <tt class="docutils literal">None</tt>;</li>
157 <li><tt class="docutils literal">type</tt>: The type of the node: one of "pcdata", "element", "seq", or "or";</li>
158 <li><tt class="docutils literal">occur</tt>: How often this element (or this combination of elements) may occur:
159 one of "once", "opt", "mult", or "plus"</li>
160 <li><tt class="docutils literal">left</tt>: The left hand subexpression</li>
161 <li><tt class="docutils literal">right</tt>: The right hand subexpression</li>
162 </ul>
163 </blockquote>
164 <p>For example, the element declaration <tt class="docutils literal">&lt;!ELEMENT a <span class="pre">(a|b)+&gt;</span></tt> results
165 in the following element content declaration objects:</p>
166 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">dtd</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">DTD</span><span class="p">(</span><span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;!ELEMENT a (a|b)+&gt;'</span><span class="p">))</span>
167 <span class="gp">&gt;&gt;&gt; </span><span class="n">content</span> <span class="o">=</span> <span class="n">dtd</span><span class="o">.</span><span class="n">elements</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span><span class="o">.</span><span class="n">content</span>
168 <span class="gp">&gt;&gt;&gt; </span><span class="n">content</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">content</span><span class="o">.</span><span class="n">occur</span><span class="p">,</span> <span class="n">content</span><span class="o">.</span><span class="n">name</span>
169 <span class="go">('or', 'plus', None)</span>
170
171 <span class="gp">&gt;&gt;&gt; </span><span class="n">left</span><span class="p">,</span> <span class="n">right</span> <span class="o">=</span> <span class="n">content</span><span class="o">.</span><span class="n">left</span><span class="p">,</span> <span class="n">content</span><span class="o">.</span><span class="n">right</span>
172 <span class="gp">&gt;&gt;&gt; </span><span class="n">left</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">left</span><span class="o">.</span><span class="n">occur</span><span class="p">,</span> <span class="n">left</span><span class="o">.</span><span class="n">name</span>
173 <span class="go">('element', 'once', 'a')</span>
174 <span class="gp">&gt;&gt;&gt; </span><span class="n">right</span><span class="o">.</span><span class="n">type</span><span class="p">,</span> <span class="n">right</span><span class="o">.</span><span class="n">occur</span><span class="p">,</span> <span class="n">right</span><span class="o">.</span><span class="n">name</span>
175 <span class="go">('element', 'once', 'b')</span>
176 </pre></div>
177 <p>Attributes declarations have the following attributes/methods:</p>
178 <blockquote>
179 <ul class="simple">
180 <li><tt class="docutils literal">name</tt>: The name of the attribute;</li>
181 <li><tt class="docutils literal">elemname</tt>: The name of the element the attribute belongs to;</li>
182 <li><tt class="docutils literal">type</tt>: The attribute type, one of "cdata", "id", "idref", "idrefs", "entity",
183 "entities", "nmtoken", "nmtokens", "enumeration", or "notation";</li>
184 <li><tt class="docutils literal">default</tt>: The type of the default value, one of "none", "required", "implied",
185 or "fixed";</li>
186 <li><tt class="docutils literal">defaultValue</tt>: The default value;</li>
187 <li><tt class="docutils literal">itervalues()</tt>: Return an iterator over the allowed attribute values (if the attribute
188 is of type "enumeration");</li>
189 <li><tt class="docutils literal">values()</tt>: Return a list of allowed attribute values.</li>
190 </ul>
191 </blockquote>
192 <p>Entity declarations are available via the <tt class="docutils literal">iterentities</tt> and <tt class="docutils literal">entities</tt> methods:</p>
193 <blockquote>
194 <pre class="doctest-block">
195 &gt;&gt;&gt; dtd = etree.DTD(StringIO('&lt;!ENTITY hurz "&amp;#x40;"&gt;'))
196 &gt;&gt;&gt; entity = dtd.entities()[0]
197 &gt;&gt;&gt; entity.name, entity.orig, entity.content
198 ('hurz', '&amp;#x40;', '@')
199 </pre>
200 </blockquote>
201 </div>
202 <div class="section" id="relaxng">
203 <h1>RelaxNG</h1>
204 <p>The <tt class="docutils literal">RelaxNG</tt> class takes an ElementTree object to construct a Relax NG
205 validator:</p>
206 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
207 <span class="gp">... </span><span class="s1">&lt;element name="a" xmlns="http://relaxng.org/ns/structure/1.0"&gt;</span>
208 <span class="gp">... </span><span class="s1"> &lt;zeroOrMore&gt;</span>
209 <span class="gp">... </span><span class="s1">    &lt;element name="b"&gt;</span>
210 <span class="gp">... </span><span class="s1">      &lt;text /&gt;</span>
211 <span class="gp">... </span><span class="s1">    &lt;/element&gt;</span>
212 <span class="gp">... </span><span class="s1"> &lt;/zeroOrMore&gt;</span>
213 <span class="gp">... </span><span class="s1">&lt;/element&gt;</span>
214 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
215 <span class="gp">&gt;&gt;&gt; </span><span class="n">relaxng_doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
216 <span class="gp">&gt;&gt;&gt; </span><span class="n">relaxng</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">RelaxNG</span><span class="p">(</span><span class="n">relaxng_doc</span><span class="p">)</span>
217 </pre></div>
218 <p>Alternatively, pass a filename to the <tt class="docutils literal">file</tt> keyword argument to parse from
219 a file.  This also enables correct handling of include files from within the
220 RelaxNG parser.</p>
221 <p>You can then validate some ElementTree document against the schema. You'll get
222 back True if the document is valid against the Relax NG schema, and False if
223 not:</p>
224 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">valid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;a&gt;&lt;b&gt;&lt;/b&gt;&lt;/a&gt;'</span><span class="p">)</span>
225 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">valid</span><span class="p">)</span>
226 <span class="gp">&gt;&gt;&gt; </span><span class="n">relaxng</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
227 <span class="go">True</span>
228
229 <span class="gp">&gt;&gt;&gt; </span><span class="n">invalid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;a&gt;&lt;c&gt;&lt;/c&gt;&lt;/a&gt;'</span><span class="p">)</span>
230 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc2</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">invalid</span><span class="p">)</span>
231 <span class="gp">&gt;&gt;&gt; </span><span class="n">relaxng</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc2</span><span class="p">)</span>
232 <span class="go">False</span>
233 </pre></div>
234 <p>Calling the schema object has the same effect as calling its validate
235 method. This is sometimes used in conditional statements:</p>
236 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">invalid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;a&gt;&lt;c&gt;&lt;/c&gt;&lt;/a&gt;'</span><span class="p">)</span>
237 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc2</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">invalid</span><span class="p">)</span>
238 <span class="gp">&gt;&gt;&gt; </span><span class="k">if</span> <span class="ow">not</span> <span class="n">relaxng</span><span class="p">(</span><span class="n">doc2</span><span class="p">):</span>
239 <span class="gp">... </span>    <span class="k">print</span><span class="p">(</span><span class="s2">"invalid!"</span><span class="p">)</span>
240 <span class="go">invalid!</span>
241 </pre></div>
242 <p>If you prefer getting an exception when validating, you can use the
243 <tt class="docutils literal">assert_</tt> or <tt class="docutils literal">assertValid</tt> methods:</p>
244 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">relaxng</span><span class="o">.</span><span class="n">assertValid</span><span class="p">(</span><span class="n">doc2</span><span class="p">)</span>
245 <span class="gt">Traceback (most recent call last):</span>
246   <span class="c">...</span>
247 <span class="gr">lxml.etree.DocumentInvalid</span>: <span class="n">Did not expect element c there, line 1</span>
248
249 <span class="gp">&gt;&gt;&gt; </span><span class="n">relaxng</span><span class="o">.</span><span class="n">assert_</span><span class="p">(</span><span class="n">doc2</span><span class="p">)</span>
250 <span class="gt">Traceback (most recent call last):</span>
251   <span class="c">...</span>
252 <span class="gr">AssertionError</span>: <span class="n">Did not expect element c there, line 1</span>
253 </pre></div>
254 <p>If you want to find out why the validation failed in the second case, you can
255 look up the error log of the validation process and check it for relevant
256 messages:</p>
257 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">log</span> <span class="o">=</span> <span class="n">relaxng</span><span class="o">.</span><span class="n">error_log</span>
258 <span class="gp">&gt;&gt;&gt; </span><span class="k">print</span><span class="p">(</span><span class="n">log</span><span class="o">.</span><span class="n">last_error</span><span class="p">)</span>
259 <span class="go">&lt;string&gt;:1:0:ERROR:RELAXNGV:RELAXNG_ERR_ELEMWRONG: Did not expect element c there</span>
260 </pre></div>
261 <p>You can see that the error (ERROR) happened during RelaxNG validation
262 (RELAXNGV).  The message then tells you what went wrong.  You can also
263 look at the error domain and its type directly:</p>
264 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">error</span> <span class="o">=</span> <span class="n">log</span><span class="o">.</span><span class="n">last_error</span>
265 <span class="gp">&gt;&gt;&gt; </span><span class="k">print</span><span class="p">(</span><span class="n">error</span><span class="o">.</span><span class="n">domain_name</span><span class="p">)</span>
266 <span class="go">RELAXNGV</span>
267 <span class="gp">&gt;&gt;&gt; </span><span class="k">print</span><span class="p">(</span><span class="n">error</span><span class="o">.</span><span class="n">type_name</span><span class="p">)</span>
268 <span class="go">RELAXNG_ERR_ELEMWRONG</span>
269 </pre></div>
270 <p>Note that this error log is local to the RelaxNG object.  It will only
271 contain log entries that appeared during the validation.</p>
272 <p>Similar to XSLT, there's also a less efficient but easier shortcut method to
273 do one-shot RelaxNG validation:</p>
274 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">doc</span><span class="o">.</span><span class="n">relaxng</span><span class="p">(</span><span class="n">relaxng_doc</span><span class="p">)</span>
275 <span class="go">True</span>
276 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc2</span><span class="o">.</span><span class="n">relaxng</span><span class="p">(</span><span class="n">relaxng_doc</span><span class="p">)</span>
277 <span class="go">False</span>
278 </pre></div>
279 <p>libxml2 does not currently support the <a class="reference external" href="http://relaxng.org/compact-tutorial.html">RelaxNG Compact Syntax</a>.
280 However, if <a class="reference external" href="https://pypi.python.org/pypi/rnc2rng">rnc2rng</a> is installed, lxml 3.6 and later can use it
281 internally to parse the input schema.  It recognises the <cite>.rnc</cite> file
282 extension and also allows parsing an RNC schema from a string using
283 <cite>RelaxNG.from_rnc_string()</cite>.</p>
284 <p>Alternatively, the <a class="reference external" href="http://www.thaiopensource.com/relaxng/trang.html">trang</a> translator can convert the compact syntax
285 to the XML syntax, which can then be used with lxml.</p>
286 </div>
287 <div class="section" id="xmlschema">
288 <h1>XMLSchema</h1>
289 <p>lxml.etree also has XML Schema (XSD) support, using the class
290 lxml.etree.XMLSchema.  The API is very similar to the Relax NG and DTD
291 classes.  Pass an ElementTree object to construct a XMLSchema validator:</p>
292 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
293 <span class="gp">... </span><span class="s1">&lt;xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"&gt;</span>
294 <span class="gp">... </span><span class="s1">&lt;xsd:element name="a" type="AType"/&gt;</span>
295 <span class="gp">... </span><span class="s1">&lt;xsd:complexType name="AType"&gt;</span>
296 <span class="gp">... </span><span class="s1">  &lt;xsd:sequence&gt;</span>
297 <span class="gp">... </span><span class="s1">    &lt;xsd:element name="b" type="xsd:string" /&gt;</span>
298 <span class="gp">... </span><span class="s1">  &lt;/xsd:sequence&gt;</span>
299 <span class="gp">... </span><span class="s1">&lt;/xsd:complexType&gt;</span>
300 <span class="gp">... </span><span class="s1">&lt;/xsd:schema&gt;</span>
301 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
302 <span class="gp">&gt;&gt;&gt; </span><span class="n">xmlschema_doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
303 <span class="gp">&gt;&gt;&gt; </span><span class="n">xmlschema</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">XMLSchema</span><span class="p">(</span><span class="n">xmlschema_doc</span><span class="p">)</span>
304 </pre></div>
305 <p>You can then validate some ElementTree document with this.  Like with RelaxNG,
306 you'll get back true if the document is valid against the XML schema, and
307 false if not:</p>
308 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">valid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;a&gt;&lt;b&gt;&lt;/b&gt;&lt;/a&gt;'</span><span class="p">)</span>
309 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">valid</span><span class="p">)</span>
310 <span class="gp">&gt;&gt;&gt; </span><span class="n">xmlschema</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
311 <span class="go">True</span>
312
313 <span class="gp">&gt;&gt;&gt; </span><span class="n">invalid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;a&gt;&lt;c&gt;&lt;/c&gt;&lt;/a&gt;'</span><span class="p">)</span>
314 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc2</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">invalid</span><span class="p">)</span>
315 <span class="gp">&gt;&gt;&gt; </span><span class="n">xmlschema</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc2</span><span class="p">)</span>
316 <span class="go">False</span>
317 </pre></div>
318 <p>Calling the schema object has the same effect as calling its validate method.
319 This is sometimes used in conditional statements:</p>
320 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">invalid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'&lt;a&gt;&lt;c&gt;&lt;/c&gt;&lt;/a&gt;'</span><span class="p">)</span>
321 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc2</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">invalid</span><span class="p">)</span>
322 <span class="gp">&gt;&gt;&gt; </span><span class="k">if</span> <span class="ow">not</span> <span class="n">xmlschema</span><span class="p">(</span><span class="n">doc2</span><span class="p">):</span>
323 <span class="gp">... </span>    <span class="k">print</span><span class="p">(</span><span class="s2">"invalid!"</span><span class="p">)</span>
324 <span class="go">invalid!</span>
325 </pre></div>
326 <p>If you prefer getting an exception when validating, you can use the
327 <tt class="docutils literal">assert_</tt> or <tt class="docutils literal">assertValid</tt> methods:</p>
328 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">xmlschema</span><span class="o">.</span><span class="n">assertValid</span><span class="p">(</span><span class="n">doc2</span><span class="p">)</span>
329 <span class="gt">Traceback (most recent call last):</span>
330   <span class="c">...</span>
331 <span class="gr">lxml.etree.DocumentInvalid</span>: <span class="n">Element 'c': This element is not expected. Expected is ( b )., line 1</span>
332
333 <span class="gp">&gt;&gt;&gt; </span><span class="n">xmlschema</span><span class="o">.</span><span class="n">assert_</span><span class="p">(</span><span class="n">doc2</span><span class="p">)</span>
334 <span class="gt">Traceback (most recent call last):</span>
335   <span class="c">...</span>
336 <span class="gr">AssertionError</span>: <span class="n">Element 'c': This element is not expected. Expected is ( b )., line 1</span>
337 </pre></div>
338 <p>Error reporting works as for the RelaxNG class:</p>
339 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">log</span> <span class="o">=</span> <span class="n">xmlschema</span><span class="o">.</span><span class="n">error_log</span>
340 <span class="gp">&gt;&gt;&gt; </span><span class="n">error</span> <span class="o">=</span> <span class="n">log</span><span class="o">.</span><span class="n">last_error</span>
341 <span class="gp">&gt;&gt;&gt; </span><span class="k">print</span><span class="p">(</span><span class="n">error</span><span class="o">.</span><span class="n">domain_name</span><span class="p">)</span>
342 <span class="go">SCHEMASV</span>
343 <span class="gp">&gt;&gt;&gt; </span><span class="k">print</span><span class="p">(</span><span class="n">error</span><span class="o">.</span><span class="n">type_name</span><span class="p">)</span>
344 <span class="go">SCHEMAV_ELEMENT_CONTENT</span>
345 </pre></div>
346 <p>If you were to print this log entry, you would get something like the
347 following.  Note that the error message depends on the libxml2 version in
348 use:</p>
349 <pre class="literal-block">
350 &lt;string&gt;:1:ERROR::SCHEMAV_ELEMENT_CONTENT: Element 'c': This element is not expected. Expected is ( b ).
351 </pre>
352 <p>Similar to XSLT and RelaxNG, there's also a less efficient but easier shortcut
353 method to do XML Schema validation:</p>
354 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">doc</span><span class="o">.</span><span class="n">xmlschema</span><span class="p">(</span><span class="n">xmlschema_doc</span><span class="p">)</span>
355 <span class="go">True</span>
356 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc2</span><span class="o">.</span><span class="n">xmlschema</span><span class="p">(</span><span class="n">xmlschema_doc</span><span class="p">)</span>
357 <span class="go">False</span>
358 </pre></div>
359 </div>
360 <div class="section" id="id2">
361 <h1>Schematron</h1>
362 <p>From version 2.3 on lxml features ISO-<a class="reference external" href="http://www.schematron.com">Schematron</a> support built on the
363 de-facto reference implementation of Schematron, the pure-XSLT-1.0
364 <a class="reference external" href="http://www.schematron.com/implementation.html">skeleton implementation</a>. This is provided by the lxml.isoschematron package
365 that implements the Schematron class, with an API compatible to the other
366 validators'.  Pass an Element or ElementTree object to construct a Schematron
367 validator:</p>
368 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="kn">from</span> <span class="nn">lxml</span> <span class="kn">import</span> <span class="n">isoschematron</span>
369 <span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
370 <span class="gp">... </span><span class="s1">&lt;schema xmlns="http://purl.oclc.org/dsdl/schematron" &gt;</span>
371 <span class="gp">... </span><span class="s1">  &lt;pattern id="sum_equals_100_percent"&gt;</span>
372 <span class="gp">... </span><span class="s1">    &lt;title&gt;Sum equals 100%.&lt;/title&gt;</span>
373 <span class="gp">... </span><span class="s1">    &lt;rule context="Total"&gt;</span>
374 <span class="gp">... </span><span class="s1">      &lt;assert test="sum(//Percent)=100"&gt;Sum is not 100%.&lt;/assert&gt;</span>
375 <span class="gp">... </span><span class="s1">    &lt;/rule&gt;</span>
376 <span class="gp">... </span><span class="s1">  &lt;/pattern&gt;</span>
377 <span class="gp">... </span><span class="s1">&lt;/schema&gt;</span>
378 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
379
380 <span class="gp">&gt;&gt;&gt; </span><span class="n">sct_doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
381 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span> <span class="o">=</span> <span class="n">isoschematron</span><span class="o">.</span><span class="n">Schematron</span><span class="p">(</span><span class="n">sct_doc</span><span class="p">)</span>
382 </pre></div>
383 <p>You can then validate some ElementTree document with this. Just like with
384 XMLSchema or RelaxNG, you'll get back true if the document is valid against the
385 schema, and false if not:</p>
386 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">valid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
387 <span class="gp">... </span><span class="s1">&lt;Total&gt;</span>
388 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;20&lt;/Percent&gt;</span>
389 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;30&lt;/Percent&gt;</span>
390 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;50&lt;/Percent&gt;</span>
391 <span class="gp">... </span><span class="s1">&lt;/Total&gt;</span>
392 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
393
394 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">valid</span><span class="p">)</span>
395 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
396 <span class="go">True</span>
397
398 <span class="gp">&gt;&gt;&gt; </span><span class="n">etree</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span><span class="n">doc</span><span class="o">.</span><span class="n">getroot</span><span class="p">(),</span> <span class="s2">"Percent"</span><span class="p">)</span><span class="o">.</span><span class="n">text</span> <span class="o">=</span> <span class="s2">"10"</span>
399
400 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
401 <span class="go">False</span>
402 </pre></div>
403 <p>Calling the schema object has the same effect as calling its validate method.
404 This can be useful for conditional statements:</p>
405 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">is_valid</span> <span class="o">=</span> <span class="n">isoschematron</span><span class="o">.</span><span class="n">Schematron</span><span class="p">(</span><span class="n">sct_doc</span><span class="p">)</span>
406
407 <span class="gp">&gt;&gt;&gt; </span><span class="k">if</span> <span class="ow">not</span> <span class="n">is_valid</span><span class="p">(</span><span class="n">doc</span><span class="p">):</span>
408 <span class="gp">... </span>    <span class="k">print</span><span class="p">(</span><span class="s2">"invalid!"</span><span class="p">)</span>
409 <span class="go">invalid!</span>
410 </pre></div>
411 <p>Built on a pure-xslt implementation, the actual validator is created as an
412 XSLT 1.0 stylesheet using these steps:</p>
413 <ol class="arabic simple" start="0">
414 <li>(Extract embedded Schematron from XML Schema or RelaxNG schema)</li>
415 <li>Process inclusions</li>
416 <li>Process abstract patterns</li>
417 <li>Compile the schematron schema to XSLT</li>
418 </ol>
419 <p>To allow more control over the individual steps, isoschematron.Schematron
420 supports an extended API:</p>
421 <p>The <tt class="docutils literal">include</tt> and <tt class="docutils literal">expand</tt> keyword arguments can be used to switch off
422 steps 1) and 2).</p>
423 <p>To set parameters for steps 1), 2) and 3) dictionaries containing parameters
424 for XSLT can be provided using the keyword arguments <tt class="docutils literal">include_params</tt>,
425 <tt class="docutils literal">expand_params</tt> or <tt class="docutils literal">compile_params</tt>. Schematron automatically converts these
426 parameters to stylesheet parameters so you need not worry to set string
427 parameters using quotes or to use XSLT.strparam(). If you ever need to pass an
428 XPath as argument to the XSLT stylesheet you can pass in an etree.XPath object
429 (see XPath and XSLT with lxml: <a class="reference external" href="xpathxslt.html#stylesheet-parameters">Stylesheet-parameters</a> for background on this).</p>
430 <p>The <tt class="docutils literal">phase</tt> parameter of the compile step is additionally exposed as a keyword
431 argument. If set, it overrides occurrence in <tt class="docutils literal">compile_params</tt>. Note that
432 isoschematron.Schematron might expose more common parameters as additional keyword
433 args in the future.</p>
434 <p>By setting <tt class="docutils literal">store_schematron</tt> to True, the (included-and-expanded) schematron
435 document tree is stored and made available through the <tt class="docutils literal">schematron</tt> property.</p>
436 <p>Similarly, setting <tt class="docutils literal">store_xslt</tt> to True will result in the validation XSLT
437 document tree being kept; it can be retrieved through the <tt class="docutils literal">validator_xslt</tt>
438 property.</p>
439 <p>Finally, with <tt class="docutils literal">store_report</tt> set to True (default: False), the resulting
440 validation report document gets stored and can be accessed as the
441 <tt class="docutils literal">validation_report</tt> property.</p>
442 <p>Using the <tt class="docutils literal">phase</tt> parameter of isoschematron.Schematron allows for selective
443 validation of predefined pattern groups:</p>
444 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
445 <span class="gp">... </span><span class="s1">&lt;schema xmlns="http://purl.oclc.org/dsdl/schematron" &gt;</span>
446 <span class="gp">... </span><span class="s1">  &lt;phase id="phase.sum_check"&gt;</span>
447 <span class="gp">... </span><span class="s1">    &lt;active pattern="sum_equals_100_percent"/&gt;</span>
448 <span class="gp">... </span><span class="s1">  &lt;/phase&gt;</span>
449 <span class="gp">... </span><span class="s1">  &lt;phase id="phase.entries_check"&gt;</span>
450 <span class="gp">... </span><span class="s1">    &lt;active pattern="all_positive"/&gt;</span>
451 <span class="gp">... </span><span class="s1">  &lt;/phase&gt;</span>
452 <span class="gp">... </span><span class="s1">  &lt;pattern id="sum_equals_100_percent"&gt;</span>
453 <span class="gp">... </span><span class="s1">    &lt;title&gt;Sum equals 100%.&lt;/title&gt;</span>
454 <span class="gp">... </span><span class="s1">    &lt;rule context="Total"&gt;</span>
455 <span class="gp">... </span><span class="s1">      &lt;assert test="sum(//Percent)=100"&gt;Sum is not 100%.&lt;/assert&gt;</span>
456 <span class="gp">... </span><span class="s1">    &lt;/rule&gt;</span>
457 <span class="gp">... </span><span class="s1">  &lt;/pattern&gt;</span>
458 <span class="gp">... </span><span class="s1">  &lt;pattern id="all_positive"&gt;</span>
459 <span class="gp">... </span><span class="s1">    &lt;title&gt;All entries must be positive.&lt;/title&gt;</span>
460 <span class="gp">... </span><span class="s1">    &lt;rule context="Percent"&gt;</span>
461 <span class="gp">... </span><span class="s1">      &lt;assert test="number(.)&gt;0"&gt;Number (&lt;value-of select="."/&gt;) not positive&lt;/assert&gt;</span>
462 <span class="gp">... </span><span class="s1">    &lt;/rule&gt;</span>
463 <span class="gp">... </span><span class="s1">  &lt;/pattern&gt;</span>
464 <span class="gp">... </span><span class="s1">&lt;/schema&gt;</span>
465 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
466
467 <span class="gp">&gt;&gt;&gt; </span><span class="n">sct_doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
468 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span> <span class="o">=</span> <span class="n">isoschematron</span><span class="o">.</span><span class="n">Schematron</span><span class="p">(</span><span class="n">sct_doc</span><span class="p">)</span>
469
470 <span class="gp">&gt;&gt;&gt; </span><span class="n">valid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
471 <span class="gp">... </span><span class="s1">&lt;Total&gt;</span>
472 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;20&lt;/Percent&gt;</span>
473 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;30&lt;/Percent&gt;</span>
474 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;50&lt;/Percent&gt;</span>
475 <span class="gp">... </span><span class="s1">&lt;/Total&gt;</span>
476 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
477
478 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">valid</span><span class="p">)</span>
479 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
480 <span class="go">True</span>
481
482 <span class="gp">&gt;&gt;&gt; </span><span class="n">invalid_positive</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
483 <span class="gp">... </span><span class="s1">&lt;Total&gt;</span>
484 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;0&lt;/Percent&gt;</span>
485 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;50&lt;/Percent&gt;</span>
486 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;50&lt;/Percent&gt;</span>
487 <span class="gp">... </span><span class="s1">&lt;/Total&gt;</span>
488 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
489
490 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">invalid_positive</span><span class="p">)</span>
491
492 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
493 <span class="go">False</span>
494 </pre></div>
495 <p>If the constraint of Percent entries being positive is not of interest in a
496 certain validation scenario, it can now be disabled:</p>
497 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">selective</span> <span class="o">=</span> <span class="n">isoschematron</span><span class="o">.</span><span class="n">Schematron</span><span class="p">(</span><span class="n">sct_doc</span><span class="p">,</span> <span class="n">phase</span><span class="o">=</span><span class="s2">"phase.sum_check"</span><span class="p">)</span>
498 <span class="gp">&gt;&gt;&gt; </span><span class="n">selective</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
499 <span class="go">True</span>
500 </pre></div>
501 <p>The usage of validation phases is a unique feature of ISO-Schematron and can be
502 a very powerful tool e.g. for establishing validation stages or to provide
503 different validators for different "validation audiences".</p>
504 </div>
505 <div class="section" id="id3">
506 <h1>(Pre-ISO-Schematron)</h1>
507 <p>Since version 2.0, lxml.etree features <a class="reference external" href="http://www.ascc.net/xml/schematron">pre-ISO-Schematron</a> support, using the
508 class lxml.etree.Schematron.  It requires at least libxml2 2.6.21 to
509 work.  The API is the same as for the other validators.  Pass an
510 ElementTree object to construct a Schematron validator:</p>
511 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">f</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
512 <span class="gp">... </span><span class="s1">&lt;schema xmlns="http://www.ascc.net/xml/schematron" &gt;</span>
513 <span class="gp">... </span><span class="s1">  &lt;pattern name="Sum equals 100%."&gt;</span>
514 <span class="gp">... </span><span class="s1">    &lt;rule context="Total"&gt;</span>
515 <span class="gp">... </span><span class="s1">      &lt;assert test="sum(//Percent)=100"&gt;Sum is not 100%.&lt;/assert&gt;</span>
516 <span class="gp">... </span><span class="s1">    &lt;/rule&gt;</span>
517 <span class="gp">... </span><span class="s1">  &lt;/pattern&gt;</span>
518 <span class="gp">... </span><span class="s1">&lt;/schema&gt;</span>
519 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
520
521 <span class="gp">&gt;&gt;&gt; </span><span class="n">sct_doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">f</span><span class="p">)</span>
522 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">Schematron</span><span class="p">(</span><span class="n">sct_doc</span><span class="p">)</span>
523 </pre></div>
524 <p>You can then validate some ElementTree document with this.  Like with RelaxNG,
525 you'll get back true if the document is valid against the schema, and false if
526 not:</p>
527 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">valid</span> <span class="o">=</span> <span class="n">StringIO</span><span class="p">(</span><span class="s1">'''</span><span class="se">\</span>
528 <span class="gp">... </span><span class="s1">&lt;Total&gt;</span>
529 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;20&lt;/Percent&gt;</span>
530 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;30&lt;/Percent&gt;</span>
531 <span class="gp">... </span><span class="s1">  &lt;Percent&gt;50&lt;/Percent&gt;</span>
532 <span class="gp">... </span><span class="s1">&lt;/Total&gt;</span>
533 <span class="gp">... </span><span class="s1">'''</span><span class="p">)</span>
534
535 <span class="gp">&gt;&gt;&gt; </span><span class="n">doc</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">parse</span><span class="p">(</span><span class="n">valid</span><span class="p">)</span>
536 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
537 <span class="go">True</span>
538
539 <span class="gp">&gt;&gt;&gt; </span><span class="n">etree</span><span class="o">.</span><span class="n">SubElement</span><span class="p">(</span><span class="n">doc</span><span class="o">.</span><span class="n">getroot</span><span class="p">(),</span> <span class="s2">"Percent"</span><span class="p">)</span><span class="o">.</span><span class="n">text</span> <span class="o">=</span> <span class="s2">"10"</span>
540
541 <span class="gp">&gt;&gt;&gt; </span><span class="n">schematron</span><span class="o">.</span><span class="n">validate</span><span class="p">(</span><span class="n">doc</span><span class="p">)</span>
542 <span class="go">False</span>
543 </pre></div>
544 <p>Calling the schema object has the same effect as calling its validate method.
545 This is sometimes used in conditional statements:</p>
546 <div class="syntax"><pre><span></span><span class="gp">&gt;&gt;&gt; </span><span class="n">is_valid</span> <span class="o">=</span> <span class="n">etree</span><span class="o">.</span><span class="n">Schematron</span><span class="p">(</span><span class="n">sct_doc</span><span class="p">)</span>
547
548 <span class="gp">&gt;&gt;&gt; </span><span class="k">if</span> <span class="ow">not</span> <span class="n">is_valid</span><span class="p">(</span><span class="n">doc</span><span class="p">):</span>
549 <span class="gp">... </span>    <span class="k">print</span><span class="p">(</span><span class="s2">"invalid!"</span><span class="p">)</span>
550 <span class="go">invalid!</span>
551 </pre></div>
552 <p>Note that libxml2 restricts error reporting to the parsing step (when creating
553 the Schematron instance).  There is not currently any support for error
554 reporting during validation.</p>
555 </div>
556 </div>
557 <div class="footer">
558 <hr class="footer" />
559 Generated on: 2020-07-09.
560
561 </div>
562 </body>
563 </html>