Imported Upstream version 4.6.0
[platform/upstream/python-lxml.git] / doc / html / capi.html
1 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
2 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
3 <head>
4 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
5 <meta name="generator" content="Docutils 0.16: http://docutils.sourceforge.net/" />
6 <title>The public C-API of lxml.etree</title>
7 <link rel="stylesheet" href="style.css" type="text/css" />
8 <script type="text/javascript">
9 function trigger_menu(event) {
10     var sidemenu = document.getElementById("sidemenu");
11     var classes = sidemenu.getAttribute("class");
12     classes = (classes.indexOf(" visible") === -1) ? classes + " visible" : classes.replace(" visible", "");
13     sidemenu.setAttribute("class", classes);
14     event.preventDefault();
15     event.stopPropagation();
16 }
17 function hide_menu() {
18     var sidemenu = document.getElementById("sidemenu");
19     var classes = sidemenu.getAttribute("class");
20     if (classes.indexOf(" visible") !== -1) {
21         sidemenu.setAttribute("class", classes.replace(" visible", ""));
22     }
23 }
24 </script><meta name="viewport" content="width=device-width, initial-scale=1" /></head>
25 <body onclick="hide_menu()">
26 <div class="document" id="the-public-c-api-of-lxml-etree">
27 <div class="sidemenu" id="sidemenu"><div class="menutrigger" onclick="trigger_menu(event)">Menu</div><div class="menu"><div class="banner"><div class="banner_image"><img src="python-xml-title.png" /></div><div class="banner_link"><a href="index.html#support-the-project">Like the tool? <br class="first" />Help making it better! <br class="second" />Your donation helps!</a></div></div><ul id="lxml-section"><li><span class="section title">lxml</span><ul class="menu foreign" id="index-menu"><li class="menu title"><a href="index.html">lxml</a><ul class="submenu"><li class="menu item"><a href="index.html#introduction">Introduction</a></li><li class="menu item"><a href="index.html#support-the-project">Support the project</a></li><li class="menu item"><a href="index.html#documentation">Documentation</a></li><li class="menu item"><a href="index.html#download">Download</a></li><li class="menu item"><a href="index.html#mailing-list">Mailing list</a></li><li class="menu item"><a href="index.html#bug-tracker">Bug tracker</a></li><li class="menu item"><a href="index.html#license">License</a></li><li class="menu item"><a href="index.html#old-versions">Old Versions</a></li><li class="menu item"><a href="index.html#project-income-report">Project income report</a></li><li class="menu item"><a href="index.html#legal-notice-for-donations">Legal Notice for Donations</a></li></ul></li></ul><ul class="menu foreign" id="intro-menu"><li class="menu title"><a href="intro.html">Why lxml?</a><ul class="submenu"><li class="menu item"><a href="intro.html#motto">Motto</a></li><li class="menu item"><a href="intro.html#aims">Aims</a></li></ul></li></ul><ul class="menu foreign" id="installation-menu"><li class="menu title"><a href="installation.html">Installing lxml</a><ul class="submenu"><li class="menu item"><a href="installation.html#where-to-get-it">Where to get it</a></li><li class="menu item"><a href="installation.html#requirements">Requirements</a></li><li class="menu item"><a href="installation.html#installation">Installation</a></li><li class="menu item"><a href="installation.html#building-lxml-from-dev-sources">Building lxml from dev sources</a></li><li class="menu item"><a href="installation.html#using-lxml-with-python-libxml2">Using lxml with python-libxml2</a></li><li class="menu item"><a href="installation.html#source-builds-on-ms-windows">Source builds on MS Windows</a></li><li class="menu item"><a href="installation.html#source-builds-on-macos-x">Source builds on MacOS-X</a></li></ul></li></ul><ul class="menu foreign" id="performance-menu"><li class="menu title"><a href="performance.html">Benchmarks and Speed</a><ul class="submenu"><li class="menu item"><a href="performance.html#general-notes">General notes</a></li><li class="menu item"><a href="performance.html#how-to-read-the-timings">How to read the timings</a></li><li class="menu item"><a href="performance.html#parsing-and-serialising">Parsing and Serialising</a></li><li class="menu item"><a href="performance.html#the-elementtree-api">The ElementTree API</a></li><li class="menu item"><a href="performance.html#xpath">XPath</a></li><li class="menu item"><a href="performance.html#a-longer-example">A longer example</a></li><li class="menu item"><a href="performance.html#lxml-objectify">lxml.objectify</a></li></ul></li></ul><ul class="menu foreign" id="compatibility-menu"><li class="menu title"><a href="compatibility.html">ElementTree compatibility of lxml.etree</a></li></ul><ul class="menu foreign" id="FAQ-menu"><li class="menu title"><a href="FAQ.html">lxml FAQ - Frequently Asked Questions</a><ul class="submenu"><li class="menu item"><a href="FAQ.html#general-questions">General Questions</a></li><li class="menu item"><a href="FAQ.html#installation">Installation</a></li><li class="menu item"><a href="FAQ.html#contributing">Contributing</a></li><li class="menu item"><a href="FAQ.html#bugs">Bugs</a></li><li class="menu item"><a href="FAQ.html#id1">Threading</a></li><li class="menu item"><a href="FAQ.html#parsing-and-serialisation">Parsing and Serialisation</a></li><li class="menu item"><a href="FAQ.html#xpath-and-document-traversal">XPath and Document Traversal</a></li></ul></li></ul></li></ul><ul id="Developing with lxml-section"><li><span class="section title">Developing with lxml</span><ul class="menu foreign" id="tutorial-menu"><li class="menu title"><a href="tutorial.html">The lxml.etree Tutorial</a><ul class="submenu"><li class="menu item"><a href="tutorial.html#the-element-class">The Element class</a></li><li class="menu item"><a href="tutorial.html#the-elementtree-class">The ElementTree class</a></li><li class="menu item"><a href="tutorial.html#parsing-from-strings-and-files">Parsing from strings and files</a></li><li class="menu item"><a href="tutorial.html#namespaces">Namespaces</a></li><li class="menu item"><a href="tutorial.html#the-e-factory">The E-factory</a></li><li class="menu item"><a href="tutorial.html#elementpath">ElementPath</a></li></ul></li></ul><ul class="menu foreign" id="apidoc lxml-menu"><li class="menu title"><a href="apidoc/lxml.html">API reference</a></li></ul><ul class="menu foreign" id="api-menu"><li class="menu title"><a href="api.html">APIs specific to lxml.etree</a><ul class="submenu"><li class="menu item"><a href="api.html#lxml-etree">lxml.etree</a></li><li class="menu item"><a href="api.html#other-element-apis">Other Element APIs</a></li><li class="menu item"><a href="api.html#trees-and-documents">Trees and Documents</a></li><li class="menu item"><a href="api.html#iteration">Iteration</a></li><li class="menu item"><a href="api.html#error-handling-on-exceptions">Error handling on exceptions</a></li><li class="menu item"><a href="api.html#error-logging">Error logging</a></li><li class="menu item"><a href="api.html#serialisation">Serialisation</a></li><li class="menu item"><a href="api.html#incremental-xml-generation">Incremental XML generation</a></li><li class="menu item"><a href="api.html#cdata">CDATA</a></li><li class="menu item"><a href="api.html#xinclude-and-elementinclude">XInclude and ElementInclude</a></li></ul></li></ul><ul class="menu foreign" id="parsing-menu"><li class="menu title"><a href="parsing.html">Parsing XML and HTML with lxml</a><ul class="submenu"><li class="menu item"><a href="parsing.html#parsers">Parsers</a></li><li class="menu item"><a href="parsing.html#the-target-parser-interface">The target parser interface</a></li><li class="menu item"><a href="parsing.html#the-feed-parser-interface">The feed parser interface</a></li><li class="menu item"><a href="parsing.html#incremental-event-parsing">Incremental event parsing</a></li><li class="menu item"><a href="parsing.html#iterparse-and-iterwalk">iterparse and iterwalk</a></li><li class="menu item"><a href="parsing.html#python-unicode-strings">Python unicode strings</a></li></ul></li></ul><ul class="menu foreign" id="validation-menu"><li class="menu title"><a href="validation.html">Validation with lxml</a><ul class="submenu"><li class="menu item"><a href="validation.html#validation-at-parse-time">Validation at parse time</a></li><li class="menu item"><a href="validation.html#id1">DTD</a></li><li class="menu item"><a href="validation.html#relaxng">RelaxNG</a></li><li class="menu item"><a href="validation.html#xmlschema">XMLSchema</a></li><li class="menu item"><a href="validation.html#id2">Schematron</a></li><li class="menu item"><a href="validation.html#id3">(Pre-ISO-Schematron)</a></li></ul></li></ul><ul class="menu foreign" id="xpathxslt-menu"><li class="menu title"><a href="xpathxslt.html">XPath and XSLT with lxml</a><ul class="submenu"><li class="menu item"><a href="xpathxslt.html#xpath">XPath</a></li><li class="menu item"><a href="xpathxslt.html#xslt">XSLT</a></li></ul></li></ul><ul class="menu foreign" id="objectify-menu"><li class="menu title"><a href="objectify.html">lxml.objectify</a><ul class="submenu"><li class="menu item"><a href="objectify.html#the-lxml-objectify-api">The lxml.objectify API</a></li><li class="menu item"><a href="objectify.html#asserting-a-schema">Asserting a Schema</a></li><li class="menu item"><a href="objectify.html#objectpath">ObjectPath</a></li><li class="menu item"><a href="objectify.html#python-data-types">Python data types</a></li><li class="menu item"><a href="objectify.html#how-data-types-are-matched">How data types are matched</a></li><li class="menu item"><a href="objectify.html#what-is-different-from-lxml-etree">What is different from lxml.etree?</a></li></ul></li></ul><ul class="menu foreign" id="lxmlhtml-menu"><li class="menu title"><a href="lxmlhtml.html">lxml.html</a><ul class="submenu"><li class="menu item"><a href="lxmlhtml.html#parsing-html">Parsing HTML</a></li><li class="menu item"><a href="lxmlhtml.html#html-element-methods">HTML Element Methods</a></li><li class="menu item"><a href="lxmlhtml.html#running-html-doctests">Running HTML doctests</a></li><li class="menu item"><a href="lxmlhtml.html#creating-html-with-the-e-factory">Creating HTML with the E-factory</a></li><li class="menu item"><a href="lxmlhtml.html#working-with-links">Working with links</a></li><li class="menu item"><a href="lxmlhtml.html#forms">Forms</a></li><li class="menu item"><a href="lxmlhtml.html#cleaning-up-html">Cleaning up HTML</a></li><li class="menu item"><a href="lxmlhtml.html#html-diff">HTML Diff</a></li><li class="menu item"><a href="lxmlhtml.html#examples">Examples</a></li></ul></li></ul><ul class="menu foreign" id="cssselect-menu"><li class="menu title"><a href="cssselect.html">lxml.cssselect</a><ul class="submenu"><li class="menu item"><a href="cssselect.html#the-cssselector-class">The CSSSelector class</a></li><li class="menu item"><a href="cssselect.html#the-cssselect-method">The cssselect method</a></li><li class="menu item"><a href="cssselect.html#supported-selectors">Supported Selectors</a></li><li class="menu item"><a href="cssselect.html#namespaces">Namespaces</a></li></ul></li></ul><ul class="menu foreign" id="elementsoup-menu"><li class="menu title"><a href="elementsoup.html">BeautifulSoup Parser</a><ul class="submenu"><li class="menu item"><a href="elementsoup.html#parsing-with-the-soupparser">Parsing with the soupparser</a></li><li class="menu item"><a href="elementsoup.html#entity-handling">Entity handling</a></li><li class="menu item"><a href="elementsoup.html#using-soupparser-as-a-fallback">Using soupparser as a fallback</a></li><li class="menu item"><a href="elementsoup.html#using-only-the-encoding-detection">Using only the encoding detection</a></li></ul></li></ul><ul class="menu foreign" id="html5parser-menu"><li class="menu title"><a href="html5parser.html">html5lib Parser</a><ul class="submenu"><li class="menu item"><a href="html5parser.html#differences-to-regular-html-parsing">Differences to regular HTML parsing</a></li><li class="menu item"><a href="html5parser.html#function-reference">Function Reference</a></li></ul></li></ul></li></ul><ul id="Extending lxml-section"><li><span class="section title">Extending lxml</span><ul class="menu foreign" id="resolvers-menu"><li class="menu title"><a href="resolvers.html">Document loading and URL resolving</a><ul class="submenu"><li class="menu item"><a href="resolvers.html#xml-catalogs">XML Catalogs</a></li><li class="menu item"><a href="resolvers.html#uri-resolvers">URI Resolvers</a></li><li class="menu item"><a href="resolvers.html#document-loading-in-context">Document loading in context</a></li><li class="menu item"><a href="resolvers.html#i-o-access-control-in-xslt">I/O access control in XSLT</a></li></ul></li></ul><ul class="menu foreign" id="extensions-menu"><li class="menu title"><a href="extensions.html">Python extensions for XPath and XSLT</a><ul class="submenu"><li class="menu item"><a href="extensions.html#xpath-extension-functions">XPath Extension functions</a></li><li class="menu item"><a href="extensions.html#xslt-extension-elements">XSLT extension elements</a></li></ul></li></ul><ul class="menu foreign" id="element classes-menu"><li class="menu title"><a href="element_classes.html">Using custom Element classes in lxml</a><ul class="submenu"><li class="menu item"><a href="element_classes.html#background-on-element-proxies">Background on Element proxies</a></li><li class="menu item"><a href="element_classes.html#element-initialization">Element initialization</a></li><li class="menu item"><a href="element_classes.html#setting-up-a-class-lookup-scheme">Setting up a class lookup scheme</a></li><li class="menu item"><a href="element_classes.html#generating-xml-with-custom-classes">Generating XML with custom classes</a></li><li class="menu item"><a href="element_classes.html#id1">Implementing namespaces</a></li></ul></li></ul><ul class="menu foreign" id="sax-menu"><li class="menu title"><a href="sax.html">Sax support</a><ul class="submenu"><li class="menu item"><a href="sax.html#building-a-tree-from-sax-events">Building a tree from SAX events</a></li><li class="menu item"><a href="sax.html#producing-sax-events-from-an-elementtree-or-element">Producing SAX events from an ElementTree or Element</a></li><li class="menu item"><a href="sax.html#interfacing-with-pulldom-minidom">Interfacing with pulldom/minidom</a></li></ul></li></ul><ul class="menu current" id="capi-menu"><li class="menu title"><a href="capi.html">The public C-API of lxml.etree</a><ul class="submenu"><li class="menu item"><a href="capi.html#passing-generated-trees-through-python">Passing generated trees through Python</a></li><li class="menu item"><a href="capi.html#writing-external-modules-in-cython">Writing external modules in Cython</a></li><li class="menu item"><a href="capi.html#writing-external-modules-in-c">Writing external modules in C</a></li></ul></li></ul></li></ul><ul id="Developing lxml-section"><li><span class="section title">Developing lxml</span><ul class="menu foreign" id="build-menu"><li class="menu title"><a href="build.html">How to build lxml from source</a><ul class="submenu"><li class="menu item"><a href="build.html#cython">Cython</a></li><li class="menu item"><a href="build.html#github-git-and-hg">Github, git and hg</a></li><li class="menu item"><a href="build.html#building-the-sources">Building the sources</a></li><li class="menu item"><a href="build.html#running-the-tests-and-reporting-errors">Running the tests and reporting errors</a></li><li class="menu item"><a href="build.html#building-an-egg-or-wheel">Building an egg or wheel</a></li><li class="menu item"><a href="build.html#building-lxml-on-macos-x">Building lxml on MacOS-X</a></li><li class="menu item"><a href="build.html#static-linking-on-windows">Static linking on Windows</a></li><li class="menu item"><a href="build.html#building-debian-packages-from-svn-sources">Building Debian packages from SVN sources</a></li></ul></li></ul><ul class="menu foreign" id="lxml source howto-menu"><li class="menu title"><a href="lxml-source-howto.html">How to read the source of lxml</a><ul class="submenu"><li class="menu item"><a href="lxml-source-howto.html#what-is-cython">What is Cython?</a></li><li class="menu item"><a href="lxml-source-howto.html#where-to-start">Where to start?</a></li><li class="menu item"><a href="lxml-source-howto.html#lxml-etree">lxml.etree</a></li><li class="menu item"><a href="lxml-source-howto.html#python-modules">Python modules</a></li><li class="menu item"><a href="lxml-source-howto.html#lxml-objectify">lxml.objectify</a></li><li class="menu item"><a href="lxml-source-howto.html#lxml-html">lxml.html</a></li></ul></li></ul><ul class="menu foreign" id="changes 4 6 0-menu"><li class="menu title"><a href="changes-4.6.0.html">Release Changelog</a></li></ul><ul class="menu foreign" id="credits-menu"><li class="menu title"><a href="credits.html">Credits</a><ul class="submenu"><li class="menu item"><a href="credits.html#main-contributors">Main contributors</a></li><li class="menu item"><a href="credits.html#special-thanks-goes-to">Special thanks goes to:</a></li></ul></li></ul></li><li><a href="/sitemap.html">Sitemap</a></li></ul></div></div><div class="banner"><div class="banner_image"><img src="python-xml-title.png" /></div><div class="banner_link"><a href="index.html#support-the-project">Like the tool? <br class="first" />Help making it better! <br class="second" />Your donation helps!</a></div></div><h1 class="title">The public C-API of lxml.etree</h1>
28
29 <p>As of version 1.1, lxml.etree provides a public C-API.  This allows external
30 C extensions to efficiently access public functions and classes of lxml,
31 without going through the Python API.</p>
32 <p>The API is described in the file <a class="reference external" href="https://github.com/lxml/lxml/blob/master/src/lxml/includes/etreepublic.pxd">etreepublic.pxd</a>, which is directly
33 c-importable by extension modules implemented in <a class="reference external" href="http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/">Pyrex</a> or <a class="reference external" href="http://cython.org">Cython</a>.</p>
34 <div class="contents topic" id="contents">
35 <p class="topic-title">Contents</p>
36 <ul class="simple">
37 <li><a class="reference internal" href="#passing-generated-trees-through-python" id="id1">Passing generated trees through Python</a></li>
38 <li><a class="reference internal" href="#writing-external-modules-in-cython" id="id2">Writing external modules in Cython</a></li>
39 <li><a class="reference internal" href="#writing-external-modules-in-c" id="id3">Writing external modules in C</a></li>
40 </ul>
41 </div>
42 <div class="section" id="passing-generated-trees-through-python">
43 <h1>Passing generated trees through Python</h1>
44 <p>This is the most simple way to integrate with lxml.  It does not require
45 any C-level integration but uses a Python function to wrap an externally
46 generated libxml2 document in lxml.</p>
47 <p>The external module that creates the libxml2 tree must pack the document
48 pointer into a <a class="reference external" href="https://docs.python.org/3/c-api/capsule.html">PyCapsule</a>
49 object.  This can then be passed into lxml with the function
50 <tt class="docutils literal">lxml.etree.adopt_external_document()</tt>.  It also takes an optional lxml
51 parser instance to associate with the document, in order to configure the
52 Element class lookup, relative URL lookups, etc.</p>
53 <p>See the <a class="reference external" href="api/lxml.etree-module.html#adopt_external_document">API reference</a>
54 for further details.</p>
55 <p>The same functionality is available as part of the public C-API in form
56 of the C function <tt class="docutils literal">adoptExternalDocument()</tt>.</p>
57 </div>
58 <div class="section" id="writing-external-modules-in-cython">
59 <h1>Writing external modules in Cython</h1>
60 <p>This is the easiest way of extending lxml at the C level.  A <a class="reference external" href="http://cython.org">Cython</a>
61 (or <a class="reference external" href="http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/">Pyrex</a>) module should start like this:</p>
62 <pre class="literal-block">
63 # My Cython extension
64
65 # directive pointing compiler to lxml header files;
66 # use ``aliases={"LXML_PACKAGE_DIR": lxml.__path__}``
67 # argument to cythonize in setup.py to dynamically
68 # determine dir at compile time
69 # distutils: include_dirs = LXML_PACKAGE_DIR
70
71 # import the public functions and classes of lxml.etree
72 cimport lxml.includes.etreepublic as cetree
73
74 # import the lxml.etree module in Python
75 cdef object etree
76 from lxml import etree
77
78 # initialize the access to the C-API of lxml.etree
79 cetree.import_lxml__etree()
80 </pre>
81 <p>From this line on, you can access all public functions of lxml.etree
82 from the <tt class="docutils literal">cetree</tt> namespace like this:</p>
83 <pre class="literal-block">
84 # build a tag name from namespace and element name
85 py_tag = cetree.namespacedNameFromNsName("http://some/url", "myelement")
86 </pre>
87 <p>Public lxml classes are easily subclassed.  For example, to implement
88 and set a new default element class, you can write Cython code like
89 the following:</p>
90 <pre class="literal-block">
91 from lxml.includes.etreepublic cimport ElementBase
92 cdef class NewElementClass(ElementBase):
93      def set_value(self, myval):
94          self.set("my_attribute", myval)
95
96 etree.set_element_class_lookup(
97      etree.ElementDefaultClassLookup(element=NewElementClass))
98 </pre>
99 </div>
100 <div class="section" id="writing-external-modules-in-c">
101 <h1>Writing external modules in C</h1>
102 <p>If you really feel like it, you can also interface with lxml.etree straight
103 from C code.  All you have to do is include the header file for the public
104 API, import the <tt class="docutils literal">lxml.etree</tt> module and then call the import function:</p>
105 <div class="syntax"><pre><span></span><span class="cm">/* My C extension */</span>
106
107 <span class="cm">/* common includes */</span>
108 <span class="cp">#include</span> <span class="cpf">"Python.h"</span><span class="cp"></span>
109 <span class="cp">#include</span> <span class="cpf">"stdio.h"</span><span class="cp"></span>
110 <span class="cp">#include</span> <span class="cpf">"string.h"</span><span class="cp"></span>
111 <span class="cp">#include</span> <span class="cpf">"stdarg.h"</span><span class="cp"></span>
112 <span class="cp">#include</span> <span class="cpf">"libxml/xmlversion.h"</span><span class="cp"></span>
113 <span class="cp">#include</span> <span class="cpf">"libxml/encoding.h"</span><span class="cp"></span>
114 <span class="cp">#include</span> <span class="cpf">"libxml/hash.h"</span><span class="cp"></span>
115 <span class="cp">#include</span> <span class="cpf">"libxml/tree.h"</span><span class="cp"></span>
116 <span class="cp">#include</span> <span class="cpf">"libxml/xmlIO.h"</span><span class="cp"></span>
117 <span class="cp">#include</span> <span class="cpf">"libxml/xmlsave.h"</span><span class="cp"></span>
118 <span class="cp">#include</span> <span class="cpf">"libxml/globals.h"</span><span class="cp"></span>
119 <span class="cp">#include</span> <span class="cpf">"libxml/xmlstring.h"</span><span class="cp"></span>
120
121 <span class="cm">/* lxml.etree specific includes */</span>
122 <span class="cp">#include</span> <span class="cpf">"lxml-version.h"</span><span class="cp"></span>
123 <span class="cp">#include</span> <span class="cpf">"etree_defs.h"</span><span class="cp"></span>
124 <span class="cp">#include</span> <span class="cpf">"etree.h"</span><span class="cp"></span>
125
126 <span class="cm">/* setup code */</span>
127 <span class="n">import_lxml__etree</span><span class="p">()</span>
128 </pre></div>
129 <p>Note that including <tt class="docutils literal">etree.h</tt> does not automatically include the
130 header files it requires.  Note also that the above list of common
131 includes may not be sufficient.</p>
132 </div>
133 </div>
134 <div class="footer">
135 <hr class="footer" />
136 Generated on: 2020-10-17.
137
138 </div>
139 </body>
140 </html>