1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3 "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6 <title>lxml.etree.HTMLParser</title>
7 <link rel="stylesheet" href="epydoc.css" type="text/css" />
8 <script type="text/javascript" src="epydoc.js"></script>
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15 bgcolor="#a0c0ff" cellspacing="0">
18 <th> <a
19 href="lxml-module.html">Home</a> </th>
22 <th> <a
23 href="module-tree.html">Trees</a> </th>
26 <th> <a
27 href="identifier-index.html">Indices</a> </th>
30 <th> <a
31 href="help.html">Help</a> </th>
33 <!-- Project homepage -->
34 <th class="navbar" align="right" width="100%">
35 <table border="0" cellpadding="0" cellspacing="0">
36 <tr><th class="navbar" align="center"
37 ><a class="navbar" target="_top" href="/">lxml API</a></th>
41 <table width="100%" cellpadding="0" cellspacing="0">
44 <span class="breadcrumbs">
45 <a href="lxml-module.html">Package lxml</a> ::
46 <a href="lxml.etree-module.html">Module etree</a> ::
51 <table cellpadding="0" cellspacing="0">
52 <!-- hide/show private -->
53 <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54 onclick="toggle_private();">hide private</a>]</span></td></tr>
55 <tr><td align="right"><span class="options"
56 >[<a href="frames.html" target="_top">frames</a
57 >] | <a href="lxml.etree.HTMLParser-class.html"
58 target="_top">no frames</a>]</span></td></tr>
63 <!-- ==================== CLASS DESCRIPTION ==================== -->
64 <h1 class="epydoc">Class HTMLParser</h1><p class="nomargin-top"></p>
65 <pre class="base-tree">
68 <a href="lxml.etree._BaseParser-class.html" onclick="show_private();">_BaseParser</a> --+
70 <a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a> --+
72 <strong class="uidshort">HTMLParser</strong>
75 <dl><dt>Known Subclasses:</dt>
77 <ul class="subclass-list">
78 <li class="private"><a href="lxml.html.HTMLParser-class.html" onclick="show_private();">html.HTMLParser</a></li> </ul>
82 <p>HTMLParser(self, encoding=None, remove_blank_text=False, remove_comments=False, remove_pis=False, strip_cdata=True, no_network=True, target=None, XMLSchema schema=None, recover=True, compact=True)</p>
83 <p>The HTML parser.</p>
84 <p>This parser allows reading HTML into a normal XML tree. By
85 default, it can read broken (non well-formed) HTML, depending on
86 the capabilities of libxml2. Use the 'recover' option to switch
88 <p>Available boolean keyword arguments:</p>
89 <ul class="rst-simple">
90 <li>recover - try hard to parse through broken HTML (default: True)</li>
91 <li>no_network - prevent network access for related files (default: True)</li>
92 <li>remove_blank_text - discard empty text nodes</li>
93 <li>remove_comments - discard comments</li>
94 <li>remove_pis - discard processing instructions</li>
95 <li>strip_cdata - replace CDATA sections by normal text content (default: True)</li>
96 <li>compact - safe memory for short text content (default: True)</li>
98 <p>Other keyword arguments:</p>
99 <ul class="rst-simple">
100 <li>encoding - override the document encoding</li>
101 <li>target - a parser target object that will receive the parse events</li>
102 <li>schema - an XMLSchema to validate against</li>
104 <p>Note that you should avoid sharing parsers between threads for performance
107 <!-- ==================== INSTANCE METHODS ==================== -->
108 <a name="section-InstanceMethods"></a>
109 <table class="summary" border="1" cellpadding="3"
110 cellspacing="0" width="100%" bgcolor="white">
111 <tr bgcolor="#70b0f0" class="table-header">
112 <td colspan="2" class="table-header">
113 <table border="0" cellpadding="0" cellspacing="0" width="100%">
115 <td align="left"><span class="table-header">Instance Methods</span></td>
116 <td align="right" valign="top"
117 ><span class="options">[<a href="#section-InstanceMethods"
118 class="privatelink" onclick="toggle_private();"
119 >hide private</a>]</span></td>
125 <td width="15%" align="right" valign="top" class="summary">
126 <span class="summary-type"> </span>
127 </td><td class="summary">
128 <table width="100%" cellpadding="0" cellspacing="0" border="0">
130 <td><span class="summary-sig"><a href="lxml.etree.HTMLParser-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
131 <span class="summary-sig-arg">encoding</span>=<span class="summary-sig-default">None</span>,
132 <span class="summary-sig-arg">remove_blank_text</span>=<span class="summary-sig-default">False</span>,
133 <span class="summary-sig-arg">remove_comments</span>=<span class="summary-sig-default">False</span>,
134 <span class="summary-sig-arg">remove_pis</span>=<span class="summary-sig-default">False</span>,
135 <span class="summary-sig-arg">strip_cdata</span>=<span class="summary-sig-default">True</span>,
136 <span class="summary-sig-arg">no_network</span>=<span class="summary-sig-default">True</span>,
137 <span class="summary-sig-arg">target</span>=<span class="summary-sig-default">None</span>,
138 <span class="summary-sig-arg">XMLSchema schema</span>=<span class="summary-sig-default">None</span>,
139 <span class="summary-sig-arg">recover</span>=<span class="summary-sig-default">True</span>,
140 <span class="summary-sig-arg">compact</span>=<span class="summary-sig-default">True</span>)</span><br />
141 x.__init__(...) initializes x; see help(type(x)) for signature</td>
142 <td align="right" valign="top">
152 <td width="15%" align="right" valign="top" class="summary">
153 <span class="summary-type">a new object with type S, a subtype of T</span>
154 </td><td class="summary">
155 <table width="100%" cellpadding="0" cellspacing="0" border="0">
157 <td><span class="summary-sig"><a href="lxml.etree.HTMLParser-class.html#__new__" class="summary-sig-name">__new__</a>(<span class="summary-sig-arg">T</span>,
158 <span class="summary-sig-arg">S</span>,
159 <span class="summary-sig-arg">...</span>)</span></td>
160 <td align="right" valign="top">
170 <td colspan="2" class="summary">
171 <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a></code></b>:
172 <code><a href="lxml.etree._FeedParser-class.html#close">close</a></code>,
173 <code><a href="lxml.etree._FeedParser-class.html#feed">feed</a></code>
175 <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._BaseParser-class.html" onclick="show_private();">_BaseParser</a></code></b>:
176 <code><a href="lxml.etree._BaseParser-class.html#copy">copy</a></code>,
177 <code><a href="lxml.etree._BaseParser-class.html#makeelement">makeelement</a></code>,
178 <code><a href="lxml.etree._BaseParser-class.html#setElementClassLookup">setElementClassLookup</a></code>,
179 <code><a href="lxml.etree._BaseParser-class.html#set_element_class_lookup">set_element_class_lookup</a></code>
181 <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
182 <code>__delattr__</code>,
183 <code>__format__</code>,
184 <code>__getattribute__</code>,
185 <code>__hash__</code>,
186 <code>__reduce__</code>,
187 <code>__reduce_ex__</code>,
188 <code>__repr__</code>,
189 <code>__setattr__</code>,
190 <code>__sizeof__</code>,
191 <code>__str__</code>,
192 <code>__subclasshook__</code>
197 <!-- ==================== PROPERTIES ==================== -->
198 <a name="section-Properties"></a>
199 <table class="summary" border="1" cellpadding="3"
200 cellspacing="0" width="100%" bgcolor="white">
201 <tr bgcolor="#70b0f0" class="table-header">
202 <td colspan="2" class="table-header">
203 <table border="0" cellpadding="0" cellspacing="0" width="100%">
205 <td align="left"><span class="table-header">Properties</span></td>
206 <td align="right" valign="top"
207 ><span class="options">[<a href="#section-Properties"
208 class="privatelink" onclick="toggle_private();"
209 >hide private</a>]</span></td>
215 <td colspan="2" class="summary">
216 <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a></code></b>:
217 <code><a href="lxml.etree._FeedParser-class.html#feed_error_log">feed_error_log</a></code>
219 <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._BaseParser-class.html" onclick="show_private();">_BaseParser</a></code></b>:
220 <code><a href="lxml.etree._BaseParser-class.html#error_log">error_log</a></code>,
221 <code><a href="lxml.etree._BaseParser-class.html#resolvers">resolvers</a></code>,
222 <code><a href="lxml.etree._BaseParser-class.html#target">target</a></code>,
223 <code><a href="lxml.etree._BaseParser-class.html#version">version</a></code>
225 <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
226 <code>__class__</code>
231 <!-- ==================== METHOD DETAILS ==================== -->
232 <a name="section-MethodDetails"></a>
233 <table class="details" border="1" cellpadding="3"
234 cellspacing="0" width="100%" bgcolor="white">
235 <tr bgcolor="#70b0f0" class="table-header">
236 <td colspan="2" class="table-header">
237 <table border="0" cellpadding="0" cellspacing="0" width="100%">
239 <td align="left"><span class="table-header">Method Details</span></td>
240 <td align="right" valign="top"
241 ><span class="options">[<a href="#section-MethodDetails"
242 class="privatelink" onclick="toggle_private();"
243 >hide private</a>]</span></td>
249 <a name="__init__"></a>
251 <table class="details" border="1" cellpadding="3"
252 cellspacing="0" width="100%" bgcolor="white">
254 <table width="100%" cellpadding="0" cellspacing="0" border="0">
255 <tr valign="top"><td>
256 <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
257 <span class="sig-arg">encoding</span>=<span class="sig-default">None</span>,
258 <span class="sig-arg">remove_blank_text</span>=<span class="sig-default">False</span>,
259 <span class="sig-arg">remove_comments</span>=<span class="sig-default">False</span>,
260 <span class="sig-arg">remove_pis</span>=<span class="sig-default">False</span>,
261 <span class="sig-arg">strip_cdata</span>=<span class="sig-default">True</span>,
262 <span class="sig-arg">no_network</span>=<span class="sig-default">True</span>,
263 <span class="sig-arg">target</span>=<span class="sig-default">None</span>,
264 <span class="sig-arg">XMLSchema schema</span>=<span class="sig-default">None</span>,
265 <span class="sig-arg">recover</span>=<span class="sig-default">True</span>,
266 <span class="sig-arg">compact</span>=<span class="sig-default">True</span>)</span>
267 <br /><em class="fname">(Constructor)</em>
269 </td><td align="right" valign="top"
274 x.__init__(...) initializes x; see help(type(x)) for signature
282 <a name="__new__"></a>
284 <table class="details" border="1" cellpadding="3"
285 cellspacing="0" width="100%" bgcolor="white">
287 <table width="100%" cellpadding="0" cellspacing="0" border="0">
288 <tr valign="top"><td>
289 <h3 class="epydoc"><span class="sig"><span class="sig-name">__new__</span>(<span class="sig-arg">T</span>,
290 <span class="sig-arg">S</span>,
291 <span class="sig-arg">...</span>)</span>
293 </td><td align="right" valign="top"
300 <dt>Returns: a new object with type S, a subtype of T</dt>
308 <!-- ==================== NAVIGATION BAR ==================== -->
309 <table class="navbar" border="0" width="100%" cellpadding="0"
310 bgcolor="#a0c0ff" cellspacing="0">
313 <th> <a
314 href="lxml-module.html">Home</a> </th>
317 <th> <a
318 href="module-tree.html">Trees</a> </th>
321 <th> <a
322 href="identifier-index.html">Indices</a> </th>
325 <th> <a
326 href="help.html">Help</a> </th>
328 <!-- Project homepage -->
329 <th class="navbar" align="right" width="100%">
330 <table border="0" cellpadding="0" cellspacing="0">
331 <tr><th class="navbar" align="center"
332 ><a class="navbar" target="_top" href="/">lxml API</a></th>
336 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
338 <td align="left" class="footer">
339 Generated by Epydoc 3.0.1 on Tue Jul 31 10:14:18 2012
341 <td align="right" class="footer">
342 <a target="mainFrame" href="http://epydoc.sourceforge.net"
343 >http://epydoc.sourceforge.net</a>
348 <script type="text/javascript">
350 // Private objects are initially displayed (because if
351 // javascript is turned off then we want them to be
352 // visible); but by default, we want to hide them. So hide
353 // them unless we have a cookie that says to show them.