Imported Upstream version 2.3.5
[platform/upstream/python-lxml.git] / doc / html / api / lxml.etree.HTMLParser-class.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.etree.HTMLParser</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.etree-module.html">Module&nbsp;etree</a> ::
47         Class&nbsp;HTMLParser
48       </span>
49     </td>
50     <td>
51       <table cellpadding="0" cellspacing="0">
52         <!-- hide/show private -->
53         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
55         <tr><td align="right"><span class="options"
56             >[<a href="frames.html" target="_top">frames</a
57             >]&nbsp;|&nbsp;<a href="lxml.etree.HTMLParser-class.html"
58             target="_top">no&nbsp;frames</a>]</span></td></tr>
59       </table>
60     </td>
61   </tr>
62 </table>
63 <!-- ==================== CLASS DESCRIPTION ==================== -->
64 <h1 class="epydoc">Class HTMLParser</h1><p class="nomargin-top"></p>
65 <pre class="base-tree">
66  object --+        
67           |        
68 <a href="lxml.etree._BaseParser-class.html" onclick="show_private();">_BaseParser</a> --+    
69               |    
70     <a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a> --+
71                   |
72                  <strong class="uidshort">HTMLParser</strong>
73 </pre>
74
75 <dl><dt>Known Subclasses:</dt>
76 <dd>
77       <ul class="subclass-list">
78 <li class="private"><a href="lxml.html.HTMLParser-class.html" onclick="show_private();">html.HTMLParser</a></li>  </ul>
79 </dd></dl>
80
81 <hr />
82 <p>HTMLParser(self, encoding=None, remove_blank_text=False,                    remove_comments=False, remove_pis=False, strip_cdata=True,                    no_network=True, target=None, XMLSchema schema=None,                    recover=True, compact=True)</p>
83 <p>The HTML parser.</p>
84 <p>This parser allows reading HTML into a normal XML tree.  By
85 default, it can read broken (non well-formed) HTML, depending on
86 the capabilities of libxml2.  Use the 'recover' option to switch
87 this off.</p>
88 <p>Available boolean keyword arguments:</p>
89 <ul class="rst-simple">
90 <li>recover            - try hard to parse through broken HTML (default: True)</li>
91 <li>no_network         - prevent network access for related files (default: True)</li>
92 <li>remove_blank_text  - discard empty text nodes</li>
93 <li>remove_comments    - discard comments</li>
94 <li>remove_pis         - discard processing instructions</li>
95 <li>strip_cdata        - replace CDATA sections by normal text content (default: True)</li>
96 <li>compact            - safe memory for short text content (default: True)</li>
97 </ul>
98 <p>Other keyword arguments:</p>
99 <ul class="rst-simple">
100 <li>encoding - override the document encoding</li>
101 <li>target   - a parser target object that will receive the parse events</li>
102 <li>schema   - an XMLSchema to validate against</li>
103 </ul>
104 <p>Note that you should avoid sharing parsers between threads for performance
105 reasons.</p>
106
107 <!-- ==================== INSTANCE METHODS ==================== -->
108 <a name="section-InstanceMethods"></a>
109 <table class="summary" border="1" cellpadding="3"
110        cellspacing="0" width="100%" bgcolor="white">
111 <tr bgcolor="#70b0f0" class="table-header">
112   <td colspan="2" class="table-header">
113     <table border="0" cellpadding="0" cellspacing="0" width="100%">
114       <tr valign="top">
115         <td align="left"><span class="table-header">Instance Methods</span></td>
116         <td align="right" valign="top"
117          ><span class="options">[<a href="#section-InstanceMethods"
118          class="privatelink" onclick="toggle_private();"
119          >hide private</a>]</span></td>
120       </tr>
121     </table>
122   </td>
123 </tr>
124 <tr>
125     <td width="15%" align="right" valign="top" class="summary">
126       <span class="summary-type">&nbsp;</span>
127     </td><td class="summary">
128       <table width="100%" cellpadding="0" cellspacing="0" border="0">
129         <tr>
130           <td><span class="summary-sig"><a href="lxml.etree.HTMLParser-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
131         <span class="summary-sig-arg">encoding</span>=<span class="summary-sig-default">None</span>,
132         <span class="summary-sig-arg">remove_blank_text</span>=<span class="summary-sig-default">False</span>,
133         <span class="summary-sig-arg">remove_comments</span>=<span class="summary-sig-default">False</span>,
134         <span class="summary-sig-arg">remove_pis</span>=<span class="summary-sig-default">False</span>,
135         <span class="summary-sig-arg">strip_cdata</span>=<span class="summary-sig-default">True</span>,
136         <span class="summary-sig-arg">no_network</span>=<span class="summary-sig-default">True</span>,
137         <span class="summary-sig-arg">target</span>=<span class="summary-sig-default">None</span>,
138         <span class="summary-sig-arg">XMLSchema schema</span>=<span class="summary-sig-default">None</span>,
139         <span class="summary-sig-arg">recover</span>=<span class="summary-sig-default">True</span>,
140         <span class="summary-sig-arg">compact</span>=<span class="summary-sig-default">True</span>)</span><br />
141       x.__init__(...) initializes x; see help(type(x)) for signature</td>
142           <td align="right" valign="top">
143             
144             
145           </td>
146         </tr>
147       </table>
148       
149     </td>
150   </tr>
151 <tr>
152     <td width="15%" align="right" valign="top" class="summary">
153       <span class="summary-type">a new object with type S, a subtype of T</span>
154     </td><td class="summary">
155       <table width="100%" cellpadding="0" cellspacing="0" border="0">
156         <tr>
157           <td><span class="summary-sig"><a href="lxml.etree.HTMLParser-class.html#__new__" class="summary-sig-name">__new__</a>(<span class="summary-sig-arg">T</span>,
158         <span class="summary-sig-arg">S</span>,
159         <span class="summary-sig-arg">...</span>)</span></td>
160           <td align="right" valign="top">
161             
162             
163           </td>
164         </tr>
165       </table>
166       
167     </td>
168   </tr>
169   <tr>
170     <td colspan="2" class="summary">
171     <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a></code></b>:
172       <code><a href="lxml.etree._FeedParser-class.html#close">close</a></code>,
173       <code><a href="lxml.etree._FeedParser-class.html#feed">feed</a></code>
174       </p>
175     <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._BaseParser-class.html" onclick="show_private();">_BaseParser</a></code></b>:
176       <code><a href="lxml.etree._BaseParser-class.html#copy">copy</a></code>,
177       <code><a href="lxml.etree._BaseParser-class.html#makeelement">makeelement</a></code>,
178       <code><a href="lxml.etree._BaseParser-class.html#setElementClassLookup">setElementClassLookup</a></code>,
179       <code><a href="lxml.etree._BaseParser-class.html#set_element_class_lookup">set_element_class_lookup</a></code>
180       </p>
181     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
182       <code>__delattr__</code>,
183       <code>__format__</code>,
184       <code>__getattribute__</code>,
185       <code>__hash__</code>,
186       <code>__reduce__</code>,
187       <code>__reduce_ex__</code>,
188       <code>__repr__</code>,
189       <code>__setattr__</code>,
190       <code>__sizeof__</code>,
191       <code>__str__</code>,
192       <code>__subclasshook__</code>
193       </p>
194     </td>
195   </tr>
196 </table>
197 <!-- ==================== PROPERTIES ==================== -->
198 <a name="section-Properties"></a>
199 <table class="summary" border="1" cellpadding="3"
200        cellspacing="0" width="100%" bgcolor="white">
201 <tr bgcolor="#70b0f0" class="table-header">
202   <td colspan="2" class="table-header">
203     <table border="0" cellpadding="0" cellspacing="0" width="100%">
204       <tr valign="top">
205         <td align="left"><span class="table-header">Properties</span></td>
206         <td align="right" valign="top"
207          ><span class="options">[<a href="#section-Properties"
208          class="privatelink" onclick="toggle_private();"
209          >hide private</a>]</span></td>
210       </tr>
211     </table>
212   </td>
213 </tr>
214   <tr>
215     <td colspan="2" class="summary">
216     <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a></code></b>:
217       <code><a href="lxml.etree._FeedParser-class.html#feed_error_log">feed_error_log</a></code>
218       </p>
219     <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._BaseParser-class.html" onclick="show_private();">_BaseParser</a></code></b>:
220       <code><a href="lxml.etree._BaseParser-class.html#error_log">error_log</a></code>,
221       <code><a href="lxml.etree._BaseParser-class.html#resolvers">resolvers</a></code>,
222       <code><a href="lxml.etree._BaseParser-class.html#target">target</a></code>,
223       <code><a href="lxml.etree._BaseParser-class.html#version">version</a></code>
224       </p>
225     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
226       <code>__class__</code>
227       </p>
228     </td>
229   </tr>
230 </table>
231 <!-- ==================== METHOD DETAILS ==================== -->
232 <a name="section-MethodDetails"></a>
233 <table class="details" border="1" cellpadding="3"
234        cellspacing="0" width="100%" bgcolor="white">
235 <tr bgcolor="#70b0f0" class="table-header">
236   <td colspan="2" class="table-header">
237     <table border="0" cellpadding="0" cellspacing="0" width="100%">
238       <tr valign="top">
239         <td align="left"><span class="table-header">Method Details</span></td>
240         <td align="right" valign="top"
241          ><span class="options">[<a href="#section-MethodDetails"
242          class="privatelink" onclick="toggle_private();"
243          >hide private</a>]</span></td>
244       </tr>
245     </table>
246   </td>
247 </tr>
248 </table>
249 <a name="__init__"></a>
250 <div>
251 <table class="details" border="1" cellpadding="3"
252        cellspacing="0" width="100%" bgcolor="white">
253 <tr><td>
254   <table width="100%" cellpadding="0" cellspacing="0" border="0">
255   <tr valign="top"><td>
256   <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
257         <span class="sig-arg">encoding</span>=<span class="sig-default">None</span>,
258         <span class="sig-arg">remove_blank_text</span>=<span class="sig-default">False</span>,
259         <span class="sig-arg">remove_comments</span>=<span class="sig-default">False</span>,
260         <span class="sig-arg">remove_pis</span>=<span class="sig-default">False</span>,
261         <span class="sig-arg">strip_cdata</span>=<span class="sig-default">True</span>,
262         <span class="sig-arg">no_network</span>=<span class="sig-default">True</span>,
263         <span class="sig-arg">target</span>=<span class="sig-default">None</span>,
264         <span class="sig-arg">XMLSchema schema</span>=<span class="sig-default">None</span>,
265         <span class="sig-arg">recover</span>=<span class="sig-default">True</span>,
266         <span class="sig-arg">compact</span>=<span class="sig-default">True</span>)</span>
267     <br /><em class="fname">(Constructor)</em>
268   </h3>
269   </td><td align="right" valign="top"
270     >&nbsp;
271     </td>
272   </tr></table>
273   
274   x.__init__(...) initializes x; see help(type(x)) for signature
275   <dl class="fields">
276     <dt>Overrides:
277         object.__init__
278     </dt>
279   </dl>
280 </td></tr></table>
281 </div>
282 <a name="__new__"></a>
283 <div>
284 <table class="details" border="1" cellpadding="3"
285        cellspacing="0" width="100%" bgcolor="white">
286 <tr><td>
287   <table width="100%" cellpadding="0" cellspacing="0" border="0">
288   <tr valign="top"><td>
289   <h3 class="epydoc"><span class="sig"><span class="sig-name">__new__</span>(<span class="sig-arg">T</span>,
290         <span class="sig-arg">S</span>,
291         <span class="sig-arg">...</span>)</span>
292   </h3>
293   </td><td align="right" valign="top"
294     >&nbsp;
295     </td>
296   </tr></table>
297   
298   
299   <dl class="fields">
300     <dt>Returns: a new object with type S, a subtype of T</dt>
301     <dt>Overrides:
302         object.__new__
303     </dt>
304   </dl>
305 </td></tr></table>
306 </div>
307 <br />
308 <!-- ==================== NAVIGATION BAR ==================== -->
309 <table class="navbar" border="0" width="100%" cellpadding="0"
310        bgcolor="#a0c0ff" cellspacing="0">
311   <tr valign="middle">
312   <!-- Home link -->
313       <th>&nbsp;&nbsp;&nbsp;<a
314         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
315
316   <!-- Tree link -->
317       <th>&nbsp;&nbsp;&nbsp;<a
318         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
319
320   <!-- Index link -->
321       <th>&nbsp;&nbsp;&nbsp;<a
322         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
323
324   <!-- Help link -->
325       <th>&nbsp;&nbsp;&nbsp;<a
326         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
327
328   <!-- Project homepage -->
329       <th class="navbar" align="right" width="100%">
330         <table border="0" cellpadding="0" cellspacing="0">
331           <tr><th class="navbar" align="center"
332             ><a class="navbar" target="_top" href="/">lxml API</a></th>
333           </tr></table></th>
334   </tr>
335 </table>
336 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
337   <tr>
338     <td align="left" class="footer">
339     Generated by Epydoc 3.0.1 on Tue Jul 31 10:14:18 2012
340     </td>
341     <td align="right" class="footer">
342       <a target="mainFrame" href="http://epydoc.sourceforge.net"
343         >http://epydoc.sourceforge.net</a>
344     </td>
345   </tr>
346 </table>
347
348 <script type="text/javascript">
349   <!--
350   // Private objects are initially displayed (because if
351   // javascript is turned off then we want them to be
352   // visible); but by default, we want to hide them.  So hide
353   // them unless we have a cookie that says to show them.
354   checkCookie();
355   // -->
356 </script>
357 </body>
358 </html>