Imported Upstream version 4.5.2
[platform/upstream/python-lxml.git] / doc / html / api / lxml.etree.HTMLParser-class.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.etree.HTMLParser</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.etree-module.html">Module&nbsp;etree</a> ::
47         Class&nbsp;HTMLParser
48       </span>
49     </td>
50     <td>
51       <table cellpadding="0" cellspacing="0">
52         <!-- hide/show private -->
53         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
55         <tr><td align="right"><span class="options"
56             >[<a href="frames.html" target="_top">frames</a
57             >]&nbsp;|&nbsp;<a href="lxml.etree.HTMLParser-class.html"
58             target="_top">no&nbsp;frames</a>]</span></td></tr>
59       </table>
60     </td>
61   </tr>
62 </table>
63 <!-- ==================== CLASS DESCRIPTION ==================== -->
64 <h1 class="epydoc">Class HTMLParser</h1><p class="nomargin-top"></p>
65 <pre class="base-tree">
66     object --+        
67              |        
68 ??._BaseParser --+    
69                  |    
70        <a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a> --+
71                      |
72                     <strong class="uidshort">HTMLParser</strong>
73 </pre>
74
75 <dl><dt>Known Subclasses:</dt>
76 <dd>
77       <ul class="subclass-list">
78 <li><a href="lxml.html.HTMLParser-class.html">html.HTMLParser</a></li><li class="private">, <a href="lxml.etree.HTMLPullParser-class.html" onclick="show_private();">HTMLPullParser</a></li>  </ul>
79 </dd></dl>
80
81 <hr />
82 <p>HTMLParser(self, encoding=None, remove_blank_text=False,                    remove_comments=False, remove_pis=False, strip_cdata=True,                    no_network=True, target=None, schema: XMLSchema =None,                    recover=True, compact=True, collect_ids=True, huge_tree=False)</p>
83 <p>The HTML parser.</p>
84 <p>This parser allows reading HTML into a normal XML tree.  By
85 default, it can read broken (non well-formed) HTML, depending on
86 the capabilities of libxml2.  Use the 'recover' option to switch
87 this off.</p>
88 <p>Available boolean keyword arguments:</p>
89 <ul class="rst-simple">
90 <li>recover            - try hard to parse through broken HTML (default: True)</li>
91 <li>no_network         - prevent network access for related files (default: True)</li>
92 <li>remove_blank_text  - discard empty text nodes that are ignorable (i.e. not actual text content)</li>
93 <li>remove_comments    - discard comments</li>
94 <li>remove_pis         - discard processing instructions</li>
95 <li>strip_cdata        - replace CDATA sections by normal text content (default: True)</li>
96 <li>compact            - save memory for short text content (default: True)</li>
97 <li>default_doctype    - add a default doctype even if it is not found in the HTML (default: True)</li>
98 <li>collect_ids        - use a hash table of XML IDs for fast access (default: True)</li>
99 <li><dl class="rst-first rst-docutils">
100 <dt>huge_tree          - disable security restrictions and support very deep trees</dt>
101 <dd>and very long text content (only affects libxml2 2.7+)</dd>
102 </dl>
103 </li>
104 </ul>
105 <p>Other keyword arguments:</p>
106 <ul class="rst-simple">
107 <li>encoding - override the document encoding</li>
108 <li>target   - a parser target object that will receive the parse events</li>
109 <li>schema   - an XMLSchema to validate against</li>
110 </ul>
111 <p>Note that you should avoid sharing parsers between threads for performance
112 reasons.</p>
113
114 <!-- ==================== INSTANCE METHODS ==================== -->
115 <a name="section-InstanceMethods"></a>
116 <table class="summary" border="1" cellpadding="3"
117        cellspacing="0" width="100%" bgcolor="white">
118 <tr bgcolor="#70b0f0" class="table-header">
119   <td colspan="2" class="table-header">
120     <table border="0" cellpadding="0" cellspacing="0" width="100%">
121       <tr valign="top">
122         <td align="left"><span class="table-header">Instance Methods</span></td>
123         <td align="right" valign="top"
124          ><span class="options">[<a href="#section-InstanceMethods"
125          class="privatelink" onclick="toggle_private();"
126          >hide private</a>]</span></td>
127       </tr>
128     </table>
129   </td>
130 </tr>
131 <tr>
132     <td width="15%" align="right" valign="top" class="summary">
133       <span class="summary-type">&nbsp;</span>
134     </td><td class="summary">
135       <table width="100%" cellpadding="0" cellspacing="0" border="0">
136         <tr>
137           <td><span class="summary-sig"><a href="lxml.etree.HTMLParser-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
138         <span class="summary-sig-arg">encoding</span>=<span class="summary-sig-default">None</span>,
139         <span class="summary-sig-arg">remove_blank_text</span>=<span class="summary-sig-default">False</span>,
140         <span class="summary-sig-arg">remove_comments</span>=<span class="summary-sig-default">False</span>,
141         <span class="summary-sig-arg">remove_pis</span>=<span class="summary-sig-default">False</span>,
142         <span class="summary-sig-arg">strip_cdata</span>=<span class="summary-sig-default">True</span>,
143         <span class="summary-sig-arg">no_network</span>=<span class="summary-sig-default">True</span>,
144         <span class="summary-sig-arg">target</span>=<span class="summary-sig-default">None</span>,
145         <span class="summary-sig-arg">schema: XMLSchema</span>=<span class="summary-sig-default">None</span>,
146         <span class="summary-sig-arg">recover</span>=<span class="summary-sig-default">True</span>,
147         <span class="summary-sig-arg">compact</span>=<span class="summary-sig-default">True</span>,
148         <span class="summary-sig-arg">collect_ids</span>=<span class="summary-sig-default">True</span>,
149         <span class="summary-sig-arg">huge_tree</span>=<span class="summary-sig-default">False</span>)</span><br />
150       x.__init__(...) initializes x; see help(type(x)) for signature</td>
151           <td align="right" valign="top">
152             
153             
154           </td>
155         </tr>
156       </table>
157       
158     </td>
159   </tr>
160 <tr>
161     <td width="15%" align="right" valign="top" class="summary">
162       <span class="summary-type">a new object with type S, a subtype of T</span>
163     </td><td class="summary">
164       <table width="100%" cellpadding="0" cellspacing="0" border="0">
165         <tr>
166           <td><span class="summary-sig"><a href="lxml.etree.HTMLParser-class.html#__new__" class="summary-sig-name">__new__</a>(<span class="summary-sig-arg">T</span>,
167         <span class="summary-sig-arg">S</span>,
168         <span class="summary-sig-arg">...</span>)</span></td>
169           <td align="right" valign="top">
170             
171             
172           </td>
173         </tr>
174       </table>
175       
176     </td>
177   </tr>
178   <tr>
179     <td colspan="2" class="summary">
180     <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a></code></b>:
181       <code><a href="lxml.etree._FeedParser-class.html#close">close</a></code>,
182       <code><a href="lxml.etree._FeedParser-class.html#feed">feed</a></code>
183       </p>
184     <p class="indent-wrapped-lines"><b>Inherited from <code><i>unreachable</i>._BaseParser</code></b>:
185       <code>copy</code>,
186       <code>makeelement</code>,
187       <code>setElementClassLookup</code>,
188       <code>set_element_class_lookup</code>
189       </p>
190     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
191       <code>__delattr__</code>,
192       <code>__format__</code>,
193       <code>__getattribute__</code>,
194       <code>__hash__</code>,
195       <code>__reduce__</code>,
196       <code>__reduce_ex__</code>,
197       <code>__repr__</code>,
198       <code>__setattr__</code>,
199       <code>__sizeof__</code>,
200       <code>__str__</code>,
201       <code>__subclasshook__</code>
202       </p>
203     </td>
204   </tr>
205 </table>
206 <!-- ==================== PROPERTIES ==================== -->
207 <a name="section-Properties"></a>
208 <table class="summary" border="1" cellpadding="3"
209        cellspacing="0" width="100%" bgcolor="white">
210 <tr bgcolor="#70b0f0" class="table-header">
211   <td colspan="2" class="table-header">
212     <table border="0" cellpadding="0" cellspacing="0" width="100%">
213       <tr valign="top">
214         <td align="left"><span class="table-header">Properties</span></td>
215         <td align="right" valign="top"
216          ><span class="options">[<a href="#section-Properties"
217          class="privatelink" onclick="toggle_private();"
218          >hide private</a>]</span></td>
219       </tr>
220     </table>
221   </td>
222 </tr>
223   <tr>
224     <td colspan="2" class="summary">
225     <p class="indent-wrapped-lines"><b>Inherited from <code><a href="lxml.etree._FeedParser-class.html" onclick="show_private();">_FeedParser</a></code></b>:
226       <code><a href="lxml.etree._FeedParser-class.html#feed_error_log">feed_error_log</a></code>
227       </p>
228     <p class="indent-wrapped-lines"><b>Inherited from <code><i>unreachable</i>._BaseParser</code></b>:
229       <code>error_log</code>,
230       <code>resolvers</code>,
231       <code>target</code>,
232       <code>version</code>
233       </p>
234     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
235       <code>__class__</code>
236       </p>
237     </td>
238   </tr>
239 </table>
240 <!-- ==================== METHOD DETAILS ==================== -->
241 <a name="section-MethodDetails"></a>
242 <table class="details" border="1" cellpadding="3"
243        cellspacing="0" width="100%" bgcolor="white">
244 <tr bgcolor="#70b0f0" class="table-header">
245   <td colspan="2" class="table-header">
246     <table border="0" cellpadding="0" cellspacing="0" width="100%">
247       <tr valign="top">
248         <td align="left"><span class="table-header">Method Details</span></td>
249         <td align="right" valign="top"
250          ><span class="options">[<a href="#section-MethodDetails"
251          class="privatelink" onclick="toggle_private();"
252          >hide private</a>]</span></td>
253       </tr>
254     </table>
255   </td>
256 </tr>
257 </table>
258 <a name="__init__"></a>
259 <div>
260 <table class="details" border="1" cellpadding="3"
261        cellspacing="0" width="100%" bgcolor="white">
262 <tr><td>
263   <table width="100%" cellpadding="0" cellspacing="0" border="0">
264   <tr valign="top"><td>
265   <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
266         <span class="sig-arg">encoding</span>=<span class="sig-default">None</span>,
267         <span class="sig-arg">remove_blank_text</span>=<span class="sig-default">False</span>,
268         <span class="sig-arg">remove_comments</span>=<span class="sig-default">False</span>,
269         <span class="sig-arg">remove_pis</span>=<span class="sig-default">False</span>,
270         <span class="sig-arg">strip_cdata</span>=<span class="sig-default">True</span>,
271         <span class="sig-arg">no_network</span>=<span class="sig-default">True</span>,
272         <span class="sig-arg">target</span>=<span class="sig-default">None</span>,
273         <span class="sig-arg">schema: XMLSchema</span>=<span class="sig-default">None</span>,
274         <span class="sig-arg">recover</span>=<span class="sig-default">True</span>,
275         <span class="sig-arg">compact</span>=<span class="sig-default">True</span>,
276         <span class="sig-arg">collect_ids</span>=<span class="sig-default">True</span>,
277         <span class="sig-arg">huge_tree</span>=<span class="sig-default">False</span>)</span>
278     <br /><em class="fname">(Constructor)</em>
279   </h3>
280   </td><td align="right" valign="top"
281     >&nbsp;
282     </td>
283   </tr></table>
284   
285   x.__init__(...) initializes x; see help(type(x)) for signature
286   <dl class="fields">
287     <dt>Overrides:
288         object.__init__
289     </dt>
290   </dl>
291 </td></tr></table>
292 </div>
293 <a name="__new__"></a>
294 <div>
295 <table class="details" border="1" cellpadding="3"
296        cellspacing="0" width="100%" bgcolor="white">
297 <tr><td>
298   <table width="100%" cellpadding="0" cellspacing="0" border="0">
299   <tr valign="top"><td>
300   <h3 class="epydoc"><span class="sig"><span class="sig-name">__new__</span>(<span class="sig-arg">T</span>,
301         <span class="sig-arg">S</span>,
302         <span class="sig-arg">...</span>)</span>
303   </h3>
304   </td><td align="right" valign="top"
305     >&nbsp;
306     </td>
307   </tr></table>
308   
309   
310   <dl class="fields">
311     <dt>Returns: a new object with type S, a subtype of T</dt>
312     <dt>Overrides:
313         object.__new__
314     </dt>
315   </dl>
316 </td></tr></table>
317 </div>
318 <br />
319 <!-- ==================== NAVIGATION BAR ==================== -->
320 <table class="navbar" border="0" width="100%" cellpadding="0"
321        bgcolor="#a0c0ff" cellspacing="0">
322   <tr valign="middle">
323   <!-- Home link -->
324       <th>&nbsp;&nbsp;&nbsp;<a
325         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
326
327   <!-- Tree link -->
328       <th>&nbsp;&nbsp;&nbsp;<a
329         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
330
331   <!-- Index link -->
332       <th>&nbsp;&nbsp;&nbsp;<a
333         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
334
335   <!-- Help link -->
336       <th>&nbsp;&nbsp;&nbsp;<a
337         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
338
339   <!-- Project homepage -->
340       <th class="navbar" align="right" width="100%">
341         <table border="0" cellpadding="0" cellspacing="0">
342           <tr><th class="navbar" align="center"
343             ><a class="navbar" target="_top" href="/">lxml API</a></th>
344           </tr></table></th>
345   </tr>
346 </table>
347 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
348   <tr>
349     <td align="left" class="footer">
350     Generated by Epydoc 3.0.1
351     on Thu Jul  9 18:29:53 2020
352     </td>
353     <td align="right" class="footer">
354       <a target="mainFrame" href="http://epydoc.sourceforge.net"
355         >http://epydoc.sourceforge.net</a>
356     </td>
357   </tr>
358 </table>
359
360 <script type="text/javascript">
361   <!--
362   // Private objects are initially displayed (because if
363   // javascript is turned off then we want them to be
364   // visible); but by default, we want to hide them.  So hide
365   // them unless we have a cookie that says to show them.
366   checkCookie();
367   // -->
368 </script>
369 </body>
370 </html>