Imported Upstream version 2.3.5
[platform/upstream/python-lxml.git] / doc / html / api / lxml.html.html5parser-module.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.html.html5parser</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.html-module.html">Package&nbsp;html</a> ::
47         Module&nbsp;html5parser
48       </span>
49     </td>
50     <td>
51       <table cellpadding="0" cellspacing="0">
52         <!-- hide/show private -->
53         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
55         <tr><td align="right"><span class="options"
56             >[<a href="frames.html" target="_top">frames</a
57             >]&nbsp;|&nbsp;<a href="lxml.html.html5parser-module.html"
58             target="_top">no&nbsp;frames</a>]</span></td></tr>
59       </table>
60     </td>
61   </tr>
62 </table>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module html5parser</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.html5parser-pysrc.html">source&nbsp;code</a></span></p>
65 An interface to html5lib that mimics the lxml.html interface.
66
67 <!-- ==================== CLASSES ==================== -->
68 <a name="section-Classes"></a>
69 <table class="summary" border="1" cellpadding="3"
70        cellspacing="0" width="100%" bgcolor="white">
71 <tr bgcolor="#70b0f0" class="table-header">
72   <td colspan="2" class="table-header">
73     <table border="0" cellpadding="0" cellspacing="0" width="100%">
74       <tr valign="top">
75         <td align="left"><span class="table-header">Classes</span></td>
76         <td align="right" valign="top"
77          ><span class="options">[<a href="#section-Classes"
78          class="privatelink" onclick="toggle_private();"
79          >hide private</a>]</span></td>
80       </tr>
81     </table>
82   </td>
83 </tr>
84 <tr>
85     <td width="15%" align="right" valign="top" class="summary">
86       <span class="summary-type">&nbsp;</span>
87     </td><td class="summary">
88         <a href="lxml.html.html5parser.HTMLParser-class.html" class="summary-name">HTMLParser</a><br />
89       An html5lib HTML parser with lxml as tree.
90     </td>
91   </tr>
92 <tr>
93     <td width="15%" align="right" valign="top" class="summary">
94       <span class="summary-type">&nbsp;</span>
95     </td><td class="summary">
96         <a href="lxml.html.html5parser.XHTMLParser-class.html" class="summary-name">XHTMLParser</a><br />
97       An html5lib XHTML Parser with lxml as tree.
98     </td>
99   </tr>
100 </table>
101 <!-- ==================== FUNCTIONS ==================== -->
102 <a name="section-Functions"></a>
103 <table class="summary" border="1" cellpadding="3"
104        cellspacing="0" width="100%" bgcolor="white">
105 <tr bgcolor="#70b0f0" class="table-header">
106   <td colspan="2" class="table-header">
107     <table border="0" cellpadding="0" cellspacing="0" width="100%">
108       <tr valign="top">
109         <td align="left"><span class="table-header">Functions</span></td>
110         <td align="right" valign="top"
111          ><span class="options">[<a href="#section-Functions"
112          class="privatelink" onclick="toggle_private();"
113          >hide private</a>]</span></td>
114       </tr>
115     </table>
116   </td>
117 </tr>
118 <tr class="private">
119     <td width="15%" align="right" valign="top" class="summary">
120       <span class="summary-type">&nbsp;</span>
121     </td><td class="summary">
122       <table width="100%" cellpadding="0" cellspacing="0" border="0">
123         <tr>
124           <td><span class="summary-sig"><a name="_find_tag"></a><span class="summary-sig-name">_find_tag</span>(<span class="summary-sig-arg">tree</span>,
125         <span class="summary-sig-arg">tag</span>)</span></td>
126           <td align="right" valign="top">
127             <span class="codelink"><a href="lxml.html.html5parser-pysrc.html#_find_tag">source&nbsp;code</a></span>
128             
129           </td>
130         </tr>
131       </table>
132       
133     </td>
134   </tr>
135 <tr>
136     <td width="15%" align="right" valign="top" class="summary">
137       <span class="summary-type">&nbsp;</span>
138     </td><td class="summary">
139       <table width="100%" cellpadding="0" cellspacing="0" border="0">
140         <tr>
141           <td><span class="summary-sig"><a name="document_fromstring"></a><span class="summary-sig-name">document_fromstring</span>(<span class="summary-sig-arg">html</span>,
142         <span class="summary-sig-arg">guess_charset</span>=<span class="summary-sig-default">True</span>,
143         <span class="summary-sig-arg">parser</span>=<span class="summary-sig-default">None</span>)</span><br />
144       Parse a whole document into a string.</td>
145           <td align="right" valign="top">
146             <span class="codelink"><a href="lxml.html.html5parser-pysrc.html#document_fromstring">source&nbsp;code</a></span>
147             
148           </td>
149         </tr>
150       </table>
151       
152     </td>
153   </tr>
154 <tr>
155     <td width="15%" align="right" valign="top" class="summary">
156       <span class="summary-type">&nbsp;</span>
157     </td><td class="summary">
158       <table width="100%" cellpadding="0" cellspacing="0" border="0">
159         <tr>
160           <td><span class="summary-sig"><a href="lxml.html.html5parser-module.html#fragments_fromstring" class="summary-sig-name">fragments_fromstring</a>(<span class="summary-sig-arg">html</span>,
161         <span class="summary-sig-arg">no_leading_text</span>=<span class="summary-sig-default">False</span>,
162         <span class="summary-sig-arg">guess_charset</span>=<span class="summary-sig-default">False</span>,
163         <span class="summary-sig-arg">parser</span>=<span class="summary-sig-default">None</span>)</span><br />
164       Parses several HTML elements, returning a list of elements.</td>
165           <td align="right" valign="top">
166             <span class="codelink"><a href="lxml.html.html5parser-pysrc.html#fragments_fromstring">source&nbsp;code</a></span>
167             
168           </td>
169         </tr>
170       </table>
171       
172     </td>
173   </tr>
174 <tr>
175     <td width="15%" align="right" valign="top" class="summary">
176       <span class="summary-type">&nbsp;</span>
177     </td><td class="summary">
178       <table width="100%" cellpadding="0" cellspacing="0" border="0">
179         <tr>
180           <td><span class="summary-sig"><a href="lxml.html.html5parser-module.html#fragment_fromstring" class="summary-sig-name">fragment_fromstring</a>(<span class="summary-sig-arg">html</span>,
181         <span class="summary-sig-arg">create_parent</span>=<span class="summary-sig-default">False</span>,
182         <span class="summary-sig-arg">guess_charset</span>=<span class="summary-sig-default">False</span>,
183         <span class="summary-sig-arg">parser</span>=<span class="summary-sig-default">None</span>)</span><br />
184       Parses a single HTML element; it is an error if there is more than
185 one element, or if anything but whitespace precedes or follows the
186 element.</td>
187           <td align="right" valign="top">
188             <span class="codelink"><a href="lxml.html.html5parser-pysrc.html#fragment_fromstring">source&nbsp;code</a></span>
189             
190           </td>
191         </tr>
192       </table>
193       
194     </td>
195   </tr>
196 <tr>
197     <td width="15%" align="right" valign="top" class="summary">
198       <span class="summary-type">&nbsp;</span>
199     </td><td class="summary">
200       <table width="100%" cellpadding="0" cellspacing="0" border="0">
201         <tr>
202           <td><span class="summary-sig"><a href="lxml.html.html5parser-module.html#fromstring" class="summary-sig-name">fromstring</a>(<span class="summary-sig-arg">html</span>,
203         <span class="summary-sig-arg">guess_charset</span>=<span class="summary-sig-default">True</span>,
204         <span class="summary-sig-arg">parser</span>=<span class="summary-sig-default">None</span>)</span><br />
205       Parse the html, returning a single element/document.</td>
206           <td align="right" valign="top">
207             <span class="codelink"><a href="lxml.html.html5parser-pysrc.html#fromstring">source&nbsp;code</a></span>
208             
209           </td>
210         </tr>
211       </table>
212       
213     </td>
214   </tr>
215 <tr>
216     <td width="15%" align="right" valign="top" class="summary">
217       <span class="summary-type">&nbsp;</span>
218     </td><td class="summary">
219       <table width="100%" cellpadding="0" cellspacing="0" border="0">
220         <tr>
221           <td><span class="summary-sig"><a name="parse"></a><span class="summary-sig-name">parse</span>(<span class="summary-sig-arg">filename_url_or_file</span>,
222         <span class="summary-sig-arg">guess_charset</span>=<span class="summary-sig-default">True</span>,
223         <span class="summary-sig-arg">parser</span>=<span class="summary-sig-default">None</span>)</span><br />
224       Parse a filename, URL, or file-like object into an HTML document
225 tree.  Note: this returns a tree, not an element.  Use
226 <tt class="rst-docutils literal"><span class="pre">parse(...).getroot()</span></tt> to get the document root.</td>
227           <td align="right" valign="top">
228             <span class="codelink"><a href="lxml.html.html5parser-pysrc.html#parse">source&nbsp;code</a></span>
229             
230           </td>
231         </tr>
232       </table>
233       
234     </td>
235   </tr>
236 </table>
237 <!-- ==================== VARIABLES ==================== -->
238 <a name="section-Variables"></a>
239 <table class="summary" border="1" cellpadding="3"
240        cellspacing="0" width="100%" bgcolor="white">
241 <tr bgcolor="#70b0f0" class="table-header">
242   <td colspan="2" class="table-header">
243     <table border="0" cellpadding="0" cellspacing="0" width="100%">
244       <tr valign="top">
245         <td align="left"><span class="table-header">Variables</span></td>
246         <td align="right" valign="top"
247          ><span class="options">[<a href="#section-Variables"
248          class="privatelink" onclick="toggle_private();"
249          >hide private</a>]</span></td>
250       </tr>
251     </table>
252   </td>
253 </tr>
254 <tr>
255     <td width="15%" align="right" valign="top" class="summary">
256       <span class="summary-type">&nbsp;</span>
257     </td><td class="summary">
258         <a name="xhtml_parser"></a><span class="summary-name">xhtml_parser</span> = <code title="XHTMLParser()">XHTMLParser()</code>
259     </td>
260   </tr>
261 <tr>
262     <td width="15%" align="right" valign="top" class="summary">
263       <span class="summary-type">&nbsp;</span>
264     </td><td class="summary">
265         <a name="html_parser"></a><span class="summary-name">html_parser</span> = <code title="HTMLParser()">HTMLParser()</code>
266     </td>
267   </tr>
268 <tr>
269     <td width="15%" align="right" valign="top" class="summary">
270       <span class="summary-type">&nbsp;</span>
271     </td><td class="summary">
272         <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'lxml.html'"><code class="variable-quote">'</code><code class="variable-string">lxml.html</code><code class="variable-quote">'</code></code>
273     </td>
274   </tr>
275 </table>
276 <!-- ==================== FUNCTION DETAILS ==================== -->
277 <a name="section-FunctionDetails"></a>
278 <table class="details" border="1" cellpadding="3"
279        cellspacing="0" width="100%" bgcolor="white">
280 <tr bgcolor="#70b0f0" class="table-header">
281   <td colspan="2" class="table-header">
282     <table border="0" cellpadding="0" cellspacing="0" width="100%">
283       <tr valign="top">
284         <td align="left"><span class="table-header">Function Details</span></td>
285         <td align="right" valign="top"
286          ><span class="options">[<a href="#section-FunctionDetails"
287          class="privatelink" onclick="toggle_private();"
288          >hide private</a>]</span></td>
289       </tr>
290     </table>
291   </td>
292 </tr>
293 </table>
294 <a name="fragments_fromstring"></a>
295 <div>
296 <table class="details" border="1" cellpadding="3"
297        cellspacing="0" width="100%" bgcolor="white">
298 <tr><td>
299   <table width="100%" cellpadding="0" cellspacing="0" border="0">
300   <tr valign="top"><td>
301   <h3 class="epydoc"><span class="sig"><span class="sig-name">fragments_fromstring</span>(<span class="sig-arg">html</span>,
302         <span class="sig-arg">no_leading_text</span>=<span class="sig-default">False</span>,
303         <span class="sig-arg">guess_charset</span>=<span class="sig-default">False</span>,
304         <span class="sig-arg">parser</span>=<span class="sig-default">None</span>)</span>
305   </h3>
306   </td><td align="right" valign="top"
307     ><span class="codelink"><a href="lxml.html.html5parser-pysrc.html#fragments_fromstring">source&nbsp;code</a></span>&nbsp;
308     </td>
309   </tr></table>
310   
311   <p>Parses several HTML elements, returning a list of elements.</p>
312 <p>The first item in the list may be a string.  If no_leading_text is true,
313 then it will be an error if there is leading text, and it will always be
314 a list of only elements.</p>
315 <p>If <code class="link">guess_charset</code> is <code class="link">True</code> and the text was not unicode but a
316 bytestring, the <code class="link">chardet</code> library will perform charset guessing on the
317 string.</p>
318   <dl class="fields">
319   </dl>
320 </td></tr></table>
321 </div>
322 <a name="fragment_fromstring"></a>
323 <div>
324 <table class="details" border="1" cellpadding="3"
325        cellspacing="0" width="100%" bgcolor="white">
326 <tr><td>
327   <table width="100%" cellpadding="0" cellspacing="0" border="0">
328   <tr valign="top"><td>
329   <h3 class="epydoc"><span class="sig"><span class="sig-name">fragment_fromstring</span>(<span class="sig-arg">html</span>,
330         <span class="sig-arg">create_parent</span>=<span class="sig-default">False</span>,
331         <span class="sig-arg">guess_charset</span>=<span class="sig-default">False</span>,
332         <span class="sig-arg">parser</span>=<span class="sig-default">None</span>)</span>
333   </h3>
334   </td><td align="right" valign="top"
335     ><span class="codelink"><a href="lxml.html.html5parser-pysrc.html#fragment_fromstring">source&nbsp;code</a></span>&nbsp;
336     </td>
337   </tr></table>
338   
339   <p>Parses a single HTML element; it is an error if there is more than
340 one element, or if anything but whitespace precedes or follows the
341 element.</p>
342 <p>If create_parent is true (or is a tag name) then a parent node
343 will be created to encapsulate the HTML in a single element.  In
344 this case, leading or trailing text is allowed.</p>
345   <dl class="fields">
346   </dl>
347 </td></tr></table>
348 </div>
349 <a name="fromstring"></a>
350 <div>
351 <table class="details" border="1" cellpadding="3"
352        cellspacing="0" width="100%" bgcolor="white">
353 <tr><td>
354   <table width="100%" cellpadding="0" cellspacing="0" border="0">
355   <tr valign="top"><td>
356   <h3 class="epydoc"><span class="sig"><span class="sig-name">fromstring</span>(<span class="sig-arg">html</span>,
357         <span class="sig-arg">guess_charset</span>=<span class="sig-default">True</span>,
358         <span class="sig-arg">parser</span>=<span class="sig-default">None</span>)</span>
359   </h3>
360   </td><td align="right" valign="top"
361     ><span class="codelink"><a href="lxml.html.html5parser-pysrc.html#fromstring">source&nbsp;code</a></span>&nbsp;
362     </td>
363   </tr></table>
364   
365   <p>Parse the html, returning a single element/document.</p>
366 <p>This tries to minimally parse the chunk of text, without knowing if it
367 is a fragment or a document.</p>
368 <p>base_url will set the document's base_url attribute (and the tree's docinfo.URL)</p>
369   <dl class="fields">
370   </dl>
371 </td></tr></table>
372 </div>
373 <br />
374 <!-- ==================== NAVIGATION BAR ==================== -->
375 <table class="navbar" border="0" width="100%" cellpadding="0"
376        bgcolor="#a0c0ff" cellspacing="0">
377   <tr valign="middle">
378   <!-- Home link -->
379       <th>&nbsp;&nbsp;&nbsp;<a
380         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
381
382   <!-- Tree link -->
383       <th>&nbsp;&nbsp;&nbsp;<a
384         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
385
386   <!-- Index link -->
387       <th>&nbsp;&nbsp;&nbsp;<a
388         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
389
390   <!-- Help link -->
391       <th>&nbsp;&nbsp;&nbsp;<a
392         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
393
394   <!-- Project homepage -->
395       <th class="navbar" align="right" width="100%">
396         <table border="0" cellpadding="0" cellspacing="0">
397           <tr><th class="navbar" align="center"
398             ><a class="navbar" target="_top" href="/">lxml API</a></th>
399           </tr></table></th>
400   </tr>
401 </table>
402 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
403   <tr>
404     <td align="left" class="footer">
405     Generated by Epydoc 3.0.1 on Tue Jul 31 10:14:17 2012
406     </td>
407     <td align="right" class="footer">
408       <a target="mainFrame" href="http://epydoc.sourceforge.net"
409         >http://epydoc.sourceforge.net</a>
410     </td>
411   </tr>
412 </table>
413
414 <script type="text/javascript">
415   <!--
416   // Private objects are initially displayed (because if
417   // javascript is turned off then we want them to be
418   // visible); but by default, we want to hide them.  So hide
419   // them unless we have a cookie that says to show them.
420   checkCookie();
421   // -->
422 </script>
423 </body>
424 </html>