Imported Upstream version 2.3.5
[platform/upstream/python-lxml.git] / doc / html / api / lxml.html.soupparser-module.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.html.soupparser</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.html-module.html">Package&nbsp;html</a> ::
47         Module&nbsp;soupparser
48       </span>
49     </td>
50     <td>
51       <table cellpadding="0" cellspacing="0">
52         <!-- hide/show private -->
53         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
55         <tr><td align="right"><span class="options"
56             >[<a href="frames.html" target="_top">frames</a
57             >]&nbsp;|&nbsp;<a href="lxml.html.soupparser-module.html"
58             target="_top">no&nbsp;frames</a>]</span></td></tr>
59       </table>
60     </td>
61   </tr>
62 </table>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module soupparser</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.soupparser-pysrc.html">source&nbsp;code</a></span></p>
65 External interface to the BeautifulSoup HTML parser.
66
67 <!-- ==================== FUNCTIONS ==================== -->
68 <a name="section-Functions"></a>
69 <table class="summary" border="1" cellpadding="3"
70        cellspacing="0" width="100%" bgcolor="white">
71 <tr bgcolor="#70b0f0" class="table-header">
72   <td colspan="2" class="table-header">
73     <table border="0" cellpadding="0" cellspacing="0" width="100%">
74       <tr valign="top">
75         <td align="left"><span class="table-header">Functions</span></td>
76         <td align="right" valign="top"
77          ><span class="options">[<a href="#section-Functions"
78          class="privatelink" onclick="toggle_private();"
79          >hide private</a>]</span></td>
80       </tr>
81     </table>
82   </td>
83 </tr>
84 <tr>
85     <td width="15%" align="right" valign="top" class="summary">
86       <span class="summary-type">&nbsp;</span>
87     </td><td class="summary">
88       <table width="100%" cellpadding="0" cellspacing="0" border="0">
89         <tr>
90           <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#fromstring" class="summary-sig-name">fromstring</a>(<span class="summary-sig-arg">data</span>,
91         <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
92         <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>,
93         <span class="summary-sig-arg">**bsargs</span>)</span><br />
94       Parse a string of HTML data into an Element tree using the
95 BeautifulSoup parser.</td>
96           <td align="right" valign="top">
97             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#fromstring">source&nbsp;code</a></span>
98             
99           </td>
100         </tr>
101       </table>
102       
103     </td>
104   </tr>
105 <tr>
106     <td width="15%" align="right" valign="top" class="summary">
107       <span class="summary-type">&nbsp;</span>
108     </td><td class="summary">
109       <table width="100%" cellpadding="0" cellspacing="0" border="0">
110         <tr>
111           <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#parse" class="summary-sig-name">parse</a>(<span class="summary-sig-arg">file</span>,
112         <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
113         <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>,
114         <span class="summary-sig-arg">**bsargs</span>)</span><br />
115       Parse a file into an ElemenTree using the BeautifulSoup parser.</td>
116           <td align="right" valign="top">
117             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#parse">source&nbsp;code</a></span>
118             
119           </td>
120         </tr>
121       </table>
122       
123     </td>
124   </tr>
125 <tr>
126     <td width="15%" align="right" valign="top" class="summary">
127       <span class="summary-type">&nbsp;</span>
128     </td><td class="summary">
129       <table width="100%" cellpadding="0" cellspacing="0" border="0">
130         <tr>
131           <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#convert_tree" class="summary-sig-name">convert_tree</a>(<span class="summary-sig-arg">beautiful_soup_tree</span>,
132         <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span><br />
133       Convert a BeautifulSoup tree to a list of Element trees.</td>
134           <td align="right" valign="top">
135             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#convert_tree">source&nbsp;code</a></span>
136             
137           </td>
138         </tr>
139       </table>
140       
141     </td>
142   </tr>
143 <tr class="private">
144     <td width="15%" align="right" valign="top" class="summary">
145       <span class="summary-type">&nbsp;</span>
146     </td><td class="summary">
147       <table width="100%" cellpadding="0" cellspacing="0" border="0">
148         <tr>
149           <td><span class="summary-sig"><a name="_parse"></a><span class="summary-sig-name">_parse</span>(<span class="summary-sig-arg">source</span>,
150         <span class="summary-sig-arg">beautifulsoup</span>,
151         <span class="summary-sig-arg">makeelement</span>,
152         <span class="summary-sig-arg">**bsargs</span>)</span></td>
153           <td align="right" valign="top">
154             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_parse">source&nbsp;code</a></span>
155             
156           </td>
157         </tr>
158       </table>
159       
160     </td>
161   </tr>
162 <tr class="private">
163     <td width="15%" align="right" valign="top" class="summary">
164       <span class="summary-type">&nbsp;</span>
165     </td><td class="summary">
166       <table width="100%" cellpadding="0" cellspacing="0" border="0">
167         <tr>
168           <td><span class="summary-sig"><a name="_convert_tree"></a><span class="summary-sig-name">_convert_tree</span>(<span class="summary-sig-arg">beautiful_soup_tree</span>,
169         <span class="summary-sig-arg">makeelement</span>)</span></td>
170           <td align="right" valign="top">
171             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_convert_tree">source&nbsp;code</a></span>
172             
173           </td>
174         </tr>
175       </table>
176       
177     </td>
178   </tr>
179 <tr class="private">
180     <td width="15%" align="right" valign="top" class="summary">
181       <span class="summary-type">&nbsp;</span>
182     </td><td class="summary">
183       <table width="100%" cellpadding="0" cellspacing="0" border="0">
184         <tr>
185           <td><span class="summary-sig"><a name="_convert_children"></a><span class="summary-sig-name">_convert_children</span>(<span class="summary-sig-arg">parent</span>,
186         <span class="summary-sig-arg">beautiful_soup_tree</span>,
187         <span class="summary-sig-arg">makeelement</span>)</span></td>
188           <td align="right" valign="top">
189             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_convert_children">source&nbsp;code</a></span>
190             
191           </td>
192         </tr>
193       </table>
194       
195     </td>
196   </tr>
197 <tr class="private">
198     <td width="15%" align="right" valign="top" class="summary">
199       <span class="summary-type">&nbsp;</span>
200     </td><td class="summary">
201       <table width="100%" cellpadding="0" cellspacing="0" border="0">
202         <tr>
203           <td><span class="summary-sig"><a name="_append_text"></a><span class="summary-sig-name">_append_text</span>(<span class="summary-sig-arg">parent</span>,
204         <span class="summary-sig-arg">element</span>,
205         <span class="summary-sig-arg">text</span>)</span></td>
206           <td align="right" valign="top">
207             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_append_text">source&nbsp;code</a></span>
208             
209           </td>
210         </tr>
211       </table>
212       
213     </td>
214   </tr>
215 <tr class="private">
216     <td width="15%" align="right" valign="top" class="summary">
217       <span class="summary-type">&nbsp;</span>
218     </td><td class="summary">
219       <table width="100%" cellpadding="0" cellspacing="0" border="0">
220         <tr>
221           <td><span class="summary-sig"><a name="handle_entities"></a><span class="summary-sig-name">handle_entities</span>(<span class="summary-sig-arg">...</span>)</span><br />
222       sub(repl, string[, count = 0]) --&gt; newstring
223 Return the string obtained by replacing the leftmost non-overlapping
224 occurrences of pattern in string by the replacement repl.</td>
225           <td align="right" valign="top">
226             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#handle_entities">source&nbsp;code</a></span>
227             
228           </td>
229         </tr>
230       </table>
231       
232     </td>
233   </tr>
234 <tr class="private">
235     <td width="15%" align="right" valign="top" class="summary">
236       <span class="summary-type">&nbsp;</span>
237     </td><td class="summary">
238       <table width="100%" cellpadding="0" cellspacing="0" border="0">
239         <tr>
240           <td><span class="summary-sig"><a name="unescape"></a><span class="summary-sig-name">unescape</span>(<span class="summary-sig-arg">string</span>)</span></td>
241           <td align="right" valign="top">
242             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#unescape">source&nbsp;code</a></span>
243             
244           </td>
245         </tr>
246       </table>
247       
248     </td>
249   </tr>
250 </table>
251 <!-- ==================== VARIABLES ==================== -->
252 <a name="section-Variables"></a>
253 <table class="summary" border="1" cellpadding="3"
254        cellspacing="0" width="100%" bgcolor="white">
255 <tr bgcolor="#70b0f0" class="table-header">
256   <td colspan="2" class="table-header">
257     <table border="0" cellpadding="0" cellspacing="0" width="100%">
258       <tr valign="top">
259         <td align="left"><span class="table-header">Variables</span></td>
260         <td align="right" valign="top"
261          ><span class="options">[<a href="#section-Variables"
262          class="privatelink" onclick="toggle_private();"
263          >hide private</a>]</span></td>
264       </tr>
265     </table>
266   </td>
267 </tr>
268 <tr class="private">
269     <td width="15%" align="right" valign="top" class="summary">
270       <span class="summary-type">&nbsp;</span>
271     </td><td class="summary">
272         <a href="lxml.html.soupparser-module.html#__doc__" class="summary-name" onclick="show_private();">__doc__</a> = <code title="&quot;&quot;&quot;External interface to the BeautifulSoup HTML parser.
273 &quot;&quot;&quot;">&quot;&quot;&quot;External interface to the BeautifulSoup HTML pars<code class="variable-ellipsis">...</code></code>
274     </td>
275   </tr>
276 <tr class="private">
277     <td width="15%" align="right" valign="top" class="summary">
278       <span class="summary-type">&nbsp;</span>
279     </td><td class="summary">
280         <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'lxml.html'"><code class="variable-quote">'</code><code class="variable-string">lxml.html</code><code class="variable-quote">'</code></code>
281     </td>
282   </tr>
283 </table>
284 <!-- ==================== FUNCTION DETAILS ==================== -->
285 <a name="section-FunctionDetails"></a>
286 <table class="details" border="1" cellpadding="3"
287        cellspacing="0" width="100%" bgcolor="white">
288 <tr bgcolor="#70b0f0" class="table-header">
289   <td colspan="2" class="table-header">
290     <table border="0" cellpadding="0" cellspacing="0" width="100%">
291       <tr valign="top">
292         <td align="left"><span class="table-header">Function Details</span></td>
293         <td align="right" valign="top"
294          ><span class="options">[<a href="#section-FunctionDetails"
295          class="privatelink" onclick="toggle_private();"
296          >hide private</a>]</span></td>
297       </tr>
298     </table>
299   </td>
300 </tr>
301 </table>
302 <a name="fromstring"></a>
303 <div>
304 <table class="details" border="1" cellpadding="3"
305        cellspacing="0" width="100%" bgcolor="white">
306 <tr><td>
307   <table width="100%" cellpadding="0" cellspacing="0" border="0">
308   <tr valign="top"><td>
309   <h3 class="epydoc"><span class="sig"><span class="sig-name">fromstring</span>(<span class="sig-arg">data</span>,
310         <span class="sig-arg">beautifulsoup</span>=<span class="sig-default">None</span>,
311         <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>,
312         <span class="sig-arg">**bsargs</span>)</span>
313   </h3>
314   </td><td align="right" valign="top"
315     ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#fromstring">source&nbsp;code</a></span>&nbsp;
316     </td>
317   </tr></table>
318   
319   <p>Parse a string of HTML data into an Element tree using the
320 BeautifulSoup parser.</p>
321 <p>Returns the root <tt class="rst-docutils literal">&lt;html&gt;</tt> Element of the tree.</p>
322 <p>You can pass a different BeautifulSoup parser through the
323 <code class="link">beautifulsoup</code> keyword, and a diffent Element factory function
324 through the <code class="link">makeelement</code> keyword.  By default, the standard
325 <tt class="rst-docutils literal">BeautifulSoup</tt> class and the default factory of <a href="lxml.html-module.html" class="link">lxml.html</a> are
326 used.</p>
327   <dl class="fields">
328   </dl>
329 </td></tr></table>
330 </div>
331 <a name="parse"></a>
332 <div>
333 <table class="details" border="1" cellpadding="3"
334        cellspacing="0" width="100%" bgcolor="white">
335 <tr><td>
336   <table width="100%" cellpadding="0" cellspacing="0" border="0">
337   <tr valign="top"><td>
338   <h3 class="epydoc"><span class="sig"><span class="sig-name">parse</span>(<span class="sig-arg">file</span>,
339         <span class="sig-arg">beautifulsoup</span>=<span class="sig-default">None</span>,
340         <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>,
341         <span class="sig-arg">**bsargs</span>)</span>
342   </h3>
343   </td><td align="right" valign="top"
344     ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#parse">source&nbsp;code</a></span>&nbsp;
345     </td>
346   </tr></table>
347   
348   <p>Parse a file into an ElemenTree using the BeautifulSoup parser.</p>
349 <p>You can pass a different BeautifulSoup parser through the
350 <code class="link">beautifulsoup</code> keyword, and a diffent Element factory function
351 through the <code class="link">makeelement</code> keyword.  By default, the standard
352 <tt class="rst-docutils literal">BeautifulSoup</tt> class and the default factory of <a href="lxml.html-module.html" class="link">lxml.html</a> are
353 used.</p>
354   <dl class="fields">
355   </dl>
356 </td></tr></table>
357 </div>
358 <a name="convert_tree"></a>
359 <div>
360 <table class="details" border="1" cellpadding="3"
361        cellspacing="0" width="100%" bgcolor="white">
362 <tr><td>
363   <table width="100%" cellpadding="0" cellspacing="0" border="0">
364   <tr valign="top"><td>
365   <h3 class="epydoc"><span class="sig"><span class="sig-name">convert_tree</span>(<span class="sig-arg">beautiful_soup_tree</span>,
366         <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>)</span>
367   </h3>
368   </td><td align="right" valign="top"
369     ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#convert_tree">source&nbsp;code</a></span>&nbsp;
370     </td>
371   </tr></table>
372   
373   <p>Convert a BeautifulSoup tree to a list of Element trees.</p>
374 <p>Returns a list instead of a single root Element to support
375 HTML-like soup with more than one root element.</p>
376 <p>You can pass a different Element factory through the <code class="link">makeelement</code>
377 keyword.</p>
378   <dl class="fields">
379   </dl>
380 </td></tr></table>
381 </div>
382 <br />
383 <!-- ==================== VARIABLES DETAILS ==================== -->
384 <a name="section-VariablesDetails"></a>
385 <table class="details" border="1" cellpadding="3"
386        cellspacing="0" width="100%" bgcolor="white">
387 <tr bgcolor="#70b0f0" class="table-header">
388   <td colspan="2" class="table-header">
389     <table border="0" cellpadding="0" cellspacing="0" width="100%">
390       <tr valign="top">
391         <td align="left"><span class="table-header">Variables Details</span></td>
392         <td align="right" valign="top"
393          ><span class="options">[<a href="#section-VariablesDetails"
394          class="privatelink" onclick="toggle_private();"
395          >hide private</a>]</span></td>
396       </tr>
397     </table>
398   </td>
399 </tr>
400 </table>
401 <a name="__doc__"></a>
402 <div class="private">
403 <table class="details" border="1" cellpadding="3"
404        cellspacing="0" width="100%" bgcolor="white">
405 <tr><td>
406   <h3 class="epydoc">__doc__</h3>
407   
408   <dl class="fields">
409   </dl>
410   <dl class="fields">
411     <dt>Value:</dt>
412       <dd><table><tr><td><pre class="variable">
413 &quot;&quot;&quot;External interface to the BeautifulSoup HTML parser.
414 &quot;&quot;&quot;
415 </pre></td></tr></table>
416 </dd>
417   </dl>
418 </td></tr></table>
419 </div>
420 <br />
421 <!-- ==================== NAVIGATION BAR ==================== -->
422 <table class="navbar" border="0" width="100%" cellpadding="0"
423        bgcolor="#a0c0ff" cellspacing="0">
424   <tr valign="middle">
425   <!-- Home link -->
426       <th>&nbsp;&nbsp;&nbsp;<a
427         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
428
429   <!-- Tree link -->
430       <th>&nbsp;&nbsp;&nbsp;<a
431         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
432
433   <!-- Index link -->
434       <th>&nbsp;&nbsp;&nbsp;<a
435         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
436
437   <!-- Help link -->
438       <th>&nbsp;&nbsp;&nbsp;<a
439         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
440
441   <!-- Project homepage -->
442       <th class="navbar" align="right" width="100%">
443         <table border="0" cellpadding="0" cellspacing="0">
444           <tr><th class="navbar" align="center"
445             ><a class="navbar" target="_top" href="/">lxml API</a></th>
446           </tr></table></th>
447   </tr>
448 </table>
449 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
450   <tr>
451     <td align="left" class="footer">
452     Generated by Epydoc 3.0.1 on Tue Jul 31 10:14:17 2012
453     </td>
454     <td align="right" class="footer">
455       <a target="mainFrame" href="http://epydoc.sourceforge.net"
456         >http://epydoc.sourceforge.net</a>
457     </td>
458   </tr>
459 </table>
460
461 <script type="text/javascript">
462   <!--
463   // Private objects are initially displayed (because if
464   // javascript is turned off then we want them to be
465   // visible); but by default, we want to hide them.  So hide
466   // them unless we have a cookie that says to show them.
467   checkCookie();
468   // -->
469 </script>
470 </body>
471 </html>