1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3 "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6 <title>lxml.html.soupparser</title>
7 <link rel="stylesheet" href="epydoc.css" type="text/css" />
8 <script type="text/javascript" src="epydoc.js"></script>
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15 bgcolor="#a0c0ff" cellspacing="0">
18 <th> <a
19 href="lxml-module.html">Home</a> </th>
22 <th> <a
23 href="module-tree.html">Trees</a> </th>
26 <th> <a
27 href="identifier-index.html">Indices</a> </th>
30 <th> <a
31 href="help.html">Help</a> </th>
33 <!-- Project homepage -->
34 <th class="navbar" align="right" width="100%">
35 <table border="0" cellpadding="0" cellspacing="0">
36 <tr><th class="navbar" align="center"
37 ><a class="navbar" target="_top" href="/">lxml API</a></th>
41 <table width="100%" cellpadding="0" cellspacing="0">
44 <span class="breadcrumbs">
45 <a href="lxml-module.html">Package lxml</a> ::
46 <a href="lxml.html-module.html">Package html</a> ::
47 Module soupparser
51 <table cellpadding="0" cellspacing="0">
52 <!-- hide/show private -->
53 <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54 onclick="toggle_private();">hide private</a>]</span></td></tr>
55 <tr><td align="right"><span class="options"
56 >[<a href="frames.html" target="_top">frames</a
57 >] | <a href="lxml.html.soupparser-module.html"
58 target="_top">no frames</a>]</span></td></tr>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module soupparser</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.soupparser-pysrc.html">source code</a></span></p>
65 External interface to the BeautifulSoup HTML parser.
67 <!-- ==================== FUNCTIONS ==================== -->
68 <a name="section-Functions"></a>
69 <table class="summary" border="1" cellpadding="3"
70 cellspacing="0" width="100%" bgcolor="white">
71 <tr bgcolor="#70b0f0" class="table-header">
72 <td colspan="2" class="table-header">
73 <table border="0" cellpadding="0" cellspacing="0" width="100%">
75 <td align="left"><span class="table-header">Functions</span></td>
76 <td align="right" valign="top"
77 ><span class="options">[<a href="#section-Functions"
78 class="privatelink" onclick="toggle_private();"
79 >hide private</a>]</span></td>
85 <td width="15%" align="right" valign="top" class="summary">
86 <span class="summary-type"> </span>
87 </td><td class="summary">
88 <table width="100%" cellpadding="0" cellspacing="0" border="0">
90 <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#fromstring" class="summary-sig-name">fromstring</a>(<span class="summary-sig-arg">data</span>,
91 <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
92 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>,
93 <span class="summary-sig-arg">**bsargs</span>)</span><br />
94 Parse a string of HTML data into an Element tree using the
95 BeautifulSoup parser.</td>
96 <td align="right" valign="top">
97 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#fromstring">source code</a></span>
106 <td width="15%" align="right" valign="top" class="summary">
107 <span class="summary-type"> </span>
108 </td><td class="summary">
109 <table width="100%" cellpadding="0" cellspacing="0" border="0">
111 <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#parse" class="summary-sig-name">parse</a>(<span class="summary-sig-arg">file</span>,
112 <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
113 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>,
114 <span class="summary-sig-arg">**bsargs</span>)</span><br />
115 Parse a file into an ElemenTree using the BeautifulSoup parser.</td>
116 <td align="right" valign="top">
117 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#parse">source code</a></span>
126 <td width="15%" align="right" valign="top" class="summary">
127 <span class="summary-type"> </span>
128 </td><td class="summary">
129 <table width="100%" cellpadding="0" cellspacing="0" border="0">
131 <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#convert_tree" class="summary-sig-name">convert_tree</a>(<span class="summary-sig-arg">beautiful_soup_tree</span>,
132 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span><br />
133 Convert a BeautifulSoup tree to a list of Element trees.</td>
134 <td align="right" valign="top">
135 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#convert_tree">source code</a></span>
144 <td width="15%" align="right" valign="top" class="summary">
145 <span class="summary-type"> </span>
146 </td><td class="summary">
147 <table width="100%" cellpadding="0" cellspacing="0" border="0">
149 <td><span class="summary-sig"><a name="_parse"></a><span class="summary-sig-name">_parse</span>(<span class="summary-sig-arg">source</span>,
150 <span class="summary-sig-arg">beautifulsoup</span>,
151 <span class="summary-sig-arg">makeelement</span>,
152 <span class="summary-sig-arg">**bsargs</span>)</span></td>
153 <td align="right" valign="top">
154 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_parse">source code</a></span>
163 <td width="15%" align="right" valign="top" class="summary">
164 <span class="summary-type"> </span>
165 </td><td class="summary">
166 <table width="100%" cellpadding="0" cellspacing="0" border="0">
168 <td><span class="summary-sig"><a name="_convert_tree"></a><span class="summary-sig-name">_convert_tree</span>(<span class="summary-sig-arg">beautiful_soup_tree</span>,
169 <span class="summary-sig-arg">makeelement</span>)</span></td>
170 <td align="right" valign="top">
171 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_convert_tree">source code</a></span>
180 <td width="15%" align="right" valign="top" class="summary">
181 <span class="summary-type"> </span>
182 </td><td class="summary">
183 <table width="100%" cellpadding="0" cellspacing="0" border="0">
185 <td><span class="summary-sig"><a name="_convert_children"></a><span class="summary-sig-name">_convert_children</span>(<span class="summary-sig-arg">parent</span>,
186 <span class="summary-sig-arg">beautiful_soup_tree</span>,
187 <span class="summary-sig-arg">makeelement</span>)</span></td>
188 <td align="right" valign="top">
189 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_convert_children">source code</a></span>
198 <td width="15%" align="right" valign="top" class="summary">
199 <span class="summary-type"> </span>
200 </td><td class="summary">
201 <table width="100%" cellpadding="0" cellspacing="0" border="0">
203 <td><span class="summary-sig"><a name="_append_text"></a><span class="summary-sig-name">_append_text</span>(<span class="summary-sig-arg">parent</span>,
204 <span class="summary-sig-arg">element</span>,
205 <span class="summary-sig-arg">text</span>)</span></td>
206 <td align="right" valign="top">
207 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_append_text">source code</a></span>
216 <td width="15%" align="right" valign="top" class="summary">
217 <span class="summary-type"> </span>
218 </td><td class="summary">
219 <table width="100%" cellpadding="0" cellspacing="0" border="0">
221 <td><span class="summary-sig"><a name="handle_entities"></a><span class="summary-sig-name">handle_entities</span>(<span class="summary-sig-arg">...</span>)</span><br />
222 sub(repl, string[, count = 0]) --> newstring
223 Return the string obtained by replacing the leftmost non-overlapping
224 occurrences of pattern in string by the replacement repl.</td>
225 <td align="right" valign="top">
226 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#handle_entities">source code</a></span>
235 <td width="15%" align="right" valign="top" class="summary">
236 <span class="summary-type"> </span>
237 </td><td class="summary">
238 <table width="100%" cellpadding="0" cellspacing="0" border="0">
240 <td><span class="summary-sig"><a name="unescape"></a><span class="summary-sig-name">unescape</span>(<span class="summary-sig-arg">string</span>)</span></td>
241 <td align="right" valign="top">
242 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#unescape">source code</a></span>
251 <!-- ==================== VARIABLES ==================== -->
252 <a name="section-Variables"></a>
253 <table class="summary" border="1" cellpadding="3"
254 cellspacing="0" width="100%" bgcolor="white">
255 <tr bgcolor="#70b0f0" class="table-header">
256 <td colspan="2" class="table-header">
257 <table border="0" cellpadding="0" cellspacing="0" width="100%">
259 <td align="left"><span class="table-header">Variables</span></td>
260 <td align="right" valign="top"
261 ><span class="options">[<a href="#section-Variables"
262 class="privatelink" onclick="toggle_private();"
263 >hide private</a>]</span></td>
269 <td width="15%" align="right" valign="top" class="summary">
270 <span class="summary-type"> </span>
271 </td><td class="summary">
272 <a href="lxml.html.soupparser-module.html#__doc__" class="summary-name" onclick="show_private();">__doc__</a> = <code title=""""External interface to the BeautifulSoup HTML parser.
273 """">"""External interface to the BeautifulSoup HTML pars<code class="variable-ellipsis">...</code></code>
277 <td width="15%" align="right" valign="top" class="summary">
278 <span class="summary-type"> </span>
279 </td><td class="summary">
280 <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'lxml.html'"><code class="variable-quote">'</code><code class="variable-string">lxml.html</code><code class="variable-quote">'</code></code>
284 <!-- ==================== FUNCTION DETAILS ==================== -->
285 <a name="section-FunctionDetails"></a>
286 <table class="details" border="1" cellpadding="3"
287 cellspacing="0" width="100%" bgcolor="white">
288 <tr bgcolor="#70b0f0" class="table-header">
289 <td colspan="2" class="table-header">
290 <table border="0" cellpadding="0" cellspacing="0" width="100%">
292 <td align="left"><span class="table-header">Function Details</span></td>
293 <td align="right" valign="top"
294 ><span class="options">[<a href="#section-FunctionDetails"
295 class="privatelink" onclick="toggle_private();"
296 >hide private</a>]</span></td>
302 <a name="fromstring"></a>
304 <table class="details" border="1" cellpadding="3"
305 cellspacing="0" width="100%" bgcolor="white">
307 <table width="100%" cellpadding="0" cellspacing="0" border="0">
308 <tr valign="top"><td>
309 <h3 class="epydoc"><span class="sig"><span class="sig-name">fromstring</span>(<span class="sig-arg">data</span>,
310 <span class="sig-arg">beautifulsoup</span>=<span class="sig-default">None</span>,
311 <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>,
312 <span class="sig-arg">**bsargs</span>)</span>
314 </td><td align="right" valign="top"
315 ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#fromstring">source code</a></span>
319 <p>Parse a string of HTML data into an Element tree using the
320 BeautifulSoup parser.</p>
321 <p>Returns the root <tt class="rst-docutils literal"><html></tt> Element of the tree.</p>
322 <p>You can pass a different BeautifulSoup parser through the
323 <code class="link">beautifulsoup</code> keyword, and a diffent Element factory function
324 through the <code class="link">makeelement</code> keyword. By default, the standard
325 <tt class="rst-docutils literal">BeautifulSoup</tt> class and the default factory of <a href="lxml.html-module.html" class="link">lxml.html</a> are
333 <table class="details" border="1" cellpadding="3"
334 cellspacing="0" width="100%" bgcolor="white">
336 <table width="100%" cellpadding="0" cellspacing="0" border="0">
337 <tr valign="top"><td>
338 <h3 class="epydoc"><span class="sig"><span class="sig-name">parse</span>(<span class="sig-arg">file</span>,
339 <span class="sig-arg">beautifulsoup</span>=<span class="sig-default">None</span>,
340 <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>,
341 <span class="sig-arg">**bsargs</span>)</span>
343 </td><td align="right" valign="top"
344 ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#parse">source code</a></span>
348 <p>Parse a file into an ElemenTree using the BeautifulSoup parser.</p>
349 <p>You can pass a different BeautifulSoup parser through the
350 <code class="link">beautifulsoup</code> keyword, and a diffent Element factory function
351 through the <code class="link">makeelement</code> keyword. By default, the standard
352 <tt class="rst-docutils literal">BeautifulSoup</tt> class and the default factory of <a href="lxml.html-module.html" class="link">lxml.html</a> are
358 <a name="convert_tree"></a>
360 <table class="details" border="1" cellpadding="3"
361 cellspacing="0" width="100%" bgcolor="white">
363 <table width="100%" cellpadding="0" cellspacing="0" border="0">
364 <tr valign="top"><td>
365 <h3 class="epydoc"><span class="sig"><span class="sig-name">convert_tree</span>(<span class="sig-arg">beautiful_soup_tree</span>,
366 <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>)</span>
368 </td><td align="right" valign="top"
369 ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#convert_tree">source code</a></span>
373 <p>Convert a BeautifulSoup tree to a list of Element trees.</p>
374 <p>Returns a list instead of a single root Element to support
375 HTML-like soup with more than one root element.</p>
376 <p>You can pass a different Element factory through the <code class="link">makeelement</code>
383 <!-- ==================== VARIABLES DETAILS ==================== -->
384 <a name="section-VariablesDetails"></a>
385 <table class="details" border="1" cellpadding="3"
386 cellspacing="0" width="100%" bgcolor="white">
387 <tr bgcolor="#70b0f0" class="table-header">
388 <td colspan="2" class="table-header">
389 <table border="0" cellpadding="0" cellspacing="0" width="100%">
391 <td align="left"><span class="table-header">Variables Details</span></td>
392 <td align="right" valign="top"
393 ><span class="options">[<a href="#section-VariablesDetails"
394 class="privatelink" onclick="toggle_private();"
395 >hide private</a>]</span></td>
401 <a name="__doc__"></a>
402 <div class="private">
403 <table class="details" border="1" cellpadding="3"
404 cellspacing="0" width="100%" bgcolor="white">
406 <h3 class="epydoc">__doc__</h3>
412 <dd><table><tr><td><pre class="variable">
413 """External interface to the BeautifulSoup HTML parser.
415 </pre></td></tr></table>
421 <!-- ==================== NAVIGATION BAR ==================== -->
422 <table class="navbar" border="0" width="100%" cellpadding="0"
423 bgcolor="#a0c0ff" cellspacing="0">
426 <th> <a
427 href="lxml-module.html">Home</a> </th>
430 <th> <a
431 href="module-tree.html">Trees</a> </th>
434 <th> <a
435 href="identifier-index.html">Indices</a> </th>
438 <th> <a
439 href="help.html">Help</a> </th>
441 <!-- Project homepage -->
442 <th class="navbar" align="right" width="100%">
443 <table border="0" cellpadding="0" cellspacing="0">
444 <tr><th class="navbar" align="center"
445 ><a class="navbar" target="_top" href="/">lxml API</a></th>
449 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
451 <td align="left" class="footer">
452 Generated by Epydoc 3.0.1 on Tue Jul 31 10:14:17 2012
454 <td align="right" class="footer">
455 <a target="mainFrame" href="http://epydoc.sourceforge.net"
456 >http://epydoc.sourceforge.net</a>
461 <script type="text/javascript">
463 // Private objects are initially displayed (because if
464 // javascript is turned off then we want them to be
465 // visible); but by default, we want to hide them. So hide
466 // them unless we have a cookie that says to show them.