1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3 "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6 <title>lxml.html.soupparser</title>
7 <link rel="stylesheet" href="epydoc.css" type="text/css" />
8 <script type="text/javascript" src="epydoc.js"></script>
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15 bgcolor="#a0c0ff" cellspacing="0">
18 <th> <a
19 href="lxml-module.html">Home</a> </th>
22 <th> <a
23 href="module-tree.html">Trees</a> </th>
26 <th> <a
27 href="identifier-index.html">Indices</a> </th>
30 <th> <a
31 href="help.html">Help</a> </th>
33 <!-- Project homepage -->
34 <th class="navbar" align="right" width="100%">
35 <table border="0" cellpadding="0" cellspacing="0">
36 <tr><th class="navbar" align="center"
37 ><a class="navbar" target="_top" href="/">lxml API</a></th>
41 <table width="100%" cellpadding="0" cellspacing="0">
44 <span class="breadcrumbs">
45 <a href="lxml-module.html">Package lxml</a> ::
46 <a href="lxml.html-module.html">Package html</a> ::
47 Module soupparser
51 <table cellpadding="0" cellspacing="0">
52 <!-- hide/show private -->
53 <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54 onclick="toggle_private();">hide private</a>]</span></td></tr>
55 <tr><td align="right"><span class="options"
56 >[<a href="frames.html" target="_top">frames</a
57 >] | <a href="lxml.html.soupparser-module.html"
58 target="_top">no frames</a>]</span></td></tr>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module soupparser</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.soupparser-pysrc.html">source code</a></span></p>
65 External interface to the BeautifulSoup HTML parser.
67 <!-- ==================== CLASSES ==================== -->
68 <a name="section-Classes"></a>
69 <table class="summary" border="1" cellpadding="3"
70 cellspacing="0" width="100%" bgcolor="white">
71 <tr bgcolor="#70b0f0" class="table-header">
72 <td colspan="2" class="table-header">
73 <table border="0" cellpadding="0" cellspacing="0" width="100%">
75 <td align="left"><span class="table-header">Classes</span></td>
76 <td align="right" valign="top"
77 ><span class="options">[<a href="#section-Classes"
78 class="privatelink" onclick="toggle_private();"
79 >hide private</a>]</span></td>
85 <td width="15%" align="right" valign="top" class="summary">
86 <span class="summary-type"> </span>
87 </td><td class="summary">
88 <a href="lxml.html.soupparser._PseudoTag-class.html" class="summary-name" onclick="show_private();">_PseudoTag</a>
92 <!-- ==================== FUNCTIONS ==================== -->
93 <a name="section-Functions"></a>
94 <table class="summary" border="1" cellpadding="3"
95 cellspacing="0" width="100%" bgcolor="white">
96 <tr bgcolor="#70b0f0" class="table-header">
97 <td colspan="2" class="table-header">
98 <table border="0" cellpadding="0" cellspacing="0" width="100%">
100 <td align="left"><span class="table-header">Functions</span></td>
101 <td align="right" valign="top"
102 ><span class="options">[<a href="#section-Functions"
103 class="privatelink" onclick="toggle_private();"
104 >hide private</a>]</span></td>
110 <td width="15%" align="right" valign="top" class="summary">
111 <span class="summary-type"> </span>
112 </td><td class="summary">
113 <table width="100%" cellpadding="0" cellspacing="0" border="0">
115 <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#fromstring" class="summary-sig-name">fromstring</a>(<span class="summary-sig-arg">data</span>,
116 <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
117 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>,
118 <span class="summary-sig-arg">**bsargs</span>)</span><br />
119 Parse a string of HTML data into an Element tree using the
120 BeautifulSoup parser.</td>
121 <td align="right" valign="top">
122 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#fromstring">source code</a></span>
131 <td width="15%" align="right" valign="top" class="summary">
132 <span class="summary-type"> </span>
133 </td><td class="summary">
134 <table width="100%" cellpadding="0" cellspacing="0" border="0">
136 <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#parse" class="summary-sig-name">parse</a>(<span class="summary-sig-arg">file</span>,
137 <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
138 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>,
139 <span class="summary-sig-arg">**bsargs</span>)</span><br />
140 Parse a file into an ElemenTree using the BeautifulSoup parser.</td>
141 <td align="right" valign="top">
142 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#parse">source code</a></span>
151 <td width="15%" align="right" valign="top" class="summary">
152 <span class="summary-type"> </span>
153 </td><td class="summary">
154 <table width="100%" cellpadding="0" cellspacing="0" border="0">
156 <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#convert_tree" class="summary-sig-name">convert_tree</a>(<span class="summary-sig-arg">beautiful_soup_tree</span>,
157 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span><br />
158 Convert a BeautifulSoup tree to a list of Element trees.</td>
159 <td align="right" valign="top">
160 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#convert_tree">source code</a></span>
169 <td width="15%" align="right" valign="top" class="summary">
170 <span class="summary-type"> </span>
171 </td><td class="summary">
172 <table width="100%" cellpadding="0" cellspacing="0" border="0">
174 <td><span class="summary-sig"><a name="_parse"></a><span class="summary-sig-name">_parse</span>(<span class="summary-sig-arg">source</span>,
175 <span class="summary-sig-arg">beautifulsoup</span>,
176 <span class="summary-sig-arg">makeelement</span>,
177 <span class="summary-sig-arg">**bsargs</span>)</span></td>
178 <td align="right" valign="top">
179 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_parse">source code</a></span>
188 <td width="15%" align="right" valign="top" class="summary">
189 <span class="summary-type"> </span>
190 </td><td class="summary">
191 <table width="100%" cellpadding="0" cellspacing="0" border="0">
193 <td><span class="summary-sig"><a name="_parse_doctype_declaration"></a><span class="summary-sig-name">_parse_doctype_declaration</span>(<span class="summary-sig-arg">...</span>)</span><br />
194 match(string[, pos[, endpos]]) --> match object or None.
195 Matches zero or more characters at the beginning of the string</td>
196 <td align="right" valign="top">
197 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_parse_doctype_declaration">source code</a></span>
206 <td width="15%" align="right" valign="top" class="summary">
207 <span class="summary-type"> </span>
208 </td><td class="summary">
209 <table width="100%" cellpadding="0" cellspacing="0" border="0">
211 <td><span class="summary-sig"><a name="_convert_tree"></a><span class="summary-sig-name">_convert_tree</span>(<span class="summary-sig-arg">beautiful_soup_tree</span>,
212 <span class="summary-sig-arg">makeelement</span>)</span></td>
213 <td align="right" valign="top">
214 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_convert_tree">source code</a></span>
223 <td width="15%" align="right" valign="top" class="summary">
224 <span class="summary-type"> </span>
225 </td><td class="summary">
226 <table width="100%" cellpadding="0" cellspacing="0" border="0">
228 <td><span class="summary-sig"><a name="_init_node_converters"></a><span class="summary-sig-name">_init_node_converters</span>(<span class="summary-sig-arg">makeelement</span>)</span></td>
229 <td align="right" valign="top">
230 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_init_node_converters">source code</a></span>
239 <td width="15%" align="right" valign="top" class="summary">
240 <span class="summary-type"> </span>
241 </td><td class="summary">
242 <table width="100%" cellpadding="0" cellspacing="0" border="0">
244 <td><span class="summary-sig"><a name="handle_entities"></a><span class="summary-sig-name">handle_entities</span>(<span class="summary-sig-arg">...</span>)</span><br />
245 sub(repl, string[, count = 0]) --> newstring
246 Return the string obtained by replacing the leftmost non-overlapping
247 occurrences of pattern in string by the replacement repl.</td>
248 <td align="right" valign="top">
249 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#handle_entities">source code</a></span>
258 <td width="15%" align="right" valign="top" class="summary">
259 <span class="summary-type">character</span>
260 </td><td class="summary">
261 <table width="100%" cellpadding="0" cellspacing="0" border="0">
263 <td><span class="summary-sig"><a name="unichr"></a><span class="summary-sig-name">unichr</span>(<span class="summary-sig-arg">i</span>)</span><br />
264 Return a string of one character with ordinal i; 0 <= i < 256.</td>
265 <td align="right" valign="top">
275 <td width="15%" align="right" valign="top" class="summary">
276 <span class="summary-type"> </span>
277 </td><td class="summary">
278 <table width="100%" cellpadding="0" cellspacing="0" border="0">
280 <td><span class="summary-sig"><a name="unescape"></a><span class="summary-sig-name">unescape</span>(<span class="summary-sig-arg">string</span>)</span></td>
281 <td align="right" valign="top">
282 <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#unescape">source code</a></span>
291 <!-- ==================== VARIABLES ==================== -->
292 <a name="section-Variables"></a>
293 <table class="summary" border="1" cellpadding="3"
294 cellspacing="0" width="100%" bgcolor="white">
295 <tr bgcolor="#70b0f0" class="table-header">
296 <td colspan="2" class="table-header">
297 <table border="0" cellpadding="0" cellspacing="0" width="100%">
299 <td align="left"><span class="table-header">Variables</span></td>
300 <td align="right" valign="top"
301 ><span class="options">[<a href="#section-Variables"
302 class="privatelink" onclick="toggle_private();"
303 >hide private</a>]</span></td>
309 <td width="15%" align="right" valign="top" class="summary">
310 <span class="summary-type"> </span>
311 </td><td class="summary">
312 <a href="lxml.html.soupparser-module.html#_DECLARATION_OR_DOCTYPE" class="summary-name" onclick="show_private();">_DECLARATION_OR_DOCTYPE</a> = <code title="(<class 'bs4.element.Declaration'>, <class 'bs4.element.Doctype'>)"><code class="variable-group">(</code><class 'bs4.element.Declaration'><code class="variable-op">, </code><code class="variable-ellipsis">...</code></code>
316 <td width="15%" align="right" valign="top" class="summary">
317 <span class="summary-type"> </span>
318 </td><td class="summary">
319 <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'lxml.html'"><code class="variable-quote">'</code><code class="variable-string">lxml.html</code><code class="variable-quote">'</code></code>
323 <!-- ==================== FUNCTION DETAILS ==================== -->
324 <a name="section-FunctionDetails"></a>
325 <table class="details" border="1" cellpadding="3"
326 cellspacing="0" width="100%" bgcolor="white">
327 <tr bgcolor="#70b0f0" class="table-header">
328 <td colspan="2" class="table-header">
329 <table border="0" cellpadding="0" cellspacing="0" width="100%">
331 <td align="left"><span class="table-header">Function Details</span></td>
332 <td align="right" valign="top"
333 ><span class="options">[<a href="#section-FunctionDetails"
334 class="privatelink" onclick="toggle_private();"
335 >hide private</a>]</span></td>
341 <a name="fromstring"></a>
343 <table class="details" border="1" cellpadding="3"
344 cellspacing="0" width="100%" bgcolor="white">
346 <table width="100%" cellpadding="0" cellspacing="0" border="0">
347 <tr valign="top"><td>
348 <h3 class="epydoc"><span class="sig"><span class="sig-name">fromstring</span>(<span class="sig-arg">data</span>,
349 <span class="sig-arg">beautifulsoup</span>=<span class="sig-default">None</span>,
350 <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>,
351 <span class="sig-arg">**bsargs</span>)</span>
353 </td><td align="right" valign="top"
354 ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#fromstring">source code</a></span>
358 <p>Parse a string of HTML data into an Element tree using the
359 BeautifulSoup parser.</p>
360 <p>Returns the root <tt class="rst-docutils literal"><html></tt> Element of the tree.</p>
361 <p>You can pass a different BeautifulSoup parser through the
362 <code class="link">beautifulsoup</code> keyword, and a diffent Element factory function
363 through the <code class="link">makeelement</code> keyword. By default, the standard
364 <tt class="rst-docutils literal">BeautifulSoup</tt> class and the default factory of lxml.html are
372 <table class="details" border="1" cellpadding="3"
373 cellspacing="0" width="100%" bgcolor="white">
375 <table width="100%" cellpadding="0" cellspacing="0" border="0">
376 <tr valign="top"><td>
377 <h3 class="epydoc"><span class="sig"><span class="sig-name">parse</span>(<span class="sig-arg">file</span>,
378 <span class="sig-arg">beautifulsoup</span>=<span class="sig-default">None</span>,
379 <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>,
380 <span class="sig-arg">**bsargs</span>)</span>
382 </td><td align="right" valign="top"
383 ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#parse">source code</a></span>
387 <p>Parse a file into an ElemenTree using the BeautifulSoup parser.</p>
388 <p>You can pass a different BeautifulSoup parser through the
389 <code class="link">beautifulsoup</code> keyword, and a diffent Element factory function
390 through the <code class="link">makeelement</code> keyword. By default, the standard
391 <tt class="rst-docutils literal">BeautifulSoup</tt> class and the default factory of lxml.html are
397 <a name="convert_tree"></a>
399 <table class="details" border="1" cellpadding="3"
400 cellspacing="0" width="100%" bgcolor="white">
402 <table width="100%" cellpadding="0" cellspacing="0" border="0">
403 <tr valign="top"><td>
404 <h3 class="epydoc"><span class="sig"><span class="sig-name">convert_tree</span>(<span class="sig-arg">beautiful_soup_tree</span>,
405 <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>)</span>
407 </td><td align="right" valign="top"
408 ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#convert_tree">source code</a></span>
412 <p>Convert a BeautifulSoup tree to a list of Element trees.</p>
413 <p>Returns a list instead of a single root Element to support
414 HTML-like soup with more than one root element.</p>
415 <p>You can pass a different Element factory through the <code class="link">makeelement</code>
422 <!-- ==================== VARIABLES DETAILS ==================== -->
423 <a name="section-VariablesDetails"></a>
424 <table class="details" border="1" cellpadding="3"
425 cellspacing="0" width="100%" bgcolor="white">
426 <tr bgcolor="#70b0f0" class="table-header">
427 <td colspan="2" class="table-header">
428 <table border="0" cellpadding="0" cellspacing="0" width="100%">
430 <td align="left"><span class="table-header">Variables Details</span></td>
431 <td align="right" valign="top"
432 ><span class="options">[<a href="#section-VariablesDetails"
433 class="privatelink" onclick="toggle_private();"
434 >hide private</a>]</span></td>
440 <a name="_DECLARATION_OR_DOCTYPE"></a>
441 <div class="private">
442 <table class="details" border="1" cellpadding="3"
443 cellspacing="0" width="100%" bgcolor="white">
445 <h3 class="epydoc">_DECLARATION_OR_DOCTYPE</h3>
451 <dd><table><tr><td><pre class="variable">
452 <code class="variable-group">(</code><class 'bs4.element.Declaration'><code class="variable-op">, </code><class 'bs4.element.Doctype'><code class="variable-group">)</code>
453 </pre></td></tr></table>
459 <!-- ==================== NAVIGATION BAR ==================== -->
460 <table class="navbar" border="0" width="100%" cellpadding="0"
461 bgcolor="#a0c0ff" cellspacing="0">
464 <th> <a
465 href="lxml-module.html">Home</a> </th>
468 <th> <a
469 href="module-tree.html">Trees</a> </th>
472 <th> <a
473 href="identifier-index.html">Indices</a> </th>
476 <th> <a
477 href="help.html">Help</a> </th>
479 <!-- Project homepage -->
480 <th class="navbar" align="right" width="100%">
481 <table border="0" cellpadding="0" cellspacing="0">
482 <tr><th class="navbar" align="center"
483 ><a class="navbar" target="_top" href="/">lxml API</a></th>
487 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
489 <td align="left" class="footer">
490 Generated by Epydoc 3.0.1
491 on Wed Jan 29 12:26:21 2020
493 <td align="right" class="footer">
494 <a target="mainFrame" href="http://epydoc.sourceforge.net"
495 >http://epydoc.sourceforge.net</a>
500 <script type="text/javascript">
502 // Private objects are initially displayed (because if
503 // javascript is turned off then we want them to be
504 // visible); but by default, we want to hide them. So hide
505 // them unless we have a cookie that says to show them.