4e819b7229cfbe1401618556fa345968ca55d13f
[platform/upstream/python-lxml.git] / doc / html / api / lxml.html.soupparser-module.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.html.soupparser</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.html-module.html">Package&nbsp;html</a> ::
47         Module&nbsp;soupparser
48       </span>
49     </td>
50     <td>
51       <table cellpadding="0" cellspacing="0">
52         <!-- hide/show private -->
53         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
55         <tr><td align="right"><span class="options"
56             >[<a href="frames.html" target="_top">frames</a
57             >]&nbsp;|&nbsp;<a href="lxml.html.soupparser-module.html"
58             target="_top">no&nbsp;frames</a>]</span></td></tr>
59       </table>
60     </td>
61   </tr>
62 </table>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module soupparser</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.soupparser-pysrc.html">source&nbsp;code</a></span></p>
65 External interface to the BeautifulSoup HTML parser.
66
67 <!-- ==================== CLASSES ==================== -->
68 <a name="section-Classes"></a>
69 <table class="summary" border="1" cellpadding="3"
70        cellspacing="0" width="100%" bgcolor="white">
71 <tr bgcolor="#70b0f0" class="table-header">
72   <td colspan="2" class="table-header">
73     <table border="0" cellpadding="0" cellspacing="0" width="100%">
74       <tr valign="top">
75         <td align="left"><span class="table-header">Classes</span></td>
76         <td align="right" valign="top"
77          ><span class="options">[<a href="#section-Classes"
78          class="privatelink" onclick="toggle_private();"
79          >hide private</a>]</span></td>
80       </tr>
81     </table>
82   </td>
83 </tr>
84 <tr class="private">
85     <td width="15%" align="right" valign="top" class="summary">
86       <span class="summary-type">&nbsp;</span>
87     </td><td class="summary">
88         <a href="lxml.html.soupparser._PseudoTag-class.html" class="summary-name" onclick="show_private();">_PseudoTag</a>
89     </td>
90   </tr>
91 </table>
92 <!-- ==================== FUNCTIONS ==================== -->
93 <a name="section-Functions"></a>
94 <table class="summary" border="1" cellpadding="3"
95        cellspacing="0" width="100%" bgcolor="white">
96 <tr bgcolor="#70b0f0" class="table-header">
97   <td colspan="2" class="table-header">
98     <table border="0" cellpadding="0" cellspacing="0" width="100%">
99       <tr valign="top">
100         <td align="left"><span class="table-header">Functions</span></td>
101         <td align="right" valign="top"
102          ><span class="options">[<a href="#section-Functions"
103          class="privatelink" onclick="toggle_private();"
104          >hide private</a>]</span></td>
105       </tr>
106     </table>
107   </td>
108 </tr>
109 <tr>
110     <td width="15%" align="right" valign="top" class="summary">
111       <span class="summary-type">&nbsp;</span>
112     </td><td class="summary">
113       <table width="100%" cellpadding="0" cellspacing="0" border="0">
114         <tr>
115           <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#fromstring" class="summary-sig-name">fromstring</a>(<span class="summary-sig-arg">data</span>,
116         <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
117         <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>,
118         <span class="summary-sig-arg">**bsargs</span>)</span><br />
119       Parse a string of HTML data into an Element tree using the
120 BeautifulSoup parser.</td>
121           <td align="right" valign="top">
122             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#fromstring">source&nbsp;code</a></span>
123             
124           </td>
125         </tr>
126       </table>
127       
128     </td>
129   </tr>
130 <tr>
131     <td width="15%" align="right" valign="top" class="summary">
132       <span class="summary-type">&nbsp;</span>
133     </td><td class="summary">
134       <table width="100%" cellpadding="0" cellspacing="0" border="0">
135         <tr>
136           <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#parse" class="summary-sig-name">parse</a>(<span class="summary-sig-arg">file</span>,
137         <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
138         <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>,
139         <span class="summary-sig-arg">**bsargs</span>)</span><br />
140       Parse a file into an ElemenTree using the BeautifulSoup parser.</td>
141           <td align="right" valign="top">
142             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#parse">source&nbsp;code</a></span>
143             
144           </td>
145         </tr>
146       </table>
147       
148     </td>
149   </tr>
150 <tr>
151     <td width="15%" align="right" valign="top" class="summary">
152       <span class="summary-type">&nbsp;</span>
153     </td><td class="summary">
154       <table width="100%" cellpadding="0" cellspacing="0" border="0">
155         <tr>
156           <td><span class="summary-sig"><a href="lxml.html.soupparser-module.html#convert_tree" class="summary-sig-name">convert_tree</a>(<span class="summary-sig-arg">beautiful_soup_tree</span>,
157         <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span><br />
158       Convert a BeautifulSoup tree to a list of Element trees.</td>
159           <td align="right" valign="top">
160             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#convert_tree">source&nbsp;code</a></span>
161             
162           </td>
163         </tr>
164       </table>
165       
166     </td>
167   </tr>
168 <tr class="private">
169     <td width="15%" align="right" valign="top" class="summary">
170       <span class="summary-type">&nbsp;</span>
171     </td><td class="summary">
172       <table width="100%" cellpadding="0" cellspacing="0" border="0">
173         <tr>
174           <td><span class="summary-sig"><a name="_parse"></a><span class="summary-sig-name">_parse</span>(<span class="summary-sig-arg">source</span>,
175         <span class="summary-sig-arg">beautifulsoup</span>,
176         <span class="summary-sig-arg">makeelement</span>,
177         <span class="summary-sig-arg">**bsargs</span>)</span></td>
178           <td align="right" valign="top">
179             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_parse">source&nbsp;code</a></span>
180             
181           </td>
182         </tr>
183       </table>
184       
185     </td>
186   </tr>
187 <tr class="private">
188     <td width="15%" align="right" valign="top" class="summary">
189       <span class="summary-type">&nbsp;</span>
190     </td><td class="summary">
191       <table width="100%" cellpadding="0" cellspacing="0" border="0">
192         <tr>
193           <td><span class="summary-sig"><a name="_parse_doctype_declaration"></a><span class="summary-sig-name">_parse_doctype_declaration</span>(<span class="summary-sig-arg">...</span>)</span><br />
194       match(string[, pos[, endpos]]) --&gt; match object or None.
195 Matches zero or more characters at the beginning of the string</td>
196           <td align="right" valign="top">
197             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_parse_doctype_declaration">source&nbsp;code</a></span>
198             
199           </td>
200         </tr>
201       </table>
202       
203     </td>
204   </tr>
205 <tr class="private">
206     <td width="15%" align="right" valign="top" class="summary">
207       <span class="summary-type">&nbsp;</span>
208     </td><td class="summary">
209       <table width="100%" cellpadding="0" cellspacing="0" border="0">
210         <tr>
211           <td><span class="summary-sig"><a name="_convert_tree"></a><span class="summary-sig-name">_convert_tree</span>(<span class="summary-sig-arg">beautiful_soup_tree</span>,
212         <span class="summary-sig-arg">makeelement</span>)</span></td>
213           <td align="right" valign="top">
214             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_convert_tree">source&nbsp;code</a></span>
215             
216           </td>
217         </tr>
218       </table>
219       
220     </td>
221   </tr>
222 <tr class="private">
223     <td width="15%" align="right" valign="top" class="summary">
224       <span class="summary-type">&nbsp;</span>
225     </td><td class="summary">
226       <table width="100%" cellpadding="0" cellspacing="0" border="0">
227         <tr>
228           <td><span class="summary-sig"><a name="_init_node_converters"></a><span class="summary-sig-name">_init_node_converters</span>(<span class="summary-sig-arg">makeelement</span>)</span></td>
229           <td align="right" valign="top">
230             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#_init_node_converters">source&nbsp;code</a></span>
231             
232           </td>
233         </tr>
234       </table>
235       
236     </td>
237   </tr>
238 <tr class="private">
239     <td width="15%" align="right" valign="top" class="summary">
240       <span class="summary-type">&nbsp;</span>
241     </td><td class="summary">
242       <table width="100%" cellpadding="0" cellspacing="0" border="0">
243         <tr>
244           <td><span class="summary-sig"><a name="handle_entities"></a><span class="summary-sig-name">handle_entities</span>(<span class="summary-sig-arg">...</span>)</span><br />
245       sub(repl, string[, count = 0]) --&gt; newstring
246 Return the string obtained by replacing the leftmost non-overlapping
247 occurrences of pattern in string by the replacement repl.</td>
248           <td align="right" valign="top">
249             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#handle_entities">source&nbsp;code</a></span>
250             
251           </td>
252         </tr>
253       </table>
254       
255     </td>
256   </tr>
257 <tr class="private">
258     <td width="15%" align="right" valign="top" class="summary">
259       <span class="summary-type">character</span>
260     </td><td class="summary">
261       <table width="100%" cellpadding="0" cellspacing="0" border="0">
262         <tr>
263           <td><span class="summary-sig"><a name="unichr"></a><span class="summary-sig-name">unichr</span>(<span class="summary-sig-arg">i</span>)</span><br />
264       Return a string of one character with ordinal i; 0 &lt;= i &lt; 256.</td>
265           <td align="right" valign="top">
266             
267             
268           </td>
269         </tr>
270       </table>
271       
272     </td>
273   </tr>
274 <tr class="private">
275     <td width="15%" align="right" valign="top" class="summary">
276       <span class="summary-type">&nbsp;</span>
277     </td><td class="summary">
278       <table width="100%" cellpadding="0" cellspacing="0" border="0">
279         <tr>
280           <td><span class="summary-sig"><a name="unescape"></a><span class="summary-sig-name">unescape</span>(<span class="summary-sig-arg">string</span>)</span></td>
281           <td align="right" valign="top">
282             <span class="codelink"><a href="lxml.html.soupparser-pysrc.html#unescape">source&nbsp;code</a></span>
283             
284           </td>
285         </tr>
286       </table>
287       
288     </td>
289   </tr>
290 </table>
291 <!-- ==================== VARIABLES ==================== -->
292 <a name="section-Variables"></a>
293 <table class="summary" border="1" cellpadding="3"
294        cellspacing="0" width="100%" bgcolor="white">
295 <tr bgcolor="#70b0f0" class="table-header">
296   <td colspan="2" class="table-header">
297     <table border="0" cellpadding="0" cellspacing="0" width="100%">
298       <tr valign="top">
299         <td align="left"><span class="table-header">Variables</span></td>
300         <td align="right" valign="top"
301          ><span class="options">[<a href="#section-Variables"
302          class="privatelink" onclick="toggle_private();"
303          >hide private</a>]</span></td>
304       </tr>
305     </table>
306   </td>
307 </tr>
308 <tr class="private">
309     <td width="15%" align="right" valign="top" class="summary">
310       <span class="summary-type">&nbsp;</span>
311     </td><td class="summary">
312         <a href="lxml.html.soupparser-module.html#_DECLARATION_OR_DOCTYPE" class="summary-name" onclick="show_private();">_DECLARATION_OR_DOCTYPE</a> = <code title="(&lt;class 'bs4.element.Declaration'&gt;, &lt;class 'bs4.element.Doctype'&gt;)"><code class="variable-group">(</code>&lt;class 'bs4.element.Declaration'&gt;<code class="variable-op">, </code><code class="variable-ellipsis">...</code></code>
313     </td>
314   </tr>
315 <tr class="private">
316     <td width="15%" align="right" valign="top" class="summary">
317       <span class="summary-type">&nbsp;</span>
318     </td><td class="summary">
319         <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'lxml.html'"><code class="variable-quote">'</code><code class="variable-string">lxml.html</code><code class="variable-quote">'</code></code>
320     </td>
321   </tr>
322 </table>
323 <!-- ==================== FUNCTION DETAILS ==================== -->
324 <a name="section-FunctionDetails"></a>
325 <table class="details" border="1" cellpadding="3"
326        cellspacing="0" width="100%" bgcolor="white">
327 <tr bgcolor="#70b0f0" class="table-header">
328   <td colspan="2" class="table-header">
329     <table border="0" cellpadding="0" cellspacing="0" width="100%">
330       <tr valign="top">
331         <td align="left"><span class="table-header">Function Details</span></td>
332         <td align="right" valign="top"
333          ><span class="options">[<a href="#section-FunctionDetails"
334          class="privatelink" onclick="toggle_private();"
335          >hide private</a>]</span></td>
336       </tr>
337     </table>
338   </td>
339 </tr>
340 </table>
341 <a name="fromstring"></a>
342 <div>
343 <table class="details" border="1" cellpadding="3"
344        cellspacing="0" width="100%" bgcolor="white">
345 <tr><td>
346   <table width="100%" cellpadding="0" cellspacing="0" border="0">
347   <tr valign="top"><td>
348   <h3 class="epydoc"><span class="sig"><span class="sig-name">fromstring</span>(<span class="sig-arg">data</span>,
349         <span class="sig-arg">beautifulsoup</span>=<span class="sig-default">None</span>,
350         <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>,
351         <span class="sig-arg">**bsargs</span>)</span>
352   </h3>
353   </td><td align="right" valign="top"
354     ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#fromstring">source&nbsp;code</a></span>&nbsp;
355     </td>
356   </tr></table>
357   
358   <p>Parse a string of HTML data into an Element tree using the
359 BeautifulSoup parser.</p>
360 <p>Returns the root <tt class="rst-docutils literal">&lt;html&gt;</tt> Element of the tree.</p>
361 <p>You can pass a different BeautifulSoup parser through the
362 <code class="link">beautifulsoup</code> keyword, and a diffent Element factory function
363 through the <code class="link">makeelement</code> keyword.  By default, the standard
364 <tt class="rst-docutils literal">BeautifulSoup</tt> class and the default factory of lxml.html are
365 used.</p>
366   <dl class="fields">
367   </dl>
368 </td></tr></table>
369 </div>
370 <a name="parse"></a>
371 <div>
372 <table class="details" border="1" cellpadding="3"
373        cellspacing="0" width="100%" bgcolor="white">
374 <tr><td>
375   <table width="100%" cellpadding="0" cellspacing="0" border="0">
376   <tr valign="top"><td>
377   <h3 class="epydoc"><span class="sig"><span class="sig-name">parse</span>(<span class="sig-arg">file</span>,
378         <span class="sig-arg">beautifulsoup</span>=<span class="sig-default">None</span>,
379         <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>,
380         <span class="sig-arg">**bsargs</span>)</span>
381   </h3>
382   </td><td align="right" valign="top"
383     ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#parse">source&nbsp;code</a></span>&nbsp;
384     </td>
385   </tr></table>
386   
387   <p>Parse a file into an ElemenTree using the BeautifulSoup parser.</p>
388 <p>You can pass a different BeautifulSoup parser through the
389 <code class="link">beautifulsoup</code> keyword, and a diffent Element factory function
390 through the <code class="link">makeelement</code> keyword.  By default, the standard
391 <tt class="rst-docutils literal">BeautifulSoup</tt> class and the default factory of lxml.html are
392 used.</p>
393   <dl class="fields">
394   </dl>
395 </td></tr></table>
396 </div>
397 <a name="convert_tree"></a>
398 <div>
399 <table class="details" border="1" cellpadding="3"
400        cellspacing="0" width="100%" bgcolor="white">
401 <tr><td>
402   <table width="100%" cellpadding="0" cellspacing="0" border="0">
403   <tr valign="top"><td>
404   <h3 class="epydoc"><span class="sig"><span class="sig-name">convert_tree</span>(<span class="sig-arg">beautiful_soup_tree</span>,
405         <span class="sig-arg">makeelement</span>=<span class="sig-default">None</span>)</span>
406   </h3>
407   </td><td align="right" valign="top"
408     ><span class="codelink"><a href="lxml.html.soupparser-pysrc.html#convert_tree">source&nbsp;code</a></span>&nbsp;
409     </td>
410   </tr></table>
411   
412   <p>Convert a BeautifulSoup tree to a list of Element trees.</p>
413 <p>Returns a list instead of a single root Element to support
414 HTML-like soup with more than one root element.</p>
415 <p>You can pass a different Element factory through the <code class="link">makeelement</code>
416 keyword.</p>
417   <dl class="fields">
418   </dl>
419 </td></tr></table>
420 </div>
421 <br />
422 <!-- ==================== VARIABLES DETAILS ==================== -->
423 <a name="section-VariablesDetails"></a>
424 <table class="details" border="1" cellpadding="3"
425        cellspacing="0" width="100%" bgcolor="white">
426 <tr bgcolor="#70b0f0" class="table-header">
427   <td colspan="2" class="table-header">
428     <table border="0" cellpadding="0" cellspacing="0" width="100%">
429       <tr valign="top">
430         <td align="left"><span class="table-header">Variables Details</span></td>
431         <td align="right" valign="top"
432          ><span class="options">[<a href="#section-VariablesDetails"
433          class="privatelink" onclick="toggle_private();"
434          >hide private</a>]</span></td>
435       </tr>
436     </table>
437   </td>
438 </tr>
439 </table>
440 <a name="_DECLARATION_OR_DOCTYPE"></a>
441 <div class="private">
442 <table class="details" border="1" cellpadding="3"
443        cellspacing="0" width="100%" bgcolor="white">
444 <tr><td>
445   <h3 class="epydoc">_DECLARATION_OR_DOCTYPE</h3>
446   
447   <dl class="fields">
448   </dl>
449   <dl class="fields">
450     <dt>Value:</dt>
451       <dd><table><tr><td><pre class="variable">
452 <code class="variable-group">(</code>&lt;class 'bs4.element.Declaration'&gt;<code class="variable-op">, </code>&lt;class 'bs4.element.Doctype'&gt;<code class="variable-group">)</code>
453 </pre></td></tr></table>
454 </dd>
455   </dl>
456 </td></tr></table>
457 </div>
458 <br />
459 <!-- ==================== NAVIGATION BAR ==================== -->
460 <table class="navbar" border="0" width="100%" cellpadding="0"
461        bgcolor="#a0c0ff" cellspacing="0">
462   <tr valign="middle">
463   <!-- Home link -->
464       <th>&nbsp;&nbsp;&nbsp;<a
465         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
466
467   <!-- Tree link -->
468       <th>&nbsp;&nbsp;&nbsp;<a
469         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
470
471   <!-- Index link -->
472       <th>&nbsp;&nbsp;&nbsp;<a
473         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
474
475   <!-- Help link -->
476       <th>&nbsp;&nbsp;&nbsp;<a
477         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
478
479   <!-- Project homepage -->
480       <th class="navbar" align="right" width="100%">
481         <table border="0" cellpadding="0" cellspacing="0">
482           <tr><th class="navbar" align="center"
483             ><a class="navbar" target="_top" href="/">lxml API</a></th>
484           </tr></table></th>
485   </tr>
486 </table>
487 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
488   <tr>
489     <td align="left" class="footer">
490     Generated by Epydoc 3.0.1
491     on Wed Jan 29 12:26:21 2020
492     </td>
493     <td align="right" class="footer">
494       <a target="mainFrame" href="http://epydoc.sourceforge.net"
495         >http://epydoc.sourceforge.net</a>
496     </td>
497   </tr>
498 </table>
499
500 <script type="text/javascript">
501   <!--
502   // Private objects are initially displayed (because if
503   // javascript is turned off then we want them to be
504   // visible); but by default, we want to hide them.  So hide
505   // them unless we have a cookie that says to show them.
506   checkCookie();
507   // -->
508 </script>
509 </body>
510 </html>