f9000c9a472d81ed1ba260b8f88d083b1b89d21d
[platform/upstream/python-lxml.git] / doc / html / api / lxml.html.clean-module.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.html.clean</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.html-module.html">Package&nbsp;html</a> ::
47         Module&nbsp;clean
48       </span>
49     </td>
50     <td>
51       <table cellpadding="0" cellspacing="0">
52         <!-- hide/show private -->
53         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
55         <tr><td align="right"><span class="options"
56             >[<a href="frames.html" target="_top">frames</a
57             >]&nbsp;|&nbsp;<a href="lxml.html.clean-module.html"
58             target="_top">no&nbsp;frames</a>]</span></td></tr>
59       </table>
60     </td>
61   </tr>
62 </table>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module clean</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.clean-pysrc.html">source&nbsp;code</a></span></p>
65 <p>A cleanup tool for HTML.</p>
66 <p>Removes unwanted tags and content.  See the <a href="lxml.html.clean.Cleaner-class.html" class="link">Cleaner</a> class for
67 details.</p>
68
69 <!-- ==================== CLASSES ==================== -->
70 <a name="section-Classes"></a>
71 <table class="summary" border="1" cellpadding="3"
72        cellspacing="0" width="100%" bgcolor="white">
73 <tr bgcolor="#70b0f0" class="table-header">
74   <td colspan="2" class="table-header">
75     <table border="0" cellpadding="0" cellspacing="0" width="100%">
76       <tr valign="top">
77         <td align="left"><span class="table-header">Classes</span></td>
78         <td align="right" valign="top"
79          ><span class="options">[<a href="#section-Classes"
80          class="privatelink" onclick="toggle_private();"
81          >hide private</a>]</span></td>
82       </tr>
83     </table>
84   </td>
85 </tr>
86 <tr class="private">
87     <td width="15%" align="right" valign="top" class="summary">
88       <span class="summary-type">&nbsp;</span>
89     </td><td class="summary">
90         <a href="str-class.html" class="summary-name" onclick="show_private();">unicode</a><br />
91       str(object='') -&gt; string
92     </td>
93   </tr>
94 <tr>
95     <td width="15%" align="right" valign="top" class="summary">
96       <span class="summary-type">&nbsp;</span>
97     </td><td class="summary">
98         <a href="lxml.html.clean.Cleaner-class.html" class="summary-name">Cleaner</a><br />
99       Instances cleans the document of each of the possible offending
100 elements.  The cleaning is controlled by attributes; you can
101 override attributes in a subclass, or set them in the constructor.
102     </td>
103   </tr>
104 </table>
105 <!-- ==================== FUNCTIONS ==================== -->
106 <a name="section-Functions"></a>
107 <table class="summary" border="1" cellpadding="3"
108        cellspacing="0" width="100%" bgcolor="white">
109 <tr bgcolor="#70b0f0" class="table-header">
110   <td colspan="2" class="table-header">
111     <table border="0" cellpadding="0" cellspacing="0" width="100%">
112       <tr valign="top">
113         <td align="left"><span class="table-header">Functions</span></td>
114         <td align="right" valign="top"
115          ><span class="options">[<a href="#section-Functions"
116          class="privatelink" onclick="toggle_private();"
117          >hide private</a>]</span></td>
118       </tr>
119     </table>
120   </td>
121 </tr>
122 <tr class="private">
123     <td width="15%" align="right" valign="top" class="summary">
124       <span class="summary-type">character</span>
125     </td><td class="summary">
126       <table width="100%" cellpadding="0" cellspacing="0" border="0">
127         <tr>
128           <td><span class="summary-sig"><a name="unichr"></a><span class="summary-sig-name">unichr</span>(<span class="summary-sig-arg">i</span>)</span><br />
129       Return a string of one character with ordinal i; 0 &lt;= i &lt; 256.</td>
130           <td align="right" valign="top">
131             
132             
133           </td>
134         </tr>
135       </table>
136       
137     </td>
138   </tr>
139 <tr class="private">
140     <td width="15%" align="right" valign="top" class="summary">
141       <span class="summary-type">&nbsp;</span>
142     </td><td class="summary">
143       <table width="100%" cellpadding="0" cellspacing="0" border="0">
144         <tr>
145           <td><span class="summary-sig"><a name="_is_image_dataurl"></a><span class="summary-sig-name">_is_image_dataurl</span>(<span class="summary-sig-arg">...</span>)</span><br />
146       search(string[, pos[, endpos]]) --&gt; match object or None.
147 Scan through string looking for a match, and return a corresponding
148 match object instance. Return None if no position in the string matches.</td>
149           <td align="right" valign="top">
150             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_is_image_dataurl">source&nbsp;code</a></span>
151             
152           </td>
153         </tr>
154       </table>
155       
156     </td>
157   </tr>
158 <tr class="private">
159     <td width="15%" align="right" valign="top" class="summary">
160       <span class="summary-type">&nbsp;</span>
161     </td><td class="summary">
162       <table width="100%" cellpadding="0" cellspacing="0" border="0">
163         <tr>
164           <td><span class="summary-sig"><a name="_is_possibly_malicious_scheme"></a><span class="summary-sig-name">_is_possibly_malicious_scheme</span>(<span class="summary-sig-arg">...</span>)</span><br />
165       search(string[, pos[, endpos]]) --&gt; match object or None.
166 Scan through string looking for a match, and return a corresponding
167 match object instance. Return None if no position in the string matches.</td>
168           <td align="right" valign="top">
169             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_is_possibly_malicious_scheme">source&nbsp;code</a></span>
170             
171           </td>
172         </tr>
173       </table>
174       
175     </td>
176   </tr>
177 <tr class="private">
178     <td width="15%" align="right" valign="top" class="summary">
179       <span class="summary-type">&nbsp;</span>
180     </td><td class="summary">
181       <table width="100%" cellpadding="0" cellspacing="0" border="0">
182         <tr>
183           <td><span class="summary-sig"><a name="_is_javascript_scheme"></a><span class="summary-sig-name">_is_javascript_scheme</span>(<span class="summary-sig-arg">s</span>)</span></td>
184           <td align="right" valign="top">
185             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_is_javascript_scheme">source&nbsp;code</a></span>
186             
187           </td>
188         </tr>
189       </table>
190       
191     </td>
192   </tr>
193 <tr class="private">
194     <td width="15%" align="right" valign="top" class="summary">
195       <span class="summary-type">&nbsp;</span>
196     </td><td class="summary">
197       <table width="100%" cellpadding="0" cellspacing="0" border="0">
198         <tr>
199           <td><span class="summary-sig"><a name="_substitute_whitespace"></a><span class="summary-sig-name">_substitute_whitespace</span>(<span class="summary-sig-arg">...</span>)</span><br />
200       sub(repl, string[, count = 0]) --&gt; newstring
201 Return the string obtained by replacing the leftmost non-overlapping
202 occurrences of pattern in string by the replacement repl.</td>
203           <td align="right" valign="top">
204             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_substitute_whitespace">source&nbsp;code</a></span>
205             
206           </td>
207         </tr>
208       </table>
209       
210     </td>
211   </tr>
212 <tr>
213     <td width="15%" align="right" valign="top" class="summary">
214       <span class="summary-type">&nbsp;</span>
215     </td><td class="summary">
216       <table width="100%" cellpadding="0" cellspacing="0" border="0">
217         <tr>
218           <td><span class="summary-sig"><a name="clean_html"></a><span class="summary-sig-name">clean_html</span>(<span class="summary-sig-arg">...</span>)</span></td>
219           <td align="right" valign="top">
220             <span class="codelink"><a href="lxml.html.clean-pysrc.html#clean_html">source&nbsp;code</a></span>
221             
222           </td>
223         </tr>
224       </table>
225       
226     </td>
227   </tr>
228 <tr>
229     <td width="15%" align="right" valign="top" class="summary">
230       <span class="summary-type">&nbsp;</span>
231     </td><td class="summary">
232       <table width="100%" cellpadding="0" cellspacing="0" border="0">
233         <tr>
234           <td><span class="summary-sig"><a href="lxml.html.clean-module.html#autolink" class="summary-sig-name">autolink</a>(<span class="summary-sig-arg">el</span>,
235         <span class="summary-sig-arg">link_regexes</span>=<span class="summary-sig-default">_link_regexes</span>,
236         <span class="summary-sig-arg">avoid_elements</span>=<span class="summary-sig-default">_avoid_elements</span>,
237         <span class="summary-sig-arg">avoid_hosts</span>=<span class="summary-sig-default">_avoid_hosts</span>,
238         <span class="summary-sig-arg">avoid_classes</span>=<span class="summary-sig-default">_avoid_classes</span>)</span><br />
239       Turn any URLs into links.</td>
240           <td align="right" valign="top">
241             <span class="codelink"><a href="lxml.html.clean-pysrc.html#autolink">source&nbsp;code</a></span>
242             
243           </td>
244         </tr>
245       </table>
246       
247     </td>
248   </tr>
249 <tr class="private">
250     <td width="15%" align="right" valign="top" class="summary">
251       <span class="summary-type">&nbsp;</span>
252     </td><td class="summary">
253       <table width="100%" cellpadding="0" cellspacing="0" border="0">
254         <tr>
255           <td><span class="summary-sig"><a name="_link_text"></a><span class="summary-sig-name">_link_text</span>(<span class="summary-sig-arg">text</span>,
256         <span class="summary-sig-arg">link_regexes</span>,
257         <span class="summary-sig-arg">avoid_hosts</span>,
258         <span class="summary-sig-arg">factory</span>)</span></td>
259           <td align="right" valign="top">
260             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_link_text">source&nbsp;code</a></span>
261             
262           </td>
263         </tr>
264       </table>
265       
266     </td>
267   </tr>
268 <tr>
269     <td width="15%" align="right" valign="top" class="summary">
270       <span class="summary-type">&nbsp;</span>
271     </td><td class="summary">
272       <table width="100%" cellpadding="0" cellspacing="0" border="0">
273         <tr>
274           <td><span class="summary-sig"><a href="lxml.html.clean-module.html#autolink_html" class="summary-sig-name">autolink_html</a>(<span class="summary-sig-arg">html</span>)</span><br />
275       Turn any URLs into links.</td>
276           <td align="right" valign="top">
277             <span class="codelink"><a href="lxml.html.clean-pysrc.html#autolink_html">source&nbsp;code</a></span>
278             
279           </td>
280         </tr>
281       </table>
282       
283     </td>
284   </tr>
285 <tr>
286     <td width="15%" align="right" valign="top" class="summary">
287       <span class="summary-type">&nbsp;</span>
288     </td><td class="summary">
289       <table width="100%" cellpadding="0" cellspacing="0" border="0">
290         <tr>
291           <td><span class="summary-sig"><a href="lxml.html.clean-module.html#word_break" class="summary-sig-name">word_break</a>(<span class="summary-sig-arg">el</span>,
292         <span class="summary-sig-arg">max_width</span>=<span class="summary-sig-default">40</span>,
293         <span class="summary-sig-arg">avoid_elements</span>=<span class="summary-sig-default">_avoid_word_break_elements</span>,
294         <span class="summary-sig-arg">avoid_classes</span>=<span class="summary-sig-default">_avoid_word_break_classes</span>,
295         <span class="summary-sig-arg">break_character</span>=<span class="summary-sig-default">unichr(0x200b)</span>)</span><br />
296       Breaks any long words found in the body of the text (not attributes).</td>
297           <td align="right" valign="top">
298             <span class="codelink"><a href="lxml.html.clean-pysrc.html#word_break">source&nbsp;code</a></span>
299             
300           </td>
301         </tr>
302       </table>
303       
304     </td>
305   </tr>
306 <tr>
307     <td width="15%" align="right" valign="top" class="summary">
308       <span class="summary-type">&nbsp;</span>
309     </td><td class="summary">
310       <table width="100%" cellpadding="0" cellspacing="0" border="0">
311         <tr>
312           <td><span class="summary-sig"><a name="word_break_html"></a><span class="summary-sig-name">word_break_html</span>(<span class="summary-sig-arg">html</span>)</span></td>
313           <td align="right" valign="top">
314             <span class="codelink"><a href="lxml.html.clean-pysrc.html#word_break_html">source&nbsp;code</a></span>
315             
316           </td>
317         </tr>
318       </table>
319       
320     </td>
321   </tr>
322 <tr class="private">
323     <td width="15%" align="right" valign="top" class="summary">
324       <span class="summary-type">&nbsp;</span>
325     </td><td class="summary">
326       <table width="100%" cellpadding="0" cellspacing="0" border="0">
327         <tr>
328           <td><span class="summary-sig"><a name="_break_text"></a><span class="summary-sig-name">_break_text</span>(<span class="summary-sig-arg">text</span>,
329         <span class="summary-sig-arg">max_width</span>,
330         <span class="summary-sig-arg">break_character</span>)</span></td>
331           <td align="right" valign="top">
332             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_break_text">source&nbsp;code</a></span>
333             
334           </td>
335         </tr>
336       </table>
337       
338     </td>
339   </tr>
340 <tr class="private">
341     <td width="15%" align="right" valign="top" class="summary">
342       <span class="summary-type">&nbsp;</span>
343     </td><td class="summary">
344       <table width="100%" cellpadding="0" cellspacing="0" border="0">
345         <tr>
346           <td><span class="summary-sig"><a name="_insert_break"></a><span class="summary-sig-name">_insert_break</span>(<span class="summary-sig-arg">word</span>,
347         <span class="summary-sig-arg">width</span>,
348         <span class="summary-sig-arg">break_character</span>)</span></td>
349           <td align="right" valign="top">
350             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_insert_break">source&nbsp;code</a></span>
351             
352           </td>
353         </tr>
354       </table>
355       
356     </td>
357   </tr>
358 </table>
359 <!-- ==================== VARIABLES ==================== -->
360 <a name="section-Variables"></a>
361 <table class="summary" border="1" cellpadding="3"
362        cellspacing="0" width="100%" bgcolor="white">
363 <tr bgcolor="#70b0f0" class="table-header">
364   <td colspan="2" class="table-header">
365     <table border="0" cellpadding="0" cellspacing="0" width="100%">
366       <tr valign="top">
367         <td align="left"><span class="table-header">Variables</span></td>
368         <td align="right" valign="top"
369          ><span class="options">[<a href="#section-Variables"
370          class="privatelink" onclick="toggle_private();"
371          >hide private</a>]</span></td>
372       </tr>
373     </table>
374   </td>
375 </tr>
376 <tr class="private">
377     <td width="15%" align="right" valign="top" class="summary">
378       <span class="summary-type">&nbsp;</span>
379     </td><td class="summary">
380         <a name="basestring"></a><span class="summary-name">basestring</span> = <code title="str, bytes">str, bytes</code>
381     </td>
382   </tr>
383 <tr class="private">
384     <td width="15%" align="right" valign="top" class="summary">
385       <span class="summary-type">&nbsp;</span>
386     </td><td class="summary">
387         <a name="_css_javascript_re"></a><span class="summary-name">_css_javascript_re</span> = <code title="re.compile(r'(?is)expression\s*\(.*?\)')">re.compile(r'<code class="re-flags">(?is)</code>expression\s<code class="re-op">*</code>\(.<code class="re-op">*?</code>\)')</code>
388     </td>
389   </tr>
390 <tr class="private">
391     <td width="15%" align="right" valign="top" class="summary">
392       <span class="summary-type">&nbsp;</span>
393     </td><td class="summary">
394         <a name="_css_import_re"></a><span class="summary-name">_css_import_re</span> = <code title="re.compile(r'(?i)@\s*import')">re.compile(r'<code class="re-flags">(?i)</code>@\s<code class="re-op">*</code>import')</code>
395     </td>
396   </tr>
397 <tr class="private">
398     <td width="15%" align="right" valign="top" class="summary">
399       <span class="summary-type">&nbsp;</span>
400     </td><td class="summary">
401         <a href="lxml.html.clean-module.html#_conditional_comment_re" class="summary-name" onclick="show_private();">_conditional_comment_re</a> = <code title="re.compile(r'(?is)\[if[\s\n\r]+.*?\][\s\n\r]*&gt;')">re.compile(r'<code class="re-flags">(?is)</code>\[if<code class="re-group">[</code>\s\n\r<code class="re-group">]</code><code class="re-op">+</code>.<code class="re-op">*?</code>\]<code class="variable-ellipsis">...</code></code>
402     </td>
403   </tr>
404 <tr class="private">
405     <td width="15%" align="right" valign="top" class="summary">
406       <span class="summary-type">&nbsp;</span>
407     </td><td class="summary">
408         <a name="_find_styled_elements"></a><span class="summary-name">_find_styled_elements</span> = <code title="descendant-or-self::*[@style]">descendant-or-self::*[@style]</code>
409     </td>
410   </tr>
411 <tr class="private">
412     <td width="15%" align="right" valign="top" class="summary">
413       <span class="summary-type">&nbsp;</span>
414     </td><td class="summary">
415         <a href="lxml.html.clean-module.html#_find_external_links" class="summary-name" onclick="show_private();">_find_external_links</a> = <code title="descendant-or-self::a  [normalize-space(@href) and substring(normalize\
416 -space(@href),1,1) != '#'] |descendant-or-self::x:a[normalize-space(@h\
417 ref) and substring(normalize-space(@href),1,1) != '#']">descendant-or-self::a  [normalize-space<code class="variable-ellipsis">...</code></code>
418     </td>
419   </tr>
420 <tr>
421     <td width="15%" align="right" valign="top" class="summary">
422       <span class="summary-type">&nbsp;</span>
423     </td><td class="summary">
424         <a name="clean"></a><span class="summary-name">clean</span> = <code title="&lt;lxml.html.clean.Cleaner object&gt;">&lt;lxml.html.clean.Cleaner object&gt;</code>
425     </td>
426   </tr>
427 <tr class="private">
428     <td width="15%" align="right" valign="top" class="summary">
429       <span class="summary-type">&nbsp;</span>
430     </td><td class="summary">
431         <a href="lxml.html.clean-module.html#_link_regexes" class="summary-name" onclick="show_private();">_link_regexes</a> = <code title="[re.compile(r'(?i)(?P&lt;body&gt;https?://(?P&lt;host&gt;[a-z0-9\._-]+)(?:/[/-_\.,\
432 a-z0-9%&amp;\?;=~]*)?(?:\([/-_\.,a-z0-9%&amp;\?;=~]*\))?)'),
433  re.compile(r'(?i)mailto:(?P&lt;body&gt;[a-z0-9\._-]+@(?P&lt;host&gt;[a-z0-9_\.-]+\
434 [a-z]))')]"><code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code><code class="re-group">(?P&lt;</code><code class="re-ref">body</code><code class="re-group">&gt;</code>https<code class="re-op">?</code>://<code class="re-group">(?P&lt;</code><code class="re-ref">host</code><code class="re-group">&gt;</code><code class="re-group">[</code><code class="variable-ellipsis">...</code></code>
435     </td>
436   </tr>
437 <tr class="private">
438     <td width="15%" align="right" valign="top" class="summary">
439       <span class="summary-type">&nbsp;</span>
440     </td><td class="summary">
441         <a href="lxml.html.clean-module.html#_avoid_elements" class="summary-name" onclick="show_private();">_avoid_elements</a> = <code title="['textarea', 'pre', 'code', 'head', 'select', 'a']"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">head</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">select</code><code class="variable-quote">'</code><code class="variable-ellipsis">...</code></code>
442     </td>
443   </tr>
444 <tr class="private">
445     <td width="15%" align="right" valign="top" class="summary">
446       <span class="summary-type">&nbsp;</span>
447     </td><td class="summary">
448         <a href="lxml.html.clean-module.html#_avoid_hosts" class="summary-name" onclick="show_private();">_avoid_hosts</a> = <code title="[re.compile(r'(?i)^localhost'),
449  re.compile(r'(?i)\bexample\.(?:com|org|net)$'),
450  re.compile(r'^127\.0\.0\.1$')]"><code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code>^localhost')<code class="variable-op">, </code>re.compile(r'<code class="re-flags">(?</code><code class="variable-ellipsis">...</code></code>
451     </td>
452   </tr>
453 <tr class="private">
454     <td width="15%" align="right" valign="top" class="summary">
455       <span class="summary-type">&nbsp;</span>
456     </td><td class="summary">
457         <a name="_avoid_classes"></a><span class="summary-name">_avoid_classes</span> = <code title="['nolink']"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">nolink</code><code class="variable-quote">'</code><code class="variable-group">]</code></code>
458     </td>
459   </tr>
460 <tr class="private">
461     <td width="15%" align="right" valign="top" class="summary">
462       <span class="summary-type">&nbsp;</span>
463     </td><td class="summary">
464         <a name="_avoid_word_break_elements"></a><span class="summary-name">_avoid_word_break_elements</span> = <code title="['pre', 'textarea', 'code']"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-group">]</code></code>
465     </td>
466   </tr>
467 <tr class="private">
468     <td width="15%" align="right" valign="top" class="summary">
469       <span class="summary-type">&nbsp;</span>
470     </td><td class="summary">
471         <a name="_avoid_word_break_classes"></a><span class="summary-name">_avoid_word_break_classes</span> = <code title="['nobreak']"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">nobreak</code><code class="variable-quote">'</code><code class="variable-group">]</code></code>
472     </td>
473   </tr>
474 <tr class="private">
475     <td width="15%" align="right" valign="top" class="summary">
476       <span class="summary-type">&nbsp;</span>
477     </td><td class="summary">
478         <a name="_break_prefer_re"></a><span class="summary-name">_break_prefer_re</span> = <code title="re.compile(r'(?i)[^a-z]')">re.compile(r'<code class="re-flags">(?i)</code><code class="re-group">[</code><code class="re-op">^</code>a<code class="re-op">-</code>z<code class="re-group">]</code>')</code>
479     </td>
480   </tr>
481 <tr class="private">
482     <td width="15%" align="right" valign="top" class="summary">
483       <span class="summary-type">&nbsp;</span>
484     </td><td class="summary">
485         <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="None">None</code><br />
486       hash(x)
487     </td>
488   </tr>
489 <tr class="private">
490     <td width="15%" align="right" valign="top" class="summary">
491       <span class="summary-type">&nbsp;</span>
492     </td><td class="summary">
493         <a name="__test__"></a><span class="summary-name">__test__</span> = <code title="{}"><code class="variable-group">{</code><code class="variable-group">}</code></code>
494     </td>
495   </tr>
496 </table>
497 <!-- ==================== FUNCTION DETAILS ==================== -->
498 <a name="section-FunctionDetails"></a>
499 <table class="details" border="1" cellpadding="3"
500        cellspacing="0" width="100%" bgcolor="white">
501 <tr bgcolor="#70b0f0" class="table-header">
502   <td colspan="2" class="table-header">
503     <table border="0" cellpadding="0" cellspacing="0" width="100%">
504       <tr valign="top">
505         <td align="left"><span class="table-header">Function Details</span></td>
506         <td align="right" valign="top"
507          ><span class="options">[<a href="#section-FunctionDetails"
508          class="privatelink" onclick="toggle_private();"
509          >hide private</a>]</span></td>
510       </tr>
511     </table>
512   </td>
513 </tr>
514 </table>
515 <a name="autolink"></a>
516 <div>
517 <table class="details" border="1" cellpadding="3"
518        cellspacing="0" width="100%" bgcolor="white">
519 <tr><td>
520   <table width="100%" cellpadding="0" cellspacing="0" border="0">
521   <tr valign="top"><td>
522   <h3 class="epydoc"><span class="sig"><span class="sig-name">autolink</span>(<span class="sig-arg">el</span>,
523         <span class="sig-arg">link_regexes</span>=<span class="sig-default">_link_regexes</span>,
524         <span class="sig-arg">avoid_elements</span>=<span class="sig-default">_avoid_elements</span>,
525         <span class="sig-arg">avoid_hosts</span>=<span class="sig-default">_avoid_hosts</span>,
526         <span class="sig-arg">avoid_classes</span>=<span class="sig-default">_avoid_classes</span>)</span>
527   </h3>
528   </td><td align="right" valign="top"
529     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#autolink">source&nbsp;code</a></span>&nbsp;
530     </td>
531   </tr></table>
532   
533   <p>Turn any URLs into links.</p>
534 <p>It will search for links identified by the given regular
535 expressions (by default mailto and http(s) links).</p>
536 <p>It won't link text in an element in avoid_elements, or an element
537 with a class in avoid_classes.  It won't link to anything with a
538 host that matches one of the regular expressions in avoid_hosts
539 (default localhost and 127.0.0.1).</p>
540 <p>If you pass in an element, the element's tail will not be
541 substituted, only the contents of the element.</p>
542   <dl class="fields">
543   </dl>
544 </td></tr></table>
545 </div>
546 <a name="autolink_html"></a>
547 <div>
548 <table class="details" border="1" cellpadding="3"
549        cellspacing="0" width="100%" bgcolor="white">
550 <tr><td>
551   <table width="100%" cellpadding="0" cellspacing="0" border="0">
552   <tr valign="top"><td>
553   <h3 class="epydoc"><span class="sig"><span class="sig-name">autolink_html</span>(<span class="sig-arg">html</span>)</span>
554   </h3>
555   </td><td align="right" valign="top"
556     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#autolink_html">source&nbsp;code</a></span>&nbsp;
557     </td>
558   </tr></table>
559   
560   <p>Turn any URLs into links.</p>
561 <p>It will search for links identified by the given regular
562 expressions (by default mailto and http(s) links).</p>
563 <p>It won't link text in an element in avoid_elements, or an element
564 with a class in avoid_classes.  It won't link to anything with a
565 host that matches one of the regular expressions in avoid_hosts
566 (default localhost and 127.0.0.1).</p>
567 <p>If you pass in an element, the element's tail will not be
568 substituted, only the contents of the element.</p>
569   <dl class="fields">
570   </dl>
571 </td></tr></table>
572 </div>
573 <a name="word_break"></a>
574 <div>
575 <table class="details" border="1" cellpadding="3"
576        cellspacing="0" width="100%" bgcolor="white">
577 <tr><td>
578   <table width="100%" cellpadding="0" cellspacing="0" border="0">
579   <tr valign="top"><td>
580   <h3 class="epydoc"><span class="sig"><span class="sig-name">word_break</span>(<span class="sig-arg">el</span>,
581         <span class="sig-arg">max_width</span>=<span class="sig-default">40</span>,
582         <span class="sig-arg">avoid_elements</span>=<span class="sig-default">_avoid_word_break_elements</span>,
583         <span class="sig-arg">avoid_classes</span>=<span class="sig-default">_avoid_word_break_classes</span>,
584         <span class="sig-arg">break_character</span>=<span class="sig-default">unichr(0x200b)</span>)</span>
585   </h3>
586   </td><td align="right" valign="top"
587     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#word_break">source&nbsp;code</a></span>&nbsp;
588     </td>
589   </tr></table>
590   
591   <p>Breaks any long words found in the body of the text (not attributes).</p>
592 <p>Doesn't effect any of the tags in avoid_elements, by default
593 <tt class="rst-docutils literal">&lt;textarea&gt;</tt> and <tt class="rst-docutils literal">&lt;pre&gt;</tt></p>
594 <p>Breaks words by inserting &amp;#8203;, which is a unicode character
595 for Zero Width Space character.  This generally takes up no space
596 in rendering, but does copy as a space, and in monospace contexts
597 usually takes up space.</p>
598 <p>See <a class="rst-reference external" href="http://www.cs.tut.fi/~jkorpela/html/nobr.html" target="_top">http://www.cs.tut.fi/~jkorpela/html/nobr.html</a> for a discussion</p>
599   <dl class="fields">
600   </dl>
601 </td></tr></table>
602 </div>
603 <br />
604 <!-- ==================== VARIABLES DETAILS ==================== -->
605 <a name="section-VariablesDetails"></a>
606 <table class="details" border="1" cellpadding="3"
607        cellspacing="0" width="100%" bgcolor="white">
608 <tr bgcolor="#70b0f0" class="table-header">
609   <td colspan="2" class="table-header">
610     <table border="0" cellpadding="0" cellspacing="0" width="100%">
611       <tr valign="top">
612         <td align="left"><span class="table-header">Variables Details</span></td>
613         <td align="right" valign="top"
614          ><span class="options">[<a href="#section-VariablesDetails"
615          class="privatelink" onclick="toggle_private();"
616          >hide private</a>]</span></td>
617       </tr>
618     </table>
619   </td>
620 </tr>
621 </table>
622 <a name="_conditional_comment_re"></a>
623 <div class="private">
624 <table class="details" border="1" cellpadding="3"
625        cellspacing="0" width="100%" bgcolor="white">
626 <tr><td>
627   <h3 class="epydoc">_conditional_comment_re</h3>
628   
629   <dl class="fields">
630   </dl>
631   <dl class="fields">
632     <dt>Value:</dt>
633       <dd><table><tr><td><pre class="variable">
634 re.compile(r'<code class="re-flags">(?is)</code>\[if<code class="re-group">[</code>\s\n\r<code class="re-group">]</code><code class="re-op">+</code>.<code class="re-op">*?</code>\]<code class="re-group">[</code>\s\n\r<code class="re-group">]</code><code class="re-op">*</code>&gt;')
635 </pre></td></tr></table>
636 </dd>
637   </dl>
638 </td></tr></table>
639 </div>
640 <a name="_find_external_links"></a>
641 <div class="private">
642 <table class="details" border="1" cellpadding="3"
643        cellspacing="0" width="100%" bgcolor="white">
644 <tr><td>
645   <h3 class="epydoc">_find_external_links</h3>
646   
647   <dl class="fields">
648   </dl>
649   <dl class="fields">
650     <dt>Value:</dt>
651       <dd><table><tr><td><pre class="variable">
652 descendant-or-self::a  [normalize-space(@href) and substring(normalize<span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
653 -space(@href),1,1) != '#'] |descendant-or-self::x:a[normalize-space(@h<span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
654 ref) and substring(normalize-space(@href),1,1) != '#']
655 </pre></td></tr></table>
656 </dd>
657   </dl>
658 </td></tr></table>
659 </div>
660 <a name="_link_regexes"></a>
661 <div class="private">
662 <table class="details" border="1" cellpadding="3"
663        cellspacing="0" width="100%" bgcolor="white">
664 <tr><td>
665   <h3 class="epydoc">_link_regexes</h3>
666   
667   <dl class="fields">
668   </dl>
669   <dl class="fields">
670     <dt>Value:</dt>
671       <dd><table><tr><td><pre class="variable">
672 <code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code><code class="re-group">(?P&lt;</code><code class="re-ref">body</code><code class="re-group">&gt;</code>https<code class="re-op">?</code>://<code class="re-group">(?P&lt;</code><code class="re-ref">host</code><code class="re-group">&gt;</code><code class="re-group">[</code>a<code class="re-op">-</code>z0<code class="re-op">-</code>9\._-<code class="re-group">]</code><code class="re-op">+</code><code class="re-group">)</code><code class="re-group">(?:</code>/<code class="re-group">[</code>/-_\.,<span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
673 a<code class="re-op">-</code>z0<code class="re-op">-</code>9%&amp;\?;=~<code class="re-group">]</code><code class="re-op">*</code><code class="re-group">)</code><code class="re-op">?</code><code class="re-group">(?:</code>\(<code class="re-group">[</code>/-_\.,a<code class="re-op">-</code>z0<code class="re-op">-</code>9%&amp;\?;=~<code class="re-group">]</code><code class="re-op">*</code>\)<code class="re-group">)</code><code class="re-op">?</code><code class="re-group">)</code>')<code class="variable-op">,</code>
674  re.compile(r'<code class="re-flags">(?i)</code>mailto:<code class="re-group">(?P&lt;</code><code class="re-ref">body</code><code class="re-group">&gt;</code><code class="re-group">[</code>a<code class="re-op">-</code>z0<code class="re-op">-</code>9\._-<code class="re-group">]</code><code class="re-op">+</code>@<code class="re-group">(?P&lt;</code><code class="re-ref">host</code><code class="re-group">&gt;</code><code class="re-group">[</code>a<code class="re-op">-</code>z0<code class="re-op">-</code>9_\.-<code class="re-group">]</code><code class="re-op">+</code><code class="re-group"></code><span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
675 <code class="re-group">[</code>a<code class="re-op">-</code>z<code class="re-group">]</code><code class="re-group">)</code><code class="re-group">)</code>')<code class="variable-group">]</code>
676 </pre></td></tr></table>
677 </dd>
678   </dl>
679 </td></tr></table>
680 </div>
681 <a name="_avoid_elements"></a>
682 <div class="private">
683 <table class="details" border="1" cellpadding="3"
684        cellspacing="0" width="100%" bgcolor="white">
685 <tr><td>
686   <h3 class="epydoc">_avoid_elements</h3>
687   
688   <dl class="fields">
689   </dl>
690   <dl class="fields">
691     <dt>Value:</dt>
692       <dd><table><tr><td><pre class="variable">
693 <code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">head</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">select</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-group">]</code>
694 </pre></td></tr></table>
695 </dd>
696   </dl>
697 </td></tr></table>
698 </div>
699 <a name="_avoid_hosts"></a>
700 <div class="private">
701 <table class="details" border="1" cellpadding="3"
702        cellspacing="0" width="100%" bgcolor="white">
703 <tr><td>
704   <h3 class="epydoc">_avoid_hosts</h3>
705   
706   <dl class="fields">
707   </dl>
708   <dl class="fields">
709     <dt>Value:</dt>
710       <dd><table><tr><td><pre class="variable">
711 <code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code>^localhost')<code class="variable-op">,</code>
712  re.compile(r'<code class="re-flags">(?i)</code>\bexample\.<code class="re-group">(?:</code>com<code class="re-op">|</code>org<code class="re-op">|</code>net<code class="re-group">)</code>$')<code class="variable-op">,</code>
713  re.compile(r'^127\.0\.0\.1$')<code class="variable-group">]</code>
714 </pre></td></tr></table>
715 </dd>
716   </dl>
717 </td></tr></table>
718 </div>
719 <br />
720 <!-- ==================== NAVIGATION BAR ==================== -->
721 <table class="navbar" border="0" width="100%" cellpadding="0"
722        bgcolor="#a0c0ff" cellspacing="0">
723   <tr valign="middle">
724   <!-- Home link -->
725       <th>&nbsp;&nbsp;&nbsp;<a
726         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
727
728   <!-- Tree link -->
729       <th>&nbsp;&nbsp;&nbsp;<a
730         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
731
732   <!-- Index link -->
733       <th>&nbsp;&nbsp;&nbsp;<a
734         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
735
736   <!-- Help link -->
737       <th>&nbsp;&nbsp;&nbsp;<a
738         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
739
740   <!-- Project homepage -->
741       <th class="navbar" align="right" width="100%">
742         <table border="0" cellpadding="0" cellspacing="0">
743           <tr><th class="navbar" align="center"
744             ><a class="navbar" target="_top" href="/">lxml API</a></th>
745           </tr></table></th>
746   </tr>
747 </table>
748 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
749   <tr>
750     <td align="left" class="footer">
751     Generated by Epydoc 3.0.1
752     on Wed Jan 29 12:26:21 2020
753     </td>
754     <td align="right" class="footer">
755       <a target="mainFrame" href="http://epydoc.sourceforge.net"
756         >http://epydoc.sourceforge.net</a>
757     </td>
758   </tr>
759 </table>
760
761 <script type="text/javascript">
762   <!--
763   // Private objects are initially displayed (because if
764   // javascript is turned off then we want them to be
765   // visible); but by default, we want to hide them.  So hide
766   // them unless we have a cookie that says to show them.
767   checkCookie();
768   // -->
769 </script>
770 </body>
771 </html>