Imported Upstream version 2.3.5
[platform/upstream/python-lxml.git] / doc / html / api / lxml.html.clean-module.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.html.clean</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.html-module.html">Package&nbsp;html</a> ::
47         Module&nbsp;clean
48       </span>
49     </td>
50     <td>
51       <table cellpadding="0" cellspacing="0">
52         <!-- hide/show private -->
53         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
54     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
55         <tr><td align="right"><span class="options"
56             >[<a href="frames.html" target="_top">frames</a
57             >]&nbsp;|&nbsp;<a href="lxml.html.clean-module.html"
58             target="_top">no&nbsp;frames</a>]</span></td></tr>
59       </table>
60     </td>
61   </tr>
62 </table>
63 <!-- ==================== MODULE DESCRIPTION ==================== -->
64 <h1 class="epydoc">Module clean</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.clean-pysrc.html">source&nbsp;code</a></span></p>
65 <p>A cleanup tool for HTML.</p>
66 <p>Removes unwanted tags and content.  See the <a href="lxml.html.clean.Cleaner-class.html" class="link">Cleaner</a> class for
67 details.</p>
68
69 <!-- ==================== CLASSES ==================== -->
70 <a name="section-Classes"></a>
71 <table class="summary" border="1" cellpadding="3"
72        cellspacing="0" width="100%" bgcolor="white">
73 <tr bgcolor="#70b0f0" class="table-header">
74   <td colspan="2" class="table-header">
75     <table border="0" cellpadding="0" cellspacing="0" width="100%">
76       <tr valign="top">
77         <td align="left"><span class="table-header">Classes</span></td>
78         <td align="right" valign="top"
79          ><span class="options">[<a href="#section-Classes"
80          class="privatelink" onclick="toggle_private();"
81          >hide private</a>]</span></td>
82       </tr>
83     </table>
84   </td>
85 </tr>
86 <tr>
87     <td width="15%" align="right" valign="top" class="summary">
88       <span class="summary-type">&nbsp;</span>
89     </td><td class="summary">
90         <a href="lxml.html.clean.Cleaner-class.html" class="summary-name">Cleaner</a><br />
91       Instances cleans the document of each of the possible offending
92 elements.  The cleaning is controlled by attributes; you can
93 override attributes in a subclass, or set them in the constructor.
94     </td>
95   </tr>
96 </table>
97 <!-- ==================== FUNCTIONS ==================== -->
98 <a name="section-Functions"></a>
99 <table class="summary" border="1" cellpadding="3"
100        cellspacing="0" width="100%" bgcolor="white">
101 <tr bgcolor="#70b0f0" class="table-header">
102   <td colspan="2" class="table-header">
103     <table border="0" cellpadding="0" cellspacing="0" width="100%">
104       <tr valign="top">
105         <td align="left"><span class="table-header">Functions</span></td>
106         <td align="right" valign="top"
107          ><span class="options">[<a href="#section-Functions"
108          class="privatelink" onclick="toggle_private();"
109          >hide private</a>]</span></td>
110       </tr>
111     </table>
112   </td>
113 </tr>
114 <tr class="private">
115     <td width="15%" align="right" valign="top" class="summary">
116       <span class="summary-type">&nbsp;</span>
117     </td><td class="summary">
118       <table width="100%" cellpadding="0" cellspacing="0" border="0">
119         <tr>
120           <td><span class="summary-sig"><a name="_substitute_whitespace"></a><span class="summary-sig-name">_substitute_whitespace</span>(<span class="summary-sig-arg">...</span>)</span><br />
121       sub(repl, string[, count = 0]) --&gt; newstring
122 Return the string obtained by replacing the leftmost non-overlapping
123 occurrences of pattern in string by the replacement repl.</td>
124           <td align="right" valign="top">
125             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_substitute_whitespace">source&nbsp;code</a></span>
126             
127           </td>
128         </tr>
129       </table>
130       
131     </td>
132   </tr>
133 <tr>
134     <td width="15%" align="right" valign="top" class="summary">
135       <span class="summary-type">&nbsp;</span>
136     </td><td class="summary">
137       <table width="100%" cellpadding="0" cellspacing="0" border="0">
138         <tr>
139           <td><span class="summary-sig"><a name="clean_html"></a><span class="summary-sig-name">clean_html</span>(<span class="summary-sig-arg">html</span>)</span></td>
140           <td align="right" valign="top">
141             <span class="codelink"><a href="lxml.html.clean-pysrc.html#clean_html">source&nbsp;code</a></span>
142             
143           </td>
144         </tr>
145       </table>
146       
147     </td>
148   </tr>
149 <tr>
150     <td width="15%" align="right" valign="top" class="summary">
151       <span class="summary-type">&nbsp;</span>
152     </td><td class="summary">
153       <table width="100%" cellpadding="0" cellspacing="0" border="0">
154         <tr>
155           <td><span class="summary-sig"><a href="lxml.html.clean-module.html#autolink" class="summary-sig-name">autolink</a>(<span class="summary-sig-arg">el</span>,
156         <span class="summary-sig-arg">link_regexes</span>=<span class="summary-sig-default"><code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code><code class="re-group">(?P&lt;</code><code class="re-ref">body</code><code class="re-group">&gt;</code>https<code class="re-op">?</code>://<code class="re-group">(?P&lt;</code><code class="re-ref">host</code><code class="re-group">&gt;</code><code class="re-group">[</code>a<code class="re-op">-</code>z0<code class="re-op">-</code>9\._-<code class="re-group">]</code><code class="re-op">+</code><code class="re-group">)</code><code class="re-group">(?:</code><code class="variable-ellipsis">...</code></span>,
157         <span class="summary-sig-arg">avoid_elements</span>=<span class="summary-sig-default"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">head</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">select</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-group">]</code></span>,
158         <span class="summary-sig-arg">avoid_hosts</span>=<span class="summary-sig-default"><code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code>^localhost')<code class="variable-op">, </code>re.compile(r'<code class="re-flags">(?i)</code>\bexample\.<code class="re-group">(?</code><code class="variable-ellipsis">...</code></span>,
159         <span class="summary-sig-arg">avoid_classes</span>=<span class="summary-sig-default"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">nolink</code><code class="variable-quote">'</code><code class="variable-group">]</code></span>)</span><br />
160       Turn any URLs into links.</td>
161           <td align="right" valign="top">
162             <span class="codelink"><a href="lxml.html.clean-pysrc.html#autolink">source&nbsp;code</a></span>
163             
164           </td>
165         </tr>
166       </table>
167       
168     </td>
169   </tr>
170 <tr class="private">
171     <td width="15%" align="right" valign="top" class="summary">
172       <span class="summary-type">&nbsp;</span>
173     </td><td class="summary">
174       <table width="100%" cellpadding="0" cellspacing="0" border="0">
175         <tr>
176           <td><span class="summary-sig"><a name="_link_text"></a><span class="summary-sig-name">_link_text</span>(<span class="summary-sig-arg">text</span>,
177         <span class="summary-sig-arg">link_regexes</span>,
178         <span class="summary-sig-arg">avoid_hosts</span>,
179         <span class="summary-sig-arg">factory</span>)</span></td>
180           <td align="right" valign="top">
181             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_link_text">source&nbsp;code</a></span>
182             
183           </td>
184         </tr>
185       </table>
186       
187     </td>
188   </tr>
189 <tr>
190     <td width="15%" align="right" valign="top" class="summary">
191       <span class="summary-type">&nbsp;</span>
192     </td><td class="summary">
193       <table width="100%" cellpadding="0" cellspacing="0" border="0">
194         <tr>
195           <td><span class="summary-sig"><a href="lxml.html.clean-module.html#autolink_html" class="summary-sig-name">autolink_html</a>(<span class="summary-sig-arg">html</span>,
196         <span class="summary-sig-arg">*args</span>,
197         <span class="summary-sig-arg">**kw</span>)</span><br />
198       Turn any URLs into links.</td>
199           <td align="right" valign="top">
200             <span class="codelink"><a href="lxml.html.clean-pysrc.html#autolink_html">source&nbsp;code</a></span>
201             
202           </td>
203         </tr>
204       </table>
205       
206     </td>
207   </tr>
208 <tr>
209     <td width="15%" align="right" valign="top" class="summary">
210       <span class="summary-type">&nbsp;</span>
211     </td><td class="summary">
212       <table width="100%" cellpadding="0" cellspacing="0" border="0">
213         <tr>
214           <td><span class="summary-sig"><a href="lxml.html.clean-module.html#word_break" class="summary-sig-name">word_break</a>(<span class="summary-sig-arg">el</span>,
215         <span class="summary-sig-arg">max_width</span>=<span class="summary-sig-default">40</span>,
216         <span class="summary-sig-arg">avoid_elements</span>=<span class="summary-sig-default"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-group">]</code></span>,
217         <span class="summary-sig-arg">avoid_classes</span>=<span class="summary-sig-default"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">nobreak</code><code class="variable-quote">'</code><code class="variable-group">]</code></span>,
218         <span class="summary-sig-arg">break_character</span>=<span class="summary-sig-default"><code class="variable-quote">u'</code><code class="variable-string">&#8203;</code><code class="variable-quote">'</code></span>)</span><br />
219       Breaks any long words found in the body of the text (not attributes).</td>
220           <td align="right" valign="top">
221             <span class="codelink"><a href="lxml.html.clean-pysrc.html#word_break">source&nbsp;code</a></span>
222             
223           </td>
224         </tr>
225       </table>
226       
227     </td>
228   </tr>
229 <tr>
230     <td width="15%" align="right" valign="top" class="summary">
231       <span class="summary-type">&nbsp;</span>
232     </td><td class="summary">
233       <table width="100%" cellpadding="0" cellspacing="0" border="0">
234         <tr>
235           <td><span class="summary-sig"><a name="word_break_html"></a><span class="summary-sig-name">word_break_html</span>(<span class="summary-sig-arg">html</span>,
236         <span class="summary-sig-arg">*args</span>,
237         <span class="summary-sig-arg">**kw</span>)</span></td>
238           <td align="right" valign="top">
239             <span class="codelink"><a href="lxml.html.clean-pysrc.html#word_break_html">source&nbsp;code</a></span>
240             
241           </td>
242         </tr>
243       </table>
244       
245     </td>
246   </tr>
247 <tr class="private">
248     <td width="15%" align="right" valign="top" class="summary">
249       <span class="summary-type">&nbsp;</span>
250     </td><td class="summary">
251       <table width="100%" cellpadding="0" cellspacing="0" border="0">
252         <tr>
253           <td><span class="summary-sig"><a name="_break_text"></a><span class="summary-sig-name">_break_text</span>(<span class="summary-sig-arg">text</span>,
254         <span class="summary-sig-arg">max_width</span>,
255         <span class="summary-sig-arg">break_character</span>)</span></td>
256           <td align="right" valign="top">
257             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_break_text">source&nbsp;code</a></span>
258             
259           </td>
260         </tr>
261       </table>
262       
263     </td>
264   </tr>
265 <tr class="private">
266     <td width="15%" align="right" valign="top" class="summary">
267       <span class="summary-type">&nbsp;</span>
268     </td><td class="summary">
269       <table width="100%" cellpadding="0" cellspacing="0" border="0">
270         <tr>
271           <td><span class="summary-sig"><a name="_insert_break"></a><span class="summary-sig-name">_insert_break</span>(<span class="summary-sig-arg">word</span>,
272         <span class="summary-sig-arg">width</span>,
273         <span class="summary-sig-arg">break_character</span>)</span></td>
274           <td align="right" valign="top">
275             <span class="codelink"><a href="lxml.html.clean-pysrc.html#_insert_break">source&nbsp;code</a></span>
276             
277           </td>
278         </tr>
279       </table>
280       
281     </td>
282   </tr>
283 </table>
284 <!-- ==================== VARIABLES ==================== -->
285 <a name="section-Variables"></a>
286 <table class="summary" border="1" cellpadding="3"
287        cellspacing="0" width="100%" bgcolor="white">
288 <tr bgcolor="#70b0f0" class="table-header">
289   <td colspan="2" class="table-header">
290     <table border="0" cellpadding="0" cellspacing="0" width="100%">
291       <tr valign="top">
292         <td align="left"><span class="table-header">Variables</span></td>
293         <td align="right" valign="top"
294          ><span class="options">[<a href="#section-Variables"
295          class="privatelink" onclick="toggle_private();"
296          >hide private</a>]</span></td>
297       </tr>
298     </table>
299   </td>
300 </tr>
301 <tr class="private">
302     <td width="15%" align="right" valign="top" class="summary">
303       <span class="summary-type">&nbsp;</span>
304     </td><td class="summary">
305         <a name="_css_javascript_re"></a><span class="summary-name">_css_javascript_re</span> = <code title="re.compile(r'(?is)expression\s*\(.*?\)')">re.compile(r'<code class="re-flags">(?is)</code>expression\s<code class="re-op">*</code>\(.<code class="re-op">*?</code>\)')</code>
306     </td>
307   </tr>
308 <tr class="private">
309     <td width="15%" align="right" valign="top" class="summary">
310       <span class="summary-type">&nbsp;</span>
311     </td><td class="summary">
312         <a name="_css_import_re"></a><span class="summary-name">_css_import_re</span> = <code title="re.compile(r'(?i)@\s*import')">re.compile(r'<code class="re-flags">(?i)</code>@\s<code class="re-op">*</code>import')</code>
313     </td>
314   </tr>
315 <tr class="private">
316     <td width="15%" align="right" valign="top" class="summary">
317       <span class="summary-type">&nbsp;</span>
318     </td><td class="summary">
319         <a href="lxml.html.clean-module.html#_javascript_scheme_re" class="summary-name" onclick="show_private();">_javascript_scheme_re</a> = <code title="re.compile(r'(?i)\s*(?:javascript|jscript|livescript|vbscript|data|abo\
320 ut|mocha):')">re.compile(r'<code class="re-flags">(?i)</code>\s<code class="re-op">*</code><code class="re-group">(?:</code>javascript<code class="re-op">|</code>jscr<code class="variable-ellipsis">...</code></code>
321     </td>
322   </tr>
323 <tr class="private">
324     <td width="15%" align="right" valign="top" class="summary">
325       <span class="summary-type">&nbsp;</span>
326     </td><td class="summary">
327         <a href="lxml.html.clean-module.html#_conditional_comment_re" class="summary-name" onclick="show_private();">_conditional_comment_re</a> = <code title="re.compile(r'(?is)\[if[\s\n\r]+.*?\][\s\n\r]*&gt;')">re.compile(r'<code class="re-flags">(?is)</code>\[if<code class="re-group">[</code>\s\n\r<code class="re-group">]</code><code class="re-op">+</code>.<code class="re-op">*?</code>\]<code class="variable-ellipsis">...</code></code>
328     </td>
329   </tr>
330 <tr class="private">
331     <td width="15%" align="right" valign="top" class="summary">
332       <span class="summary-type">&nbsp;</span>
333     </td><td class="summary">
334         <a name="_find_styled_elements"></a><span class="summary-name">_find_styled_elements</span> = <code title="descendant-or-self::*[@style]">descendant-or-self::*[@style]</code>
335     </td>
336   </tr>
337 <tr class="private">
338     <td width="15%" align="right" valign="top" class="summary">
339       <span class="summary-type">&nbsp;</span>
340     </td><td class="summary">
341         <a href="lxml.html.clean-module.html#_find_external_links" class="summary-name" onclick="show_private();">_find_external_links</a> = <code title="descendant-or-self::a  [normalize-space(@href) and substring(normalize\
342 -space(@href),1,1) != '#'] |descendant-or-self::x:a[normalize-space(@h\
343 ref) and substring(normalize-space(@href),1,1) != '#']">descendant-or-self::a  [normalize-space<code class="variable-ellipsis">...</code></code>
344     </td>
345   </tr>
346 <tr>
347     <td width="15%" align="right" valign="top" class="summary">
348       <span class="summary-type">&nbsp;</span>
349     </td><td class="summary">
350         <a name="clean"></a><span class="summary-name">clean</span> = <code title="Cleaner()">Cleaner()</code>
351     </td>
352   </tr>
353 <tr class="private">
354     <td width="15%" align="right" valign="top" class="summary">
355       <span class="summary-type">&nbsp;</span>
356     </td><td class="summary">
357         <a href="lxml.html.clean-module.html#_link_regexes" class="summary-name" onclick="show_private();">_link_regexes</a> = <code title="[re.compile(r'(?i)(?P&lt;body&gt;https?://(?P&lt;host&gt;[a-z0-9\._-]+)(?:/[/-_\.,\
358 a-z0-9%&amp;\?;=~]*)?(?:\([/-_\.,a-z0-9%&amp;\?;=~]*\))?)'),
359  re.compile(r'(?i)mailto:(?P&lt;body&gt;[a-z0-9\._-]+@(?P&lt;host&gt;[a-z0-9_\._]+\
360 [a-z]))')]"><code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code><code class="re-group">(?P&lt;</code><code class="re-ref">body</code><code class="re-group">&gt;</code>https<code class="re-op">?</code>://<code class="re-group">(?P&lt;</code><code class="re-ref">host</code><code class="re-group">&gt;</code><code class="re-group">[</code><code class="variable-ellipsis">...</code></code>
361     </td>
362   </tr>
363 <tr class="private">
364     <td width="15%" align="right" valign="top" class="summary">
365       <span class="summary-type">&nbsp;</span>
366     </td><td class="summary">
367         <a href="lxml.html.clean-module.html#_avoid_elements" class="summary-name" onclick="show_private();">_avoid_elements</a> = <code title="['textarea', 'pre', 'code', 'head', 'select', 'a']"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">head</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">select</code><code class="variable-quote">'</code><code class="variable-ellipsis">...</code></code>
368     </td>
369   </tr>
370 <tr class="private">
371     <td width="15%" align="right" valign="top" class="summary">
372       <span class="summary-type">&nbsp;</span>
373     </td><td class="summary">
374         <a href="lxml.html.clean-module.html#_avoid_hosts" class="summary-name" onclick="show_private();">_avoid_hosts</a> = <code title="[re.compile(r'(?i)^localhost'),
375  re.compile(r'(?i)\bexample\.(?:com|org|net)$'),
376  re.compile(r'^127\.0\.0\.1$')]"><code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code>^localhost')<code class="variable-op">, </code>re.compile(r'<code class="re-flags">(?</code><code class="variable-ellipsis">...</code></code>
377     </td>
378   </tr>
379 <tr class="private">
380     <td width="15%" align="right" valign="top" class="summary">
381       <span class="summary-type">&nbsp;</span>
382     </td><td class="summary">
383         <a name="_avoid_classes"></a><span class="summary-name">_avoid_classes</span> = <code title="['nolink']"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">nolink</code><code class="variable-quote">'</code><code class="variable-group">]</code></code>
384     </td>
385   </tr>
386 <tr class="private">
387     <td width="15%" align="right" valign="top" class="summary">
388       <span class="summary-type">&nbsp;</span>
389     </td><td class="summary">
390         <a name="_avoid_word_break_elements"></a><span class="summary-name">_avoid_word_break_elements</span> = <code title="['pre', 'textarea', 'code']"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-group">]</code></code>
391     </td>
392   </tr>
393 <tr class="private">
394     <td width="15%" align="right" valign="top" class="summary">
395       <span class="summary-type">&nbsp;</span>
396     </td><td class="summary">
397         <a name="_avoid_word_break_classes"></a><span class="summary-name">_avoid_word_break_classes</span> = <code title="['nobreak']"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">nobreak</code><code class="variable-quote">'</code><code class="variable-group">]</code></code>
398     </td>
399   </tr>
400 <tr class="private">
401     <td width="15%" align="right" valign="top" class="summary">
402       <span class="summary-type">&nbsp;</span>
403     </td><td class="summary">
404         <a name="_break_prefer_re"></a><span class="summary-name">_break_prefer_re</span> = <code title="re.compile(r'(?i)[^a-z]')">re.compile(r'<code class="re-flags">(?i)</code><code class="re-group">[</code><code class="re-op">^</code>a<code class="re-op">-</code>z<code class="re-group">]</code>')</code>
405     </td>
406   </tr>
407 <tr class="private">
408     <td width="15%" align="right" valign="top" class="summary">
409       <span class="summary-type">&nbsp;</span>
410     </td><td class="summary">
411         <a name="__package__"></a><span class="summary-name">__package__</span> = <code title="'lxml.html'"><code class="variable-quote">'</code><code class="variable-string">lxml.html</code><code class="variable-quote">'</code></code>
412     </td>
413   </tr>
414 </table>
415 <!-- ==================== FUNCTION DETAILS ==================== -->
416 <a name="section-FunctionDetails"></a>
417 <table class="details" border="1" cellpadding="3"
418        cellspacing="0" width="100%" bgcolor="white">
419 <tr bgcolor="#70b0f0" class="table-header">
420   <td colspan="2" class="table-header">
421     <table border="0" cellpadding="0" cellspacing="0" width="100%">
422       <tr valign="top">
423         <td align="left"><span class="table-header">Function Details</span></td>
424         <td align="right" valign="top"
425          ><span class="options">[<a href="#section-FunctionDetails"
426          class="privatelink" onclick="toggle_private();"
427          >hide private</a>]</span></td>
428       </tr>
429     </table>
430   </td>
431 </tr>
432 </table>
433 <a name="autolink"></a>
434 <div>
435 <table class="details" border="1" cellpadding="3"
436        cellspacing="0" width="100%" bgcolor="white">
437 <tr><td>
438   <table width="100%" cellpadding="0" cellspacing="0" border="0">
439   <tr valign="top"><td>
440   <h3 class="epydoc"><span class="sig"><span class="sig-name">autolink</span>(<span class="sig-arg">el</span>,
441         <span class="sig-arg">link_regexes</span>=<span class="sig-default"><code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code><code class="re-group">(?P&lt;</code><code class="re-ref">body</code><code class="re-group">&gt;</code>https<code class="re-op">?</code>://<code class="re-group">(?P&lt;</code><code class="re-ref">host</code><code class="re-group">&gt;</code><code class="re-group">[</code>a<code class="re-op">-</code>z0<code class="re-op">-</code>9\._-<code class="re-group">]</code><code class="re-op">+</code><code class="re-group">)</code><code class="re-group">(?:</code><code class="variable-ellipsis">...</code></span>,
442         <span class="sig-arg">avoid_elements</span>=<span class="sig-default"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">head</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">select</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-group">]</code></span>,
443         <span class="sig-arg">avoid_hosts</span>=<span class="sig-default"><code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code>^localhost')<code class="variable-op">, </code>re.compile(r'<code class="re-flags">(?i)</code>\bexample\.<code class="re-group">(?</code><code class="variable-ellipsis">...</code></span>,
444         <span class="sig-arg">avoid_classes</span>=<span class="sig-default"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">nolink</code><code class="variable-quote">'</code><code class="variable-group">]</code></span>)</span>
445   </h3>
446   </td><td align="right" valign="top"
447     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#autolink">source&nbsp;code</a></span>&nbsp;
448     </td>
449   </tr></table>
450   
451   <p>Turn any URLs into links.</p>
452 <p>It will search for links identified by the given regular
453 expressions (by default mailto and http(s) links).</p>
454 <p>It won't link text in an element in avoid_elements, or an element
455 with a class in avoid_classes.  It won't link to anything with a
456 host that matches one of the regular expressions in avoid_hosts
457 (default localhost and 127.0.0.1).</p>
458 <p>If you pass in an element, the element's tail will not be
459 substituted, only the contents of the element.</p>
460   <dl class="fields">
461   </dl>
462 </td></tr></table>
463 </div>
464 <a name="autolink_html"></a>
465 <div>
466 <table class="details" border="1" cellpadding="3"
467        cellspacing="0" width="100%" bgcolor="white">
468 <tr><td>
469   <table width="100%" cellpadding="0" cellspacing="0" border="0">
470   <tr valign="top"><td>
471   <h3 class="epydoc"><span class="sig"><span class="sig-name">autolink_html</span>(<span class="sig-arg">html</span>,
472         <span class="sig-arg">*args</span>,
473         <span class="sig-arg">**kw</span>)</span>
474   </h3>
475   </td><td align="right" valign="top"
476     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#autolink_html">source&nbsp;code</a></span>&nbsp;
477     </td>
478   </tr></table>
479   
480   <p>Turn any URLs into links.</p>
481 <p>It will search for links identified by the given regular
482 expressions (by default mailto and http(s) links).</p>
483 <p>It won't link text in an element in avoid_elements, or an element
484 with a class in avoid_classes.  It won't link to anything with a
485 host that matches one of the regular expressions in avoid_hosts
486 (default localhost and 127.0.0.1).</p>
487 <p>If you pass in an element, the element's tail will not be
488 substituted, only the contents of the element.</p>
489   <dl class="fields">
490   </dl>
491 </td></tr></table>
492 </div>
493 <a name="word_break"></a>
494 <div>
495 <table class="details" border="1" cellpadding="3"
496        cellspacing="0" width="100%" bgcolor="white">
497 <tr><td>
498   <table width="100%" cellpadding="0" cellspacing="0" border="0">
499   <tr valign="top"><td>
500   <h3 class="epydoc"><span class="sig"><span class="sig-name">word_break</span>(<span class="sig-arg">el</span>,
501         <span class="sig-arg">max_width</span>=<span class="sig-default">40</span>,
502         <span class="sig-arg">avoid_elements</span>=<span class="sig-default"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-group">]</code></span>,
503         <span class="sig-arg">avoid_classes</span>=<span class="sig-default"><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">nobreak</code><code class="variable-quote">'</code><code class="variable-group">]</code></span>,
504         <span class="sig-arg">break_character</span>=<span class="sig-default"><code class="variable-quote">u'</code><code class="variable-string">&#8203;</code><code class="variable-quote">'</code></span>)</span>
505   </h3>
506   </td><td align="right" valign="top"
507     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#word_break">source&nbsp;code</a></span>&nbsp;
508     </td>
509   </tr></table>
510   
511   <p>Breaks any long words found in the body of the text (not attributes).</p>
512 <p>Doesn't effect any of the tags in avoid_elements, by default
513 <tt class="rst-docutils literal">&lt;textarea&gt;</tt> and <tt class="rst-docutils literal">&lt;pre&gt;</tt></p>
514 <p>Breaks words by inserting &amp;#8203;, which is a unicode character
515 for Zero Width Space character.  This generally takes up no space
516 in rendering, but does copy as a space, and in monospace contexts
517 usually takes up space.</p>
518 <p>See <a class="rst-reference external" href="http://www.cs.tut.fi/~jkorpela/html/nobr.html" target="_top">http://www.cs.tut.fi/~jkorpela/html/nobr.html</a> for a discussion</p>
519   <dl class="fields">
520   </dl>
521 </td></tr></table>
522 </div>
523 <br />
524 <!-- ==================== VARIABLES DETAILS ==================== -->
525 <a name="section-VariablesDetails"></a>
526 <table class="details" border="1" cellpadding="3"
527        cellspacing="0" width="100%" bgcolor="white">
528 <tr bgcolor="#70b0f0" class="table-header">
529   <td colspan="2" class="table-header">
530     <table border="0" cellpadding="0" cellspacing="0" width="100%">
531       <tr valign="top">
532         <td align="left"><span class="table-header">Variables Details</span></td>
533         <td align="right" valign="top"
534          ><span class="options">[<a href="#section-VariablesDetails"
535          class="privatelink" onclick="toggle_private();"
536          >hide private</a>]</span></td>
537       </tr>
538     </table>
539   </td>
540 </tr>
541 </table>
542 <a name="_javascript_scheme_re"></a>
543 <div class="private">
544 <table class="details" border="1" cellpadding="3"
545        cellspacing="0" width="100%" bgcolor="white">
546 <tr><td>
547   <h3 class="epydoc">_javascript_scheme_re</h3>
548   
549   <dl class="fields">
550   </dl>
551   <dl class="fields">
552     <dt>Value:</dt>
553       <dd><table><tr><td><pre class="variable">
554 re.compile(r'<code class="re-flags">(?i)</code>\s<code class="re-op">*</code><code class="re-group">(?:</code>javascript<code class="re-op">|</code>jscript<code class="re-op">|</code>livescript<code class="re-op">|</code>vbscript<code class="re-op">|</code>data<code class="re-op">|</code>abo<span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
555 ut<code class="re-op">|</code>mocha<code class="re-group">)</code>:')
556 </pre></td></tr></table>
557 </dd>
558   </dl>
559 </td></tr></table>
560 </div>
561 <a name="_conditional_comment_re"></a>
562 <div class="private">
563 <table class="details" border="1" cellpadding="3"
564        cellspacing="0" width="100%" bgcolor="white">
565 <tr><td>
566   <h3 class="epydoc">_conditional_comment_re</h3>
567   
568   <dl class="fields">
569   </dl>
570   <dl class="fields">
571     <dt>Value:</dt>
572       <dd><table><tr><td><pre class="variable">
573 re.compile(r'<code class="re-flags">(?is)</code>\[if<code class="re-group">[</code>\s\n\r<code class="re-group">]</code><code class="re-op">+</code>.<code class="re-op">*?</code>\]<code class="re-group">[</code>\s\n\r<code class="re-group">]</code><code class="re-op">*</code>&gt;')
574 </pre></td></tr></table>
575 </dd>
576   </dl>
577 </td></tr></table>
578 </div>
579 <a name="_find_external_links"></a>
580 <div class="private">
581 <table class="details" border="1" cellpadding="3"
582        cellspacing="0" width="100%" bgcolor="white">
583 <tr><td>
584   <h3 class="epydoc">_find_external_links</h3>
585   
586   <dl class="fields">
587   </dl>
588   <dl class="fields">
589     <dt>Value:</dt>
590       <dd><table><tr><td><pre class="variable">
591 descendant-or-self::a  [normalize-space(@href) and substring(normalize<span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
592 -space(@href),1,1) != '#'] |descendant-or-self::x:a[normalize-space(@h<span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
593 ref) and substring(normalize-space(@href),1,1) != '#']
594 </pre></td></tr></table>
595 </dd>
596   </dl>
597 </td></tr></table>
598 </div>
599 <a name="_link_regexes"></a>
600 <div class="private">
601 <table class="details" border="1" cellpadding="3"
602        cellspacing="0" width="100%" bgcolor="white">
603 <tr><td>
604   <h3 class="epydoc">_link_regexes</h3>
605   
606   <dl class="fields">
607   </dl>
608   <dl class="fields">
609     <dt>Value:</dt>
610       <dd><table><tr><td><pre class="variable">
611 <code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code><code class="re-group">(?P&lt;</code><code class="re-ref">body</code><code class="re-group">&gt;</code>https<code class="re-op">?</code>://<code class="re-group">(?P&lt;</code><code class="re-ref">host</code><code class="re-group">&gt;</code><code class="re-group">[</code>a<code class="re-op">-</code>z0<code class="re-op">-</code>9\._-<code class="re-group">]</code><code class="re-op">+</code><code class="re-group">)</code><code class="re-group">(?:</code>/<code class="re-group">[</code>/-_\.,<span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
612 a<code class="re-op">-</code>z0<code class="re-op">-</code>9%&amp;\?;=~<code class="re-group">]</code><code class="re-op">*</code><code class="re-group">)</code><code class="re-op">?</code><code class="re-group">(?:</code>\(<code class="re-group">[</code>/-_\.,a<code class="re-op">-</code>z0<code class="re-op">-</code>9%&amp;\?;=~<code class="re-group">]</code><code class="re-op">*</code>\)<code class="re-group">)</code><code class="re-op">?</code><code class="re-group">)</code>')<code class="variable-op">,</code>
613  re.compile(r'<code class="re-flags">(?i)</code>mailto:<code class="re-group">(?P&lt;</code><code class="re-ref">body</code><code class="re-group">&gt;</code><code class="re-group">[</code>a<code class="re-op">-</code>z0<code class="re-op">-</code>9\._-<code class="re-group">]</code><code class="re-op">+</code>@<code class="re-group">(?P&lt;</code><code class="re-ref">host</code><code class="re-group">&gt;</code><code class="re-group">[</code>a<code class="re-op">-</code>z0<code class="re-op">-</code>9_\._<code class="re-group">]</code><code class="re-op">+</code><code class="re-group"></code><span class="variable-linewrap"><img src="crarr.png" alt="\" /></span>
614 <code class="re-group">[</code>a<code class="re-op">-</code>z<code class="re-group">]</code><code class="re-group">)</code><code class="re-group">)</code>')<code class="variable-group">]</code>
615 </pre></td></tr></table>
616 </dd>
617   </dl>
618 </td></tr></table>
619 </div>
620 <a name="_avoid_elements"></a>
621 <div class="private">
622 <table class="details" border="1" cellpadding="3"
623        cellspacing="0" width="100%" bgcolor="white">
624 <tr><td>
625   <h3 class="epydoc">_avoid_elements</h3>
626   
627   <dl class="fields">
628   </dl>
629   <dl class="fields">
630     <dt>Value:</dt>
631       <dd><table><tr><td><pre class="variable">
632 <code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">textarea</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">pre</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">head</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">select</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-group">]</code>
633 </pre></td></tr></table>
634 </dd>
635   </dl>
636 </td></tr></table>
637 </div>
638 <a name="_avoid_hosts"></a>
639 <div class="private">
640 <table class="details" border="1" cellpadding="3"
641        cellspacing="0" width="100%" bgcolor="white">
642 <tr><td>
643   <h3 class="epydoc">_avoid_hosts</h3>
644   
645   <dl class="fields">
646   </dl>
647   <dl class="fields">
648     <dt>Value:</dt>
649       <dd><table><tr><td><pre class="variable">
650 <code class="variable-group">[</code>re.compile(r'<code class="re-flags">(?i)</code>^localhost')<code class="variable-op">,</code>
651  re.compile(r'<code class="re-flags">(?i)</code>\bexample\.<code class="re-group">(?:</code>com<code class="re-op">|</code>org<code class="re-op">|</code>net<code class="re-group">)</code>$')<code class="variable-op">,</code>
652  re.compile(r'^127\.0\.0\.1$')<code class="variable-group">]</code>
653 </pre></td></tr></table>
654 </dd>
655   </dl>
656 </td></tr></table>
657 </div>
658 <br />
659 <!-- ==================== NAVIGATION BAR ==================== -->
660 <table class="navbar" border="0" width="100%" cellpadding="0"
661        bgcolor="#a0c0ff" cellspacing="0">
662   <tr valign="middle">
663   <!-- Home link -->
664       <th>&nbsp;&nbsp;&nbsp;<a
665         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
666
667   <!-- Tree link -->
668       <th>&nbsp;&nbsp;&nbsp;<a
669         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
670
671   <!-- Index link -->
672       <th>&nbsp;&nbsp;&nbsp;<a
673         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
674
675   <!-- Help link -->
676       <th>&nbsp;&nbsp;&nbsp;<a
677         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
678
679   <!-- Project homepage -->
680       <th class="navbar" align="right" width="100%">
681         <table border="0" cellpadding="0" cellspacing="0">
682           <tr><th class="navbar" align="center"
683             ><a class="navbar" target="_top" href="/">lxml API</a></th>
684           </tr></table></th>
685   </tr>
686 </table>
687 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
688   <tr>
689     <td align="left" class="footer">
690     Generated by Epydoc 3.0.1 on Tue Jul 31 10:14:17 2012
691     </td>
692     <td align="right" class="footer">
693       <a target="mainFrame" href="http://epydoc.sourceforge.net"
694         >http://epydoc.sourceforge.net</a>
695     </td>
696   </tr>
697 </table>
698
699 <script type="text/javascript">
700   <!--
701   // Private objects are initially displayed (because if
702   // javascript is turned off then we want them to be
703   // visible); but by default, we want to hide them.  So hide
704   // them unless we have a cookie that says to show them.
705   checkCookie();
706   // -->
707 </script>
708 </body>
709 </html>