489f84c4d88838acee21869e82404a6dfd7c5a36
[platform/upstream/python-lxml.git] / doc / html / api / lxml.html.clean.Cleaner-class.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.html.clean.Cleaner</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.html-module.html">Package&nbsp;html</a> ::
47         <a href="lxml.html.clean-module.html">Module&nbsp;clean</a> ::
48         Class&nbsp;Cleaner
49       </span>
50     </td>
51     <td>
52       <table cellpadding="0" cellspacing="0">
53         <!-- hide/show private -->
54         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
55     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
56         <tr><td align="right"><span class="options"
57             >[<a href="frames.html" target="_top">frames</a
58             >]&nbsp;|&nbsp;<a href="lxml.html.clean.Cleaner-class.html"
59             target="_top">no&nbsp;frames</a>]</span></td></tr>
60       </table>
61     </td>
62   </tr>
63 </table>
64 <!-- ==================== CLASS DESCRIPTION ==================== -->
65 <h1 class="epydoc">Class Cleaner</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner">source&nbsp;code</a></span></p>
66 <pre class="base-tree">
67 object --+
68          |
69         <strong class="uidshort">Cleaner</strong>
70 </pre>
71
72 <hr />
73 <p>Instances cleans the document of each of the possible offending
74 elements.  The cleaning is controlled by attributes; you can
75 override attributes in a subclass, or set them in the constructor.</p>
76 <dl class="rst-docutils">
77 <dt><tt class="rst-docutils literal">scripts</tt>:</dt>
78 <dd>Removes any <tt class="rst-docutils literal">&lt;script&gt;</tt> tags.</dd>
79 <dt><tt class="rst-docutils literal">javascript</tt>:</dt>
80 <dd>Removes any Javascript, like an <tt class="rst-docutils literal">onclick</tt> attribute. Also removes stylesheets
81 as they could contain Javascript.</dd>
82 <dt><tt class="rst-docutils literal">comments</tt>:</dt>
83 <dd>Removes any comments.</dd>
84 <dt><tt class="rst-docutils literal">style</tt>:</dt>
85 <dd>Removes any style tags.</dd>
86 <dt><tt class="rst-docutils literal">inline_style</tt></dt>
87 <dd>Removes any style attributes.  Defaults to the value of the <tt class="rst-docutils literal">style</tt> option.</dd>
88 <dt><tt class="rst-docutils literal">links</tt>:</dt>
89 <dd>Removes any <tt class="rst-docutils literal">&lt;link&gt;</tt> tags</dd>
90 <dt><tt class="rst-docutils literal">meta</tt>:</dt>
91 <dd>Removes any <tt class="rst-docutils literal">&lt;meta&gt;</tt> tags</dd>
92 <dt><tt class="rst-docutils literal">page_structure</tt>:</dt>
93 <dd>Structural parts of a page: <tt class="rst-docutils literal">&lt;head&gt;</tt>, <tt class="rst-docutils literal">&lt;html&gt;</tt>, <tt class="rst-docutils literal">&lt;title&gt;</tt>.</dd>
94 <dt><tt class="rst-docutils literal">processing_instructions</tt>:</dt>
95 <dd>Removes any processing instructions.</dd>
96 <dt><tt class="rst-docutils literal">embedded</tt>:</dt>
97 <dd>Removes any embedded objects (flash, iframes)</dd>
98 <dt><tt class="rst-docutils literal">frames</tt>:</dt>
99 <dd>Removes any frame-related tags</dd>
100 <dt><tt class="rst-docutils literal">forms</tt>:</dt>
101 <dd>Removes any form tags</dd>
102 <dt><tt class="rst-docutils literal">annoying_tags</tt>:</dt>
103 <dd>Tags that aren't <em>wrong</em>, but are annoying.  <tt class="rst-docutils literal">&lt;blink&gt;</tt> and <tt class="rst-docutils literal">&lt;marquee&gt;</tt></dd>
104 <dt><tt class="rst-docutils literal">remove_tags</tt>:</dt>
105 <dd>A list of tags to remove.  Only the tags will be removed,
106 their content will get pulled up into the parent tag.</dd>
107 <dt><tt class="rst-docutils literal">kill_tags</tt>:</dt>
108 <dd>A list of tags to kill.  Killing also removes the tag's content,
109 i.e. the whole subtree, not just the tag itself.</dd>
110 <dt><tt class="rst-docutils literal">allow_tags</tt>:</dt>
111 <dd>A list of tags to include (default include all).</dd>
112 <dt><tt class="rst-docutils literal">remove_unknown_tags</tt>:</dt>
113 <dd>Remove any tags that aren't standard parts of HTML.</dd>
114 <dt><tt class="rst-docutils literal">safe_attrs_only</tt>:</dt>
115 <dd>If true, only include 'safe' attributes (specifically the list
116 from the feedparser HTML sanitisation web site).</dd>
117 <dt><tt class="rst-docutils literal">safe_attrs</tt>:</dt>
118 <dd>A set of attribute names to override the default list of attributes
119 considered 'safe' (when safe_attrs_only=True).</dd>
120 <dt><tt class="rst-docutils literal">add_nofollow</tt>:</dt>
121 <dd>If true, then any &lt;a&gt; tags will have <tt class="rst-docutils literal"><span class="pre">rel=&quot;nofollow&quot;</span></tt> added to them.</dd>
122 <dt><tt class="rst-docutils literal">host_whitelist</tt>:</dt>
123 <dd><p class="rst-first">A list or set of hosts that you can use for embedded content
124 (for content like <tt class="rst-docutils literal">&lt;object&gt;</tt>, <tt class="rst-docutils literal">&lt;link <span class="pre">rel=&quot;stylesheet&quot;&gt;</span></tt>, etc).
125 You can also implement/override the method
126 <tt class="rst-docutils literal">allow_embedded_url(el, url)</tt> or <tt class="rst-docutils literal">allow_element(el)</tt> to
127 implement more complex rules for what can be embedded.
128 Anything that passes this test will be shown, regardless of
129 the value of (for instance) <tt class="rst-docutils literal">embedded</tt>.</p>
130 <p>Note that this parameter might not work as intended if you do not
131 make the links absolute before doing the cleaning.</p>
132 <p class="rst-last">Note that you may also need to set <tt class="rst-docutils literal">whitelist_tags</tt>.</p>
133 </dd>
134 <dt><tt class="rst-docutils literal">whitelist_tags</tt>:</dt>
135 <dd>A set of tags that can be included with <tt class="rst-docutils literal">host_whitelist</tt>.
136 The default is <tt class="rst-docutils literal">iframe</tt> and <tt class="rst-docutils literal">embed</tt>; you may wish to
137 include other tags like <tt class="rst-docutils literal">script</tt>, or you may want to
138 implement <tt class="rst-docutils literal">allow_embedded_url</tt> for more control.  Set to None to
139 include all tags.</dd>
140 </dl>
141 <p>This modifies the document <em>in place</em>.</p>
142
143 <!-- ==================== INSTANCE METHODS ==================== -->
144 <a name="section-InstanceMethods"></a>
145 <table class="summary" border="1" cellpadding="3"
146        cellspacing="0" width="100%" bgcolor="white">
147 <tr bgcolor="#70b0f0" class="table-header">
148   <td colspan="2" class="table-header">
149     <table border="0" cellpadding="0" cellspacing="0" width="100%">
150       <tr valign="top">
151         <td align="left"><span class="table-header">Instance Methods</span></td>
152         <td align="right" valign="top"
153          ><span class="options">[<a href="#section-InstanceMethods"
154          class="privatelink" onclick="toggle_private();"
155          >hide private</a>]</span></td>
156       </tr>
157     </table>
158   </td>
159 </tr>
160 <tr>
161     <td width="15%" align="right" valign="top" class="summary">
162       <span class="summary-type">&nbsp;</span>
163     </td><td class="summary">
164       <table width="100%" cellpadding="0" cellspacing="0" border="0">
165         <tr>
166           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>)</span><br />
167       x.__init__(...) initializes x; see help(type(x)) for signature</td>
168           <td align="right" valign="top">
169             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__init__">source&nbsp;code</a></span>
170             
171           </td>
172         </tr>
173       </table>
174       
175     </td>
176   </tr>
177 <tr>
178     <td width="15%" align="right" valign="top" class="summary">
179       <span class="summary-type">&nbsp;</span>
180     </td><td class="summary">
181       <table width="100%" cellpadding="0" cellspacing="0" border="0">
182         <tr>
183           <td><span class="summary-sig"><a name="__call__"></a><span class="summary-sig-name">__call__</span>(<span class="summary-sig-arg">self</span>,
184         <span class="summary-sig-arg">doc</span>)</span><br />
185       Cleans the document.</td>
186           <td align="right" valign="top">
187             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__call__">source&nbsp;code</a></span>
188             
189           </td>
190         </tr>
191       </table>
192       
193     </td>
194   </tr>
195 <tr>
196     <td width="15%" align="right" valign="top" class="summary">
197       <span class="summary-type">&nbsp;</span>
198     </td><td class="summary">
199       <table width="100%" cellpadding="0" cellspacing="0" border="0">
200         <tr>
201           <td><span class="summary-sig"><a name="allow_follow"></a><span class="summary-sig-name">allow_follow</span>(<span class="summary-sig-arg">self</span>,
202         <span class="summary-sig-arg">anchor</span>)</span><br />
203       Override to suppress rel=&quot;nofollow&quot; on some anchors.</td>
204           <td align="right" valign="top">
205             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_follow">source&nbsp;code</a></span>
206             
207           </td>
208         </tr>
209       </table>
210       
211     </td>
212   </tr>
213 <tr>
214     <td width="15%" align="right" valign="top" class="summary">
215       <span class="summary-type">&nbsp;</span>
216     </td><td class="summary">
217       <table width="100%" cellpadding="0" cellspacing="0" border="0">
218         <tr>
219           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#allow_element" class="summary-sig-name">allow_element</a>(<span class="summary-sig-arg">self</span>,
220         <span class="summary-sig-arg">el</span>)</span><br />
221       Decide whether an element is configured to be accepted or rejected.</td>
222           <td align="right" valign="top">
223             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_element">source&nbsp;code</a></span>
224             
225           </td>
226         </tr>
227       </table>
228       
229     </td>
230   </tr>
231 <tr>
232     <td width="15%" align="right" valign="top" class="summary">
233       <span class="summary-type">&nbsp;</span>
234     </td><td class="summary">
235       <table width="100%" cellpadding="0" cellspacing="0" border="0">
236         <tr>
237           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#allow_embedded_url" class="summary-sig-name">allow_embedded_url</a>(<span class="summary-sig-arg">self</span>,
238         <span class="summary-sig-arg">el</span>,
239         <span class="summary-sig-arg">url</span>)</span><br />
240       Decide whether a URL that was found in an element's attributes or text
241 if configured to be accepted or rejected.</td>
242           <td align="right" valign="top">
243             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_embedded_url">source&nbsp;code</a></span>
244             
245           </td>
246         </tr>
247       </table>
248       
249     </td>
250   </tr>
251 <tr>
252     <td width="15%" align="right" valign="top" class="summary">
253       <span class="summary-type">&nbsp;</span>
254     </td><td class="summary">
255       <table width="100%" cellpadding="0" cellspacing="0" border="0">
256         <tr>
257           <td><span class="summary-sig"><a name="kill_conditional_comments"></a><span class="summary-sig-name">kill_conditional_comments</span>(<span class="summary-sig-arg">self</span>,
258         <span class="summary-sig-arg">doc</span>)</span><br />
259       IE conditional comments basically embed HTML that the parser
260 doesn't normally see.  We can't allow anything like that, so
261 we'll kill any comments that could be conditional.</td>
262           <td align="right" valign="top">
263             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.kill_conditional_comments">source&nbsp;code</a></span>
264             
265           </td>
266         </tr>
267       </table>
268       
269     </td>
270   </tr>
271 <tr class="private">
272     <td width="15%" align="right" valign="top" class="summary">
273       <span class="summary-type">&nbsp;</span>
274     </td><td class="summary">
275       <table width="100%" cellpadding="0" cellspacing="0" border="0">
276         <tr>
277           <td><span class="summary-sig"><a name="_kill_elements"></a><span class="summary-sig-name">_kill_elements</span>(<span class="summary-sig-arg">self</span>,
278         <span class="summary-sig-arg">doc</span>,
279         <span class="summary-sig-arg">condition</span>,
280         <span class="summary-sig-arg">iterate</span>=<span class="summary-sig-default">None</span>)</span></td>
281           <td align="right" valign="top">
282             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._kill_elements">source&nbsp;code</a></span>
283             
284           </td>
285         </tr>
286       </table>
287       
288     </td>
289   </tr>
290 <tr class="private">
291     <td width="15%" align="right" valign="top" class="summary">
292       <span class="summary-type">&nbsp;</span>
293     </td><td class="summary">
294       <table width="100%" cellpadding="0" cellspacing="0" border="0">
295         <tr>
296           <td><span class="summary-sig"><a name="_remove_javascript_link"></a><span class="summary-sig-name">_remove_javascript_link</span>(<span class="summary-sig-arg">self</span>,
297         <span class="summary-sig-arg">link</span>)</span></td>
298           <td align="right" valign="top">
299             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._remove_javascript_link">source&nbsp;code</a></span>
300             
301           </td>
302         </tr>
303       </table>
304       
305     </td>
306   </tr>
307 <tr class="private">
308     <td width="15%" align="right" valign="top" class="summary">
309       <span class="summary-type">&nbsp;</span>
310     </td><td class="summary">
311       <table width="100%" cellpadding="0" cellspacing="0" border="0">
312         <tr>
313           <td><span class="summary-sig"><a name="_substitute_comments"></a><span class="summary-sig-name">_substitute_comments</span>(<span class="summary-sig-arg">...</span>)</span><br />
314       sub(repl, string[, count = 0]) --&gt; newstring
315 Return the string obtained by replacing the leftmost non-overlapping
316 occurrences of pattern in string by the replacement repl.</td>
317           <td align="right" valign="top">
318             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._substitute_comments">source&nbsp;code</a></span>
319             
320           </td>
321         </tr>
322       </table>
323       
324     </td>
325   </tr>
326 <tr class="private">
327     <td width="15%" align="right" valign="top" class="summary">
328       <span class="summary-type">&nbsp;</span>
329     </td><td class="summary">
330       <table width="100%" cellpadding="0" cellspacing="0" border="0">
331         <tr>
332           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#_has_sneaky_javascript" class="summary-sig-name" onclick="show_private();">_has_sneaky_javascript</a>(<span class="summary-sig-arg">self</span>,
333         <span class="summary-sig-arg">style</span>)</span><br />
334       Depending on the browser, stuff like <tt class="rst-docutils literal">e x p r e s s i o <span class="pre">n(...)</span></tt>
335 can get interpreted, or <tt class="rst-docutils literal">expre/* stuff <span class="pre">*/ssion(...)</span></tt>.  This
336 checks for attempt to do stuff like this.</td>
337           <td align="right" valign="top">
338             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._has_sneaky_javascript">source&nbsp;code</a></span>
339             
340           </td>
341         </tr>
342       </table>
343       
344     </td>
345   </tr>
346 <tr>
347     <td width="15%" align="right" valign="top" class="summary">
348       <span class="summary-type">&nbsp;</span>
349     </td><td class="summary">
350       <table width="100%" cellpadding="0" cellspacing="0" border="0">
351         <tr>
352           <td><span class="summary-sig"><a name="clean_html"></a><span class="summary-sig-name">clean_html</span>(<span class="summary-sig-arg">self</span>,
353         <span class="summary-sig-arg">html</span>)</span></td>
354           <td align="right" valign="top">
355             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.clean_html">source&nbsp;code</a></span>
356             
357           </td>
358         </tr>
359       </table>
360       
361     </td>
362   </tr>
363   <tr>
364     <td colspan="2" class="summary">
365     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
366       <code>__delattr__</code>,
367       <code>__format__</code>,
368       <code>__getattribute__</code>,
369       <code>__hash__</code>,
370       <code>__new__</code>,
371       <code>__reduce__</code>,
372       <code>__reduce_ex__</code>,
373       <code>__repr__</code>,
374       <code>__setattr__</code>,
375       <code>__sizeof__</code>,
376       <code>__str__</code>,
377       <code>__subclasshook__</code>
378       </p>
379     </td>
380   </tr>
381 </table>
382 <!-- ==================== CLASS VARIABLES ==================== -->
383 <a name="section-ClassVariables"></a>
384 <table class="summary" border="1" cellpadding="3"
385        cellspacing="0" width="100%" bgcolor="white">
386 <tr bgcolor="#70b0f0" class="table-header">
387   <td colspan="2" class="table-header">
388     <table border="0" cellpadding="0" cellspacing="0" width="100%">
389       <tr valign="top">
390         <td align="left"><span class="table-header">Class Variables</span></td>
391         <td align="right" valign="top"
392          ><span class="options">[<a href="#section-ClassVariables"
393          class="privatelink" onclick="toggle_private();"
394          >hide private</a>]</span></td>
395       </tr>
396     </table>
397   </td>
398 </tr>
399 <tr>
400     <td width="15%" align="right" valign="top" class="summary">
401       <span class="summary-type">&nbsp;</span>
402     </td><td class="summary">
403         <a name="scripts"></a><span class="summary-name">scripts</span> = <code title="True">True</code>
404     </td>
405   </tr>
406 <tr>
407     <td width="15%" align="right" valign="top" class="summary">
408       <span class="summary-type">&nbsp;</span>
409     </td><td class="summary">
410         <a name="javascript"></a><span class="summary-name">javascript</span> = <code title="True">True</code>
411     </td>
412   </tr>
413 <tr>
414     <td width="15%" align="right" valign="top" class="summary">
415       <span class="summary-type">&nbsp;</span>
416     </td><td class="summary">
417         <a name="comments"></a><span class="summary-name">comments</span> = <code title="True">True</code>
418     </td>
419   </tr>
420 <tr>
421     <td width="15%" align="right" valign="top" class="summary">
422       <span class="summary-type">&nbsp;</span>
423     </td><td class="summary">
424         <a name="style"></a><span class="summary-name">style</span> = <code title="False">False</code>
425     </td>
426   </tr>
427 <tr>
428     <td width="15%" align="right" valign="top" class="summary">
429       <span class="summary-type">&nbsp;</span>
430     </td><td class="summary">
431         <a name="inline_style"></a><span class="summary-name">inline_style</span> = <code title="None">None</code><br />
432       hash(x)
433     </td>
434   </tr>
435 <tr>
436     <td width="15%" align="right" valign="top" class="summary">
437       <span class="summary-type">&nbsp;</span>
438     </td><td class="summary">
439         <a name="links"></a><span class="summary-name">links</span> = <code title="True">True</code>
440     </td>
441   </tr>
442 <tr>
443     <td width="15%" align="right" valign="top" class="summary">
444       <span class="summary-type">&nbsp;</span>
445     </td><td class="summary">
446         <a name="meta"></a><span class="summary-name">meta</span> = <code title="True">True</code>
447     </td>
448   </tr>
449 <tr>
450     <td width="15%" align="right" valign="top" class="summary">
451       <span class="summary-type">&nbsp;</span>
452     </td><td class="summary">
453         <a name="page_structure"></a><span class="summary-name">page_structure</span> = <code title="True">True</code>
454     </td>
455   </tr>
456 <tr>
457     <td width="15%" align="right" valign="top" class="summary">
458       <span class="summary-type">&nbsp;</span>
459     </td><td class="summary">
460         <a name="processing_instructions"></a><span class="summary-name">processing_instructions</span> = <code title="True">True</code>
461     </td>
462   </tr>
463 <tr>
464     <td width="15%" align="right" valign="top" class="summary">
465       <span class="summary-type">&nbsp;</span>
466     </td><td class="summary">
467         <a name="embedded"></a><span class="summary-name">embedded</span> = <code title="True">True</code>
468     </td>
469   </tr>
470 <tr>
471     <td width="15%" align="right" valign="top" class="summary">
472       <span class="summary-type">&nbsp;</span>
473     </td><td class="summary">
474         <a name="frames"></a><span class="summary-name">frames</span> = <code title="True">True</code>
475     </td>
476   </tr>
477 <tr>
478     <td width="15%" align="right" valign="top" class="summary">
479       <span class="summary-type">&nbsp;</span>
480     </td><td class="summary">
481         <a name="forms"></a><span class="summary-name">forms</span> = <code title="True">True</code>
482     </td>
483   </tr>
484 <tr>
485     <td width="15%" align="right" valign="top" class="summary">
486       <span class="summary-type">&nbsp;</span>
487     </td><td class="summary">
488         <a name="annoying_tags"></a><span class="summary-name">annoying_tags</span> = <code title="True">True</code>
489     </td>
490   </tr>
491 <tr>
492     <td width="15%" align="right" valign="top" class="summary">
493       <span class="summary-type">&nbsp;</span>
494     </td><td class="summary">
495         <a name="remove_tags"></a><span class="summary-name">remove_tags</span> = <code title="None">None</code><br />
496       hash(x)
497     </td>
498   </tr>
499 <tr>
500     <td width="15%" align="right" valign="top" class="summary">
501       <span class="summary-type">&nbsp;</span>
502     </td><td class="summary">
503         <a name="allow_tags"></a><span class="summary-name">allow_tags</span> = <code title="None">None</code><br />
504       hash(x)
505     </td>
506   </tr>
507 <tr>
508     <td width="15%" align="right" valign="top" class="summary">
509       <span class="summary-type">&nbsp;</span>
510     </td><td class="summary">
511         <a name="kill_tags"></a><span class="summary-name">kill_tags</span> = <code title="None">None</code><br />
512       hash(x)
513     </td>
514   </tr>
515 <tr>
516     <td width="15%" align="right" valign="top" class="summary">
517       <span class="summary-type">&nbsp;</span>
518     </td><td class="summary">
519         <a name="remove_unknown_tags"></a><span class="summary-name">remove_unknown_tags</span> = <code title="True">True</code>
520     </td>
521   </tr>
522 <tr>
523     <td width="15%" align="right" valign="top" class="summary">
524       <span class="summary-type">&nbsp;</span>
525     </td><td class="summary">
526         <a name="safe_attrs_only"></a><span class="summary-name">safe_attrs_only</span> = <code title="True">True</code>
527     </td>
528   </tr>
529 <tr>
530     <td width="15%" align="right" valign="top" class="summary">
531       <span class="summary-type">&nbsp;</span>
532     </td><td class="summary">
533         <a href="lxml.html.clean.Cleaner-class.html#safe_attrs" class="summary-name">safe_attrs</a> = <code title="frozenset(['abbr',
534            'accept',
535            'accept-charset',
536            'accesskey',
537            'action',
538            'align',
539            'alt',
540            'axis',
541 ..."><code class="variable-group">frozenset([</code><code class="variable-quote">'</code><code class="variable-string">abbr</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">accept</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">accept-charset</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-ellipsis">...</code></code>
542     </td>
543   </tr>
544 <tr>
545     <td width="15%" align="right" valign="top" class="summary">
546       <span class="summary-type">&nbsp;</span>
547     </td><td class="summary">
548         <a name="add_nofollow"></a><span class="summary-name">add_nofollow</span> = <code title="False">False</code>
549     </td>
550   </tr>
551 <tr>
552     <td width="15%" align="right" valign="top" class="summary">
553       <span class="summary-type">&nbsp;</span>
554     </td><td class="summary">
555         <a name="host_whitelist"></a><span class="summary-name">host_whitelist</span> = <code title="()"><code class="variable-group">(</code><code class="variable-group">)</code></code>
556     </td>
557   </tr>
558 <tr>
559     <td width="15%" align="right" valign="top" class="summary">
560       <span class="summary-type">&nbsp;</span>
561     </td><td class="summary">
562         <a name="whitelist_tags"></a><span class="summary-name">whitelist_tags</span> = <code title="set(['embed', 'iframe'])"><code class="variable-group">set([</code><code class="variable-quote">'</code><code class="variable-string">embed</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">iframe</code><code class="variable-quote">'</code><code class="variable-group">])</code></code>
563     </td>
564   </tr>
565 <tr class="private">
566     <td width="15%" align="right" valign="top" class="summary">
567       <span class="summary-type">&nbsp;</span>
568     </td><td class="summary">
569         <a href="lxml.html.clean.Cleaner-class.html#_tag_link_attrs" class="summary-name" onclick="show_private();">_tag_link_attrs</a> = <code title="{'a': 'href',
570  'applet': ['code', 'object'],
571  'embed': 'src',
572  'iframe': 'src',
573  'layer': 'src',
574  'link': 'href',
575  'script': 'src'}"><code class="variable-group">{</code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">applet</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">object</code><code class="variable-quote">'</code><code class="variable-group">]</code><code class="variable-op">, </code><code class="variable-ellipsis">...</code></code>
576     </td>
577   </tr>
578 <tr>
579     <td width="15%" align="right" valign="top" class="summary">
580       <span class="summary-type">&nbsp;</span>
581     </td><td class="summary">
582         <a name="__qualname__"></a><span class="summary-name">__qualname__</span> = <code title="'Cleaner'"><code class="variable-quote">'</code><code class="variable-string">Cleaner</code><code class="variable-quote">'</code></code>
583     </td>
584   </tr>
585 </table>
586 <!-- ==================== PROPERTIES ==================== -->
587 <a name="section-Properties"></a>
588 <table class="summary" border="1" cellpadding="3"
589        cellspacing="0" width="100%" bgcolor="white">
590 <tr bgcolor="#70b0f0" class="table-header">
591   <td colspan="2" class="table-header">
592     <table border="0" cellpadding="0" cellspacing="0" width="100%">
593       <tr valign="top">
594         <td align="left"><span class="table-header">Properties</span></td>
595         <td align="right" valign="top"
596          ><span class="options">[<a href="#section-Properties"
597          class="privatelink" onclick="toggle_private();"
598          >hide private</a>]</span></td>
599       </tr>
600     </table>
601   </td>
602 </tr>
603   <tr>
604     <td colspan="2" class="summary">
605     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
606       <code>__class__</code>
607       </p>
608     </td>
609   </tr>
610 </table>
611 <!-- ==================== METHOD DETAILS ==================== -->
612 <a name="section-MethodDetails"></a>
613 <table class="details" border="1" cellpadding="3"
614        cellspacing="0" width="100%" bgcolor="white">
615 <tr bgcolor="#70b0f0" class="table-header">
616   <td colspan="2" class="table-header">
617     <table border="0" cellpadding="0" cellspacing="0" width="100%">
618       <tr valign="top">
619         <td align="left"><span class="table-header">Method Details</span></td>
620         <td align="right" valign="top"
621          ><span class="options">[<a href="#section-MethodDetails"
622          class="privatelink" onclick="toggle_private();"
623          >hide private</a>]</span></td>
624       </tr>
625     </table>
626   </td>
627 </tr>
628 </table>
629 <a name="__init__"></a>
630 <div>
631 <table class="details" border="1" cellpadding="3"
632        cellspacing="0" width="100%" bgcolor="white">
633 <tr><td>
634   <table width="100%" cellpadding="0" cellspacing="0" border="0">
635   <tr valign="top"><td>
636   <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>)</span>
637     <br /><em class="fname">(Constructor)</em>
638   </h3>
639   </td><td align="right" valign="top"
640     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__init__">source&nbsp;code</a></span>&nbsp;
641     </td>
642   </tr></table>
643   
644   x.__init__(...) initializes x; see help(type(x)) for signature
645   <dl class="fields">
646     <dt>Overrides:
647         object.__init__
648         <dd><em class="note">(inherited documentation)</em></dd>
649     </dt>
650   </dl>
651 </td></tr></table>
652 </div>
653 <a name="allow_element"></a>
654 <div>
655 <table class="details" border="1" cellpadding="3"
656        cellspacing="0" width="100%" bgcolor="white">
657 <tr><td>
658   <table width="100%" cellpadding="0" cellspacing="0" border="0">
659   <tr valign="top"><td>
660   <h3 class="epydoc"><span class="sig"><span class="sig-name">allow_element</span>(<span class="sig-arg">self</span>,
661         <span class="sig-arg">el</span>)</span>
662   </h3>
663   </td><td align="right" valign="top"
664     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_element">source&nbsp;code</a></span>&nbsp;
665     </td>
666   </tr></table>
667   
668   Decide whether an element is configured to be accepted or rejected.
669   <dl class="fields">
670     <dt>Parameters:</dt>
671     <dd><ul class="nomargin-top">
672         <li><strong class="pname"><code>el</code></strong> - an element.</li>
673     </ul></dd>
674     <dt>Returns:</dt>
675         <dd>true to accept the element or false to reject/discard it.</dd>
676   </dl>
677 </td></tr></table>
678 </div>
679 <a name="allow_embedded_url"></a>
680 <div>
681 <table class="details" border="1" cellpadding="3"
682        cellspacing="0" width="100%" bgcolor="white">
683 <tr><td>
684   <table width="100%" cellpadding="0" cellspacing="0" border="0">
685   <tr valign="top"><td>
686   <h3 class="epydoc"><span class="sig"><span class="sig-name">allow_embedded_url</span>(<span class="sig-arg">self</span>,
687         <span class="sig-arg">el</span>,
688         <span class="sig-arg">url</span>)</span>
689   </h3>
690   </td><td align="right" valign="top"
691     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_embedded_url">source&nbsp;code</a></span>&nbsp;
692     </td>
693   </tr></table>
694   
695   Decide whether a URL that was found in an element's attributes or text
696 if configured to be accepted or rejected.
697   <dl class="fields">
698     <dt>Parameters:</dt>
699     <dd><ul class="nomargin-top">
700         <li><strong class="pname"><code>el</code></strong> - an element.</li>
701         <li><strong class="pname"><code>url</code></strong> - a URL found on the element.</li>
702     </ul></dd>
703     <dt>Returns:</dt>
704         <dd>true to accept the URL and false to reject it.</dd>
705   </dl>
706 </td></tr></table>
707 </div>
708 <a name="_has_sneaky_javascript"></a>
709 <div class="private">
710 <table class="details" border="1" cellpadding="3"
711        cellspacing="0" width="100%" bgcolor="white">
712 <tr><td>
713   <table width="100%" cellpadding="0" cellspacing="0" border="0">
714   <tr valign="top"><td>
715   <h3 class="epydoc"><span class="sig"><span class="sig-name">_has_sneaky_javascript</span>(<span class="sig-arg">self</span>,
716         <span class="sig-arg">style</span>)</span>
717   </h3>
718   </td><td align="right" valign="top"
719     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._has_sneaky_javascript">source&nbsp;code</a></span>&nbsp;
720     </td>
721   </tr></table>
722   
723   <p>Depending on the browser, stuff like <tt class="rst-docutils literal">e x p r e s s i o <span class="pre">n(...)</span></tt>
724 can get interpreted, or <tt class="rst-docutils literal">expre/* stuff <span class="pre">*/ssion(...)</span></tt>.  This
725 checks for attempt to do stuff like this.</p>
726 <p>Typically the response will be to kill the entire style; if you
727 have just a bit of Javascript in the style another rule will catch
728 that and remove only the Javascript from the style; this catches
729 more sneaky attempts.</p>
730   <dl class="fields">
731   </dl>
732 </td></tr></table>
733 </div>
734 <br />
735 <!-- ==================== CLASS VARIABLE DETAILS ==================== -->
736 <a name="section-ClassVariableDetails"></a>
737 <table class="details" border="1" cellpadding="3"
738        cellspacing="0" width="100%" bgcolor="white">
739 <tr bgcolor="#70b0f0" class="table-header">
740   <td colspan="2" class="table-header">
741     <table border="0" cellpadding="0" cellspacing="0" width="100%">
742       <tr valign="top">
743         <td align="left"><span class="table-header">Class Variable Details</span></td>
744         <td align="right" valign="top"
745          ><span class="options">[<a href="#section-ClassVariableDetails"
746          class="privatelink" onclick="toggle_private();"
747          >hide private</a>]</span></td>
748       </tr>
749     </table>
750   </td>
751 </tr>
752 </table>
753 <a name="safe_attrs"></a>
754 <div>
755 <table class="details" border="1" cellpadding="3"
756        cellspacing="0" width="100%" bgcolor="white">
757 <tr><td>
758   <h3 class="epydoc">safe_attrs</h3>
759   
760   <dl class="fields">
761   </dl>
762   <dl class="fields">
763     <dt>Value:</dt>
764       <dd><table><tr><td><pre class="variable">
765 <code class="variable-group">frozenset([</code><code class="variable-quote">'</code><code class="variable-string">abbr</code><code class="variable-quote">'</code><code class="variable-op">,</code>
766            <code class="variable-quote">'</code><code class="variable-string">accept</code><code class="variable-quote">'</code><code class="variable-op">,</code>
767            <code class="variable-quote">'</code><code class="variable-string">accept-charset</code><code class="variable-quote">'</code><code class="variable-op">,</code>
768            <code class="variable-quote">'</code><code class="variable-string">accesskey</code><code class="variable-quote">'</code><code class="variable-op">,</code>
769            <code class="variable-quote">'</code><code class="variable-string">action</code><code class="variable-quote">'</code><code class="variable-op">,</code>
770            <code class="variable-quote">'</code><code class="variable-string">align</code><code class="variable-quote">'</code><code class="variable-op">,</code>
771            <code class="variable-quote">'</code><code class="variable-string">alt</code><code class="variable-quote">'</code><code class="variable-op">,</code>
772            <code class="variable-quote">'</code><code class="variable-string">axis</code><code class="variable-quote">'</code><code class="variable-op">,</code>
773 <code class="variable-ellipsis">...</code>
774 </pre></td></tr></table>
775 </dd>
776   </dl>
777 </td></tr></table>
778 </div>
779 <a name="_tag_link_attrs"></a>
780 <div class="private">
781 <table class="details" border="1" cellpadding="3"
782        cellspacing="0" width="100%" bgcolor="white">
783 <tr><td>
784   <h3 class="epydoc">_tag_link_attrs</h3>
785   
786   <dl class="fields">
787   </dl>
788   <dl class="fields">
789     <dt>Value:</dt>
790       <dd><table><tr><td><pre class="variable">
791 <code class="variable-group">{</code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">,</code>
792  <code class="variable-quote">'</code><code class="variable-string">applet</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">object</code><code class="variable-quote">'</code><code class="variable-group">]</code><code class="variable-op">,</code>
793  <code class="variable-quote">'</code><code class="variable-string">embed</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
794  <code class="variable-quote">'</code><code class="variable-string">iframe</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
795  <code class="variable-quote">'</code><code class="variable-string">layer</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
796  <code class="variable-quote">'</code><code class="variable-string">link</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">,</code>
797  <code class="variable-quote">'</code><code class="variable-string">script</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-group">}</code>
798 </pre></td></tr></table>
799 </dd>
800   </dl>
801 </td></tr></table>
802 </div>
803 <br />
804 <!-- ==================== NAVIGATION BAR ==================== -->
805 <table class="navbar" border="0" width="100%" cellpadding="0"
806        bgcolor="#a0c0ff" cellspacing="0">
807   <tr valign="middle">
808   <!-- Home link -->
809       <th>&nbsp;&nbsp;&nbsp;<a
810         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
811
812   <!-- Tree link -->
813       <th>&nbsp;&nbsp;&nbsp;<a
814         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
815
816   <!-- Index link -->
817       <th>&nbsp;&nbsp;&nbsp;<a
818         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
819
820   <!-- Help link -->
821       <th>&nbsp;&nbsp;&nbsp;<a
822         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
823
824   <!-- Project homepage -->
825       <th class="navbar" align="right" width="100%">
826         <table border="0" cellpadding="0" cellspacing="0">
827           <tr><th class="navbar" align="center"
828             ><a class="navbar" target="_top" href="/">lxml API</a></th>
829           </tr></table></th>
830   </tr>
831 </table>
832 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
833   <tr>
834     <td align="left" class="footer">
835     Generated by Epydoc 3.0.1
836     on Wed Jan 29 12:26:21 2020
837     </td>
838     <td align="right" class="footer">
839       <a target="mainFrame" href="http://epydoc.sourceforge.net"
840         >http://epydoc.sourceforge.net</a>
841     </td>
842   </tr>
843 </table>
844
845 <script type="text/javascript">
846   <!--
847   // Private objects are initially displayed (because if
848   // javascript is turned off then we want them to be
849   // visible); but by default, we want to hide them.  So hide
850   // them unless we have a cookie that says to show them.
851   checkCookie();
852   // -->
853 </script>
854 </body>
855 </html>