Imported Upstream version 2.3.5
[platform/upstream/python-lxml.git] / doc / html / api / lxml.html.clean.Cleaner-class.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.html.clean.Cleaner</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.html-module.html">Package&nbsp;html</a> ::
47         <a href="lxml.html.clean-module.html">Module&nbsp;clean</a> ::
48         Class&nbsp;Cleaner
49       </span>
50     </td>
51     <td>
52       <table cellpadding="0" cellspacing="0">
53         <!-- hide/show private -->
54         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
55     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
56         <tr><td align="right"><span class="options"
57             >[<a href="frames.html" target="_top">frames</a
58             >]&nbsp;|&nbsp;<a href="lxml.html.clean.Cleaner-class.html"
59             target="_top">no&nbsp;frames</a>]</span></td></tr>
60       </table>
61     </td>
62   </tr>
63 </table>
64 <!-- ==================== CLASS DESCRIPTION ==================== -->
65 <h1 class="epydoc">Class Cleaner</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner">source&nbsp;code</a></span></p>
66 <pre class="base-tree">
67 object --+
68          |
69         <strong class="uidshort">Cleaner</strong>
70 </pre>
71
72 <hr />
73 <p>Instances cleans the document of each of the possible offending
74 elements.  The cleaning is controlled by attributes; you can
75 override attributes in a subclass, or set them in the constructor.</p>
76 <dl class="rst-docutils">
77 <dt><tt class="rst-docutils literal">scripts</tt>:</dt>
78 <dd>Removes any <tt class="rst-docutils literal">&lt;script&gt;</tt> tags.</dd>
79 <dt><tt class="rst-docutils literal">javascript</tt>:</dt>
80 <dd>Removes any Javascript, like an <tt class="rst-docutils literal">onclick</tt> attribute.</dd>
81 <dt><tt class="rst-docutils literal">comments</tt>:</dt>
82 <dd>Removes any comments.</dd>
83 <dt><tt class="rst-docutils literal">style</tt>:</dt>
84 <dd>Removes any style tags or attributes.</dd>
85 <dt><tt class="rst-docutils literal">links</tt>:</dt>
86 <dd>Removes any <tt class="rst-docutils literal">&lt;link&gt;</tt> tags</dd>
87 <dt><tt class="rst-docutils literal">meta</tt>:</dt>
88 <dd>Removes any <tt class="rst-docutils literal">&lt;meta&gt;</tt> tags</dd>
89 <dt><tt class="rst-docutils literal">page_structure</tt>:</dt>
90 <dd>Structural parts of a page: <tt class="rst-docutils literal">&lt;head&gt;</tt>, <tt class="rst-docutils literal">&lt;html&gt;</tt>, <tt class="rst-docutils literal">&lt;title&gt;</tt>.</dd>
91 <dt><tt class="rst-docutils literal">processing_instructions</tt>:</dt>
92 <dd>Removes any processing instructions.</dd>
93 <dt><tt class="rst-docutils literal">embedded</tt>:</dt>
94 <dd>Removes any embedded objects (flash, iframes)</dd>
95 <dt><tt class="rst-docutils literal">frames</tt>:</dt>
96 <dd>Removes any frame-related tags</dd>
97 <dt><tt class="rst-docutils literal">forms</tt>:</dt>
98 <dd>Removes any form tags</dd>
99 <dt><tt class="rst-docutils literal">annoying_tags</tt>:</dt>
100 <dd>Tags that aren't <em>wrong</em>, but are annoying.  <tt class="rst-docutils literal">&lt;blink&gt;</tt> and <tt class="rst-docutils literal">&lt;marquee&gt;</tt></dd>
101 <dt><tt class="rst-docutils literal">remove_tags</tt>:</dt>
102 <dd>A list of tags to remove.  Only the tags will be removed,
103 their content will get pulled up into the parent tag.</dd>
104 <dt><tt class="rst-docutils literal">kill_tags</tt>:</dt>
105 <dd>A list of tags to kill.  Killing also removes the tag's content,
106 i.e. the whole subtree, not just the tag itself.</dd>
107 <dt><tt class="rst-docutils literal">allow_tags</tt>:</dt>
108 <dd>A list of tags to include (default include all).</dd>
109 <dt><tt class="rst-docutils literal">remove_unknown_tags</tt>:</dt>
110 <dd>Remove any tags that aren't standard parts of HTML.</dd>
111 <dt><tt class="rst-docutils literal">safe_attrs_only</tt>:</dt>
112 <dd>If true, only include 'safe' attributes (specifically the list
113 from <a class="rst-reference external" href="http://feedparser.org/docs/html-sanitization.html" target="_top">feedparser</a>).</dd>
114 <dt><tt class="rst-docutils literal">add_nofollow</tt>:</dt>
115 <dd>If true, then any &lt;a&gt; tags will have <tt class="rst-docutils literal"><span class="pre">rel=&quot;nofollow&quot;</span></tt> added to them.</dd>
116 <dt><tt class="rst-docutils literal">host_whitelist</tt>:</dt>
117 <dd><p class="rst-first">A list or set of hosts that you can use for embedded content
118 (for content like <tt class="rst-docutils literal">&lt;object&gt;</tt>, <tt class="rst-docutils literal">&lt;link <span class="pre">rel=&quot;stylesheet&quot;&gt;</span></tt>, etc).
119 You can also implement/override the method
120 <tt class="rst-docutils literal">allow_embedded_url(el, url)</tt> or <tt class="rst-docutils literal">allow_element(el)</tt> to
121 implement more complex rules for what can be embedded.
122 Anything that passes this test will be shown, regardless of
123 the value of (for instance) <tt class="rst-docutils literal">embedded</tt>.</p>
124 <p class="rst-last">Note that this parameter might not work as intended if you do not
125 make the links absolute before doing the cleaning.</p>
126 </dd>
127 <dt><tt class="rst-docutils literal">whitelist_tags</tt>:</dt>
128 <dd>A set of tags that can be included with <tt class="rst-docutils literal">host_whitelist</tt>.
129 The default is <tt class="rst-docutils literal">iframe</tt> and <tt class="rst-docutils literal">embed</tt>; you may wish to
130 include other tags like <tt class="rst-docutils literal">script</tt>, or you may want to
131 implement <tt class="rst-docutils literal">allow_embedded_url</tt> for more control.  Set to None to
132 include all tags.</dd>
133 </dl>
134 <p>This modifies the document <em>in place</em>.</p>
135
136 <!-- ==================== INSTANCE METHODS ==================== -->
137 <a name="section-InstanceMethods"></a>
138 <table class="summary" border="1" cellpadding="3"
139        cellspacing="0" width="100%" bgcolor="white">
140 <tr bgcolor="#70b0f0" class="table-header">
141   <td colspan="2" class="table-header">
142     <table border="0" cellpadding="0" cellspacing="0" width="100%">
143       <tr valign="top">
144         <td align="left"><span class="table-header">Instance Methods</span></td>
145         <td align="right" valign="top"
146          ><span class="options">[<a href="#section-InstanceMethods"
147          class="privatelink" onclick="toggle_private();"
148          >hide private</a>]</span></td>
149       </tr>
150     </table>
151   </td>
152 </tr>
153 <tr>
154     <td width="15%" align="right" valign="top" class="summary">
155       <span class="summary-type">&nbsp;</span>
156     </td><td class="summary">
157       <table width="100%" cellpadding="0" cellspacing="0" border="0">
158         <tr>
159           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
160         <span class="summary-sig-arg">**kw</span>)</span><br />
161       x.__init__(...) initializes x; see help(type(x)) for signature</td>
162           <td align="right" valign="top">
163             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__init__">source&nbsp;code</a></span>
164             
165           </td>
166         </tr>
167       </table>
168       
169     </td>
170   </tr>
171 <tr>
172     <td width="15%" align="right" valign="top" class="summary">
173       <span class="summary-type">&nbsp;</span>
174     </td><td class="summary">
175       <table width="100%" cellpadding="0" cellspacing="0" border="0">
176         <tr>
177           <td><span class="summary-sig"><a name="__call__"></a><span class="summary-sig-name">__call__</span>(<span class="summary-sig-arg">self</span>,
178         <span class="summary-sig-arg">doc</span>)</span><br />
179       Cleans the document.</td>
180           <td align="right" valign="top">
181             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__call__">source&nbsp;code</a></span>
182             
183           </td>
184         </tr>
185       </table>
186       
187     </td>
188   </tr>
189 <tr>
190     <td width="15%" align="right" valign="top" class="summary">
191       <span class="summary-type">&nbsp;</span>
192     </td><td class="summary">
193       <table width="100%" cellpadding="0" cellspacing="0" border="0">
194         <tr>
195           <td><span class="summary-sig"><a name="allow_follow"></a><span class="summary-sig-name">allow_follow</span>(<span class="summary-sig-arg">self</span>,
196         <span class="summary-sig-arg">anchor</span>)</span><br />
197       Override to suppress rel=&quot;nofollow&quot; on some anchors.</td>
198           <td align="right" valign="top">
199             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_follow">source&nbsp;code</a></span>
200             
201           </td>
202         </tr>
203       </table>
204       
205     </td>
206   </tr>
207 <tr>
208     <td width="15%" align="right" valign="top" class="summary">
209       <span class="summary-type">&nbsp;</span>
210     </td><td class="summary">
211       <table width="100%" cellpadding="0" cellspacing="0" border="0">
212         <tr>
213           <td><span class="summary-sig"><a name="allow_element"></a><span class="summary-sig-name">allow_element</span>(<span class="summary-sig-arg">self</span>,
214         <span class="summary-sig-arg">el</span>)</span></td>
215           <td align="right" valign="top">
216             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_element">source&nbsp;code</a></span>
217             
218           </td>
219         </tr>
220       </table>
221       
222     </td>
223   </tr>
224 <tr>
225     <td width="15%" align="right" valign="top" class="summary">
226       <span class="summary-type">&nbsp;</span>
227     </td><td class="summary">
228       <table width="100%" cellpadding="0" cellspacing="0" border="0">
229         <tr>
230           <td><span class="summary-sig"><a name="allow_embedded_url"></a><span class="summary-sig-name">allow_embedded_url</span>(<span class="summary-sig-arg">self</span>,
231         <span class="summary-sig-arg">el</span>,
232         <span class="summary-sig-arg">url</span>)</span></td>
233           <td align="right" valign="top">
234             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_embedded_url">source&nbsp;code</a></span>
235             
236           </td>
237         </tr>
238       </table>
239       
240     </td>
241   </tr>
242 <tr>
243     <td width="15%" align="right" valign="top" class="summary">
244       <span class="summary-type">&nbsp;</span>
245     </td><td class="summary">
246       <table width="100%" cellpadding="0" cellspacing="0" border="0">
247         <tr>
248           <td><span class="summary-sig"><a name="kill_conditional_comments"></a><span class="summary-sig-name">kill_conditional_comments</span>(<span class="summary-sig-arg">self</span>,
249         <span class="summary-sig-arg">doc</span>)</span><br />
250       IE conditional comments basically embed HTML that the parser
251 doesn't normally see.  We can't allow anything like that, so
252 we'll kill any comments that could be conditional.</td>
253           <td align="right" valign="top">
254             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.kill_conditional_comments">source&nbsp;code</a></span>
255             
256           </td>
257         </tr>
258       </table>
259       
260     </td>
261   </tr>
262 <tr class="private">
263     <td width="15%" align="right" valign="top" class="summary">
264       <span class="summary-type">&nbsp;</span>
265     </td><td class="summary">
266       <table width="100%" cellpadding="0" cellspacing="0" border="0">
267         <tr>
268           <td><span class="summary-sig"><a name="_kill_elements"></a><span class="summary-sig-name">_kill_elements</span>(<span class="summary-sig-arg">self</span>,
269         <span class="summary-sig-arg">doc</span>,
270         <span class="summary-sig-arg">condition</span>,
271         <span class="summary-sig-arg">iterate</span>=<span class="summary-sig-default">None</span>)</span></td>
272           <td align="right" valign="top">
273             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._kill_elements">source&nbsp;code</a></span>
274             
275           </td>
276         </tr>
277       </table>
278       
279     </td>
280   </tr>
281 <tr class="private">
282     <td width="15%" align="right" valign="top" class="summary">
283       <span class="summary-type">&nbsp;</span>
284     </td><td class="summary">
285       <table width="100%" cellpadding="0" cellspacing="0" border="0">
286         <tr>
287           <td><span class="summary-sig"><a name="_remove_javascript_link"></a><span class="summary-sig-name">_remove_javascript_link</span>(<span class="summary-sig-arg">self</span>,
288         <span class="summary-sig-arg">link</span>)</span></td>
289           <td align="right" valign="top">
290             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._remove_javascript_link">source&nbsp;code</a></span>
291             
292           </td>
293         </tr>
294       </table>
295       
296     </td>
297   </tr>
298 <tr class="private">
299     <td width="15%" align="right" valign="top" class="summary">
300       <span class="summary-type">&nbsp;</span>
301     </td><td class="summary">
302       <table width="100%" cellpadding="0" cellspacing="0" border="0">
303         <tr>
304           <td><span class="summary-sig"><a name="_substitute_comments"></a><span class="summary-sig-name">_substitute_comments</span>(<span class="summary-sig-arg">...</span>)</span><br />
305       sub(repl, string[, count = 0]) --&gt; newstring
306 Return the string obtained by replacing the leftmost non-overlapping
307 occurrences of pattern in string by the replacement repl.</td>
308           <td align="right" valign="top">
309             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._substitute_comments">source&nbsp;code</a></span>
310             
311           </td>
312         </tr>
313       </table>
314       
315     </td>
316   </tr>
317 <tr class="private">
318     <td width="15%" align="right" valign="top" class="summary">
319       <span class="summary-type">&nbsp;</span>
320     </td><td class="summary">
321       <table width="100%" cellpadding="0" cellspacing="0" border="0">
322         <tr>
323           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#_has_sneaky_javascript" class="summary-sig-name" onclick="show_private();">_has_sneaky_javascript</a>(<span class="summary-sig-arg">self</span>,
324         <span class="summary-sig-arg">style</span>)</span><br />
325       Depending on the browser, stuff like <tt class="rst-docutils literal">e x p r e s s i o <span class="pre">n(...)</span></tt>
326 can get interpreted, or <tt class="rst-docutils literal">expre/* stuff <span class="pre">*/ssion(...)</span></tt>.  This
327 checks for attempt to do stuff like this.</td>
328           <td align="right" valign="top">
329             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._has_sneaky_javascript">source&nbsp;code</a></span>
330             
331           </td>
332         </tr>
333       </table>
334       
335     </td>
336   </tr>
337 <tr>
338     <td width="15%" align="right" valign="top" class="summary">
339       <span class="summary-type">&nbsp;</span>
340     </td><td class="summary">
341       <table width="100%" cellpadding="0" cellspacing="0" border="0">
342         <tr>
343           <td><span class="summary-sig"><a name="clean_html"></a><span class="summary-sig-name">clean_html</span>(<span class="summary-sig-arg">self</span>,
344         <span class="summary-sig-arg">html</span>)</span></td>
345           <td align="right" valign="top">
346             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.clean_html">source&nbsp;code</a></span>
347             
348           </td>
349         </tr>
350       </table>
351       
352     </td>
353   </tr>
354   <tr>
355     <td colspan="2" class="summary">
356     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
357       <code>__delattr__</code>,
358       <code>__format__</code>,
359       <code>__getattribute__</code>,
360       <code>__hash__</code>,
361       <code>__new__</code>,
362       <code>__reduce__</code>,
363       <code>__reduce_ex__</code>,
364       <code>__repr__</code>,
365       <code>__setattr__</code>,
366       <code>__sizeof__</code>,
367       <code>__str__</code>,
368       <code>__subclasshook__</code>
369       </p>
370     </td>
371   </tr>
372 </table>
373 <!-- ==================== CLASS VARIABLES ==================== -->
374 <a name="section-ClassVariables"></a>
375 <table class="summary" border="1" cellpadding="3"
376        cellspacing="0" width="100%" bgcolor="white">
377 <tr bgcolor="#70b0f0" class="table-header">
378   <td colspan="2" class="table-header">
379     <table border="0" cellpadding="0" cellspacing="0" width="100%">
380       <tr valign="top">
381         <td align="left"><span class="table-header">Class Variables</span></td>
382         <td align="right" valign="top"
383          ><span class="options">[<a href="#section-ClassVariables"
384          class="privatelink" onclick="toggle_private();"
385          >hide private</a>]</span></td>
386       </tr>
387     </table>
388   </td>
389 </tr>
390 <tr>
391     <td width="15%" align="right" valign="top" class="summary">
392       <span class="summary-type">&nbsp;</span>
393     </td><td class="summary">
394         <a name="scripts"></a><span class="summary-name">scripts</span> = <code title="True">True</code>
395     </td>
396   </tr>
397 <tr>
398     <td width="15%" align="right" valign="top" class="summary">
399       <span class="summary-type">&nbsp;</span>
400     </td><td class="summary">
401         <a name="javascript"></a><span class="summary-name">javascript</span> = <code title="True">True</code>
402     </td>
403   </tr>
404 <tr>
405     <td width="15%" align="right" valign="top" class="summary">
406       <span class="summary-type">&nbsp;</span>
407     </td><td class="summary">
408         <a name="comments"></a><span class="summary-name">comments</span> = <code title="True">True</code>
409     </td>
410   </tr>
411 <tr>
412     <td width="15%" align="right" valign="top" class="summary">
413       <span class="summary-type">&nbsp;</span>
414     </td><td class="summary">
415         <a name="style"></a><span class="summary-name">style</span> = <code title="False">False</code>
416     </td>
417   </tr>
418 <tr>
419     <td width="15%" align="right" valign="top" class="summary">
420       <span class="summary-type">&nbsp;</span>
421     </td><td class="summary">
422         <a name="links"></a><span class="summary-name">links</span> = <code title="True">True</code>
423     </td>
424   </tr>
425 <tr>
426     <td width="15%" align="right" valign="top" class="summary">
427       <span class="summary-type">&nbsp;</span>
428     </td><td class="summary">
429         <a name="meta"></a><span class="summary-name">meta</span> = <code title="True">True</code>
430     </td>
431   </tr>
432 <tr>
433     <td width="15%" align="right" valign="top" class="summary">
434       <span class="summary-type">&nbsp;</span>
435     </td><td class="summary">
436         <a name="page_structure"></a><span class="summary-name">page_structure</span> = <code title="True">True</code>
437     </td>
438   </tr>
439 <tr>
440     <td width="15%" align="right" valign="top" class="summary">
441       <span class="summary-type">&nbsp;</span>
442     </td><td class="summary">
443         <a name="processing_instructions"></a><span class="summary-name">processing_instructions</span> = <code title="True">True</code>
444     </td>
445   </tr>
446 <tr>
447     <td width="15%" align="right" valign="top" class="summary">
448       <span class="summary-type">&nbsp;</span>
449     </td><td class="summary">
450         <a name="embedded"></a><span class="summary-name">embedded</span> = <code title="True">True</code>
451     </td>
452   </tr>
453 <tr>
454     <td width="15%" align="right" valign="top" class="summary">
455       <span class="summary-type">&nbsp;</span>
456     </td><td class="summary">
457         <a name="frames"></a><span class="summary-name">frames</span> = <code title="True">True</code>
458     </td>
459   </tr>
460 <tr>
461     <td width="15%" align="right" valign="top" class="summary">
462       <span class="summary-type">&nbsp;</span>
463     </td><td class="summary">
464         <a name="forms"></a><span class="summary-name">forms</span> = <code title="True">True</code>
465     </td>
466   </tr>
467 <tr>
468     <td width="15%" align="right" valign="top" class="summary">
469       <span class="summary-type">&nbsp;</span>
470     </td><td class="summary">
471         <a name="annoying_tags"></a><span class="summary-name">annoying_tags</span> = <code title="True">True</code>
472     </td>
473   </tr>
474 <tr>
475     <td width="15%" align="right" valign="top" class="summary">
476       <span class="summary-type">&nbsp;</span>
477     </td><td class="summary">
478         <a name="remove_tags"></a><span class="summary-name">remove_tags</span> = <code title="None">None</code><br />
479       hash(x)
480     </td>
481   </tr>
482 <tr>
483     <td width="15%" align="right" valign="top" class="summary">
484       <span class="summary-type">&nbsp;</span>
485     </td><td class="summary">
486         <a name="allow_tags"></a><span class="summary-name">allow_tags</span> = <code title="None">None</code><br />
487       hash(x)
488     </td>
489   </tr>
490 <tr>
491     <td width="15%" align="right" valign="top" class="summary">
492       <span class="summary-type">&nbsp;</span>
493     </td><td class="summary">
494         <a name="kill_tags"></a><span class="summary-name">kill_tags</span> = <code title="None">None</code><br />
495       hash(x)
496     </td>
497   </tr>
498 <tr>
499     <td width="15%" align="right" valign="top" class="summary">
500       <span class="summary-type">&nbsp;</span>
501     </td><td class="summary">
502         <a name="remove_unknown_tags"></a><span class="summary-name">remove_unknown_tags</span> = <code title="True">True</code>
503     </td>
504   </tr>
505 <tr>
506     <td width="15%" align="right" valign="top" class="summary">
507       <span class="summary-type">&nbsp;</span>
508     </td><td class="summary">
509         <a name="safe_attrs_only"></a><span class="summary-name">safe_attrs_only</span> = <code title="True">True</code>
510     </td>
511   </tr>
512 <tr>
513     <td width="15%" align="right" valign="top" class="summary">
514       <span class="summary-type">&nbsp;</span>
515     </td><td class="summary">
516         <a name="add_nofollow"></a><span class="summary-name">add_nofollow</span> = <code title="False">False</code>
517     </td>
518   </tr>
519 <tr>
520     <td width="15%" align="right" valign="top" class="summary">
521       <span class="summary-type">&nbsp;</span>
522     </td><td class="summary">
523         <a name="host_whitelist"></a><span class="summary-name">host_whitelist</span> = <code title="()"><code class="variable-group">(</code><code class="variable-group">)</code></code>
524     </td>
525   </tr>
526 <tr>
527     <td width="15%" align="right" valign="top" class="summary">
528       <span class="summary-type">&nbsp;</span>
529     </td><td class="summary">
530         <a name="whitelist_tags"></a><span class="summary-name">whitelist_tags</span> = <code title="set(['embed', 'iframe'])"><code class="variable-group">set([</code><code class="variable-quote">'</code><code class="variable-string">embed</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">iframe</code><code class="variable-quote">'</code><code class="variable-group">])</code></code>
531     </td>
532   </tr>
533 <tr class="private">
534     <td width="15%" align="right" valign="top" class="summary">
535       <span class="summary-type">&nbsp;</span>
536     </td><td class="summary">
537         <a href="lxml.html.clean.Cleaner-class.html#_tag_link_attrs" class="summary-name" onclick="show_private();">_tag_link_attrs</a> = <code title="{'a': 'href',
538  'applet': ['code', 'object'],
539  'embed': 'src',
540  'iframe': 'src',
541  'layer': 'src',
542  'link': 'href',
543  'script': 'src'}"><code class="variable-group">{</code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">applet</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">object</code><code class="variable-quote">'</code><code class="variable-group">]</code><code class="variable-op">, </code><code class="variable-ellipsis">...</code></code>
544     </td>
545   </tr>
546 </table>
547 <!-- ==================== PROPERTIES ==================== -->
548 <a name="section-Properties"></a>
549 <table class="summary" border="1" cellpadding="3"
550        cellspacing="0" width="100%" bgcolor="white">
551 <tr bgcolor="#70b0f0" class="table-header">
552   <td colspan="2" class="table-header">
553     <table border="0" cellpadding="0" cellspacing="0" width="100%">
554       <tr valign="top">
555         <td align="left"><span class="table-header">Properties</span></td>
556         <td align="right" valign="top"
557          ><span class="options">[<a href="#section-Properties"
558          class="privatelink" onclick="toggle_private();"
559          >hide private</a>]</span></td>
560       </tr>
561     </table>
562   </td>
563 </tr>
564   <tr>
565     <td colspan="2" class="summary">
566     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
567       <code>__class__</code>
568       </p>
569     </td>
570   </tr>
571 </table>
572 <!-- ==================== METHOD DETAILS ==================== -->
573 <a name="section-MethodDetails"></a>
574 <table class="details" border="1" cellpadding="3"
575        cellspacing="0" width="100%" bgcolor="white">
576 <tr bgcolor="#70b0f0" class="table-header">
577   <td colspan="2" class="table-header">
578     <table border="0" cellpadding="0" cellspacing="0" width="100%">
579       <tr valign="top">
580         <td align="left"><span class="table-header">Method Details</span></td>
581         <td align="right" valign="top"
582          ><span class="options">[<a href="#section-MethodDetails"
583          class="privatelink" onclick="toggle_private();"
584          >hide private</a>]</span></td>
585       </tr>
586     </table>
587   </td>
588 </tr>
589 </table>
590 <a name="__init__"></a>
591 <div>
592 <table class="details" border="1" cellpadding="3"
593        cellspacing="0" width="100%" bgcolor="white">
594 <tr><td>
595   <table width="100%" cellpadding="0" cellspacing="0" border="0">
596   <tr valign="top"><td>
597   <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
598         <span class="sig-arg">**kw</span>)</span>
599     <br /><em class="fname">(Constructor)</em>
600   </h3>
601   </td><td align="right" valign="top"
602     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__init__">source&nbsp;code</a></span>&nbsp;
603     </td>
604   </tr></table>
605   
606   x.__init__(...) initializes x; see help(type(x)) for signature
607   <dl class="fields">
608     <dt>Overrides:
609         object.__init__
610         <dd><em class="note">(inherited documentation)</em></dd>
611     </dt>
612   </dl>
613 </td></tr></table>
614 </div>
615 <a name="_has_sneaky_javascript"></a>
616 <div class="private">
617 <table class="details" border="1" cellpadding="3"
618        cellspacing="0" width="100%" bgcolor="white">
619 <tr><td>
620   <table width="100%" cellpadding="0" cellspacing="0" border="0">
621   <tr valign="top"><td>
622   <h3 class="epydoc"><span class="sig"><span class="sig-name">_has_sneaky_javascript</span>(<span class="sig-arg">self</span>,
623         <span class="sig-arg">style</span>)</span>
624   </h3>
625   </td><td align="right" valign="top"
626     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._has_sneaky_javascript">source&nbsp;code</a></span>&nbsp;
627     </td>
628   </tr></table>
629   
630   <p>Depending on the browser, stuff like <tt class="rst-rst-docutils literal rst-docutils literal">e x p r e s s i o <span class="pre">n(...)</span></tt>
631 can get interpreted, or <tt class="rst-rst-docutils literal rst-docutils literal">expre/* stuff <span class="pre">*/ssion(...)</span></tt>.  This
632 checks for attempt to do stuff like this.</p>
633 <p>Typically the response will be to kill the entire style; if you
634 have just a bit of Javascript in the style another rule will catch
635 that and remove only the Javascript from the style; this catches
636 more sneaky attempts.</p>
637   <dl class="fields">
638   </dl>
639 </td></tr></table>
640 </div>
641 <br />
642 <!-- ==================== CLASS VARIABLE DETAILS ==================== -->
643 <a name="section-ClassVariableDetails"></a>
644 <table class="details" border="1" cellpadding="3"
645        cellspacing="0" width="100%" bgcolor="white">
646 <tr bgcolor="#70b0f0" class="table-header">
647   <td colspan="2" class="table-header">
648     <table border="0" cellpadding="0" cellspacing="0" width="100%">
649       <tr valign="top">
650         <td align="left"><span class="table-header">Class Variable Details</span></td>
651         <td align="right" valign="top"
652          ><span class="options">[<a href="#section-ClassVariableDetails"
653          class="privatelink" onclick="toggle_private();"
654          >hide private</a>]</span></td>
655       </tr>
656     </table>
657   </td>
658 </tr>
659 </table>
660 <a name="_tag_link_attrs"></a>
661 <div class="private">
662 <table class="details" border="1" cellpadding="3"
663        cellspacing="0" width="100%" bgcolor="white">
664 <tr><td>
665   <h3 class="epydoc">_tag_link_attrs</h3>
666   
667   <dl class="fields">
668   </dl>
669   <dl class="fields">
670     <dt>Value:</dt>
671       <dd><table><tr><td><pre class="variable">
672 <code class="variable-group">{</code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">,</code>
673  <code class="variable-quote">'</code><code class="variable-string">applet</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">object</code><code class="variable-quote">'</code><code class="variable-group">]</code><code class="variable-op">,</code>
674  <code class="variable-quote">'</code><code class="variable-string">embed</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
675  <code class="variable-quote">'</code><code class="variable-string">iframe</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
676  <code class="variable-quote">'</code><code class="variable-string">layer</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
677  <code class="variable-quote">'</code><code class="variable-string">link</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">,</code>
678  <code class="variable-quote">'</code><code class="variable-string">script</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-group">}</code>
679 </pre></td></tr></table>
680 </dd>
681   </dl>
682 </td></tr></table>
683 </div>
684 <br />
685 <!-- ==================== NAVIGATION BAR ==================== -->
686 <table class="navbar" border="0" width="100%" cellpadding="0"
687        bgcolor="#a0c0ff" cellspacing="0">
688   <tr valign="middle">
689   <!-- Home link -->
690       <th>&nbsp;&nbsp;&nbsp;<a
691         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
692
693   <!-- Tree link -->
694       <th>&nbsp;&nbsp;&nbsp;<a
695         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
696
697   <!-- Index link -->
698       <th>&nbsp;&nbsp;&nbsp;<a
699         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
700
701   <!-- Help link -->
702       <th>&nbsp;&nbsp;&nbsp;<a
703         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
704
705   <!-- Project homepage -->
706       <th class="navbar" align="right" width="100%">
707         <table border="0" cellpadding="0" cellspacing="0">
708           <tr><th class="navbar" align="center"
709             ><a class="navbar" target="_top" href="/">lxml API</a></th>
710           </tr></table></th>
711   </tr>
712 </table>
713 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
714   <tr>
715     <td align="left" class="footer">
716     Generated by Epydoc 3.0.1 on Tue Jul 31 10:14:19 2012
717     </td>
718     <td align="right" class="footer">
719       <a target="mainFrame" href="http://epydoc.sourceforge.net"
720         >http://epydoc.sourceforge.net</a>
721     </td>
722   </tr>
723 </table>
724
725 <script type="text/javascript">
726   <!--
727   // Private objects are initially displayed (because if
728   // javascript is turned off then we want them to be
729   // visible); but by default, we want to hide them.  So hide
730   // them unless we have a cookie that says to show them.
731   checkCookie();
732   // -->
733 </script>
734 </body>
735 </html>