Upload Tizen:Base source
[toolchains/python-lxml.git] / doc / html / api / lxml.html.clean.Cleaner-class.html
1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3           "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5 <head>
6   <title>lxml.html.clean.Cleaner</title>
7   <link rel="stylesheet" href="epydoc.css" type="text/css" />
8   <script type="text/javascript" src="epydoc.js"></script>
9 </head>
10
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
12       alink="#204080">
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15        bgcolor="#a0c0ff" cellspacing="0">
16   <tr valign="middle">
17   <!-- Home link -->
18       <th>&nbsp;&nbsp;&nbsp;<a
19         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
20
21   <!-- Tree link -->
22       <th>&nbsp;&nbsp;&nbsp;<a
23         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
24
25   <!-- Index link -->
26       <th>&nbsp;&nbsp;&nbsp;<a
27         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
28
29   <!-- Help link -->
30       <th>&nbsp;&nbsp;&nbsp;<a
31         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
32
33   <!-- Project homepage -->
34       <th class="navbar" align="right" width="100%">
35         <table border="0" cellpadding="0" cellspacing="0">
36           <tr><th class="navbar" align="center"
37             ><a class="navbar" target="_top" href="http://codespeak.net/lxml/">lxml API</a></th>
38           </tr></table></th>
39   </tr>
40 </table>
41 <table width="100%" cellpadding="0" cellspacing="0">
42   <tr valign="top">
43     <td width="100%">
44       <span class="breadcrumbs">
45         <a href="lxml-module.html">Package&nbsp;lxml</a> ::
46         <a href="lxml.html-module.html">Package&nbsp;html</a> ::
47         <a href="lxml.html.clean-module.html">Module&nbsp;clean</a> ::
48         Class&nbsp;Cleaner
49       </span>
50     </td>
51     <td>
52       <table cellpadding="0" cellspacing="0">
53         <!-- hide/show private -->
54         <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
55     onclick="toggle_private();">hide&nbsp;private</a>]</span></td></tr>
56         <tr><td align="right"><span class="options"
57             >[<a href="frames.html" target="_top">frames</a
58             >]&nbsp;|&nbsp;<a href="lxml.html.clean.Cleaner-class.html"
59             target="_top">no&nbsp;frames</a>]</span></td></tr>
60       </table>
61     </td>
62   </tr>
63 </table>
64 <!-- ==================== CLASS DESCRIPTION ==================== -->
65 <h1 class="epydoc">Class Cleaner</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner">source&nbsp;code</a></span></p>
66 <pre class="base-tree">
67 object --+
68          |
69         <strong class="uidshort">Cleaner</strong>
70 </pre>
71
72 <hr />
73 <p>Instances cleans the document of each of the possible offending
74 elements.  The cleaning is controlled by attributes; you can
75 override attributes in a subclass, or set them in the constructor.</p>
76 <dl class="rst-docutils">
77 <dt><tt class="rst-docutils literal"><span class="pre">scripts</span></tt>:</dt>
78 <dd>Removes any <tt class="rst-docutils literal"><span class="pre">&lt;script&gt;</span></tt> tags.</dd>
79 <dt><tt class="rst-docutils literal"><span class="pre">javascript</span></tt>:</dt>
80 <dd>Removes any Javascript, like an <tt class="rst-docutils literal"><span class="pre">onclick</span></tt> attribute.</dd>
81 <dt><tt class="rst-docutils literal"><span class="pre">comments</span></tt>:</dt>
82 <dd>Removes any comments.</dd>
83 <dt><tt class="rst-docutils literal"><span class="pre">style</span></tt>:</dt>
84 <dd>Removes any style tags or attributes.</dd>
85 <dt><tt class="rst-docutils literal"><span class="pre">links</span></tt>:</dt>
86 <dd>Removes any <tt class="rst-docutils literal"><span class="pre">&lt;link&gt;</span></tt> tags</dd>
87 <dt><tt class="rst-docutils literal"><span class="pre">meta</span></tt>:</dt>
88 <dd>Removes any <tt class="rst-docutils literal"><span class="pre">&lt;meta&gt;</span></tt> tags</dd>
89 <dt><tt class="rst-docutils literal"><span class="pre">page_structure</span></tt>:</dt>
90 <dd>Structural parts of a page: <tt class="rst-docutils literal"><span class="pre">&lt;head&gt;</span></tt>, <tt class="rst-docutils literal"><span class="pre">&lt;html&gt;</span></tt>, <tt class="rst-docutils literal"><span class="pre">&lt;title&gt;</span></tt>.</dd>
91 <dt><tt class="rst-docutils literal"><span class="pre">processing_instructions</span></tt>:</dt>
92 <dd>Removes any processing instructions.</dd>
93 <dt><tt class="rst-docutils literal"><span class="pre">embedded</span></tt>:</dt>
94 <dd>Removes any embedded objects (flash, iframes)</dd>
95 <dt><tt class="rst-docutils literal"><span class="pre">frames</span></tt>:</dt>
96 <dd>Removes any frame-related tags</dd>
97 <dt><tt class="rst-docutils literal"><span class="pre">forms</span></tt>:</dt>
98 <dd>Removes any form tags</dd>
99 <dt><tt class="rst-docutils literal"><span class="pre">annoying_tags</span></tt>:</dt>
100 <dd>Tags that aren't <em>wrong</em>, but are annoying.  <tt class="rst-docutils literal"><span class="pre">&lt;blink&gt;</span></tt> and <tt class="rst-docutils literal"><span class="pre">&lt;marque&gt;</span></tt></dd>
101 <dt><tt class="rst-docutils literal"><span class="pre">remove_tags</span></tt>:</dt>
102 <dd>A list of tags to remove.</dd>
103 <dt><tt class="rst-docutils literal"><span class="pre">allow_tags</span></tt>:</dt>
104 <dd>A list of tags to include (default include all).</dd>
105 <dt><tt class="rst-docutils literal"><span class="pre">remove_unknown_tags</span></tt>:</dt>
106 <dd>Remove any tags that aren't standard parts of HTML.</dd>
107 <dt><tt class="rst-docutils literal"><span class="pre">safe_attrs_only</span></tt>:</dt>
108 <dd>If true, only include 'safe' attributes (specifically the list
109 from <a class="rst-reference external" href="http://feedparser.org/docs/html-sanitization.html" target="_top">feedparser</a>).</dd>
110 <dt><tt class="rst-docutils literal"><span class="pre">add_nofollow</span></tt>:</dt>
111 <dd>If true, then any &lt;a&gt; tags will have <tt class="rst-docutils literal"><span class="pre">rel=&quot;nofollow&quot;</span></tt> added to them.</dd>
112 <dt><tt class="rst-docutils literal"><span class="pre">host_whitelist</span></tt>:</dt>
113 <dd><p class="rst-first">A list or set of hosts that you can use for embedded content
114 (for content like <tt class="rst-docutils literal"><span class="pre">&lt;object&gt;</span></tt>, <tt class="rst-docutils literal"><span class="pre">&lt;link</span> <span class="pre">rel=&quot;stylesheet&quot;&gt;</span></tt>, etc).
115 You can also implement/override the method
116 <tt class="rst-docutils literal"><span class="pre">allow_embedded_url(el,</span> <span class="pre">url)</span></tt> or <tt class="rst-docutils literal"><span class="pre">allow_element(el)</span></tt> to
117 implement more complex rules for what can be embedded.
118 Anything that passes this test will be shown, regardless of
119 the value of (for instance) <tt class="rst-docutils literal"><span class="pre">embedded</span></tt>.</p>
120 <p class="rst-last">Note that this parameter might not work as intended if you do not
121 make the links absolute before doing the cleaning.</p>
122 </dd>
123 <dt><tt class="rst-docutils literal"><span class="pre">whitelist_tags</span></tt>:</dt>
124 <dd>A set of tags that can be included with <tt class="rst-docutils literal"><span class="pre">host_whitelist</span></tt>.
125 The default is <tt class="rst-docutils literal"><span class="pre">iframe</span></tt> and <tt class="rst-docutils literal"><span class="pre">embed</span></tt>; you may wish to
126 include other tags like <tt class="rst-docutils literal"><span class="pre">script</span></tt>, or you may want to
127 implement <tt class="rst-docutils literal"><span class="pre">allow_embedded_url</span></tt> for more control.  Set to None to
128 include all tags.</dd>
129 </dl>
130 <p>This modifies the document <em>in place</em>.</p>
131
132 <!-- ==================== INSTANCE METHODS ==================== -->
133 <a name="section-InstanceMethods"></a>
134 <table class="summary" border="1" cellpadding="3"
135        cellspacing="0" width="100%" bgcolor="white">
136 <tr bgcolor="#70b0f0" class="table-header">
137   <td colspan="2" class="table-header">
138     <table border="0" cellpadding="0" cellspacing="0" width="100%">
139       <tr valign="top">
140         <td align="left"><span class="table-header">Instance Methods</span></td>
141         <td align="right" valign="top"
142          ><span class="options">[<a href="#section-InstanceMethods"
143          class="privatelink" onclick="toggle_private();"
144          >hide private</a>]</span></td>
145       </tr>
146     </table>
147   </td>
148 </tr>
149 <tr>
150     <td width="15%" align="right" valign="top" class="summary">
151       <span class="summary-type">&nbsp;</span>
152     </td><td class="summary">
153       <table width="100%" cellpadding="0" cellspacing="0" border="0">
154         <tr>
155           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
156         <span class="summary-sig-arg">**kw</span>)</span><br />
157       x.__init__(...) initializes x; see x.__class__.__doc__ for signature</td>
158           <td align="right" valign="top">
159             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__init__">source&nbsp;code</a></span>
160             
161           </td>
162         </tr>
163       </table>
164       
165     </td>
166   </tr>
167 <tr>
168     <td width="15%" align="right" valign="top" class="summary">
169       <span class="summary-type">&nbsp;</span>
170     </td><td class="summary">
171       <table width="100%" cellpadding="0" cellspacing="0" border="0">
172         <tr>
173           <td><span class="summary-sig"><a name="__call__"></a><span class="summary-sig-name">__call__</span>(<span class="summary-sig-arg">self</span>,
174         <span class="summary-sig-arg">doc</span>)</span><br />
175       Cleans the document.</td>
176           <td align="right" valign="top">
177             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__call__">source&nbsp;code</a></span>
178             
179           </td>
180         </tr>
181       </table>
182       
183     </td>
184   </tr>
185 <tr>
186     <td width="15%" align="right" valign="top" class="summary">
187       <span class="summary-type">&nbsp;</span>
188     </td><td class="summary">
189       <table width="100%" cellpadding="0" cellspacing="0" border="0">
190         <tr>
191           <td><span class="summary-sig"><a name="allow_follow"></a><span class="summary-sig-name">allow_follow</span>(<span class="summary-sig-arg">self</span>,
192         <span class="summary-sig-arg">anchor</span>)</span><br />
193       Override to suppress rel=&quot;nofollow&quot; on some anchors.</td>
194           <td align="right" valign="top">
195             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_follow">source&nbsp;code</a></span>
196             
197           </td>
198         </tr>
199       </table>
200       
201     </td>
202   </tr>
203 <tr>
204     <td width="15%" align="right" valign="top" class="summary">
205       <span class="summary-type">&nbsp;</span>
206     </td><td class="summary">
207       <table width="100%" cellpadding="0" cellspacing="0" border="0">
208         <tr>
209           <td><span class="summary-sig"><a name="allow_element"></a><span class="summary-sig-name">allow_element</span>(<span class="summary-sig-arg">self</span>,
210         <span class="summary-sig-arg">el</span>)</span></td>
211           <td align="right" valign="top">
212             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_element">source&nbsp;code</a></span>
213             
214           </td>
215         </tr>
216       </table>
217       
218     </td>
219   </tr>
220 <tr>
221     <td width="15%" align="right" valign="top" class="summary">
222       <span class="summary-type">&nbsp;</span>
223     </td><td class="summary">
224       <table width="100%" cellpadding="0" cellspacing="0" border="0">
225         <tr>
226           <td><span class="summary-sig"><a name="allow_embedded_url"></a><span class="summary-sig-name">allow_embedded_url</span>(<span class="summary-sig-arg">self</span>,
227         <span class="summary-sig-arg">el</span>,
228         <span class="summary-sig-arg">url</span>)</span></td>
229           <td align="right" valign="top">
230             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_embedded_url">source&nbsp;code</a></span>
231             
232           </td>
233         </tr>
234       </table>
235       
236     </td>
237   </tr>
238 <tr>
239     <td width="15%" align="right" valign="top" class="summary">
240       <span class="summary-type">&nbsp;</span>
241     </td><td class="summary">
242       <table width="100%" cellpadding="0" cellspacing="0" border="0">
243         <tr>
244           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#kill_conditional_comments" class="summary-sig-name">kill_conditional_comments</a>(<span class="summary-sig-arg">self</span>,
245         <span class="summary-sig-arg">doc</span>)</span><br />
246       IE conditional comments basically embed HTML that the parser
247 doesn't normally see.</td>
248           <td align="right" valign="top">
249             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.kill_conditional_comments">source&nbsp;code</a></span>
250             
251           </td>
252         </tr>
253       </table>
254       
255     </td>
256   </tr>
257 <tr class="private">
258     <td width="15%" align="right" valign="top" class="summary">
259       <span class="summary-type">&nbsp;</span>
260     </td><td class="summary">
261       <table width="100%" cellpadding="0" cellspacing="0" border="0">
262         <tr>
263           <td><span class="summary-sig"><a name="_kill_elements"></a><span class="summary-sig-name">_kill_elements</span>(<span class="summary-sig-arg">self</span>,
264         <span class="summary-sig-arg">doc</span>,
265         <span class="summary-sig-arg">condition</span>,
266         <span class="summary-sig-arg">iterate</span>=<span class="summary-sig-default">None</span>)</span></td>
267           <td align="right" valign="top">
268             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._kill_elements">source&nbsp;code</a></span>
269             
270           </td>
271         </tr>
272       </table>
273       
274     </td>
275   </tr>
276 <tr class="private">
277     <td width="15%" align="right" valign="top" class="summary">
278       <span class="summary-type">&nbsp;</span>
279     </td><td class="summary">
280       <table width="100%" cellpadding="0" cellspacing="0" border="0">
281         <tr>
282           <td><span class="summary-sig"><a name="_remove_javascript_link"></a><span class="summary-sig-name">_remove_javascript_link</span>(<span class="summary-sig-arg">self</span>,
283         <span class="summary-sig-arg">link</span>)</span></td>
284           <td align="right" valign="top">
285             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._remove_javascript_link">source&nbsp;code</a></span>
286             
287           </td>
288         </tr>
289       </table>
290       
291     </td>
292   </tr>
293 <tr class="private">
294     <td width="15%" align="right" valign="top" class="summary">
295       <span class="summary-type">&nbsp;</span>
296     </td><td class="summary">
297       <table width="100%" cellpadding="0" cellspacing="0" border="0">
298         <tr>
299           <td><span class="summary-sig"><a name="_substitute_comments"></a><span class="summary-sig-name">_substitute_comments</span>(<span class="summary-sig-arg">...</span>)</span><br />
300       sub(repl, string[, count = 0]) --&gt; newstring
301 Return the string obtained by replacing the leftmost non-overlapping
302 occurrences of pattern in string by the replacement repl.</td>
303           <td align="right" valign="top">
304             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._substitute_comments">source&nbsp;code</a></span>
305             
306           </td>
307         </tr>
308       </table>
309       
310     </td>
311   </tr>
312 <tr class="private">
313     <td width="15%" align="right" valign="top" class="summary">
314       <span class="summary-type">&nbsp;</span>
315     </td><td class="summary">
316       <table width="100%" cellpadding="0" cellspacing="0" border="0">
317         <tr>
318           <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#_has_sneaky_javascript" class="summary-sig-name" onclick="show_private();">_has_sneaky_javascript</a>(<span class="summary-sig-arg">self</span>,
319         <span class="summary-sig-arg">style</span>)</span><br />
320       Depending on the browser, stuff like <tt class="rst-docutils literal"><span class="pre">e</span> <span class="pre">x</span> <span class="pre">p</span> <span class="pre">r</span> <span class="pre">e</span> <span class="pre">s</span> <span class="pre">s</span> <span class="pre">i</span> <span class="pre">o</span> <span class="pre">n(...)</span></tt>
321 can get interpreted, or <tt class="rst-docutils literal"><span class="pre">expre/*</span> <span class="pre">stuff</span> <span class="pre">*/ssion(...)</span></tt>.</td>
322           <td align="right" valign="top">
323             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._has_sneaky_javascript">source&nbsp;code</a></span>
324             
325           </td>
326         </tr>
327       </table>
328       
329     </td>
330   </tr>
331 <tr>
332     <td width="15%" align="right" valign="top" class="summary">
333       <span class="summary-type">&nbsp;</span>
334     </td><td class="summary">
335       <table width="100%" cellpadding="0" cellspacing="0" border="0">
336         <tr>
337           <td><span class="summary-sig"><a name="clean_html"></a><span class="summary-sig-name">clean_html</span>(<span class="summary-sig-arg">self</span>,
338         <span class="summary-sig-arg">html</span>)</span></td>
339           <td align="right" valign="top">
340             <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.clean_html">source&nbsp;code</a></span>
341             
342           </td>
343         </tr>
344       </table>
345       
346     </td>
347   </tr>
348   <tr>
349     <td colspan="2" class="summary">
350     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
351       <code>__delattr__</code>,
352       <code>__getattribute__</code>,
353       <code>__hash__</code>,
354       <code>__new__</code>,
355       <code>__reduce__</code>,
356       <code>__reduce_ex__</code>,
357       <code>__repr__</code>,
358       <code>__setattr__</code>,
359       <code>__str__</code>
360       </p>
361     </td>
362   </tr>
363 </table>
364 <!-- ==================== CLASS VARIABLES ==================== -->
365 <a name="section-ClassVariables"></a>
366 <table class="summary" border="1" cellpadding="3"
367        cellspacing="0" width="100%" bgcolor="white">
368 <tr bgcolor="#70b0f0" class="table-header">
369   <td colspan="2" class="table-header">
370     <table border="0" cellpadding="0" cellspacing="0" width="100%">
371       <tr valign="top">
372         <td align="left"><span class="table-header">Class Variables</span></td>
373         <td align="right" valign="top"
374          ><span class="options">[<a href="#section-ClassVariables"
375          class="privatelink" onclick="toggle_private();"
376          >hide private</a>]</span></td>
377       </tr>
378     </table>
379   </td>
380 </tr>
381 <tr>
382     <td width="15%" align="right" valign="top" class="summary">
383       <span class="summary-type">&nbsp;</span>
384     </td><td class="summary">
385         <a name="scripts"></a><span class="summary-name">scripts</span> = <code title="True">True</code>
386     </td>
387   </tr>
388 <tr>
389     <td width="15%" align="right" valign="top" class="summary">
390       <span class="summary-type">&nbsp;</span>
391     </td><td class="summary">
392         <a name="javascript"></a><span class="summary-name">javascript</span> = <code title="True">True</code>
393     </td>
394   </tr>
395 <tr>
396     <td width="15%" align="right" valign="top" class="summary">
397       <span class="summary-type">&nbsp;</span>
398     </td><td class="summary">
399         <a name="comments"></a><span class="summary-name">comments</span> = <code title="True">True</code>
400     </td>
401   </tr>
402 <tr>
403     <td width="15%" align="right" valign="top" class="summary">
404       <span class="summary-type">&nbsp;</span>
405     </td><td class="summary">
406         <a name="style"></a><span class="summary-name">style</span> = <code title="False">False</code>
407     </td>
408   </tr>
409 <tr>
410     <td width="15%" align="right" valign="top" class="summary">
411       <span class="summary-type">&nbsp;</span>
412     </td><td class="summary">
413         <a name="links"></a><span class="summary-name">links</span> = <code title="True">True</code>
414     </td>
415   </tr>
416 <tr>
417     <td width="15%" align="right" valign="top" class="summary">
418       <span class="summary-type">&nbsp;</span>
419     </td><td class="summary">
420         <a name="meta"></a><span class="summary-name">meta</span> = <code title="True">True</code>
421     </td>
422   </tr>
423 <tr>
424     <td width="15%" align="right" valign="top" class="summary">
425       <span class="summary-type">&nbsp;</span>
426     </td><td class="summary">
427         <a name="page_structure"></a><span class="summary-name">page_structure</span> = <code title="True">True</code>
428     </td>
429   </tr>
430 <tr>
431     <td width="15%" align="right" valign="top" class="summary">
432       <span class="summary-type">&nbsp;</span>
433     </td><td class="summary">
434         <a name="processing_instructions"></a><span class="summary-name">processing_instructions</span> = <code title="True">True</code>
435     </td>
436   </tr>
437 <tr>
438     <td width="15%" align="right" valign="top" class="summary">
439       <span class="summary-type">&nbsp;</span>
440     </td><td class="summary">
441         <a name="embedded"></a><span class="summary-name">embedded</span> = <code title="True">True</code>
442     </td>
443   </tr>
444 <tr>
445     <td width="15%" align="right" valign="top" class="summary">
446       <span class="summary-type">&nbsp;</span>
447     </td><td class="summary">
448         <a name="frames"></a><span class="summary-name">frames</span> = <code title="True">True</code>
449     </td>
450   </tr>
451 <tr>
452     <td width="15%" align="right" valign="top" class="summary">
453       <span class="summary-type">&nbsp;</span>
454     </td><td class="summary">
455         <a name="forms"></a><span class="summary-name">forms</span> = <code title="True">True</code>
456     </td>
457   </tr>
458 <tr>
459     <td width="15%" align="right" valign="top" class="summary">
460       <span class="summary-type">&nbsp;</span>
461     </td><td class="summary">
462         <a name="annoying_tags"></a><span class="summary-name">annoying_tags</span> = <code title="True">True</code>
463     </td>
464   </tr>
465 <tr>
466     <td width="15%" align="right" valign="top" class="summary">
467       <span class="summary-type">&nbsp;</span>
468     </td><td class="summary">
469         <a name="remove_tags"></a><span class="summary-name">remove_tags</span> = <code title="None">None</code>
470     </td>
471   </tr>
472 <tr>
473     <td width="15%" align="right" valign="top" class="summary">
474       <span class="summary-type">&nbsp;</span>
475     </td><td class="summary">
476         <a name="allow_tags"></a><span class="summary-name">allow_tags</span> = <code title="None">None</code>
477     </td>
478   </tr>
479 <tr>
480     <td width="15%" align="right" valign="top" class="summary">
481       <span class="summary-type">&nbsp;</span>
482     </td><td class="summary">
483         <a name="remove_unknown_tags"></a><span class="summary-name">remove_unknown_tags</span> = <code title="True">True</code>
484     </td>
485   </tr>
486 <tr>
487     <td width="15%" align="right" valign="top" class="summary">
488       <span class="summary-type">&nbsp;</span>
489     </td><td class="summary">
490         <a name="safe_attrs_only"></a><span class="summary-name">safe_attrs_only</span> = <code title="True">True</code>
491     </td>
492   </tr>
493 <tr>
494     <td width="15%" align="right" valign="top" class="summary">
495       <span class="summary-type">&nbsp;</span>
496     </td><td class="summary">
497         <a name="add_nofollow"></a><span class="summary-name">add_nofollow</span> = <code title="False">False</code>
498     </td>
499   </tr>
500 <tr>
501     <td width="15%" align="right" valign="top" class="summary">
502       <span class="summary-type">&nbsp;</span>
503     </td><td class="summary">
504         <a name="host_whitelist"></a><span class="summary-name">host_whitelist</span> = <code title="()"><code class="variable-group">(</code><code class="variable-group">)</code></code>
505     </td>
506   </tr>
507 <tr>
508     <td width="15%" align="right" valign="top" class="summary">
509       <span class="summary-type">&nbsp;</span>
510     </td><td class="summary">
511         <a name="whitelist_tags"></a><span class="summary-name">whitelist_tags</span> = <code title="set(['embed', 'iframe'])"><code class="variable-group">set([</code><code class="variable-quote">'</code><code class="variable-string">embed</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">iframe</code><code class="variable-quote">'</code><code class="variable-group">])</code></code>
512     </td>
513   </tr>
514 <tr class="private">
515     <td width="15%" align="right" valign="top" class="summary">
516       <span class="summary-type">&nbsp;</span>
517     </td><td class="summary">
518         <a href="lxml.html.clean.Cleaner-class.html#_tag_link_attrs" class="summary-name" onclick="show_private();">_tag_link_attrs</a> = <code title="{'a': 'href',
519  'applet': ['code', 'object'],
520  'embed': 'src',
521  'iframe': 'src',
522  'layer': 'src',
523  'link': 'href',
524  'script': 'src'}"><code class="variable-group">{</code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">applet</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">object</code><code class="variable-quote">'</code><code class="variable-group">]</code><code class="variable-op">, </code><code class="variable-ellipsis">...</code></code>
525     </td>
526   </tr>
527 </table>
528 <!-- ==================== PROPERTIES ==================== -->
529 <a name="section-Properties"></a>
530 <table class="summary" border="1" cellpadding="3"
531        cellspacing="0" width="100%" bgcolor="white">
532 <tr bgcolor="#70b0f0" class="table-header">
533   <td colspan="2" class="table-header">
534     <table border="0" cellpadding="0" cellspacing="0" width="100%">
535       <tr valign="top">
536         <td align="left"><span class="table-header">Properties</span></td>
537         <td align="right" valign="top"
538          ><span class="options">[<a href="#section-Properties"
539          class="privatelink" onclick="toggle_private();"
540          >hide private</a>]</span></td>
541       </tr>
542     </table>
543   </td>
544 </tr>
545   <tr>
546     <td colspan="2" class="summary">
547     <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
548       <code>__class__</code>
549       </p>
550     </td>
551   </tr>
552 </table>
553 <!-- ==================== METHOD DETAILS ==================== -->
554 <a name="section-MethodDetails"></a>
555 <table class="details" border="1" cellpadding="3"
556        cellspacing="0" width="100%" bgcolor="white">
557 <tr bgcolor="#70b0f0" class="table-header">
558   <td colspan="2" class="table-header">
559     <table border="0" cellpadding="0" cellspacing="0" width="100%">
560       <tr valign="top">
561         <td align="left"><span class="table-header">Method Details</span></td>
562         <td align="right" valign="top"
563          ><span class="options">[<a href="#section-MethodDetails"
564          class="privatelink" onclick="toggle_private();"
565          >hide private</a>]</span></td>
566       </tr>
567     </table>
568   </td>
569 </tr>
570 </table>
571 <a name="__init__"></a>
572 <div>
573 <table class="details" border="1" cellpadding="3"
574        cellspacing="0" width="100%" bgcolor="white">
575 <tr><td>
576   <table width="100%" cellpadding="0" cellspacing="0" border="0">
577   <tr valign="top"><td>
578   <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
579         <span class="sig-arg">**kw</span>)</span>
580     <br /><em class="fname">(Constructor)</em>
581   </h3>
582   </td><td align="right" valign="top"
583     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__init__">source&nbsp;code</a></span>&nbsp;
584     </td>
585   </tr></table>
586   
587   x.__init__(...) initializes x; see x.__class__.__doc__ for signature
588   <dl class="fields">
589     <dt>Overrides:
590         object.__init__
591         <dd><em class="note">(inherited documentation)</em></dd>
592     </dt>
593   </dl>
594 </td></tr></table>
595 </div>
596 <a name="kill_conditional_comments"></a>
597 <div>
598 <table class="details" border="1" cellpadding="3"
599        cellspacing="0" width="100%" bgcolor="white">
600 <tr><td>
601   <table width="100%" cellpadding="0" cellspacing="0" border="0">
602   <tr valign="top"><td>
603   <h3 class="epydoc"><span class="sig"><span class="sig-name">kill_conditional_comments</span>(<span class="sig-arg">self</span>,
604         <span class="sig-arg">doc</span>)</span>
605   </h3>
606   </td><td align="right" valign="top"
607     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.kill_conditional_comments">source&nbsp;code</a></span>&nbsp;
608     </td>
609   </tr></table>
610   
611   IE conditional comments basically embed HTML that the parser
612 doesn't normally see.  We can't allow anything like that, so
613 we'll kill any comments that could be conditional.
614   <dl class="fields">
615   </dl>
616 </td></tr></table>
617 </div>
618 <a name="_has_sneaky_javascript"></a>
619 <div class="private">
620 <table class="details" border="1" cellpadding="3"
621        cellspacing="0" width="100%" bgcolor="white">
622 <tr><td>
623   <table width="100%" cellpadding="0" cellspacing="0" border="0">
624   <tr valign="top"><td>
625   <h3 class="epydoc"><span class="sig"><span class="sig-name">_has_sneaky_javascript</span>(<span class="sig-arg">self</span>,
626         <span class="sig-arg">style</span>)</span>
627   </h3>
628   </td><td align="right" valign="top"
629     ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._has_sneaky_javascript">source&nbsp;code</a></span>&nbsp;
630     </td>
631   </tr></table>
632   
633   <p>Depending on the browser, stuff like <tt class="rst-rst-docutils literal rst-docutils literal"><span class="pre">e</span> <span class="pre">x</span> <span class="pre">p</span> <span class="pre">r</span> <span class="pre">e</span> <span class="pre">s</span> <span class="pre">s</span> <span class="pre">i</span> <span class="pre">o</span> <span class="pre">n(...)</span></tt>
634 can get interpreted, or <tt class="rst-rst-docutils literal rst-docutils literal"><span class="pre">expre/*</span> <span class="pre">stuff</span> <span class="pre">*/ssion(...)</span></tt>.  This
635 checks for attempt to do stuff like this.</p>
636 <p>Typically the response will be to kill the entire style; if you
637 have just a bit of Javascript in the style another rule will catch
638 that and remove only the Javascript from the style; this catches
639 more sneaky attempts.</p>
640   <dl class="fields">
641   </dl>
642 </td></tr></table>
643 </div>
644 <br />
645 <!-- ==================== CLASS VARIABLE DETAILS ==================== -->
646 <a name="section-ClassVariableDetails"></a>
647 <table class="details" border="1" cellpadding="3"
648        cellspacing="0" width="100%" bgcolor="white">
649 <tr bgcolor="#70b0f0" class="table-header">
650   <td colspan="2" class="table-header">
651     <table border="0" cellpadding="0" cellspacing="0" width="100%">
652       <tr valign="top">
653         <td align="left"><span class="table-header">Class Variable Details</span></td>
654         <td align="right" valign="top"
655          ><span class="options">[<a href="#section-ClassVariableDetails"
656          class="privatelink" onclick="toggle_private();"
657          >hide private</a>]</span></td>
658       </tr>
659     </table>
660   </td>
661 </tr>
662 </table>
663 <a name="_tag_link_attrs"></a>
664 <div class="private">
665 <table class="details" border="1" cellpadding="3"
666        cellspacing="0" width="100%" bgcolor="white">
667 <tr><td>
668   <h3 class="epydoc">_tag_link_attrs</h3>
669   
670   <dl class="fields">
671   </dl>
672   <dl class="fields">
673     <dt>Value:</dt>
674       <dd><table><tr><td><pre class="variable">
675 <code class="variable-group">{</code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">,</code>
676  <code class="variable-quote">'</code><code class="variable-string">applet</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">object</code><code class="variable-quote">'</code><code class="variable-group">]</code><code class="variable-op">,</code>
677  <code class="variable-quote">'</code><code class="variable-string">embed</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
678  <code class="variable-quote">'</code><code class="variable-string">iframe</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
679  <code class="variable-quote">'</code><code class="variable-string">layer</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
680  <code class="variable-quote">'</code><code class="variable-string">link</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">,</code>
681  <code class="variable-quote">'</code><code class="variable-string">script</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-group">}</code>
682 </pre></td></tr></table>
683 </dd>
684   </dl>
685 </td></tr></table>
686 </div>
687 <br />
688 <!-- ==================== NAVIGATION BAR ==================== -->
689 <table class="navbar" border="0" width="100%" cellpadding="0"
690        bgcolor="#a0c0ff" cellspacing="0">
691   <tr valign="middle">
692   <!-- Home link -->
693       <th>&nbsp;&nbsp;&nbsp;<a
694         href="lxml-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>
695
696   <!-- Tree link -->
697       <th>&nbsp;&nbsp;&nbsp;<a
698         href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>
699
700   <!-- Index link -->
701       <th>&nbsp;&nbsp;&nbsp;<a
702         href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>
703
704   <!-- Help link -->
705       <th>&nbsp;&nbsp;&nbsp;<a
706         href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>
707
708   <!-- Project homepage -->
709       <th class="navbar" align="right" width="100%">
710         <table border="0" cellpadding="0" cellspacing="0">
711           <tr><th class="navbar" align="center"
712             ><a class="navbar" target="_top" href="http://codespeak.net/lxml/">lxml API</a></th>
713           </tr></table></th>
714   </tr>
715 </table>
716 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
717   <tr>
718     <td align="left" class="footer">
719     Generated by Epydoc 3.0 on Fri Oct 30 14:51:47 2009
720     </td>
721     <td align="right" class="footer">
722       <a target="mainFrame" href="http://epydoc.sourceforge.net"
723         >http://epydoc.sourceforge.net</a>
724     </td>
725   </tr>
726 </table>
727
728 <script type="text/javascript">
729   <!--
730   // Private objects are initially displayed (because if
731   // javascript is turned off then we want them to be
732   // visible); but by default, we want to hide them.  So hide
733   // them unless we have a cookie that says to show them.
734   checkCookie();
735   // -->
736 </script>
737 </body>
738 </html>