1 <?xml version="1.0" encoding="ascii"?>
2 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
3 "DTD/xhtml1-transitional.dtd">
4 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6 <title>lxml.html.clean.Cleaner</title>
7 <link rel="stylesheet" href="epydoc.css" type="text/css" />
8 <script type="text/javascript" src="epydoc.js"></script>
11 <body bgcolor="white" text="black" link="blue" vlink="#204080"
13 <!-- ==================== NAVIGATION BAR ==================== -->
14 <table class="navbar" border="0" width="100%" cellpadding="0"
15 bgcolor="#a0c0ff" cellspacing="0">
18 <th> <a
19 href="lxml-module.html">Home</a> </th>
22 <th> <a
23 href="module-tree.html">Trees</a> </th>
26 <th> <a
27 href="identifier-index.html">Indices</a> </th>
30 <th> <a
31 href="help.html">Help</a> </th>
33 <!-- Project homepage -->
34 <th class="navbar" align="right" width="100%">
35 <table border="0" cellpadding="0" cellspacing="0">
36 <tr><th class="navbar" align="center"
37 ><a class="navbar" target="_top" href="/">lxml API</a></th>
41 <table width="100%" cellpadding="0" cellspacing="0">
44 <span class="breadcrumbs">
45 <a href="lxml-module.html">Package lxml</a> ::
46 <a href="lxml.html-module.html">Package html</a> ::
47 <a href="lxml.html.clean-module.html">Module clean</a> ::
52 <table cellpadding="0" cellspacing="0">
53 <!-- hide/show private -->
54 <tr><td align="right"><span class="options">[<a href="javascript:void(0);" class="privatelink"
55 onclick="toggle_private();">hide private</a>]</span></td></tr>
56 <tr><td align="right"><span class="options"
57 >[<a href="frames.html" target="_top">frames</a
58 >] | <a href="lxml.html.clean.Cleaner-class.html"
59 target="_top">no frames</a>]</span></td></tr>
64 <!-- ==================== CLASS DESCRIPTION ==================== -->
65 <h1 class="epydoc">Class Cleaner</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner">source code</a></span></p>
66 <pre class="base-tree">
69 <strong class="uidshort">Cleaner</strong>
73 <p>Instances cleans the document of each of the possible offending
74 elements. The cleaning is controlled by attributes; you can
75 override attributes in a subclass, or set them in the constructor.</p>
76 <dl class="rst-docutils">
77 <dt><tt class="rst-docutils literal">scripts</tt>:</dt>
78 <dd>Removes any <tt class="rst-docutils literal"><script></tt> tags.</dd>
79 <dt><tt class="rst-docutils literal">javascript</tt>:</dt>
80 <dd>Removes any Javascript, like an <tt class="rst-docutils literal">onclick</tt> attribute. Also removes stylesheets
81 as they could contain Javascript.</dd>
82 <dt><tt class="rst-docutils literal">comments</tt>:</dt>
83 <dd>Removes any comments.</dd>
84 <dt><tt class="rst-docutils literal">style</tt>:</dt>
85 <dd>Removes any style tags.</dd>
86 <dt><tt class="rst-docutils literal">inline_style</tt></dt>
87 <dd>Removes any style attributes. Defaults to the value of the <tt class="rst-docutils literal">style</tt> option.</dd>
88 <dt><tt class="rst-docutils literal">links</tt>:</dt>
89 <dd>Removes any <tt class="rst-docutils literal"><link></tt> tags</dd>
90 <dt><tt class="rst-docutils literal">meta</tt>:</dt>
91 <dd>Removes any <tt class="rst-docutils literal"><meta></tt> tags</dd>
92 <dt><tt class="rst-docutils literal">page_structure</tt>:</dt>
93 <dd>Structural parts of a page: <tt class="rst-docutils literal"><head></tt>, <tt class="rst-docutils literal"><html></tt>, <tt class="rst-docutils literal"><title></tt>.</dd>
94 <dt><tt class="rst-docutils literal">processing_instructions</tt>:</dt>
95 <dd>Removes any processing instructions.</dd>
96 <dt><tt class="rst-docutils literal">embedded</tt>:</dt>
97 <dd>Removes any embedded objects (flash, iframes)</dd>
98 <dt><tt class="rst-docutils literal">frames</tt>:</dt>
99 <dd>Removes any frame-related tags</dd>
100 <dt><tt class="rst-docutils literal">forms</tt>:</dt>
101 <dd>Removes any form tags</dd>
102 <dt><tt class="rst-docutils literal">annoying_tags</tt>:</dt>
103 <dd>Tags that aren't <em>wrong</em>, but are annoying. <tt class="rst-docutils literal"><blink></tt> and <tt class="rst-docutils literal"><marquee></tt></dd>
104 <dt><tt class="rst-docutils literal">remove_tags</tt>:</dt>
105 <dd>A list of tags to remove. Only the tags will be removed,
106 their content will get pulled up into the parent tag.</dd>
107 <dt><tt class="rst-docutils literal">kill_tags</tt>:</dt>
108 <dd>A list of tags to kill. Killing also removes the tag's content,
109 i.e. the whole subtree, not just the tag itself.</dd>
110 <dt><tt class="rst-docutils literal">allow_tags</tt>:</dt>
111 <dd>A list of tags to include (default include all).</dd>
112 <dt><tt class="rst-docutils literal">remove_unknown_tags</tt>:</dt>
113 <dd>Remove any tags that aren't standard parts of HTML.</dd>
114 <dt><tt class="rst-docutils literal">safe_attrs_only</tt>:</dt>
115 <dd>If true, only include 'safe' attributes (specifically the list
116 from the feedparser HTML sanitisation web site).</dd>
117 <dt><tt class="rst-docutils literal">safe_attrs</tt>:</dt>
118 <dd>A set of attribute names to override the default list of attributes
119 considered 'safe' (when safe_attrs_only=True).</dd>
120 <dt><tt class="rst-docutils literal">add_nofollow</tt>:</dt>
121 <dd>If true, then any <a> tags will have <tt class="rst-docutils literal"><span class="pre">rel="nofollow"</span></tt> added to them.</dd>
122 <dt><tt class="rst-docutils literal">host_whitelist</tt>:</dt>
123 <dd><p class="rst-first">A list or set of hosts that you can use for embedded content
124 (for content like <tt class="rst-docutils literal"><object></tt>, <tt class="rst-docutils literal"><link <span class="pre">rel="stylesheet"></span></tt>, etc).
125 You can also implement/override the method
126 <tt class="rst-docutils literal">allow_embedded_url(el, url)</tt> or <tt class="rst-docutils literal">allow_element(el)</tt> to
127 implement more complex rules for what can be embedded.
128 Anything that passes this test will be shown, regardless of
129 the value of (for instance) <tt class="rst-docutils literal">embedded</tt>.</p>
130 <p>Note that this parameter might not work as intended if you do not
131 make the links absolute before doing the cleaning.</p>
132 <p class="rst-last">Note that you may also need to set <tt class="rst-docutils literal">whitelist_tags</tt>.</p>
134 <dt><tt class="rst-docutils literal">whitelist_tags</tt>:</dt>
135 <dd>A set of tags that can be included with <tt class="rst-docutils literal">host_whitelist</tt>.
136 The default is <tt class="rst-docutils literal">iframe</tt> and <tt class="rst-docutils literal">embed</tt>; you may wish to
137 include other tags like <tt class="rst-docutils literal">script</tt>, or you may want to
138 implement <tt class="rst-docutils literal">allow_embedded_url</tt> for more control. Set to None to
139 include all tags.</dd>
141 <p>This modifies the document <em>in place</em>.</p>
143 <!-- ==================== INSTANCE METHODS ==================== -->
144 <a name="section-InstanceMethods"></a>
145 <table class="summary" border="1" cellpadding="3"
146 cellspacing="0" width="100%" bgcolor="white">
147 <tr bgcolor="#70b0f0" class="table-header">
148 <td colspan="2" class="table-header">
149 <table border="0" cellpadding="0" cellspacing="0" width="100%">
151 <td align="left"><span class="table-header">Instance Methods</span></td>
152 <td align="right" valign="top"
153 ><span class="options">[<a href="#section-InstanceMethods"
154 class="privatelink" onclick="toggle_private();"
155 >hide private</a>]</span></td>
161 <td width="15%" align="right" valign="top" class="summary">
162 <span class="summary-type"> </span>
163 </td><td class="summary">
164 <table width="100%" cellpadding="0" cellspacing="0" border="0">
166 <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>)</span><br />
167 x.__init__(...) initializes x; see help(type(x)) for signature</td>
168 <td align="right" valign="top">
169 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__init__">source code</a></span>
178 <td width="15%" align="right" valign="top" class="summary">
179 <span class="summary-type"> </span>
180 </td><td class="summary">
181 <table width="100%" cellpadding="0" cellspacing="0" border="0">
183 <td><span class="summary-sig"><a name="__call__"></a><span class="summary-sig-name">__call__</span>(<span class="summary-sig-arg">self</span>,
184 <span class="summary-sig-arg">doc</span>)</span><br />
185 Cleans the document.</td>
186 <td align="right" valign="top">
187 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__call__">source code</a></span>
196 <td width="15%" align="right" valign="top" class="summary">
197 <span class="summary-type"> </span>
198 </td><td class="summary">
199 <table width="100%" cellpadding="0" cellspacing="0" border="0">
201 <td><span class="summary-sig"><a name="allow_follow"></a><span class="summary-sig-name">allow_follow</span>(<span class="summary-sig-arg">self</span>,
202 <span class="summary-sig-arg">anchor</span>)</span><br />
203 Override to suppress rel="nofollow" on some anchors.</td>
204 <td align="right" valign="top">
205 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_follow">source code</a></span>
214 <td width="15%" align="right" valign="top" class="summary">
215 <span class="summary-type"> </span>
216 </td><td class="summary">
217 <table width="100%" cellpadding="0" cellspacing="0" border="0">
219 <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#allow_element" class="summary-sig-name">allow_element</a>(<span class="summary-sig-arg">self</span>,
220 <span class="summary-sig-arg">el</span>)</span><br />
221 Decide whether an element is configured to be accepted or rejected.</td>
222 <td align="right" valign="top">
223 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_element">source code</a></span>
232 <td width="15%" align="right" valign="top" class="summary">
233 <span class="summary-type"> </span>
234 </td><td class="summary">
235 <table width="100%" cellpadding="0" cellspacing="0" border="0">
237 <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#allow_embedded_url" class="summary-sig-name">allow_embedded_url</a>(<span class="summary-sig-arg">self</span>,
238 <span class="summary-sig-arg">el</span>,
239 <span class="summary-sig-arg">url</span>)</span><br />
240 Decide whether a URL that was found in an element's attributes or text
241 if configured to be accepted or rejected.</td>
242 <td align="right" valign="top">
243 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_embedded_url">source code</a></span>
252 <td width="15%" align="right" valign="top" class="summary">
253 <span class="summary-type"> </span>
254 </td><td class="summary">
255 <table width="100%" cellpadding="0" cellspacing="0" border="0">
257 <td><span class="summary-sig"><a name="kill_conditional_comments"></a><span class="summary-sig-name">kill_conditional_comments</span>(<span class="summary-sig-arg">self</span>,
258 <span class="summary-sig-arg">doc</span>)</span><br />
259 IE conditional comments basically embed HTML that the parser
260 doesn't normally see. We can't allow anything like that, so
261 we'll kill any comments that could be conditional.</td>
262 <td align="right" valign="top">
263 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.kill_conditional_comments">source code</a></span>
272 <td width="15%" align="right" valign="top" class="summary">
273 <span class="summary-type"> </span>
274 </td><td class="summary">
275 <table width="100%" cellpadding="0" cellspacing="0" border="0">
277 <td><span class="summary-sig"><a name="_kill_elements"></a><span class="summary-sig-name">_kill_elements</span>(<span class="summary-sig-arg">self</span>,
278 <span class="summary-sig-arg">doc</span>,
279 <span class="summary-sig-arg">condition</span>,
280 <span class="summary-sig-arg">iterate</span>=<span class="summary-sig-default">None</span>)</span></td>
281 <td align="right" valign="top">
282 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._kill_elements">source code</a></span>
291 <td width="15%" align="right" valign="top" class="summary">
292 <span class="summary-type"> </span>
293 </td><td class="summary">
294 <table width="100%" cellpadding="0" cellspacing="0" border="0">
296 <td><span class="summary-sig"><a name="_remove_javascript_link"></a><span class="summary-sig-name">_remove_javascript_link</span>(<span class="summary-sig-arg">self</span>,
297 <span class="summary-sig-arg">link</span>)</span></td>
298 <td align="right" valign="top">
299 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._remove_javascript_link">source code</a></span>
308 <td width="15%" align="right" valign="top" class="summary">
309 <span class="summary-type"> </span>
310 </td><td class="summary">
311 <table width="100%" cellpadding="0" cellspacing="0" border="0">
313 <td><span class="summary-sig"><a name="_substitute_comments"></a><span class="summary-sig-name">_substitute_comments</span>(<span class="summary-sig-arg">...</span>)</span><br />
314 sub(repl, string[, count = 0]) --> newstring
315 Return the string obtained by replacing the leftmost non-overlapping
316 occurrences of pattern in string by the replacement repl.</td>
317 <td align="right" valign="top">
318 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._substitute_comments">source code</a></span>
327 <td width="15%" align="right" valign="top" class="summary">
328 <span class="summary-type"> </span>
329 </td><td class="summary">
330 <table width="100%" cellpadding="0" cellspacing="0" border="0">
332 <td><span class="summary-sig"><a href="lxml.html.clean.Cleaner-class.html#_has_sneaky_javascript" class="summary-sig-name" onclick="show_private();">_has_sneaky_javascript</a>(<span class="summary-sig-arg">self</span>,
333 <span class="summary-sig-arg">style</span>)</span><br />
334 Depending on the browser, stuff like <tt class="rst-docutils literal">e x p r e s s i o <span class="pre">n(...)</span></tt>
335 can get interpreted, or <tt class="rst-docutils literal">expre/* stuff <span class="pre">*/ssion(...)</span></tt>. This
336 checks for attempt to do stuff like this.</td>
337 <td align="right" valign="top">
338 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._has_sneaky_javascript">source code</a></span>
347 <td width="15%" align="right" valign="top" class="summary">
348 <span class="summary-type"> </span>
349 </td><td class="summary">
350 <table width="100%" cellpadding="0" cellspacing="0" border="0">
352 <td><span class="summary-sig"><a name="clean_html"></a><span class="summary-sig-name">clean_html</span>(<span class="summary-sig-arg">self</span>,
353 <span class="summary-sig-arg">html</span>)</span></td>
354 <td align="right" valign="top">
355 <span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.clean_html">source code</a></span>
364 <td colspan="2" class="summary">
365 <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
366 <code>__delattr__</code>,
367 <code>__format__</code>,
368 <code>__getattribute__</code>,
369 <code>__hash__</code>,
370 <code>__new__</code>,
371 <code>__reduce__</code>,
372 <code>__reduce_ex__</code>,
373 <code>__repr__</code>,
374 <code>__setattr__</code>,
375 <code>__sizeof__</code>,
376 <code>__str__</code>,
377 <code>__subclasshook__</code>
382 <!-- ==================== CLASS VARIABLES ==================== -->
383 <a name="section-ClassVariables"></a>
384 <table class="summary" border="1" cellpadding="3"
385 cellspacing="0" width="100%" bgcolor="white">
386 <tr bgcolor="#70b0f0" class="table-header">
387 <td colspan="2" class="table-header">
388 <table border="0" cellpadding="0" cellspacing="0" width="100%">
390 <td align="left"><span class="table-header">Class Variables</span></td>
391 <td align="right" valign="top"
392 ><span class="options">[<a href="#section-ClassVariables"
393 class="privatelink" onclick="toggle_private();"
394 >hide private</a>]</span></td>
400 <td width="15%" align="right" valign="top" class="summary">
401 <span class="summary-type"> </span>
402 </td><td class="summary">
403 <a name="scripts"></a><span class="summary-name">scripts</span> = <code title="True">True</code>
407 <td width="15%" align="right" valign="top" class="summary">
408 <span class="summary-type"> </span>
409 </td><td class="summary">
410 <a name="javascript"></a><span class="summary-name">javascript</span> = <code title="True">True</code>
414 <td width="15%" align="right" valign="top" class="summary">
415 <span class="summary-type"> </span>
416 </td><td class="summary">
417 <a name="comments"></a><span class="summary-name">comments</span> = <code title="True">True</code>
421 <td width="15%" align="right" valign="top" class="summary">
422 <span class="summary-type"> </span>
423 </td><td class="summary">
424 <a name="style"></a><span class="summary-name">style</span> = <code title="False">False</code>
428 <td width="15%" align="right" valign="top" class="summary">
429 <span class="summary-type"> </span>
430 </td><td class="summary">
431 <a name="inline_style"></a><span class="summary-name">inline_style</span> = <code title="None">None</code><br />
436 <td width="15%" align="right" valign="top" class="summary">
437 <span class="summary-type"> </span>
438 </td><td class="summary">
439 <a name="links"></a><span class="summary-name">links</span> = <code title="True">True</code>
443 <td width="15%" align="right" valign="top" class="summary">
444 <span class="summary-type"> </span>
445 </td><td class="summary">
446 <a name="meta"></a><span class="summary-name">meta</span> = <code title="True">True</code>
450 <td width="15%" align="right" valign="top" class="summary">
451 <span class="summary-type"> </span>
452 </td><td class="summary">
453 <a name="page_structure"></a><span class="summary-name">page_structure</span> = <code title="True">True</code>
457 <td width="15%" align="right" valign="top" class="summary">
458 <span class="summary-type"> </span>
459 </td><td class="summary">
460 <a name="processing_instructions"></a><span class="summary-name">processing_instructions</span> = <code title="True">True</code>
464 <td width="15%" align="right" valign="top" class="summary">
465 <span class="summary-type"> </span>
466 </td><td class="summary">
467 <a name="embedded"></a><span class="summary-name">embedded</span> = <code title="True">True</code>
471 <td width="15%" align="right" valign="top" class="summary">
472 <span class="summary-type"> </span>
473 </td><td class="summary">
474 <a name="frames"></a><span class="summary-name">frames</span> = <code title="True">True</code>
478 <td width="15%" align="right" valign="top" class="summary">
479 <span class="summary-type"> </span>
480 </td><td class="summary">
481 <a name="forms"></a><span class="summary-name">forms</span> = <code title="True">True</code>
485 <td width="15%" align="right" valign="top" class="summary">
486 <span class="summary-type"> </span>
487 </td><td class="summary">
488 <a name="annoying_tags"></a><span class="summary-name">annoying_tags</span> = <code title="True">True</code>
492 <td width="15%" align="right" valign="top" class="summary">
493 <span class="summary-type"> </span>
494 </td><td class="summary">
495 <a name="remove_tags"></a><span class="summary-name">remove_tags</span> = <code title="None">None</code><br />
500 <td width="15%" align="right" valign="top" class="summary">
501 <span class="summary-type"> </span>
502 </td><td class="summary">
503 <a name="allow_tags"></a><span class="summary-name">allow_tags</span> = <code title="None">None</code><br />
508 <td width="15%" align="right" valign="top" class="summary">
509 <span class="summary-type"> </span>
510 </td><td class="summary">
511 <a name="kill_tags"></a><span class="summary-name">kill_tags</span> = <code title="None">None</code><br />
516 <td width="15%" align="right" valign="top" class="summary">
517 <span class="summary-type"> </span>
518 </td><td class="summary">
519 <a name="remove_unknown_tags"></a><span class="summary-name">remove_unknown_tags</span> = <code title="True">True</code>
523 <td width="15%" align="right" valign="top" class="summary">
524 <span class="summary-type"> </span>
525 </td><td class="summary">
526 <a name="safe_attrs_only"></a><span class="summary-name">safe_attrs_only</span> = <code title="True">True</code>
530 <td width="15%" align="right" valign="top" class="summary">
531 <span class="summary-type"> </span>
532 </td><td class="summary">
533 <a href="lxml.html.clean.Cleaner-class.html#safe_attrs" class="summary-name">safe_attrs</a> = <code title="frozenset(['abbr',
541 ..."><code class="variable-group">frozenset([</code><code class="variable-quote">'</code><code class="variable-string">abbr</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">accept</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">accept-charset</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-ellipsis">...</code></code>
545 <td width="15%" align="right" valign="top" class="summary">
546 <span class="summary-type"> </span>
547 </td><td class="summary">
548 <a name="add_nofollow"></a><span class="summary-name">add_nofollow</span> = <code title="False">False</code>
552 <td width="15%" align="right" valign="top" class="summary">
553 <span class="summary-type"> </span>
554 </td><td class="summary">
555 <a name="host_whitelist"></a><span class="summary-name">host_whitelist</span> = <code title="()"><code class="variable-group">(</code><code class="variable-group">)</code></code>
559 <td width="15%" align="right" valign="top" class="summary">
560 <span class="summary-type"> </span>
561 </td><td class="summary">
562 <a name="whitelist_tags"></a><span class="summary-name">whitelist_tags</span> = <code title="set(['embed', 'iframe'])"><code class="variable-group">set([</code><code class="variable-quote">'</code><code class="variable-string">embed</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">iframe</code><code class="variable-quote">'</code><code class="variable-group">])</code></code>
566 <td width="15%" align="right" valign="top" class="summary">
567 <span class="summary-type"> </span>
568 </td><td class="summary">
569 <a href="lxml.html.clean.Cleaner-class.html#_tag_link_attrs" class="summary-name" onclick="show_private();">_tag_link_attrs</a> = <code title="{'a': 'href',
570 'applet': ['code', 'object'],
575 'script': 'src'}"><code class="variable-group">{</code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">applet</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">object</code><code class="variable-quote">'</code><code class="variable-group">]</code><code class="variable-op">, </code><code class="variable-ellipsis">...</code></code>
579 <td width="15%" align="right" valign="top" class="summary">
580 <span class="summary-type"> </span>
581 </td><td class="summary">
582 <a name="__qualname__"></a><span class="summary-name">__qualname__</span> = <code title="'Cleaner'"><code class="variable-quote">'</code><code class="variable-string">Cleaner</code><code class="variable-quote">'</code></code>
586 <!-- ==================== PROPERTIES ==================== -->
587 <a name="section-Properties"></a>
588 <table class="summary" border="1" cellpadding="3"
589 cellspacing="0" width="100%" bgcolor="white">
590 <tr bgcolor="#70b0f0" class="table-header">
591 <td colspan="2" class="table-header">
592 <table border="0" cellpadding="0" cellspacing="0" width="100%">
594 <td align="left"><span class="table-header">Properties</span></td>
595 <td align="right" valign="top"
596 ><span class="options">[<a href="#section-Properties"
597 class="privatelink" onclick="toggle_private();"
598 >hide private</a>]</span></td>
604 <td colspan="2" class="summary">
605 <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
606 <code>__class__</code>
611 <!-- ==================== METHOD DETAILS ==================== -->
612 <a name="section-MethodDetails"></a>
613 <table class="details" border="1" cellpadding="3"
614 cellspacing="0" width="100%" bgcolor="white">
615 <tr bgcolor="#70b0f0" class="table-header">
616 <td colspan="2" class="table-header">
617 <table border="0" cellpadding="0" cellspacing="0" width="100%">
619 <td align="left"><span class="table-header">Method Details</span></td>
620 <td align="right" valign="top"
621 ><span class="options">[<a href="#section-MethodDetails"
622 class="privatelink" onclick="toggle_private();"
623 >hide private</a>]</span></td>
629 <a name="__init__"></a>
631 <table class="details" border="1" cellpadding="3"
632 cellspacing="0" width="100%" bgcolor="white">
634 <table width="100%" cellpadding="0" cellspacing="0" border="0">
635 <tr valign="top"><td>
636 <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>)</span>
637 <br /><em class="fname">(Constructor)</em>
639 </td><td align="right" valign="top"
640 ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.__init__">source code</a></span>
644 x.__init__(...) initializes x; see help(type(x)) for signature
648 <dd><em class="note">(inherited documentation)</em></dd>
653 <a name="allow_element"></a>
655 <table class="details" border="1" cellpadding="3"
656 cellspacing="0" width="100%" bgcolor="white">
658 <table width="100%" cellpadding="0" cellspacing="0" border="0">
659 <tr valign="top"><td>
660 <h3 class="epydoc"><span class="sig"><span class="sig-name">allow_element</span>(<span class="sig-arg">self</span>,
661 <span class="sig-arg">el</span>)</span>
663 </td><td align="right" valign="top"
664 ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_element">source code</a></span>
668 Decide whether an element is configured to be accepted or rejected.
671 <dd><ul class="nomargin-top">
672 <li><strong class="pname"><code>el</code></strong> - an element.</li>
675 <dd>true to accept the element or false to reject/discard it.</dd>
679 <a name="allow_embedded_url"></a>
681 <table class="details" border="1" cellpadding="3"
682 cellspacing="0" width="100%" bgcolor="white">
684 <table width="100%" cellpadding="0" cellspacing="0" border="0">
685 <tr valign="top"><td>
686 <h3 class="epydoc"><span class="sig"><span class="sig-name">allow_embedded_url</span>(<span class="sig-arg">self</span>,
687 <span class="sig-arg">el</span>,
688 <span class="sig-arg">url</span>)</span>
690 </td><td align="right" valign="top"
691 ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner.allow_embedded_url">source code</a></span>
695 Decide whether a URL that was found in an element's attributes or text
696 if configured to be accepted or rejected.
699 <dd><ul class="nomargin-top">
700 <li><strong class="pname"><code>el</code></strong> - an element.</li>
701 <li><strong class="pname"><code>url</code></strong> - a URL found on the element.</li>
704 <dd>true to accept the URL and false to reject it.</dd>
708 <a name="_has_sneaky_javascript"></a>
709 <div class="private">
710 <table class="details" border="1" cellpadding="3"
711 cellspacing="0" width="100%" bgcolor="white">
713 <table width="100%" cellpadding="0" cellspacing="0" border="0">
714 <tr valign="top"><td>
715 <h3 class="epydoc"><span class="sig"><span class="sig-name">_has_sneaky_javascript</span>(<span class="sig-arg">self</span>,
716 <span class="sig-arg">style</span>)</span>
718 </td><td align="right" valign="top"
719 ><span class="codelink"><a href="lxml.html.clean-pysrc.html#Cleaner._has_sneaky_javascript">source code</a></span>
723 <p>Depending on the browser, stuff like <tt class="rst-docutils literal">e x p r e s s i o <span class="pre">n(...)</span></tt>
724 can get interpreted, or <tt class="rst-docutils literal">expre/* stuff <span class="pre">*/ssion(...)</span></tt>. This
725 checks for attempt to do stuff like this.</p>
726 <p>Typically the response will be to kill the entire style; if you
727 have just a bit of Javascript in the style another rule will catch
728 that and remove only the Javascript from the style; this catches
729 more sneaky attempts.</p>
735 <!-- ==================== CLASS VARIABLE DETAILS ==================== -->
736 <a name="section-ClassVariableDetails"></a>
737 <table class="details" border="1" cellpadding="3"
738 cellspacing="0" width="100%" bgcolor="white">
739 <tr bgcolor="#70b0f0" class="table-header">
740 <td colspan="2" class="table-header">
741 <table border="0" cellpadding="0" cellspacing="0" width="100%">
743 <td align="left"><span class="table-header">Class Variable Details</span></td>
744 <td align="right" valign="top"
745 ><span class="options">[<a href="#section-ClassVariableDetails"
746 class="privatelink" onclick="toggle_private();"
747 >hide private</a>]</span></td>
753 <a name="safe_attrs"></a>
755 <table class="details" border="1" cellpadding="3"
756 cellspacing="0" width="100%" bgcolor="white">
758 <h3 class="epydoc">safe_attrs</h3>
764 <dd><table><tr><td><pre class="variable">
765 <code class="variable-group">frozenset([</code><code class="variable-quote">'</code><code class="variable-string">abbr</code><code class="variable-quote">'</code><code class="variable-op">,</code>
766 <code class="variable-quote">'</code><code class="variable-string">accept</code><code class="variable-quote">'</code><code class="variable-op">,</code>
767 <code class="variable-quote">'</code><code class="variable-string">accept-charset</code><code class="variable-quote">'</code><code class="variable-op">,</code>
768 <code class="variable-quote">'</code><code class="variable-string">accesskey</code><code class="variable-quote">'</code><code class="variable-op">,</code>
769 <code class="variable-quote">'</code><code class="variable-string">action</code><code class="variable-quote">'</code><code class="variable-op">,</code>
770 <code class="variable-quote">'</code><code class="variable-string">align</code><code class="variable-quote">'</code><code class="variable-op">,</code>
771 <code class="variable-quote">'</code><code class="variable-string">alt</code><code class="variable-quote">'</code><code class="variable-op">,</code>
772 <code class="variable-quote">'</code><code class="variable-string">axis</code><code class="variable-quote">'</code><code class="variable-op">,</code>
773 <code class="variable-ellipsis">...</code>
774 </pre></td></tr></table>
779 <a name="_tag_link_attrs"></a>
780 <div class="private">
781 <table class="details" border="1" cellpadding="3"
782 cellspacing="0" width="100%" bgcolor="white">
784 <h3 class="epydoc">_tag_link_attrs</h3>
790 <dd><table><tr><td><pre class="variable">
791 <code class="variable-group">{</code><code class="variable-quote">'</code><code class="variable-string">a</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">,</code>
792 <code class="variable-quote">'</code><code class="variable-string">applet</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-group">[</code><code class="variable-quote">'</code><code class="variable-string">code</code><code class="variable-quote">'</code><code class="variable-op">, </code><code class="variable-quote">'</code><code class="variable-string">object</code><code class="variable-quote">'</code><code class="variable-group">]</code><code class="variable-op">,</code>
793 <code class="variable-quote">'</code><code class="variable-string">embed</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
794 <code class="variable-quote">'</code><code class="variable-string">iframe</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
795 <code class="variable-quote">'</code><code class="variable-string">layer</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-op">,</code>
796 <code class="variable-quote">'</code><code class="variable-string">link</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">href</code><code class="variable-quote">'</code><code class="variable-op">,</code>
797 <code class="variable-quote">'</code><code class="variable-string">script</code><code class="variable-quote">'</code><code class="variable-op">: </code><code class="variable-quote">'</code><code class="variable-string">src</code><code class="variable-quote">'</code><code class="variable-group">}</code>
798 </pre></td></tr></table>
804 <!-- ==================== NAVIGATION BAR ==================== -->
805 <table class="navbar" border="0" width="100%" cellpadding="0"
806 bgcolor="#a0c0ff" cellspacing="0">
809 <th> <a
810 href="lxml-module.html">Home</a> </th>
813 <th> <a
814 href="module-tree.html">Trees</a> </th>
817 <th> <a
818 href="identifier-index.html">Indices</a> </th>
821 <th> <a
822 href="help.html">Help</a> </th>
824 <!-- Project homepage -->
825 <th class="navbar" align="right" width="100%">
826 <table border="0" cellpadding="0" cellspacing="0">
827 <tr><th class="navbar" align="center"
828 ><a class="navbar" target="_top" href="/">lxml API</a></th>
832 <table border="0" cellpadding="0" cellspacing="0" width="100%%">
834 <td align="left" class="footer">
835 Generated by Epydoc 3.0.1
836 on Thu Jul 9 18:29:53 2020
838 <td align="right" class="footer">
839 <a target="mainFrame" href="http://epydoc.sourceforge.net"
840 >http://epydoc.sourceforge.net</a>
845 <script type="text/javascript">
847 // Private objects are initially displayed (because if
848 // javascript is turned off then we want them to be
849 // visible); but by default, we want to hide them. So hide
850 // them unless we have a cookie that says to show them.