From 02e9f1a7bae9bafc47e1914d89db736ad6987c24 Mon Sep 17 00:00:00 2001 From: JinWang An Date: Wed, 30 Dec 2020 10:21:20 +0900 Subject: [PATCH] Imported Upstream version 4.6.2 --- CHANGES.txt | 11 + PKG-INFO | 10 +- doc/FAQ.txt | 36 + doc/html/FAQ.html | 33 +- doc/html/api.html | 4 +- doc/html/apidoc/_modules/collections/abc.html | 2 +- doc/html/apidoc/_modules/index.html | 2 +- doc/html/apidoc/_modules/lxml.html | 4 +- doc/html/apidoc/_modules/lxml/ElementInclude.html | 2 +- doc/html/apidoc/_modules/lxml/doctestcompare.html | 2 +- doc/html/apidoc/_modules/lxml/html.html | 2 +- .../apidoc/_modules/lxml/html/_diffcommand.html | 2 +- doc/html/apidoc/_modules/lxml/html/_setmixin.html | 2 +- doc/html/apidoc/_modules/lxml/html/builder.html | 2 +- doc/html/apidoc/_modules/lxml/html/formfill.html | 2 +- doc/html/apidoc/_modules/lxml/isoschematron.html | 2 +- doc/html/apidoc/genindex.html | 12 +- doc/html/apidoc/index.html | 2 +- doc/html/apidoc/lxml.ElementInclude.html | 2 +- doc/html/apidoc/lxml._elementpath.html | 2 +- doc/html/apidoc/lxml.builder.html | 2 +- doc/html/apidoc/lxml.cssselect.html | 2 +- doc/html/apidoc/lxml.doctestcompare.html | 2 +- doc/html/apidoc/lxml.etree.html | 2 +- doc/html/apidoc/lxml.html | 2 +- doc/html/apidoc/lxml.html.ElementSoup.html | 2 +- doc/html/apidoc/lxml.html._diffcommand.html | 2 +- doc/html/apidoc/lxml.html._setmixin.html | 2 +- doc/html/apidoc/lxml.html.builder.html | 2 +- doc/html/apidoc/lxml.html.clean.html | 21 +- doc/html/apidoc/lxml.html.defs.html | 2 +- doc/html/apidoc/lxml.html.diff.html | 2 +- doc/html/apidoc/lxml.html.formfill.html | 2 +- doc/html/apidoc/lxml.html.html | 2 +- doc/html/apidoc/lxml.html.html5parser.html | 2 +- doc/html/apidoc/lxml.html.soupparser.html | 2 +- doc/html/apidoc/lxml.isoschematron.html | 2 +- doc/html/apidoc/lxml.objectify.html | 2 +- doc/html/apidoc/lxml.sax.html | 2 +- doc/html/apidoc/py-modindex.html | 2 +- doc/html/apidoc/search.html | 2 +- doc/html/build.html | 4 +- doc/html/capi.html | 4 +- .../{changes-4.6.1.html => changes-4.6.2.html} | 845 +-- doc/html/compatibility.html | 4 +- doc/html/credits.html | 4 +- doc/html/cssselect.html | 4 +- doc/html/element_classes.html | 4 +- doc/html/elementsoup.html | 4 +- doc/html/extensions.html | 4 +- doc/html/html5parser.html | 4 +- doc/html/index.html | 15 +- doc/html/installation.html | 4 +- doc/html/intro.html | 4 +- doc/html/lxml-source-howto.html | 4 +- doc/html/lxmlhtml.html | 4 +- doc/html/objectify.html | 4 +- doc/html/parsing.html | 4 +- doc/html/performance.html | 4 +- doc/html/resolvers.html | 4 +- doc/html/sax.html | 4 +- doc/html/sitemap.html | 2 +- doc/html/tutorial.html | 4 +- doc/html/validation.html | 4 +- doc/html/xpathxslt.html | 7 +- doc/main.txt | 12 +- doc/xpathxslt.txt | 4 + setup.py | 1 + src/lxml.egg-info/PKG-INFO | 10 +- src/lxml.egg-info/SOURCES.txt | 2 +- src/lxml/__init__.py | 2 +- src/lxml/html/clean.c | 5718 ++++++++++---------- src/lxml/html/clean.py | 26 +- src/lxml/html/tests/test_clean.py | 10 + src/lxml/html/tests/test_clean.txt | 18 +- src/lxml/includes/lxml-version.h | 2 +- 76 files changed, 3647 insertions(+), 3304 deletions(-) rename doc/html/{changes-4.6.1.html => changes-4.6.2.html} (99%) diff --git a/CHANGES.txt b/CHANGES.txt index 7afec7e..e3b7714 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -2,6 +2,17 @@ lxml changelog ============== +4.6.2 (2020-11-26) +================== + +Bugs fixed +---------- + +* A vulnerability (CVE-2020-27783) was discovered in the HTML Cleaner by Yaniv Nizry, + which allowed JavaScript to pass through. The cleaner now removes more sneaky + "style" content. + + 4.6.1 (2020-10-18) ================== diff --git a/PKG-INFO b/PKG-INFO index d2d538c..479c563 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: lxml -Version: 4.6.1 +Version: 4.6.2 Summary: Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API. Home-page: https://lxml.de/ Author: lxml dev team @@ -38,14 +38,15 @@ Description: lxml is a Pythonic, mature binding for the libxml2 and libxslt libr as soon as a maintenance branch has been established. Note that this requires Cython to be installed at an appropriate version for the build. - 4.6.1 (2020-10-18) + 4.6.2 (2020-11-26) ================== Bugs fixed ---------- - * A vulnerability was discovered in the HTML Cleaner by Yaniv Nizry, which allowed - JavaScript to pass through. The cleaner now removes more sneaky "style" content. + * A vulnerability (CVE-2020-27783) was discovered in the HTML Cleaner by Yaniv Nizry, + which allowed JavaScript to pass through. The cleaner now removes more sneaky + "style" content. @@ -62,6 +63,7 @@ Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: C Classifier: Operating System :: OS Independent Classifier: Topic :: Text Processing :: Markup :: HTML diff --git a/doc/FAQ.txt b/doc/FAQ.txt index 02df686..24ec8c4 100644 --- a/doc/FAQ.txt +++ b/doc/FAQ.txt @@ -63,6 +63,7 @@ ElementTree_. 7.2 Why doesn't ``findall()`` support full XPath expressions? 7.3 How can I find out which namespace prefixes are used in a document? 7.4 How can I specify a default namespace for XPath expressions? + 7.5 How can I modify the tree during iteration? The code examples below use the `'lxml.etree`` module: @@ -1241,3 +1242,38 @@ How can I specify a default namespace for XPath expressions? You can't. In XPath, there is no such thing as a default namespace. Just use an arbitrary prefix and let the namespace dictionary of the XPath evaluators map it to your namespace. See also the question above. + + +How can I modify the tree during iteration? +------------------------------------------- + +lxml's iterators need to hold on to an element in the tree in order to remember +their current position. Therefore, tree modifications between two calls into the +iterator can lead to surprising results if such an element is deleted or moved +around, for example. + +If your code risks modifying elements that the iterator might still need, and +you know that the number of elements returned by the iterator is small, then just +read them all into a list (or use ``.findall()``), and iterate over that list. + +If the number of elements can be larger and you really want to process the tree +incrementally, you can often use a read-ahead generator to make the iterator +advance beyond the critical point before touching the tree structure. + +For example: + +.. sourcecode:: python + + from itertools import islice + from collections import deque + + def readahead(iterator, count=1): + iterator = iter(iterator) # allow iterables as well + elements = deque(islice(iterator, 0, count)) + for element in iterator: + elements.append(element) + yield elements.popleft() + yield from elements + + for element in readahead(root.iterfind("path/to/children")): + element.getparent().remove(element) diff --git a/doc/html/FAQ.html b/doc/html/FAQ.html index 6efc1d9..0f9867b 100644 --- a/doc/html/FAQ.html +++ b/doc/html/FAQ.html @@ -26,7 +26,7 @@ function hide_menu() {
-

lxml FAQ - Frequently Asked Questions

+

lxml FAQ - Frequently Asked Questions

Frequently asked questions on lxml. See also the notes on compatibility to ElementTree.

@@ -92,6 +92,7 @@ function hide_menu() {
  • Why doesn't findall() support full XPath expressions?
  • How can I find out which namespace prefixes are used in a document?
  • How can I specify a default namespace for XPath expressions?
  • +
  • How can I modify the tree during iteration?
  • @@ -1017,11 +1018,39 @@ Element. Its children will then inherit this prefix for serialization.

    an arbitrary prefix and let the namespace dictionary of the XPath evaluators map it to your namespace. See also the question above.

    +
    +

    How can I modify the tree during iteration?

    +

    lxml's iterators need to hold on to an element in the tree in order to remember +their current position. Therefore, tree modifications between two calls into the +iterator can lead to surprising results if such an element is deleted or moved +around, for example.

    +

    If your code risks modifying elements that the iterator might still need, and +you know that the number of elements returned by the iterator is small, then just +read them all into a list (or use .findall()), and iterate over that list.

    +

    If the number of elements can be larger and you really want to process the tree +incrementally, you can often use a read-ahead generator to make the iterator +advance beyond the critical point before touching the tree structure.

    +

    For example:

    +
    from itertools import islice
    +from collections import deque
    +
    +def readahead(iterator, count=1):
    +    iterator = iter(iterator)  # allow iterables as well
    +    elements = deque(islice(iterator, 0, count))
    +    for element in iterator:
    +        elements.append(element)
    +        yield elements.popleft()
    +    yield from elements
    +
    +for element in readahead(root.iterfind("path/to/children")):
    +    element.getparent().remove(element)
    +
    +
    diff --git a/doc/html/api.html b/doc/html/api.html index 531f6c0..d63b7f4 100644 --- a/doc/html/api.html +++ b/doc/html/api.html @@ -24,7 +24,7 @@ function hide_menu() {
    -

    APIs specific to lxml.etree

    +

    APIs specific to lxml.etree

    lxml.etree tries to follow established APIs wherever possible. Sometimes, however, the need to expose a feature in an easy way led to the invention of a @@ -534,7 +534,7 @@ resolvers, you have to stick to the external Python module.

    diff --git a/doc/html/apidoc/_modules/collections/abc.html b/doc/html/apidoc/_modules/collections/abc.html index fbc532a..7784193 100644 --- a/doc/html/apidoc/_modules/collections/abc.html +++ b/doc/html/apidoc/_modules/collections/abc.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/index.html b/doc/html/apidoc/_modules/index.html index b16b0b3..8b7ffcc 100644 --- a/doc/html/apidoc/_modules/index.html +++ b/doc/html/apidoc/_modules/index.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/lxml.html b/doc/html/apidoc/_modules/lxml.html index fcf30ca..0640590 100644 --- a/doc/html/apidoc/_modules/lxml.html +++ b/doc/html/apidoc/_modules/lxml.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    @@ -179,7 +179,7 @@

    Source code for lxml

     # this is a package
     
    -__version__ = "4.6.1"
    +__version__ = "4.6.2"
     
     
     
    [docs]def get_include(): diff --git a/doc/html/apidoc/_modules/lxml/ElementInclude.html b/doc/html/apidoc/_modules/lxml/ElementInclude.html index 32ae197..1d76824 100644 --- a/doc/html/apidoc/_modules/lxml/ElementInclude.html +++ b/doc/html/apidoc/_modules/lxml/ElementInclude.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/lxml/doctestcompare.html b/doc/html/apidoc/_modules/lxml/doctestcompare.html index 49ee57f..6db6349 100644 --- a/doc/html/apidoc/_modules/lxml/doctestcompare.html +++ b/doc/html/apidoc/_modules/lxml/doctestcompare.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/lxml/html.html b/doc/html/apidoc/_modules/lxml/html.html index f84d684..966c713 100644 --- a/doc/html/apidoc/_modules/lxml/html.html +++ b/doc/html/apidoc/_modules/lxml/html.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/lxml/html/_diffcommand.html b/doc/html/apidoc/_modules/lxml/html/_diffcommand.html index e6609d2..5a9d6b0 100644 --- a/doc/html/apidoc/_modules/lxml/html/_diffcommand.html +++ b/doc/html/apidoc/_modules/lxml/html/_diffcommand.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/lxml/html/_setmixin.html b/doc/html/apidoc/_modules/lxml/html/_setmixin.html index c184515..9ddca7c 100644 --- a/doc/html/apidoc/_modules/lxml/html/_setmixin.html +++ b/doc/html/apidoc/_modules/lxml/html/_setmixin.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/lxml/html/builder.html b/doc/html/apidoc/_modules/lxml/html/builder.html index bbdad6f..a04ddce 100644 --- a/doc/html/apidoc/_modules/lxml/html/builder.html +++ b/doc/html/apidoc/_modules/lxml/html/builder.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/lxml/html/formfill.html b/doc/html/apidoc/_modules/lxml/html/formfill.html index bcec9d3..a7928ac 100644 --- a/doc/html/apidoc/_modules/lxml/html/formfill.html +++ b/doc/html/apidoc/_modules/lxml/html/formfill.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/_modules/lxml/isoschematron.html b/doc/html/apidoc/_modules/lxml/isoschematron.html index 949ae18..8ebc1b9 100644 --- a/doc/html/apidoc/_modules/lxml/isoschematron.html +++ b/doc/html/apidoc/_modules/lxml/isoschematron.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    diff --git a/doc/html/apidoc/genindex.html b/doc/html/apidoc/genindex.html index 66f2229..3109827 100644 --- a/doc/html/apidoc/genindex.html +++ b/doc/html/apidoc/genindex.html @@ -62,7 +62,7 @@
    - 4.6.1 + 4.6.2
    @@ -340,10 +340,10 @@
  • _has_sneaky_javascript() (lxml.html.clean.Cleaner method)
  • - - +
    -
    +

    Bugs fixed

    • LP#1869455: C14N 2.0 serialisation failed for unprefixed attributes @@ -46,9 +57,9 @@ interface.
    -
    +

    4.5.2 (2020-07-09)

    -
    +

    Bugs fixed

    • Cleaner() now validates that only known configuration options can be set.
    • @@ -63,9 +74,9 @@ to "--xml2-config" and "--xslt-config" in 4.5.1 and are now
    -
    +

    4.5.1 (2020-05-19)

    -
    +

    Bugs fixed

    • LP#1570388: Fix failures when serialising documents larger than 2GB in some cases.
    • @@ -77,16 +88,16 @@ Patch by Hugh McMaster.
    -
    +

    4.5.0 (2020-01-29)

    -
    +

    Features added

    • A new function indent() was added to insert tail whitespace for pretty-printing an XML tree.
    -
    +

    Bugs fixed

    • LP#1857794: Tail text of nodes that get removed from a document using item @@ -103,18 +114,18 @@ Set CFLAGS and LDFLAGS explicitly to override it.
    -
    +

    4.4.3 (2020-01-28)

    -
    +

    Bugs fixed

    • LP#1844674: itertext() was missing tail text of comments and PIs since 4.4.0.
    -
    +

    4.4.2 (2019-11-25)

    -
    +

    Bugs fixed

    • LP#1835708: ElementInclude incorrectly rejected repeated non-recursive @@ -123,9 +134,9 @@ Patch by Rainer Hausdorf.
    -
    +

    4.4.1 (2019-08-11)

    -
    +

    Bugs fixed

    • LP#1838252: The order of an OrderedDict was lost in 4.4.0 when passing it as @@ -134,9 +145,9 @@ attrib mapping during element creation.
    -
    +

    4.4.0 (2019-07-27)

    -
    +

    Features added

    • Element.clear() accepts a new keyword argument keep_tail=True to clear @@ -162,7 +173,7 @@ tree when set to false.
    • a c14n2 serialisation method.
    -
    +

    Bugs fixed

    • When writing to file paths that contain the URL escape character '%', the file @@ -191,7 +202,7 @@ of empty tags in lxml.html.defs.
    • It now raises IOError.
    -
    +

    Other changes

    • Support for Python 3.4 was removed.
    • @@ -205,61 +216,61 @@ in a future release.
    -
    +

    4.3.5 (2019-07-27)

    • Rebuilt with Cython 0.29.13 to support Python 3.8.
    -
    +

    4.3.4 (2019-06-10)

    • Rebuilt with Cython 0.29.10 to support Python 3.8.
    -
    +

    4.3.3 (2019-03-26)

    -
    +

    Bugs fixed

    • Fix leak of output buffer and unclosed files in _XSLTResultTree.write_output().
    -
    +

    4.3.2 (2019-02-29)

    -
    +

    Bugs fixed

    • Crash in 4.3.1 when appending a child subtree with certain text nodes.
    -
    +

    Other changes

    • Built with Cython 0.29.6.
    -
    +

    4.3.1 (2019-02-08)

    -
    +

    Bugs fixed

    • LP#1814522: Crash when appending a child subtree that contains unsubstituted entity references.
    -
    +

    Other changes

    • Built with Cython 0.29.5.
    -
    +

    4.3.0 (2019-01-04)

    -
    +

    Features added

    • The module lxml.sax is compiled using Cython in order to speed it up.
    • @@ -270,7 +281,7 @@ is used. Patch by Lennart Regebro. and the corresponding schema to the 2016 version (with optional "properties").
    -
    +

    Other changes

    • GH#270, GH#271: Support for Python 2.6 and 3.3 was removed. @@ -281,9 +292,9 @@ which were released in 2014 and 2012 respectively.
    -
    +

    4.2.6 (2019-01-02)

    -
    +

    Bugs fixed

    • LP#1799755: Fix a DeprecationWarning in Py3.7+.
    • @@ -291,9 +302,9 @@ which were released in 2014 and 2012 respectively.
    -
    +

    4.2.5 (2018-09-09)

    -
    +

    Bugs fixed

    • Javascript URLs that used URL escaping were not removed by the HTML cleaner. @@ -301,16 +312,16 @@ Security problem found by Omar Eissa. (CVE-2018-19787)
    -
    +

    4.2.4 (2018-08-03)

    -
    +

    Features added

    • GH#259: Allow using pkg-config for build configuration. Patch by Patrick Griffis.
    -
    +

    Bugs fixed

    • LP#1773749, GH#268: Crash when moving an element to another document with @@ -319,18 +330,18 @@ Patch by Alexander Weggerle.
    -
    +

    4.2.3 (2018-06-27)

    -
    +

    Bugs fixed

    • Reverted GH#265: lxml links against zlib as a shared library again.
    -
    +

    4.2.2 (2018-06-22)

    -
    +

    Bugs fixed

    • GH#266: Fix sporadic crash during GC when parse-time schema validation is used @@ -341,9 +352,9 @@ Patch by Nehal J Wani.
    -
    +

    4.2.1 (2018-03-21)

    -
    +

    Bugs fixed

    • LP#1755825: iterwalk() failed to return the 'start' event for the initial @@ -353,9 +364,9 @@ element if a tag selector is used.
    -
    +

    4.2.0 (2018-03-13)

    -
    +

    Features added

    • GH#255: SelectElement.value returns more standard-compliant and @@ -368,7 +379,7 @@ first one). If no options are present (not standard-compliant) Patch by stranac.
    -
    +

    Bugs fixed

    • LP#1551797: Some XSLT messages were not captured by the transform error log.
    • @@ -376,19 +387,19 @@ Patch by stranac. validation.
    -
    +

    Other changes

    -
    +

    4.1.1 (2017-11-04)

    • Rebuild with Cython 0.27.3 to improve support for Py3.7.
    -
    +

    4.1.0 (2017-10-13)

    -
    +

    Features added

    • ElementPath supports text predicates for current node, like "[.='text']".
    • @@ -400,7 +411,7 @@ This should have a beneficial impact on the overall performance by providing a tighter compiler integration between lxml and libxml2/libxslt.
    -
    +

    Bugs fixed

    • LP#1722776: Requesting non-Element objects like comments from a document with @@ -408,9 +419,9 @@ a tighter compiler integration between lxml and libxml2/libxslt.
    -
    +

    4.0.0 (2017-09-17)

    -
    +

    Features added

    • The ElementPath implementation is now compiled using Cython, @@ -426,7 +437,7 @@ allow relative resource lookups.
    • output data into a file according to the <xsl:output> configuration.
    -
    +

    Bugs fixed

    • GH#251: HTML comments were handled incorrectly by the soupparser. @@ -459,7 +470,7 @@ to output end tags even after writing failed with an exception.

    -
    +

    Other changes

    • The main module source files were renamed from lxml.*.pyx to plain @@ -470,9 +481,9 @@ be worth validating that third-party code does not notice this change.
    -
    +

    3.8.0 (2017-06-03)

    -
    +

    Features added

    • ElementTree.write() has a new option doctype that writes out a @@ -488,7 +499,7 @@ responsible for the error. Patch by Bob Kline.
    • a default namespace when passing a None prefix.
    -
    +

    Bugs fixed

    • GH#238: Character escapes were not hex-encoded in the xmlfile serialiser. @@ -502,7 +513,7 @@ multi-threaded XSLT processing.
    • LP#1673355, GH#233: fromstring() html5parser failed to parse byte strings.
    -
    +

    Other changes

    • The previously undocumented docstring option in ElementTree.write() @@ -510,9 +521,9 @@ produces a deprecation warning and will eventually be removed.
    -
    +

    3.7.4 (2017-??-??)

    -
    +

    Bugs fixed

    • LP#1551797: revert previous fix for XSLT error logging as it breaks @@ -521,9 +532,9 @@ multi-threaded XSLT processing.
    -
    +

    3.7.3 (2017-02-18)

    -
    +

    Bugs fixed

    • GH#218 was ineffective in Python 3.
    • @@ -532,13 +543,13 @@ Patch by Jakub Wilk.
    -
    +

    3.7.2 (2017-01-08)

    • GH#220: xmlfile allows switching output methods at an element level. Patch by Burak Arslan.
    -
    +

    Bugs fixed

    • Work around installation problems in recent Python 2.7 versions @@ -550,16 +561,16 @@ script/style tags. Patch by Burak Arslan.
    -
    +

    3.7.1 (2016-12-23)

    • No source changes, issued only to solve problems with the binary packages released for 3.7.0.
    -
    +

    3.7.0 (2016-12-10)

    -
    +

    Features added

    • GH#217: XMLSyntaxError now behaves more like its SyntaxError @@ -577,14 +588,14 @@ serialises like <div attrname& marked as disabled in HTML. Patch by Kristian Klemon.
    -
    +

    Bugs fixed

    • GH#206: File name and line number were missing from XSLT error messages. Patch by Marcus Brinkmann.
    -
    +

    Other changes

    • Log entries no longer allow anything but plain string objects as message text @@ -593,27 +604,27 @@ and file name.
    -
    +

    3.6.4 (2016-08-20)

    -
    +

    3.6.3 (2016-08-18)

    • LP#1614603: change linker flags to build multi-linux wheels
    -
    +

    3.6.2 (2016-08-18)

    • LP#1614603: release without source changes to provide cleanly built Linux wheels
    -
    +

    3.6.1 (2016-07-24)

    -
    +

    Features added

    • GH#180: Separate option inline_style for Cleaner that only removes style @@ -621,7 +632,7 @@ attributes instead of all styles. Patch by Christian Pedersen.
    • GH#196: Windows build support for Python 3.5. Contribution by Maximilian Hils.
    -
    +

    Bugs fixed

    • GH#199: Exclude file fields from FormElement.form_values (as browsers do). @@ -634,9 +645,9 @@ Patch by Holger Joukl.
    -
    +

    3.6.0 (2016-03-17)

    -
    +

    Features added

    • GH#187: Now supports (only) version 5.x and later of PyPy. @@ -645,7 +656,7 @@ Patch by Armin Rigo.
    • is installed. Patch by Dirkjan Ochtman.
    -
    +

    Bugs fixed

    • GH#189: Static builds honour FTP proxy configurations when downloading @@ -657,9 +668,9 @@ Patch by Petr Demin.
    -
    +

    3.5.0 (2015-11-13)

    -
    +

    Bugs fixed

    • Unicode string results failed XPath queries in PyPy.
    • @@ -671,7 +682,7 @@ and continued parsing instead.

    3.5.0b1 (2015-09-18)

    -
    +

    Features added

    • cleanup_namespaces() accepts a new argument keep_ns_prefixes @@ -703,7 +714,7 @@ Patch by Olli Pottonen.
    • of version 3 if available.
    -
    +

    Bugs fixed

    • Memory errors that occur during tree adaptations (e.g. moving subtrees @@ -730,9 +741,9 @@ with ElementPath to avoid hiding bugs in user code.
    -
    +

    3.4.4 (2015-04-25)

    -
    +

    Bugs fixed

    • An ElementTree compatibility test added in lxml 3.4.3 that failed in @@ -740,9 +751,9 @@ Python 3.4+ was removed again.
    -
    +

    3.4.3 (2015-04-15)

    -
    +

    Bugs fixed

    • Expression cache in ElementPath was ignored. Fix by Changaco.
    • @@ -755,9 +766,9 @@ double quotes. Patch by Olli Pottonen.
    -
    +

    3.4.2 (2015-02-07)

    -
    +

    Bugs fixed

    • LP#1415907: Crash when creating an XMLSchema from a non-root element @@ -769,25 +780,25 @@ with pseudo-attributes.
    -
    +

    3.4.1 (2014-11-20)

    -
    +

    Features added

    • New htmlfile HTML generator to accompany the incremental xmlfile serialisation API. Patch by Burak Arslan.
    -
    +

    Bugs fixed

    • lxml.sax.ElementTreeContentHandler did not initialise its superclass.
    -
    +

    3.4.0 (2014-09-10)

    -
    +

    Features added

    • xmlfile(buffered=False) disables output buffering and flushes the @@ -812,10 +823,10 @@ if it had opened it internally.
    • Allow "bytearray" type for ASCII text input.
    -
    +

    Bugs fixed

    -
    +

    Other changes

    • LP#400588: decoding errors have become hard errors even in recovery mode. @@ -829,9 +840,9 @@ use lxml 3.3.x with older versions.
    -
    +

    3.3.6 (2014-08-28)

    -
    +

    Bugs fixed

    • Prevent tree cycle creation when adding Elements as siblings.
    • @@ -841,9 +852,9 @@ extension functions.
    -
    +

    3.3.5 (2014-04-18)

    -
    +

    Bugs fixed

    • HTML cleaning could fail to strip javascript links that mix control @@ -851,34 +862,34 @@ characters into the link scheme.
    -
    +

    3.3.4 (2014-04-03)

    -
    +

    Features added

    • Source line numbers above 65535 are available on Elements when using libxml2 2.9 or later.
    -
    +

    Bugs fixed

    • lxml.html.fragment_fromstring() failed for bytes input in Py3.
    -
    +

    Other changes

    -
    +

    3.3.3 (2014-03-04)

    -
    +

    Bugs fixed

    • LP#1287118: Crash when using Element subtypes with __slots__.
    -
    +

    Other changes

    • The internal classes _LogEntry and _Attrib can no longer be @@ -886,9 +897,9 @@ subclassed from Python code.
    -
    +

    3.3.2 (2014-02-26)

    -
    +

    Bugs fixed

    • The properties resolvers and version, as well as the methods @@ -909,12 +920,12 @@ provide the same interface as that returned for Elements.
    -
    +

    3.3.1 (2014-02-12)

    -
    +

    Features added

    -
    +

    Bugs fixed

    • LP#1014290: HTML documents parsed with parser.feed() failed to find @@ -926,36 +937,36 @@ header file.
    • LP#1274118: iterparse() failed to parse BOM prefixed files.
    -
    +

    Other changes

    -
    +

    3.3.0 (2014-01-26)

    -
    +

    Features added

    -
    +

    Bugs fixed

    • The heuristic that distinguishes file paths from URLs was tightened to produce less false negatives.
    -
    +

    Other changes

    3.3.0beta5 (2014-01-18)

    -
    +

    Features added

    • The PEP 393 unicode parsing support gained a fallback for wchar strings which might still be somewhat common on Windows systems.
    -
    +

    Bugs fixed

    • Several error handling problems were fixed throughout the code base that @@ -968,7 +979,7 @@ propagate exceptions (its return type is void) parsing the string character by character.
    -
    +

    Other changes

    • Document cleanup code was simplified using the new GC features in @@ -978,10 +989,10 @@ Cython 0.20.

    3.3.0beta4 (2014-01-12)

    -
    +

    Features added

    -
    +

    Bugs fixed

    • The (empty) value returned by the attrib property of Entity and Comment @@ -994,7 +1005,7 @@ and keyword arguments could modify the mapping passed as +

      Other changes

      • Built with Cython 0.20pre (gitrev 012ae82eb) to prepare support for @@ -1004,39 +1015,39 @@ Python 3.4.

      3.3.0beta3 (2014-01-02)

      -
      +

      Features added

      • Unicode string parsing was optimised for Python 3.3 (PEP 393).
      -
      +

      Bugs fixed

      • HTML parsing of Unicode strings could misdecode the input on some platforms.
      • Crash in xmlfile() when closing open elements out of order in an error case.
      -
      +

      Other changes

      3.3.0beta2 (2013-12-20)

      -
      +

      Features added

      • iterparse() supports the recover option.
      -
      +

      Bugs fixed

      • Crash in iterparse() for HTML parsing.
      • Crash in target parsing with attributes.
      -
      +

      Other changes

      • The safety check in the read-only tree implementation (e.g. used by @@ -1049,7 +1060,7 @@ behaviour.

      3.3.0beta1 (2013-12-12)

      -
      +

      Features added

      • New option handle_failures in make_links_absolute() and @@ -1061,7 +1072,7 @@ incremental parsing, as implemented for ElementTree in Python 3.4.
      • (html=True).
      -
      +

      Bugs fixed

      • LP#1255132: crash when trying to run validation over non-Element (e.g. @@ -1083,7 +1094,7 @@ The textarea used to be cleared before the new content was set, which removed the name attribute.
      -
      +

      Other changes

      • Some basic API classes use freelists internally for faster @@ -1094,12 +1105,12 @@ classes internally instead of being a parser itself.
      -
      +

      3.2.5 (2014-01-02)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • Crash in xmlfile() when closing open elements out of order in an error case.
      • @@ -1108,16 +1119,16 @@ classes internally instead of being a parser itself. comment or PI).
      -
      +

      Other changes

      -
      +

      3.2.4 (2013-11-07)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • Memory leak when creating an XPath evaluator in a thread.
      • @@ -1128,32 +1139,32 @@ to one of its own descendants.
      • Compressed plain-text serialisation to file-like objects was broken.
      -
      +

      Other changes

      -
      +

      3.2.3 (2013-07-28)

      -
      +

      Bugs fixed

      • Fix support for Python 2.4 which was lost in 3.2.2.
      -
      +

      3.2.2 (2013-07-28)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • LP#1185701: spurious XMLSyntaxError after finishing iterparse().
      • Crash in lxml.objectify during xsi annotation.
      -
      +

      Other changes

      • Return values of user provided element class lookup methods are now @@ -1162,9 +1173,9 @@ API class mismatches.
      -
      +

      3.2.1 (2013-05-11)

      -
      +

      Features added

      • The methods apply_templates() and process_children() of XSLT @@ -1173,7 +1184,7 @@ and remove_blank_text that discard either all strings from the result list.
      -
      +

      Bugs fixed

      • When moving Elements to another tree, the namespace cleanup mechanism @@ -1186,16 +1197,16 @@ to a crash.
      • crashed.
      -
      +

      Other changes

      -
      +

      3.2.0 (2013-04-28)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • LP#690319: Leading whitespace could change the behaviour of the string @@ -1214,16 +1225,16 @@ of overwriting the current value.
      • allowed by the user provided whitelist. Patch by Christine Koppelt.
      -
      +

      Other changes

      -
      +

      3.1.2 (2013-04-12)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • LP#1136509: Passing attributes through the namespace-unaware API of @@ -1235,16 +1246,16 @@ the result tree to a Unicode string.
      • by properly exported API function xmlBufUse().
      -
      +

      Other changes

      -
      +

      3.1.1 (2013-03-29)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • LP#1160386: Write access to lxml.html.FormElement.fields raised @@ -1252,7 +1263,7 @@ an AttributeError in Py3.
      • Illegal memory access during cleanup in incremental xmlfile writer.
      -
      +

      Other changes

      • The externally useless class lxml.etree._BaseParser was removed @@ -1260,16 +1271,16 @@ from the module dict.
      -
      +

      3.1.0 (2013-02-10)

      -
      +

      Features added

      • GH#89: lxml.html.clean allows overriding the set of attributes that it considers 'safe'. Patch by Francis Devereux.
      -
      +

      Bugs fixed

      • LP#1104370: copy.copy(el.attrib) raised an exception. It now returns @@ -1282,13 +1293,13 @@ in for the current run.
      • without threading support. Patch by Ulrich Seidl.
      -
      +

      Other changes

      3.1beta1 (2012-12-21)

      -
      +

      Features added

      • New build-time option --with-unicode-strings for Python 2 that @@ -1299,7 +1310,7 @@ instead of byte strings for plain ASCII content.
      • tags with non-identifier names without having to resort to getattr().
      -
      +

      Bugs fixed

      • When starting from a non-namespaced element in lxml.objectify, searching @@ -1311,7 +1322,7 @@ the search to non-namespaced children.
      • LP#1080792: Static build of libxml2 2.9.0 failed due to missing file.
      -
      +

      Other changes

      • The externally useless class _ObjectifyElementMakerCaller was @@ -1321,27 +1332,27 @@ many children. Patch by Anders Hammarquist.
      -
      +

      3.0.2 (2012-12-14)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • Fix crash during interpreter shutdown by switching to Cython 0.17.3 for building.
      -
      +

      Other changes

      -
      +

      3.0.1 (2012-10-14)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • LP#1065924: Element proxies could disappear during garbage collection @@ -1350,16 +1361,16 @@ in PyPy without proper cleanup.
      • LP#1065139: static MacOS-X build failed in Py3.
      -
      +

      Other changes

      -
      +

      3.0 (2012-10-08)

      -
      +

      Features added

      -
      +

      Bugs fixed

      • End-of-file handling was incorrect in iterparse() when reading from @@ -1367,7 +1378,7 @@ a low-level C file stream and failed in libxml2 2.9.0 due to its improved consistency checks.
      -
      +

      Other changes

      • The build no longer uses Cython by default unless the generated C files @@ -1380,21 +1391,21 @@ run special setup.py commands that do not actually run a build), pass

      3.0beta1 (2012-09-26)

      -
      +

      Features added

      • Python level access to (optional) libxml2 memory debugging features to simplify debugging of memory leaks etc.
      -
      +

      Bugs fixed

      • Fix a memory leak in XPath by switching to Cython 0.17.1.
      • Some tests were adapted to work with PyPy.
      -
      +

      Other changes

      • The code was adapted to work with the upcoming libxml2 2.9.0 release.
      • @@ -1403,7 +1414,7 @@ to simplify debugging of memory leaks etc.

      3.0alpha2 (2012-08-23)

      -
      +

      Features added

      diff --git a/doc/html/cssselect.html b/doc/html/cssselect.html index 161d465..c0edee9 100644 --- a/doc/html/cssselect.html +++ b/doc/html/cssselect.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      lxml.cssselect

      +

      lxml.cssselect

      lxml supports a number of interesting languages for tree traversal and element selection. The most important is obviously XPath, but there is also @@ -113,7 +113,7 @@ namespace URIs: the CSSSelector class accepts

      diff --git a/doc/html/element_classes.html b/doc/html/element_classes.html index 2aba74c..f6ddc0a 100644 --- a/doc/html/element_classes.html +++ b/doc/html/element_classes.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      Using custom Element classes in lxml

      +

      Using custom Element classes in lxml

      lxml has very sophisticated support for custom Element classes. You can provide your own classes for Elements and have lxml use them by @@ -525,7 +525,7 @@ and use the blank decorator instead:

      diff --git a/doc/html/elementsoup.html b/doc/html/elementsoup.html index 979244f..47fb52a 100644 --- a/doc/html/elementsoup.html +++ b/doc/html/elementsoup.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      BeautifulSoup Parser

      +

      BeautifulSoup Parser

      BeautifulSoup is a Python package for working with real-world and broken HTML, just like lxml.html. As of version 4.x, it can use @@ -203,7 +203,7 @@ you can simply pass the resulting Unicode string into lxml's parser.

      diff --git a/doc/html/extensions.html b/doc/html/extensions.html index 83c0236..b58c92c 100644 --- a/doc/html/extensions.html +++ b/doc/html/extensions.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      Python extensions for XPath and XSLT

      +

      Python extensions for XPath and XSLT

      This document describes how to use Python extension functions in XPath and XSLT like this:

      @@ -530,7 +530,7 @@ will work:

      diff --git a/doc/html/html5parser.html b/doc/html/html5parser.html index 5404511..7af227a 100644 --- a/doc/html/html5parser.html +++ b/doc/html/html5parser.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      html5lib Parser

      +

      html5lib Parser

      html5lib is a Python package that implements the HTML5 parsing algorithm which is heavily influenced by current browsers and based on the WHATWG @@ -90,7 +90,7 @@ module. Note that these are the parser classes provided by html5lib.

      diff --git a/doc/html/index.html b/doc/html/index.html index cd93d94..d78b1c4 100644 --- a/doc/html/index.html +++ b/doc/html/index.html @@ -26,7 +26,7 @@ function hide_menu() {
      -

      lxml - XML and HTML with Python

      +

      lxml - XML and HTML with Python

      @@ -43,7 +43,7 @@ in the Python language.

      XML feature completeness of these libraries with the simplicity of a native Python API, mostly compatible but superior to the well-known ElementTree API. The latest release works with all CPython versions -from 2.7 to 3.8. See the introduction for more information about +from 2.7 to 3.9. See the introduction for more information about background and goals of the lxml project. Some common questions are answered in the FAQ.

      @@ -81,7 +81,7 @@ Another supporter of the lxml project is

      Documentation

      -

      The complete lxml documentation is available for download as PDF +

      The complete lxml documentation is available for download as PDF documentation. The HTML documentation from this web site is part of the normal source download.

      diff --git a/doc/html/installation.html b/doc/html/installation.html index 721361d..ad25853 100644 --- a/doc/html/installation.html +++ b/doc/html/installation.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      Installing lxml

      +

      Installing lxml

      Contents

      @@ -208,7 +208,7 @@ read the
      diff --git a/doc/html/intro.html b/doc/html/intro.html index 816d777..7992bfc 100644 --- a/doc/html/intro.html +++ b/doc/html/intro.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      Why lxml?

      +

      Why lxml?

      Contents

      @@ -82,7 +82,7 @@ having to learn new things -- XML is complicated enough.

      diff --git a/doc/html/lxml-source-howto.html b/doc/html/lxml-source-howto.html index 72308b1..7d6a5f5 100644 --- a/doc/html/lxml-source-howto.html +++ b/doc/html/lxml-source-howto.html @@ -27,7 +27,7 @@ function hide_menu() {
      -

      How to read the source of lxml

      +

      How to read the source of lxml

      @@ -286,7 +286,7 @@ implemented in pure Python.

      diff --git a/doc/html/lxmlhtml.html b/doc/html/lxmlhtml.html index 23b33c4..81f44f5 100644 --- a/doc/html/lxmlhtml.html +++ b/doc/html/lxmlhtml.html @@ -25,7 +25,7 @@ function hide_menu() {
      -

      lxml.html

      +

      lxml.html

      @@ -696,7 +696,7 @@ microformat.

      diff --git a/doc/html/objectify.html b/doc/html/objectify.html index c9827f8..114092a 100644 --- a/doc/html/objectify.html +++ b/doc/html/objectify.html @@ -25,7 +25,7 @@ function hide_menu() {
      -

      lxml.objectify

      +

      lxml.objectify

      @@ -1165,7 +1165,7 @@ support any XPath expression. diff --git a/doc/html/parsing.html b/doc/html/parsing.html index f9d9fcb..accb931 100644 --- a/doc/html/parsing.html +++ b/doc/html/parsing.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      Parsing XML and HTML with lxml

      +

      Parsing XML and HTML with lxml

      lxml provides a very simple and powerful API for parsing XML and HTML. It supports one-step parsing as well as step-by-step parsing using an @@ -893,7 +893,7 @@ with UTF-8 is also considerably faster in most cases.

      diff --git a/doc/html/performance.html b/doc/html/performance.html index b017c9e..1f10f97 100644 --- a/doc/html/performance.html +++ b/doc/html/performance.html @@ -27,7 +27,7 @@ function hide_menu() {
      -

      Benchmarks and Speed

      +

      Benchmarks and Speed

      @@ -798,7 +798,7 @@ random.

      diff --git a/doc/html/resolvers.html b/doc/html/resolvers.html index f005007..c0d0679 100644 --- a/doc/html/resolvers.html +++ b/doc/html/resolvers.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      Document loading and URL resolving

      +

      Document loading and URL resolving

      Contents

      @@ -250,7 +250,7 @@ documents or raises exceptions if access is denied.
      diff --git a/doc/html/sax.html b/doc/html/sax.html index dcdf773..9f2909f 100644 --- a/doc/html/sax.html +++ b/doc/html/sax.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      Sax support

      +

      Sax support

      In this document we'll describe lxml's SAX support. lxml has support for producing SAX events for an ElementTree or Element. lxml can also turn SAX @@ -124,7 +124,7 @@ DOM implementation cannot generate SAX events from a DOM tree.

      diff --git a/doc/html/sitemap.html b/doc/html/sitemap.html index 9fd936f..255edf0 100644 --- a/doc/html/sitemap.html +++ b/doc/html/sitemap.html @@ -8,5 +8,5 @@

      Sitemap of lxml.de - Processing XML and HTML with Python

      -
      +
      \ No newline at end of file diff --git a/doc/html/tutorial.html b/doc/html/tutorial.html index 18ccc4c..b57f9cc 100644 --- a/doc/html/tutorial.html +++ b/doc/html/tutorial.html @@ -27,7 +27,7 @@ function hide_menu() {
      -

      The lxml.etree Tutorial

      +

      The lxml.etree Tutorial

      @@ -1220,7 +1220,7 @@ whereas the other two examples would raise a StopIt diff --git a/doc/html/validation.html b/doc/html/validation.html index 32bb4d8..ef31e06 100644 --- a/doc/html/validation.html +++ b/doc/html/validation.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      Validation with lxml

      +

      Validation with lxml

      Apart from the built-in DTD support in parsers, lxml currently supports three schema languages: DTD, Relax NG and XML Schema. All three provide @@ -556,7 +556,7 @@ reporting during validation.

      diff --git a/doc/html/xpathxslt.html b/doc/html/xpathxslt.html index 5edd59c..e53d013 100644 --- a/doc/html/xpathxslt.html +++ b/doc/html/xpathxslt.html @@ -24,7 +24,7 @@ function hide_menu() {
      -

      XPath and XSLT with lxml

      +

      XPath and XSLT with lxml

      lxml supports XPath 1.0, XSLT 1.0 and the EXSLT extensions through libxml2 and libxslt in a standards compliant way.

      @@ -69,6 +69,9 @@ frequent evaluation: XPath and xpath() method described here.

      +

      Note that the .find*() methods are usually faster than the full-blown XPath +support. They also support incremental tree processing through the .iterfind() +method, whereas XPath always collects all results before returning them.

      The xpath() method

      For ElementTree, the xpath method performs a global XPath query against the @@ -632,7 +635,7 @@ If you want to free it from memory, just do:

      diff --git a/doc/main.txt b/doc/main.txt index fa1dfba..d42c66a 100644 --- a/doc/main.txt +++ b/doc/main.txt @@ -35,7 +35,7 @@ libxml2_ and libxslt_. It is unique in that it combines the speed and XML feature completeness of these libraries with the simplicity of a native Python API, mostly compatible but superior to the well-known ElementTree_ API. The latest release works with all CPython versions -from 2.7 to 3.8. See the introduction_ for more information about +from 2.7 to 3.9. See the introduction_ for more information about background and goals of the lxml project. Some common questions are answered in the FAQ_. @@ -159,8 +159,8 @@ Index `_ (PyPI). It has the source that compiles on various platforms. The source distribution is signed with `this key `_. -The latest version is `lxml 4.6.0`_, released 2020-10-17 -(`changes for 4.6.0`_). `Older versions <#old-versions>`_ +The latest version is `lxml 4.6.2`_, released 2020-11-26 +(`changes for 4.6.2`_). `Older versions <#old-versions>`_ are listed below. Please take a look at the @@ -256,7 +256,9 @@ See the websites of lxml .. and the `latest in-development version `_. -.. _`PDF documentation`: lxmldoc-4.6.1.pdf +.. _`PDF documentation`: lxmldoc-4.6.2.pdf + +* `lxml 4.6.2`_, released 2020-11-26 (`changes for 4.6.2`_) * `lxml 4.6.1`_, released 2020-10-18 (`changes for 4.6.1`_) @@ -278,6 +280,7 @@ See the websites of lxml * `older releases `_ +.. _`lxml 4.6.2`: /files/lxml-4.6.2.tgz .. _`lxml 4.6.1`: /files/lxml-4.6.1.tgz .. _`lxml 4.6.0`: /files/lxml-4.6.0.tgz .. _`lxml 4.5.2`: /files/lxml-4.5.2.tgz @@ -288,6 +291,7 @@ See the websites of lxml .. _`lxml 4.4.1`: /files/lxml-4.4.1.tgz .. _`lxml 4.4.0`: /files/lxml-4.4.0.tgz +.. _`changes for 4.6.2`: /changes-4.6.2.html .. _`changes for 4.6.1`: /changes-4.6.1.html .. _`changes for 4.6.0`: /changes-4.6.0.html .. _`changes for 4.5.2`: /changes-4.5.2.html diff --git a/doc/xpathxslt.txt b/doc/xpathxslt.txt index 98adc9e..8b2870e 100644 --- a/doc/xpathxslt.txt +++ b/doc/xpathxslt.txt @@ -63,6 +63,10 @@ comparison`_ to learn when to use which. Their semantics when used on Elements and ElementTrees are the same as for the ``xpath()`` method described here. +Note that the ``.find*()`` methods are usually faster than the full-blown XPath +support. They also support incremental tree processing through the ``.iterfind()`` +method, whereas XPath always collects all results before returning them. + .. _`performance comparison`: performance.html#xpath diff --git a/setup.py b/setup.py index 35e4d0c..845c0d9 100644 --- a/setup.py +++ b/setup.py @@ -235,6 +235,7 @@ an appropriate version of Cython installed. 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', 'Programming Language :: C', 'Operating System :: OS Independent', 'Topic :: Text Processing :: Markup :: HTML', diff --git a/src/lxml.egg-info/PKG-INFO b/src/lxml.egg-info/PKG-INFO index d2d538c..479c563 100644 --- a/src/lxml.egg-info/PKG-INFO +++ b/src/lxml.egg-info/PKG-INFO @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: lxml -Version: 4.6.1 +Version: 4.6.2 Summary: Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API. Home-page: https://lxml.de/ Author: lxml dev team @@ -38,14 +38,15 @@ Description: lxml is a Pythonic, mature binding for the libxml2 and libxslt libr as soon as a maintenance branch has been established. Note that this requires Cython to be installed at an appropriate version for the build. - 4.6.1 (2020-10-18) + 4.6.2 (2020-11-26) ================== Bugs fixed ---------- - * A vulnerability was discovered in the HTML Cleaner by Yaniv Nizry, which allowed - JavaScript to pass through. The cleaner now removes more sneaky "style" content. + * A vulnerability (CVE-2020-27783) was discovered in the HTML Cleaner by Yaniv Nizry, + which allowed JavaScript to pass through. The cleaner now removes more sneaky + "style" content. @@ -62,6 +63,7 @@ Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 Classifier: Programming Language :: Python :: 3.7 Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: C Classifier: Operating System :: OS Independent Classifier: Topic :: Text Processing :: Markup :: HTML diff --git a/src/lxml.egg-info/SOURCES.txt b/src/lxml.egg-info/SOURCES.txt index 0eafa85..9aaa8f8 100644 --- a/src/lxml.egg-info/SOURCES.txt +++ b/src/lxml.egg-info/SOURCES.txt @@ -57,7 +57,7 @@ doc/html/FAQ.html doc/html/api.html doc/html/build.html doc/html/capi.html -doc/html/changes-4.6.1.html +doc/html/changes-4.6.2.html doc/html/compatibility.html doc/html/credits.html doc/html/cssselect.html diff --git a/src/lxml/__init__.py b/src/lxml/__init__.py index 5950601..ed50c4b 100644 --- a/src/lxml/__init__.py +++ b/src/lxml/__init__.py @@ -1,6 +1,6 @@ # this is a package -__version__ = "4.6.1" +__version__ = "4.6.2" def get_include(): diff --git a/src/lxml/html/clean.c b/src/lxml/html/clean.c index c638921..2c27827 100644 --- a/src/lxml/html/clean.c +++ b/src/lxml/html/clean.c @@ -860,7 +860,7 @@ struct __pyx_defaults1 { PyObject *__pyx_arg_break_character; }; -/* "lxml/html/clean.py":491 +/* "lxml/html/clean.py":496 * return False * * def kill_conditional_comments(self, doc): # <<<<<<<<<<<<<< @@ -1546,7 +1546,7 @@ static const char __pyx_k__10[] = ":"; static const char __pyx_k__12[] = "\\"; static const char __pyx_k__15[] = "."; static const char __pyx_k__16[] = ","; -static const char __pyx_k__41[] = "/\\*.*?\\*/"; +static const char __pyx_k__42[] = "/\\*.*?\\*/"; static const char __pyx_k_a_z[] = "[^a-z]"; static const char __pyx_k_all[] = "__all__"; static const char __pyx_k_bad[] = "bad"; @@ -1566,6 +1566,7 @@ static const char __pyx_k_s_2[] = "s"; static const char __pyx_k_set[] = "set"; static const char __pyx_k_src[] = "src"; static const char __pyx_k_sub[] = "sub"; +static const char __pyx_k_sys[] = "sys"; static const char __pyx_k_tag[] = "tag"; static const char __pyx_k_url[] = "url"; static const char __pyx_k_args[] = "args"; @@ -1598,6 +1599,7 @@ static const char __pyx_k_test[] = "__test__"; static const char __pyx_k_text[] = "text"; static const char __pyx_k_type[] = "type"; static const char __pyx_k_word[] = "word"; +static const char __pyx_k_ASCII[] = "ASCII"; static const char __pyx_k_XPath[] = "XPath"; static const char __pyx_k_aname[] = "aname"; static const char __pyx_k_avoid[] = "avoid"; @@ -1741,12 +1743,12 @@ static const char __pyx_k_link_regexes[] = "link_regexes"; static const char __pyx_k_pre_children[] = "pre_children"; static const char __pyx_k_unquote_plus[] = "unquote_plus"; static const char __pyx_k_urllib_parse[] = "urllib.parse"; +static const char __pyx_k_version_info[] = "version_info"; static const char __pyx_k_allow_element[] = "allow_element"; static const char __pyx_k_annoying_tags[] = "annoying_tags"; static const char __pyx_k_autolink_html[] = "autolink_html"; static const char __pyx_k_avoid_classes[] = "avoid_classes"; static const char __pyx_k_avoid_hosts_2[] = "_avoid_hosts"; -static const char __pyx_k_css_import_re[] = "_css_import_re"; static const char __pyx_k_kill_elements[] = "_kill_elements"; static const char __pyx_k_rewrite_links[] = "rewrite_links"; static const char __pyx_k_tail_children[] = "tail_children"; @@ -1774,12 +1776,13 @@ static const char __pyx_k_is_image_dataurl[] = "_is_image_dataurl"; static const char __pyx_k_not_an_attribute[] = "not_an_attribute"; static const char __pyx_k_strip_attributes[] = "strip_attributes"; static const char __pyx_k_transform_result[] = "_transform_result"; -static const char __pyx_k_css_javascript_re[] = "_css_javascript_re"; static const char __pyx_k_data_image_base64[] = "^data:image/.+;base64"; static const char __pyx_k_resolve_base_href[] = "resolve_base_href"; static const char __pyx_k_Cleaner_clean_html[] = "Cleaner.clean_html"; static const char __pyx_k_allow_embedded_url[] = "allow_embedded_url"; static const char __pyx_k_cline_in_traceback[] = "cline_in_traceback"; +static const char __pyx_k_replace_css_import[] = "_replace_css_import"; +static const char __pyx_k_a_zA_Z_son_a_zA_Z_s[] = " 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, __pyx_v_kw, values, pos_args, "__init__") < 0)) __PYX_ERR(0, 217, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, __pyx_v_kw, values, pos_args, "__init__") < 0)) __PYX_ERR(0, 222, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 1) { goto __pyx_L5_argtuple_error; @@ -2370,7 +2382,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_1__init__(PyObject *__pyx_ } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 217, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__init__", 1, 1, 1, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 222, __pyx_L3_error) __pyx_L3_error:; __Pyx_DECREF(__pyx_v_kw); __pyx_v_kw = 0; __Pyx_AddTraceback("lxml.html.clean.Cleaner.__init__", __pyx_clineno, __pyx_lineno, __pyx_filename); @@ -2409,19 +2421,19 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO int __pyx_clineno = 0; __Pyx_RefNannySetupContext("__init__", 0); - /* "lxml/html/clean.py":218 + /* "lxml/html/clean.py":223 * * def __init__(self, **kw): * not_an_attribute = object() # <<<<<<<<<<<<<< * for name, value in kw.items(): * default = getattr(self, name, not_an_attribute) */ - __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_builtin_object); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 218, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_CallNoArg(__pyx_builtin_object); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 223, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_not_an_attribute = __pyx_t_1; __pyx_t_1 = 0; - /* "lxml/html/clean.py":219 + /* "lxml/html/clean.py":224 * def __init__(self, **kw): * not_an_attribute = object() * for name, value in kw.items(): # <<<<<<<<<<<<<< @@ -2429,7 +2441,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO * if (default is not None and default is not True and default is not False */ __pyx_t_2 = 0; - __pyx_t_5 = __Pyx_dict_iterator(__pyx_v_kw, 1, __pyx_n_s_items, (&__pyx_t_3), (&__pyx_t_4)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 219, __pyx_L1_error) + __pyx_t_5 = __Pyx_dict_iterator(__pyx_v_kw, 1, __pyx_n_s_items, (&__pyx_t_3), (&__pyx_t_4)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 224, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_XDECREF(__pyx_t_1); __pyx_t_1 = __pyx_t_5; @@ -2437,7 +2449,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO while (1) { __pyx_t_7 = __Pyx_dict_iter_next(__pyx_t_1, __pyx_t_3, &__pyx_t_2, &__pyx_t_5, &__pyx_t_6, NULL, __pyx_t_4); if (unlikely(__pyx_t_7 == 0)) break; - if (unlikely(__pyx_t_7 == -1)) __PYX_ERR(0, 219, __pyx_L1_error) + if (unlikely(__pyx_t_7 == -1)) __PYX_ERR(0, 224, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_name, __pyx_t_5); @@ -2445,19 +2457,19 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO __Pyx_XDECREF_SET(__pyx_v_value, __pyx_t_6); __pyx_t_6 = 0; - /* "lxml/html/clean.py":220 + /* "lxml/html/clean.py":225 * not_an_attribute = object() * for name, value in kw.items(): * default = getattr(self, name, not_an_attribute) # <<<<<<<<<<<<<< * if (default is not None and default is not True and default is not False * and not isinstance(default, (frozenset, set, tuple, list))): */ - __pyx_t_6 = __Pyx_GetAttr3(__pyx_v_self, __pyx_v_name, __pyx_v_not_an_attribute); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 220, __pyx_L1_error) + __pyx_t_6 = __Pyx_GetAttr3(__pyx_v_self, __pyx_v_name, __pyx_v_not_an_attribute); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 225, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_default, __pyx_t_6); __pyx_t_6 = 0; - /* "lxml/html/clean.py":221 + /* "lxml/html/clean.py":226 * for name, value in kw.items(): * default = getattr(self, name, not_an_attribute) * if (default is not None and default is not True and default is not False # <<<<<<<<<<<<<< @@ -2479,7 +2491,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO goto __pyx_L6_bool_binop_done; } - /* "lxml/html/clean.py":222 + /* "lxml/html/clean.py":227 * default = getattr(self, name, not_an_attribute) * if (default is not None and default is not True and default is not False * and not isinstance(default, (frozenset, set, tuple, list))): # <<<<<<<<<<<<<< @@ -2488,7 +2500,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO */ __pyx_t_9 = (__pyx_v_default != Py_False); - /* "lxml/html/clean.py":221 + /* "lxml/html/clean.py":226 * for name, value in kw.items(): * default = getattr(self, name, not_an_attribute) * if (default is not None and default is not True and default is not False # <<<<<<<<<<<<<< @@ -2502,7 +2514,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO goto __pyx_L6_bool_binop_done; } - /* "lxml/html/clean.py":222 + /* "lxml/html/clean.py":227 * default = getattr(self, name, not_an_attribute) * if (default is not None and default is not True and default is not False * and not isinstance(default, (frozenset, set, tuple, list))): # <<<<<<<<<<<<<< @@ -2538,7 +2550,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO __pyx_t_8 = __pyx_t_9; __pyx_L6_bool_binop_done:; - /* "lxml/html/clean.py":221 + /* "lxml/html/clean.py":226 * for name, value in kw.items(): * default = getattr(self, name, not_an_attribute) * if (default is not None and default is not True and default is not False # <<<<<<<<<<<<<< @@ -2547,14 +2559,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO */ if (unlikely(__pyx_t_8)) { - /* "lxml/html/clean.py":224 + /* "lxml/html/clean.py":229 * and not isinstance(default, (frozenset, set, tuple, list))): * raise TypeError( * "Unknown parameter: %s=%r" % (name, value)) # <<<<<<<<<<<<<< * setattr(self, name, value) * if self.inline_style is None and 'inline_style' not in kw: */ - __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 224, __pyx_L1_error) + __pyx_t_6 = PyTuple_New(2); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 229, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_INCREF(__pyx_v_name); __Pyx_GIVEREF(__pyx_v_name); @@ -2562,25 +2574,25 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO __Pyx_INCREF(__pyx_v_value); __Pyx_GIVEREF(__pyx_v_value); PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_v_value); - __pyx_t_5 = __Pyx_PyString_Format(__pyx_kp_s_Unknown_parameter_s_r, __pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 224, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyString_Format(__pyx_kp_s_Unknown_parameter_s_r, __pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 229, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "lxml/html/clean.py":223 + /* "lxml/html/clean.py":228 * if (default is not None and default is not True and default is not False * and not isinstance(default, (frozenset, set, tuple, list))): * raise TypeError( # <<<<<<<<<<<<<< * "Unknown parameter: %s=%r" % (name, value)) * setattr(self, name, value) */ - __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_builtin_TypeError, __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 223, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_CallOneArg(__pyx_builtin_TypeError, __pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 228, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_Raise(__pyx_t_6, 0, 0, 0); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __PYX_ERR(0, 223, __pyx_L1_error) + __PYX_ERR(0, 228, __pyx_L1_error) - /* "lxml/html/clean.py":221 + /* "lxml/html/clean.py":226 * for name, value in kw.items(): * default = getattr(self, name, not_an_attribute) * if (default is not None and default is not True and default is not False # <<<<<<<<<<<<<< @@ -2589,25 +2601,25 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO */ } - /* "lxml/html/clean.py":225 + /* "lxml/html/clean.py":230 * raise TypeError( * "Unknown parameter: %s=%r" % (name, value)) * setattr(self, name, value) # <<<<<<<<<<<<<< * if self.inline_style is None and 'inline_style' not in kw: * self.inline_style = self.style */ - __pyx_t_12 = PyObject_SetAttr(__pyx_v_self, __pyx_v_name, __pyx_v_value); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 225, __pyx_L1_error) + __pyx_t_12 = PyObject_SetAttr(__pyx_v_self, __pyx_v_name, __pyx_v_value); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 230, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":226 + /* "lxml/html/clean.py":231 * "Unknown parameter: %s=%r" % (name, value)) * setattr(self, name, value) * if self.inline_style is None and 'inline_style' not in kw: # <<<<<<<<<<<<<< * self.inline_style = self.style * */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_inline_style); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 226, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_inline_style); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 231, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_9 = (__pyx_t_1 == Py_None); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; @@ -2617,25 +2629,25 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO __pyx_t_8 = __pyx_t_10; goto __pyx_L15_bool_binop_done; } - __pyx_t_10 = (__Pyx_PyDict_ContainsTF(__pyx_n_s_inline_style, __pyx_v_kw, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 226, __pyx_L1_error) + __pyx_t_10 = (__Pyx_PyDict_ContainsTF(__pyx_n_s_inline_style, __pyx_v_kw, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 231, __pyx_L1_error) __pyx_t_9 = (__pyx_t_10 != 0); __pyx_t_8 = __pyx_t_9; __pyx_L15_bool_binop_done:; if (__pyx_t_8) { - /* "lxml/html/clean.py":227 + /* "lxml/html/clean.py":232 * setattr(self, name, value) * if self.inline_style is None and 'inline_style' not in kw: * self.inline_style = self.style # <<<<<<<<<<<<<< * * if kw.get("allow_tags"): */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_style); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 227, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_style); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 232, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_inline_style, __pyx_t_1) < 0) __PYX_ERR(0, 227, __pyx_L1_error) + if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_inline_style, __pyx_t_1) < 0) __PYX_ERR(0, 232, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":226 + /* "lxml/html/clean.py":231 * "Unknown parameter: %s=%r" % (name, value)) * setattr(self, name, value) * if self.inline_style is None and 'inline_style' not in kw: # <<<<<<<<<<<<<< @@ -2644,46 +2656,46 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO */ } - /* "lxml/html/clean.py":229 + /* "lxml/html/clean.py":234 * self.inline_style = self.style * * if kw.get("allow_tags"): # <<<<<<<<<<<<<< * if kw.get("remove_unknown_tags"): * raise ValueError("It does not make sense to pass in both " */ - __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_kw, __pyx_n_s_allow_tags, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 229, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_kw, __pyx_n_s_allow_tags, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 234, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 229, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 234, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_8) { - /* "lxml/html/clean.py":230 + /* "lxml/html/clean.py":235 * * if kw.get("allow_tags"): * if kw.get("remove_unknown_tags"): # <<<<<<<<<<<<<< * raise ValueError("It does not make sense to pass in both " * "allow_tags and remove_unknown_tags") */ - __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_kw, __pyx_n_s_remove_unknown_tags, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 230, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyDict_GetItemDefault(__pyx_v_kw, __pyx_n_s_remove_unknown_tags, Py_None); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 235, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 230, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_8 < 0)) __PYX_ERR(0, 235, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (unlikely(__pyx_t_8)) { - /* "lxml/html/clean.py":231 + /* "lxml/html/clean.py":236 * if kw.get("allow_tags"): * if kw.get("remove_unknown_tags"): * raise ValueError("It does not make sense to pass in both " # <<<<<<<<<<<<<< * "allow_tags and remove_unknown_tags") * self.remove_unknown_tags = False */ - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 231, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 236, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_Raise(__pyx_t_1, 0, 0, 0); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - __PYX_ERR(0, 231, __pyx_L1_error) + __PYX_ERR(0, 236, __pyx_L1_error) - /* "lxml/html/clean.py":230 + /* "lxml/html/clean.py":235 * * if kw.get("allow_tags"): * if kw.get("remove_unknown_tags"): # <<<<<<<<<<<<<< @@ -2692,16 +2704,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO */ } - /* "lxml/html/clean.py":233 + /* "lxml/html/clean.py":238 * raise ValueError("It does not make sense to pass in both " * "allow_tags and remove_unknown_tags") * self.remove_unknown_tags = False # <<<<<<<<<<<<<< * * # Used to lookup the primary URL for a given tag that is up for */ - if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_remove_unknown_tags, Py_False) < 0) __PYX_ERR(0, 233, __pyx_L1_error) + if (__Pyx_PyObject_SetAttrStr(__pyx_v_self, __pyx_n_s_remove_unknown_tags, Py_False) < 0) __PYX_ERR(0, 238, __pyx_L1_error) - /* "lxml/html/clean.py":229 + /* "lxml/html/clean.py":234 * self.inline_style = self.style * * if kw.get("allow_tags"): # <<<<<<<<<<<<<< @@ -2710,7 +2722,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO */ } - /* "lxml/html/clean.py":217 + /* "lxml/html/clean.py":222 * whitelist_tags = {'iframe', 'embed'} * * def __init__(self, **kw): # <<<<<<<<<<<<<< @@ -2737,7 +2749,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner___init__(CYTHON_UNUSED PyO return __pyx_r; } -/* "lxml/html/clean.py":257 +/* "lxml/html/clean.py":262 * ) * * def __call__(self, doc): # <<<<<<<<<<<<<< @@ -2781,11 +2793,11 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_3__call__(PyObject *__pyx_ case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_doc)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("__call__", 1, 2, 2, 1); __PYX_ERR(0, 257, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__call__", 1, 2, 2, 1); __PYX_ERR(0, 262, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__call__") < 0)) __PYX_ERR(0, 257, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__call__") < 0)) __PYX_ERR(0, 262, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { goto __pyx_L5_argtuple_error; @@ -2798,7 +2810,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_3__call__(PyObject *__pyx_ } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("__call__", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 257, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("__call__", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 262, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("lxml.html.clean.Cleaner.__call__", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -2853,7 +2865,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_RefNannySetupContext("__call__", 0); __Pyx_INCREF(__pyx_v_doc); - /* "lxml/html/clean.py":261 + /* "lxml/html/clean.py":266 * Cleans the document. * """ * try: # <<<<<<<<<<<<<< @@ -2869,19 +2881,19 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_XGOTREF(__pyx_t_3); /*try:*/ { - /* "lxml/html/clean.py":262 + /* "lxml/html/clean.py":267 * """ * try: * getroot = doc.getroot # <<<<<<<<<<<<<< * except AttributeError: * pass # Element instance */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_getroot); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 262, __pyx_L3_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_getroot); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 267, __pyx_L3_error) __Pyx_GOTREF(__pyx_t_4); __pyx_v_getroot = __pyx_t_4; __pyx_t_4 = 0; - /* "lxml/html/clean.py":261 + /* "lxml/html/clean.py":266 * Cleans the document. * """ * try: # <<<<<<<<<<<<<< @@ -2890,7 +2902,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":266 + /* "lxml/html/clean.py":271 * pass # Element instance * else: * doc = getroot() # ElementTree instance, instead of an element # <<<<<<<<<<<<<< @@ -2911,7 +2923,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_5); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 266, __pyx_L5_except_error) + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L5_except_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF_SET(__pyx_v_doc, __pyx_t_4); @@ -2924,7 +2936,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_L3_error:; __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":263 + /* "lxml/html/clean.py":268 * try: * getroot = doc.getroot * except AttributeError: # <<<<<<<<<<<<<< @@ -2939,7 +2951,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L5_except_error; __pyx_L5_except_error:; - /* "lxml/html/clean.py":261 + /* "lxml/html/clean.py":266 * Cleans the document. * """ * try: # <<<<<<<<<<<<<< @@ -2959,14 +2971,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_L8_try_end:; } - /* "lxml/html/clean.py":268 + /* "lxml/html/clean.py":273 * doc = getroot() # ElementTree instance, instead of an element * # convert XHTML to HTML * xhtml_to_html(doc) # <<<<<<<<<<<<<< * # Normalize a case that IE treats like , and that * # can confuse either this step or later steps. */ - __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_xhtml_to_html); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 268, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_xhtml_to_html); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 273, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_5))) { @@ -2980,19 +2992,19 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_v_doc) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_v_doc); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 268, __pyx_L1_error) + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 273, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":271 + /* "lxml/html/clean.py":276 * # Normalize a case that IE treats like , and that * # can confuse either this step or later steps. * for el in doc.iter('image'): # <<<<<<<<<<<<<< * el.tag = 'img' * if not self.comments: */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 276, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) { @@ -3006,16 +3018,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_n_s_image) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_n_s_image); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L1_error) + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 276, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (likely(PyList_CheckExact(__pyx_t_4)) || PyTuple_CheckExact(__pyx_t_4)) { __pyx_t_5 = __pyx_t_4; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_8 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_8 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 276, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_9 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_9 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 276, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; for (;;) { @@ -3023,17 +3035,17 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py if (likely(PyList_CheckExact(__pyx_t_5))) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 276, __pyx_L1_error) #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 276, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif } else { if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 276, __pyx_L1_error) #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 271, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 276, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif } @@ -3043,7 +3055,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 271, __pyx_L1_error) + else __PYX_ERR(0, 276, __pyx_L1_error) } break; } @@ -3052,16 +3064,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":272 + /* "lxml/html/clean.py":277 * # can confuse either this step or later steps. * for el in doc.iter('image'): * el.tag = 'img' # <<<<<<<<<<<<<< * if not self.comments: * # Of course, if we were going to kill comments anyway, we don't */ - if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_tag, __pyx_n_s_img) < 0) __PYX_ERR(0, 272, __pyx_L1_error) + if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_tag, __pyx_n_s_img) < 0) __PYX_ERR(0, 277, __pyx_L1_error) - /* "lxml/html/clean.py":271 + /* "lxml/html/clean.py":276 * # Normalize a case that IE treats like , and that * # can confuse either this step or later steps. * for el in doc.iter('image'): # <<<<<<<<<<<<<< @@ -3071,28 +3083,28 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":273 + /* "lxml/html/clean.py":278 * for el in doc.iter('image'): * el.tag = 'img' * if not self.comments: # <<<<<<<<<<<<<< * # Of course, if we were going to kill comments anyway, we don't * # need to worry about this */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_comments); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_comments); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 278, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 273, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 278, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_11 = ((!__pyx_t_10) != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":276 + /* "lxml/html/clean.py":281 * # Of course, if we were going to kill comments anyway, we don't * # need to worry about this * self.kill_conditional_comments(doc) # <<<<<<<<<<<<<< * * kill_tags = set(self.kill_tags or ()) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_kill_conditional_comments); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 276, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_kill_conditional_comments); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 281, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_6 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) { @@ -3106,12 +3118,12 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_6, __pyx_v_doc) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_doc); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 276, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 281, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":273 + /* "lxml/html/clean.py":278 * for el in doc.iter('image'): * el.tag = 'img' * if not self.comments: # <<<<<<<<<<<<<< @@ -3120,16 +3132,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":278 + /* "lxml/html/clean.py":283 * self.kill_conditional_comments(doc) * * kill_tags = set(self.kill_tags or ()) # <<<<<<<<<<<<<< * remove_tags = set(self.remove_tags or ()) * allow_tags = set(self.allow_tags or ()) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_kill_tags); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 278, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_kill_tags); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 283, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 278, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 283, __pyx_L1_error) if (!__pyx_t_11) { __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; } else { @@ -3141,22 +3153,22 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_INCREF(__pyx_empty_tuple); __pyx_t_5 = __pyx_empty_tuple; __pyx_L12_bool_binop_done:; - __pyx_t_4 = PySet_New(__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 278, __pyx_L1_error) + __pyx_t_4 = PySet_New(__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 283, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_v_kill_tags = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":279 + /* "lxml/html/clean.py":284 * * kill_tags = set(self.kill_tags or ()) * remove_tags = set(self.remove_tags or ()) # <<<<<<<<<<<<<< * allow_tags = set(self.allow_tags or ()) * */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_remove_tags); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 279, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_remove_tags); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 284, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 279, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 284, __pyx_L1_error) if (!__pyx_t_11) { __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } else { @@ -3168,22 +3180,22 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_INCREF(__pyx_empty_tuple); __pyx_t_4 = __pyx_empty_tuple; __pyx_L14_bool_binop_done:; - __pyx_t_5 = PySet_New(__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 279, __pyx_L1_error) + __pyx_t_5 = PySet_New(__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 284, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_v_remove_tags = ((PyObject*)__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":280 + /* "lxml/html/clean.py":285 * kill_tags = set(self.kill_tags or ()) * remove_tags = set(self.remove_tags or ()) * allow_tags = set(self.allow_tags or ()) # <<<<<<<<<<<<<< * * if self.scripts: */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_tags); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 280, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_tags); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 285, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 280, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 285, __pyx_L1_error) if (!__pyx_t_11) { __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; } else { @@ -3195,35 +3207,35 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_INCREF(__pyx_empty_tuple); __pyx_t_5 = __pyx_empty_tuple; __pyx_L16_bool_binop_done:; - __pyx_t_4 = PySet_New(__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 280, __pyx_L1_error) + __pyx_t_4 = PySet_New(__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 285, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_v_allow_tags = ((PyObject*)__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":282 + /* "lxml/html/clean.py":287 * allow_tags = set(self.allow_tags or ()) * * if self.scripts: # <<<<<<<<<<<<<< * kill_tags.add('script') * if self.safe_attrs_only: */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_scripts); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 282, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_scripts); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 287, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 282, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 287, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":283 + /* "lxml/html/clean.py":288 * * if self.scripts: * kill_tags.add('script') # <<<<<<<<<<<<<< * if self.safe_attrs_only: * safe_attrs = set(self.safe_attrs) */ - __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_n_s_script); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 283, __pyx_L1_error) + __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_n_s_script); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 288, __pyx_L1_error) - /* "lxml/html/clean.py":282 + /* "lxml/html/clean.py":287 * allow_tags = set(self.allow_tags or ()) * * if self.scripts: # <<<<<<<<<<<<<< @@ -3232,46 +3244,46 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":284 + /* "lxml/html/clean.py":289 * if self.scripts: * kill_tags.add('script') * if self.safe_attrs_only: # <<<<<<<<<<<<<< * safe_attrs = set(self.safe_attrs) * for el in doc.iter(etree.Element): */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_safe_attrs_only); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 284, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_safe_attrs_only); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 289, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 284, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 289, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":285 + /* "lxml/html/clean.py":290 * kill_tags.add('script') * if self.safe_attrs_only: * safe_attrs = set(self.safe_attrs) # <<<<<<<<<<<<<< * for el in doc.iter(etree.Element): * attrib = el.attrib */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_safe_attrs); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 285, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_safe_attrs); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = PySet_New(__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 285, __pyx_L1_error) + __pyx_t_5 = PySet_New(__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 290, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_v_safe_attrs = ((PyObject*)__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":286 + /* "lxml/html/clean.py":291 * if self.safe_attrs_only: * safe_attrs = set(self.safe_attrs) * for el in doc.iter(etree.Element): # <<<<<<<<<<<<<< * attrib = el.attrib * for aname in attrib.keys(): */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 286, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 291, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_etree); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 286, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_etree); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 291, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_Element); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 286, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_Element); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 291, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = NULL; @@ -3287,16 +3299,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_6, __pyx_t_13) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_t_13); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 286, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 291, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (likely(PyList_CheckExact(__pyx_t_5)) || PyTuple_CheckExact(__pyx_t_5)) { __pyx_t_4 = __pyx_t_5; __Pyx_INCREF(__pyx_t_4); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_8 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 286, __pyx_L1_error) + __pyx_t_8 = -1; __pyx_t_4 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 291, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_9 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 286, __pyx_L1_error) + __pyx_t_9 = Py_TYPE(__pyx_t_4)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 291, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; for (;;) { @@ -3304,17 +3316,17 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py if (likely(PyList_CheckExact(__pyx_t_4))) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_4)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 286, __pyx_L1_error) + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_4, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 291, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 286, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 291, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } else { if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_4)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 286, __pyx_L1_error) + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 291, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 286, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_4, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 291, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } @@ -3324,7 +3336,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 286, __pyx_L1_error) + else __PYX_ERR(0, 291, __pyx_L1_error) } break; } @@ -3333,19 +3345,19 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":287 + /* "lxml/html/clean.py":292 * safe_attrs = set(self.safe_attrs) * for el in doc.iter(etree.Element): * attrib = el.attrib # <<<<<<<<<<<<<< * for aname in attrib.keys(): * if aname not in safe_attrs: */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 287, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 292, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_XDECREF_SET(__pyx_v_attrib, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":288 + /* "lxml/html/clean.py":293 * for el in doc.iter(etree.Element): * attrib = el.attrib * for aname in attrib.keys(): # <<<<<<<<<<<<<< @@ -3355,9 +3367,9 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_14 = 0; if (unlikely(__pyx_v_attrib == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "keys"); - __PYX_ERR(0, 288, __pyx_L1_error) + __PYX_ERR(0, 293, __pyx_L1_error) } - __pyx_t_13 = __Pyx_dict_iterator(__pyx_v_attrib, 0, __pyx_n_s_keys, (&__pyx_t_15), (&__pyx_t_7)); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 288, __pyx_L1_error) + __pyx_t_13 = __Pyx_dict_iterator(__pyx_v_attrib, 0, __pyx_n_s_keys, (&__pyx_t_15), (&__pyx_t_7)); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 293, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = __pyx_t_13; @@ -3365,32 +3377,32 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py while (1) { __pyx_t_16 = __Pyx_dict_iter_next(__pyx_t_5, __pyx_t_15, &__pyx_t_14, &__pyx_t_13, NULL, NULL, __pyx_t_7); if (unlikely(__pyx_t_16 == 0)) break; - if (unlikely(__pyx_t_16 == -1)) __PYX_ERR(0, 288, __pyx_L1_error) + if (unlikely(__pyx_t_16 == -1)) __PYX_ERR(0, 293, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_XDECREF_SET(__pyx_v_aname, __pyx_t_13); __pyx_t_13 = 0; - /* "lxml/html/clean.py":289 + /* "lxml/html/clean.py":294 * attrib = el.attrib * for aname in attrib.keys(): * if aname not in safe_attrs: # <<<<<<<<<<<<<< * del attrib[aname] * if self.javascript: */ - __pyx_t_11 = (__Pyx_PySet_ContainsTF(__pyx_v_aname, __pyx_v_safe_attrs, Py_NE)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 289, __pyx_L1_error) + __pyx_t_11 = (__Pyx_PySet_ContainsTF(__pyx_v_aname, __pyx_v_safe_attrs, Py_NE)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 294, __pyx_L1_error) __pyx_t_10 = (__pyx_t_11 != 0); if (__pyx_t_10) { - /* "lxml/html/clean.py":290 + /* "lxml/html/clean.py":295 * for aname in attrib.keys(): * if aname not in safe_attrs: * del attrib[aname] # <<<<<<<<<<<<<< * if self.javascript: * if not (self.safe_attrs_only and */ - if (unlikely(PyObject_DelItem(__pyx_v_attrib, __pyx_v_aname) < 0)) __PYX_ERR(0, 290, __pyx_L1_error) + if (unlikely(PyObject_DelItem(__pyx_v_attrib, __pyx_v_aname) < 0)) __PYX_ERR(0, 295, __pyx_L1_error) - /* "lxml/html/clean.py":289 + /* "lxml/html/clean.py":294 * attrib = el.attrib * for aname in attrib.keys(): * if aname not in safe_attrs: # <<<<<<<<<<<<<< @@ -3401,7 +3413,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":286 + /* "lxml/html/clean.py":291 * if self.safe_attrs_only: * safe_attrs = set(self.safe_attrs) * for el in doc.iter(etree.Element): # <<<<<<<<<<<<<< @@ -3411,7 +3423,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":284 + /* "lxml/html/clean.py":289 * if self.scripts: * kill_tags.add('script') * if self.safe_attrs_only: # <<<<<<<<<<<<<< @@ -3420,29 +3432,29 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":291 + /* "lxml/html/clean.py":296 * if aname not in safe_attrs: * del attrib[aname] * if self.javascript: # <<<<<<<<<<<<<< * if not (self.safe_attrs_only and * self.safe_attrs == defs.safe_attrs): */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_javascript); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_javascript); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 296, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 291, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 296, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":292 + /* "lxml/html/clean.py":297 * del attrib[aname] * if self.javascript: * if not (self.safe_attrs_only and # <<<<<<<<<<<<<< * self.safe_attrs == defs.safe_attrs): * # safe_attrs handles events attributes itself */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_safe_attrs_only); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 292, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_safe_attrs_only); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 297, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 292, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 297, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_11) { } else { @@ -3450,29 +3462,29 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L27_bool_binop_done; } - /* "lxml/html/clean.py":293 + /* "lxml/html/clean.py":298 * if self.javascript: * if not (self.safe_attrs_only and * self.safe_attrs == defs.safe_attrs): # <<<<<<<<<<<<<< * # safe_attrs handles events attributes itself * for el in doc.iter(etree.Element): */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_safe_attrs); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 293, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_safe_attrs); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 298, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_defs); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 293, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_defs); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 298, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_safe_attrs); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 293, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_safe_attrs); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 298, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = PyObject_RichCompare(__pyx_t_4, __pyx_t_13, Py_EQ); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 293, __pyx_L1_error) + __pyx_t_5 = PyObject_RichCompare(__pyx_t_4, __pyx_t_13, Py_EQ); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 298, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 293, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 298, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_10 = __pyx_t_11; __pyx_L27_bool_binop_done:; - /* "lxml/html/clean.py":292 + /* "lxml/html/clean.py":297 * del attrib[aname] * if self.javascript: * if not (self.safe_attrs_only and # <<<<<<<<<<<<<< @@ -3482,18 +3494,18 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_11 = ((!__pyx_t_10) != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":295 + /* "lxml/html/clean.py":300 * self.safe_attrs == defs.safe_attrs): * # safe_attrs handles events attributes itself * for el in doc.iter(etree.Element): # <<<<<<<<<<<<<< * attrib = el.attrib * for aname in attrib.keys(): */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_etree); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 295, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_etree); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_Element); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_Element); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = NULL; @@ -3509,16 +3521,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_13, __pyx_t_4, __pyx_t_6) : __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_6); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 295, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; if (likely(PyList_CheckExact(__pyx_t_5)) || PyTuple_CheckExact(__pyx_t_5)) { __pyx_t_13 = __pyx_t_5; __Pyx_INCREF(__pyx_t_13); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_8 = -1; __pyx_t_13 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_8 = -1; __pyx_t_13 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_9 = Py_TYPE(__pyx_t_13)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_9 = Py_TYPE(__pyx_t_13)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 300, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; for (;;) { @@ -3526,17 +3538,17 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py if (likely(PyList_CheckExact(__pyx_t_13))) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_13)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyList_GET_ITEM(__pyx_t_13, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_13, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 300, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_13, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_13, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } else { if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_13)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_13, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_13, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 300, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_13, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 295, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_13, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } @@ -3546,7 +3558,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 295, __pyx_L1_error) + else __PYX_ERR(0, 300, __pyx_L1_error) } break; } @@ -3555,19 +3567,19 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":296 + /* "lxml/html/clean.py":301 * # safe_attrs handles events attributes itself * for el in doc.iter(etree.Element): * attrib = el.attrib # <<<<<<<<<<<<<< * for aname in attrib.keys(): * if aname.startswith('on'): */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 296, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 301, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_XDECREF_SET(__pyx_v_attrib, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":297 + /* "lxml/html/clean.py":302 * for el in doc.iter(etree.Element): * attrib = el.attrib * for aname in attrib.keys(): # <<<<<<<<<<<<<< @@ -3577,9 +3589,9 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_15 = 0; if (unlikely(__pyx_v_attrib == Py_None)) { PyErr_Format(PyExc_AttributeError, "'NoneType' object has no attribute '%.30s'", "keys"); - __PYX_ERR(0, 297, __pyx_L1_error) + __PYX_ERR(0, 302, __pyx_L1_error) } - __pyx_t_6 = __Pyx_dict_iterator(__pyx_v_attrib, 0, __pyx_n_s_keys, (&__pyx_t_14), (&__pyx_t_7)); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 297, __pyx_L1_error) + __pyx_t_6 = __Pyx_dict_iterator(__pyx_v_attrib, 0, __pyx_n_s_keys, (&__pyx_t_14), (&__pyx_t_7)); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = __pyx_t_6; @@ -3587,19 +3599,19 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py while (1) { __pyx_t_16 = __Pyx_dict_iter_next(__pyx_t_5, __pyx_t_14, &__pyx_t_15, &__pyx_t_6, NULL, NULL, __pyx_t_7); if (unlikely(__pyx_t_16 == 0)) break; - if (unlikely(__pyx_t_16 == -1)) __PYX_ERR(0, 297, __pyx_L1_error) + if (unlikely(__pyx_t_16 == -1)) __PYX_ERR(0, 302, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_XDECREF_SET(__pyx_v_aname, __pyx_t_6); __pyx_t_6 = 0; - /* "lxml/html/clean.py":298 + /* "lxml/html/clean.py":303 * attrib = el.attrib * for aname in attrib.keys(): * if aname.startswith('on'): # <<<<<<<<<<<<<< * del attrib[aname] * doc.rewrite_links(self._remove_javascript_link, */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_aname, __pyx_n_s_startswith); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 298, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_aname, __pyx_n_s_startswith); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 303, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_17 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) { @@ -3613,23 +3625,23 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_t_6 = (__pyx_t_17) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_17, __pyx_n_s_on) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_n_s_on); __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 298, __pyx_L1_error) + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 303, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 298, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 303, __pyx_L1_error) __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":299 + /* "lxml/html/clean.py":304 * for aname in attrib.keys(): * if aname.startswith('on'): * del attrib[aname] # <<<<<<<<<<<<<< * doc.rewrite_links(self._remove_javascript_link, * resolve_base_href=False) */ - if (unlikely(PyObject_DelItem(__pyx_v_attrib, __pyx_v_aname) < 0)) __PYX_ERR(0, 299, __pyx_L1_error) + if (unlikely(PyObject_DelItem(__pyx_v_attrib, __pyx_v_aname) < 0)) __PYX_ERR(0, 304, __pyx_L1_error) - /* "lxml/html/clean.py":298 + /* "lxml/html/clean.py":303 * attrib = el.attrib * for aname in attrib.keys(): * if aname.startswith('on'): # <<<<<<<<<<<<<< @@ -3640,7 +3652,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":295 + /* "lxml/html/clean.py":300 * self.safe_attrs == defs.safe_attrs): * # safe_attrs handles events attributes itself * for el in doc.iter(etree.Element): # <<<<<<<<<<<<<< @@ -3650,7 +3662,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - /* "lxml/html/clean.py":292 + /* "lxml/html/clean.py":297 * del attrib[aname] * if self.javascript: * if not (self.safe_attrs_only and # <<<<<<<<<<<<<< @@ -3659,70 +3671,70 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":300 + /* "lxml/html/clean.py":305 * if aname.startswith('on'): * del attrib[aname] * doc.rewrite_links(self._remove_javascript_link, # <<<<<<<<<<<<<< * resolve_base_href=False) * # If we're deleting style then we don't have to remove JS links */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_rewrite_links); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_rewrite_links); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 305, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_remove_javascript_link); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_remove_javascript_link); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 305, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 305, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":301 + /* "lxml/html/clean.py":306 * del attrib[aname] * doc.rewrite_links(self._remove_javascript_link, * resolve_base_href=False) # <<<<<<<<<<<<<< * # If we're deleting style then we don't have to remove JS links * # from styles, otherwise... */ - __pyx_t_5 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 301, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyDict_NewPresized(1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 306, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - if (PyDict_SetItem(__pyx_t_5, __pyx_n_s_resolve_base_href, Py_False) < 0) __PYX_ERR(0, 301, __pyx_L1_error) + if (PyDict_SetItem(__pyx_t_5, __pyx_n_s_resolve_base_href, Py_False) < 0) __PYX_ERR(0, 306, __pyx_L1_error) - /* "lxml/html/clean.py":300 + /* "lxml/html/clean.py":305 * if aname.startswith('on'): * del attrib[aname] * doc.rewrite_links(self._remove_javascript_link, # <<<<<<<<<<<<<< * resolve_base_href=False) * # If we're deleting style then we don't have to remove JS links */ - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_13, __pyx_t_6, __pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 300, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_13, __pyx_t_6, __pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 305, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":304 + /* "lxml/html/clean.py":309 * # If we're deleting style then we don't have to remove JS links * # from styles, otherwise... * if not self.inline_style: # <<<<<<<<<<<<<< * for el in _find_styled_elements(doc): * old = el.get('style') */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_inline_style); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 304, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_inline_style); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 304, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 309, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_10 = ((!__pyx_t_11) != 0); if (__pyx_t_10) { - /* "lxml/html/clean.py":305 + /* "lxml/html/clean.py":310 * # from styles, otherwise... * if not self.inline_style: * for el in _find_styled_elements(doc): # <<<<<<<<<<<<<< * old = el.get('style') - * new = _css_javascript_re.sub('', old) + * new = _replace_css_javascript('', old) */ - __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_find_styled_elements); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 305, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_find_styled_elements); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 310, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_6 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_5))) { @@ -3736,16 +3748,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_5, __pyx_t_6, __pyx_v_doc) : __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_v_doc); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 305, __pyx_L1_error) + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 310, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (likely(PyList_CheckExact(__pyx_t_4)) || PyTuple_CheckExact(__pyx_t_4)) { __pyx_t_5 = __pyx_t_4; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_8 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 305, __pyx_L1_error) + __pyx_t_8 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 310, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_9 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 305, __pyx_L1_error) + __pyx_t_9 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 310, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; for (;;) { @@ -3753,17 +3765,17 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py if (likely(PyList_CheckExact(__pyx_t_5))) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 305, __pyx_L1_error) + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 310, __pyx_L1_error) #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 305, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 310, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif } else { if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 305, __pyx_L1_error) + __pyx_t_4 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 310, __pyx_L1_error) #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 305, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 310, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif } @@ -3773,7 +3785,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 305, __pyx_L1_error) + else __PYX_ERR(0, 310, __pyx_L1_error) } break; } @@ -3782,14 +3794,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":306 + /* "lxml/html/clean.py":311 * if not self.inline_style: * for el in _find_styled_elements(doc): * old = el.get('style') # <<<<<<<<<<<<<< - * new = _css_javascript_re.sub('', old) - * new = _css_import_re.sub('', new) + * new = _replace_css_javascript('', old) + * new = _replace_css_import('', new) */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 306, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 311, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); __pyx_t_13 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { @@ -3803,57 +3815,54 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_t_4 = (__pyx_t_13) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_13, __pyx_n_s_style) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_n_s_style); __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 306, __pyx_L1_error) + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 311, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_XDECREF_SET(__pyx_v_old, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":307 + /* "lxml/html/clean.py":312 * for el in _find_styled_elements(doc): * old = el.get('style') - * new = _css_javascript_re.sub('', old) # <<<<<<<<<<<<<< - * new = _css_import_re.sub('', new) + * new = _replace_css_javascript('', old) # <<<<<<<<<<<<<< + * new = _replace_css_import('', new) * if self._has_sneaky_javascript(new): */ - __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_css_javascript_re); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 307, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_replace_css_javascript); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_sub); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 307, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = NULL; + __pyx_t_13 = NULL; __pyx_t_7 = 0; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_13); - if (likely(__pyx_t_6)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); - __Pyx_INCREF(__pyx_t_6); + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); + if (likely(__pyx_t_13)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + __Pyx_INCREF(__pyx_t_13); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); __pyx_t_7 = 1; } } #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_13)) { - PyObject *__pyx_temp[3] = {__pyx_t_6, __pyx_kp_s__2, __pyx_v_old}; - __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_13, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 307, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (PyFunction_Check(__pyx_t_6)) { + PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_kp_s__2, __pyx_v_old}; + __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_13)) { - PyObject *__pyx_temp[3] = {__pyx_t_6, __pyx_kp_s__2, __pyx_v_old}; - __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_13, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 307, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) { + PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_kp_s__2, __pyx_v_old}; + __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 312, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif { - __pyx_t_17 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 307, __pyx_L1_error) + __pyx_t_17 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); - if (__pyx_t_6) { - __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_17, 0, __pyx_t_6); __pyx_t_6 = NULL; + if (__pyx_t_13) { + __Pyx_GIVEREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_17, 0, __pyx_t_13); __pyx_t_13 = NULL; } __Pyx_INCREF(__pyx_kp_s__2); __Pyx_GIVEREF(__pyx_kp_s__2); @@ -3861,117 +3870,114 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_INCREF(__pyx_v_old); __Pyx_GIVEREF(__pyx_v_old); PyTuple_SET_ITEM(__pyx_t_17, 1+__pyx_t_7, __pyx_v_old); - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_13, __pyx_t_17, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 307, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_17, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 312, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; } - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_XDECREF_SET(__pyx_v_new, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":308 + /* "lxml/html/clean.py":313 * old = el.get('style') - * new = _css_javascript_re.sub('', old) - * new = _css_import_re.sub('', new) # <<<<<<<<<<<<<< + * new = _replace_css_javascript('', old) + * new = _replace_css_import('', new) # <<<<<<<<<<<<<< * if self._has_sneaky_javascript(new): * # Something tricky is going on... */ - __Pyx_GetModuleGlobalName(__pyx_t_13, __pyx_n_s_css_import_re); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 308, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_sub); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 308, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = NULL; + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_replace_css_import); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 313, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_17 = NULL; __pyx_t_7 = 0; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_17))) { - __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_17); - if (likely(__pyx_t_13)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); - __Pyx_INCREF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_6); + if (likely(__pyx_t_17)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + __Pyx_INCREF(__pyx_t_17); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_17, function); + __Pyx_DECREF_SET(__pyx_t_6, function); __pyx_t_7 = 1; } } #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_17)) { - PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_kp_s__2, __pyx_v_new}; - __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 308, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; + if (PyFunction_Check(__pyx_t_6)) { + PyObject *__pyx_temp[3] = {__pyx_t_17, __pyx_kp_s__2, __pyx_v_new}; + __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 313, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_17)) { - PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_kp_s__2, __pyx_v_new}; - __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 308, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; + if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) { + PyObject *__pyx_temp[3] = {__pyx_t_17, __pyx_kp_s__2, __pyx_v_new}; + __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 313, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif { - __pyx_t_6 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 308, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - if (__pyx_t_13) { - __Pyx_GIVEREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_13); __pyx_t_13 = NULL; + __pyx_t_13 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 313, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + if (__pyx_t_17) { + __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_17); __pyx_t_17 = NULL; } __Pyx_INCREF(__pyx_kp_s__2); __Pyx_GIVEREF(__pyx_kp_s__2); - PyTuple_SET_ITEM(__pyx_t_6, 0+__pyx_t_7, __pyx_kp_s__2); + PyTuple_SET_ITEM(__pyx_t_13, 0+__pyx_t_7, __pyx_kp_s__2); __Pyx_INCREF(__pyx_v_new); __Pyx_GIVEREF(__pyx_v_new); - PyTuple_SET_ITEM(__pyx_t_6, 1+__pyx_t_7, __pyx_v_new); - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_6, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 308, __pyx_L1_error) + PyTuple_SET_ITEM(__pyx_t_13, 1+__pyx_t_7, __pyx_v_new); + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_13, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 313, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF_SET(__pyx_v_new, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":309 - * new = _css_javascript_re.sub('', old) - * new = _css_import_re.sub('', new) + /* "lxml/html/clean.py":314 + * new = _replace_css_javascript('', old) + * new = _replace_css_import('', new) * if self._has_sneaky_javascript(new): # <<<<<<<<<<<<<< * # Something tricky is going on... * del el.attrib['style'] */ - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_has_sneaky_javascript); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 309, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); - __pyx_t_6 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { - __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); - if (likely(__pyx_t_6)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); - __Pyx_INCREF(__pyx_t_6); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_has_sneaky_javascript); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 314, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_13 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); + if (likely(__pyx_t_13)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + __Pyx_INCREF(__pyx_t_13); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_17, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_17, __pyx_t_6, __pyx_v_new) : __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_v_new); - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 309, __pyx_L1_error) + __pyx_t_4 = (__pyx_t_13) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_13, __pyx_v_new) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_new); + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 314, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 309, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 314, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":311 + /* "lxml/html/clean.py":316 * if self._has_sneaky_javascript(new): * # Something tricky is going on... * del el.attrib['style'] # <<<<<<<<<<<<<< * elif new != old: * el.set('style', new) */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 311, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - if (unlikely(PyObject_DelItem(__pyx_t_4, __pyx_n_s_style) < 0)) __PYX_ERR(0, 311, __pyx_L1_error) + if (unlikely(PyObject_DelItem(__pyx_t_4, __pyx_n_s_style) < 0)) __PYX_ERR(0, 316, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":309 - * new = _css_javascript_re.sub('', old) - * new = _css_import_re.sub('', new) + /* "lxml/html/clean.py":314 + * new = _replace_css_javascript('', old) + * new = _replace_css_import('', new) * if self._has_sneaky_javascript(new): # <<<<<<<<<<<<<< * # Something tricky is going on... * del el.attrib['style'] @@ -3979,75 +3985,75 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L37; } - /* "lxml/html/clean.py":312 + /* "lxml/html/clean.py":317 * # Something tricky is going on... * del el.attrib['style'] * elif new != old: # <<<<<<<<<<<<<< * el.set('style', new) * if not self.style: */ - __pyx_t_4 = PyObject_RichCompare(__pyx_v_new, __pyx_v_old, Py_NE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 312, __pyx_L1_error) - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 312, __pyx_L1_error) + __pyx_t_4 = PyObject_RichCompare(__pyx_v_new, __pyx_v_old, Py_NE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 317, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 317, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":313 + /* "lxml/html/clean.py":318 * del el.attrib['style'] * elif new != old: * el.set('style', new) # <<<<<<<<<<<<<< * if not self.style: * for el in list(doc.iter('style')): */ - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_set); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 313, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); - __pyx_t_6 = NULL; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_set); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 318, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_13 = NULL; __pyx_t_7 = 0; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { - __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); - if (likely(__pyx_t_6)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); - __Pyx_INCREF(__pyx_t_6); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); + if (likely(__pyx_t_13)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + __Pyx_INCREF(__pyx_t_13); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_17, function); + __Pyx_DECREF_SET(__pyx_t_6, function); __pyx_t_7 = 1; } } #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_17)) { - PyObject *__pyx_temp[3] = {__pyx_t_6, __pyx_n_s_style, __pyx_v_new}; - __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 313, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (PyFunction_Check(__pyx_t_6)) { + PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_n_s_style, __pyx_v_new}; + __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 318, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_17)) { - PyObject *__pyx_temp[3] = {__pyx_t_6, __pyx_n_s_style, __pyx_v_new}; - __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 313, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) { + PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_n_s_style, __pyx_v_new}; + __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 318, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif { - __pyx_t_13 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 313, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - if (__pyx_t_6) { - __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_6); __pyx_t_6 = NULL; + __pyx_t_17 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 318, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + if (__pyx_t_13) { + __Pyx_GIVEREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_17, 0, __pyx_t_13); __pyx_t_13 = NULL; } __Pyx_INCREF(__pyx_n_s_style); __Pyx_GIVEREF(__pyx_n_s_style); - PyTuple_SET_ITEM(__pyx_t_13, 0+__pyx_t_7, __pyx_n_s_style); + PyTuple_SET_ITEM(__pyx_t_17, 0+__pyx_t_7, __pyx_n_s_style); __Pyx_INCREF(__pyx_v_new); __Pyx_GIVEREF(__pyx_v_new); - PyTuple_SET_ITEM(__pyx_t_13, 1+__pyx_t_7, __pyx_v_new); - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 313, __pyx_L1_error) + PyTuple_SET_ITEM(__pyx_t_17, 1+__pyx_t_7, __pyx_v_new); + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_17, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 318, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; } - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":312 + /* "lxml/html/clean.py":317 * # Something tricky is going on... * del el.attrib['style'] * elif new != old: # <<<<<<<<<<<<<< @@ -4057,17 +4063,17 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_L37:; - /* "lxml/html/clean.py":305 + /* "lxml/html/clean.py":310 * # from styles, otherwise... * if not self.inline_style: * for el in _find_styled_elements(doc): # <<<<<<<<<<<<<< * old = el.get('style') - * new = _css_javascript_re.sub('', old) + * new = _replace_css_javascript('', old) */ } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":304 + /* "lxml/html/clean.py":309 * # If we're deleting style then we don't have to remove JS links * # from styles, otherwise... * if not self.inline_style: # <<<<<<<<<<<<<< @@ -4076,45 +4082,45 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":314 + /* "lxml/html/clean.py":319 * elif new != old: * el.set('style', new) * if not self.style: # <<<<<<<<<<<<<< * for el in list(doc.iter('style')): * if el.get('type', '').lower().strip() == 'text/javascript': */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_style); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 314, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_style); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 319, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 314, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 319, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_11 = ((!__pyx_t_10) != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":315 + /* "lxml/html/clean.py":320 * el.set('style', new) * if not self.style: * for el in list(doc.iter('style')): # <<<<<<<<<<<<<< * if el.get('type', '').lower().strip() == 'text/javascript': * el.drop_tree() */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 315, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_17 = NULL; + __pyx_t_6 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_4))) { - __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_4); - if (likely(__pyx_t_17)) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_4); + if (likely(__pyx_t_6)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_4); - __Pyx_INCREF(__pyx_t_17); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_4, function); } } - __pyx_t_5 = (__pyx_t_17) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_17, __pyx_n_s_style) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_n_s_style); - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 315, __pyx_L1_error) + __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_6, __pyx_n_s_style) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_n_s_style); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_4 = PySequence_List(__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 315, __pyx_L1_error) + __pyx_t_4 = PySequence_List(__pyx_t_5); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_5 = __pyx_t_4; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; @@ -4122,102 +4128,102 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py for (;;) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_4 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 315, __pyx_L1_error) + __pyx_t_4 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_4); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 320, __pyx_L1_error) #else - __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 315, __pyx_L1_error) + __pyx_t_4 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 320, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); #endif __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":316 + /* "lxml/html/clean.py":321 * if not self.style: * for el in list(doc.iter('style')): * if el.get('type', '').lower().strip() == 'text/javascript': # <<<<<<<<<<<<<< * el.drop_tree() * continue */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 316, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_13 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_13); - __pyx_t_6 = __Pyx_PyObject_Call(__pyx_t_13, __pyx_tuple__3, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 316, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_lower); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_lower); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 316, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_13 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_13)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_13); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_17, function); + } + } + __pyx_t_6 = (__pyx_t_13) ? __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_t_13) : __Pyx_PyObject_CallNoArg(__pyx_t_17); + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_strip); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 321, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_6 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); if (likely(__pyx_t_6)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_17 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_13); + __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_17); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 316, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_17, __pyx_n_s_strip); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 316, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __pyx_t_17 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_13); - if (likely(__pyx_t_17)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); - __Pyx_INCREF(__pyx_t_17); - __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); - } - } - __pyx_t_4 = (__pyx_t_17) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_17) : __Pyx_PyObject_CallNoArg(__pyx_t_13); - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 316, __pyx_L1_error) + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_11 = (__Pyx_PyString_Equals(__pyx_t_4, __pyx_kp_s_text_javascript, Py_EQ)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 316, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_11 = (__Pyx_PyString_Equals(__pyx_t_4, __pyx_kp_s_text_javascript, Py_EQ)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 321, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":317 + /* "lxml/html/clean.py":322 * for el in list(doc.iter('style')): * if el.get('type', '').lower().strip() == 'text/javascript': * el.drop_tree() # <<<<<<<<<<<<<< * continue * old = el.text or '' */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 317, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_13); - if (likely(__pyx_t_17)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); - __Pyx_INCREF(__pyx_t_17); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_6 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_4 = (__pyx_t_17) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_17) : __Pyx_PyObject_CallNoArg(__pyx_t_13); - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 317, __pyx_L1_error) + __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_17); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 322, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":318 + /* "lxml/html/clean.py":323 * if el.get('type', '').lower().strip() == 'text/javascript': * el.drop_tree() * continue # <<<<<<<<<<<<<< * old = el.text or '' - * new = _css_javascript_re.sub('', old) + * new = _replace_css_javascript('', old) */ goto __pyx_L39_continue; - /* "lxml/html/clean.py":316 + /* "lxml/html/clean.py":321 * if not self.style: * for el in list(doc.iter('style')): * if el.get('type', '').lower().strip() == 'text/javascript': # <<<<<<<<<<<<<< @@ -4226,22 +4232,22 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":319 + /* "lxml/html/clean.py":324 * el.drop_tree() * continue * old = el.text or '' # <<<<<<<<<<<<<< - * new = _css_javascript_re.sub('', old) + * new = _replace_css_javascript('', old) * # The imported CSS can do anything; we just can't allow: */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_text); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 319, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_13); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 319, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_text); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 324, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_17); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 324, __pyx_L1_error) if (!__pyx_t_11) { - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; } else { - __Pyx_INCREF(__pyx_t_13); - __pyx_t_4 = __pyx_t_13; - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_INCREF(__pyx_t_17); + __pyx_t_4 = __pyx_t_17; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; goto __pyx_L42_bool_binop_done; } __Pyx_INCREF(__pyx_kp_s__2); @@ -4250,25 +4256,22 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_XDECREF_SET(__pyx_v_old, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":320 + /* "lxml/html/clean.py":325 * continue * old = el.text or '' - * new = _css_javascript_re.sub('', old) # <<<<<<<<<<<<<< + * new = _replace_css_javascript('', old) # <<<<<<<<<<<<<< * # The imported CSS can do anything; we just can't allow: - * new = _css_import_re.sub('', old) + * new = _replace_css_import('', new) */ - __Pyx_GetModuleGlobalName(__pyx_t_13, __pyx_n_s_css_javascript_re); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_sub); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 320, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_17, __pyx_n_s_replace_css_javascript); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = NULL; + __pyx_t_6 = NULL; __pyx_t_7 = 0; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_17))) { - __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_17); - if (likely(__pyx_t_13)) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_6)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); - __Pyx_INCREF(__pyx_t_13); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_17, function); __pyx_t_7 = 1; @@ -4276,140 +4279,137 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_17)) { - PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_kp_s__2, __pyx_v_old}; - __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; + PyObject *__pyx_temp[3] = {__pyx_t_6, __pyx_kp_s__2, __pyx_v_old}; + __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 325, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_17)) { - PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_kp_s__2, __pyx_v_old}; - __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; + PyObject *__pyx_temp[3] = {__pyx_t_6, __pyx_kp_s__2, __pyx_v_old}; + __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 325, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif { - __pyx_t_6 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 320, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - if (__pyx_t_13) { - __Pyx_GIVEREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_13); __pyx_t_13 = NULL; + __pyx_t_13 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 325, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); + if (__pyx_t_6) { + __Pyx_GIVEREF(__pyx_t_6); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_6); __pyx_t_6 = NULL; } __Pyx_INCREF(__pyx_kp_s__2); __Pyx_GIVEREF(__pyx_kp_s__2); - PyTuple_SET_ITEM(__pyx_t_6, 0+__pyx_t_7, __pyx_kp_s__2); + PyTuple_SET_ITEM(__pyx_t_13, 0+__pyx_t_7, __pyx_kp_s__2); __Pyx_INCREF(__pyx_v_old); __Pyx_GIVEREF(__pyx_v_old); - PyTuple_SET_ITEM(__pyx_t_6, 1+__pyx_t_7, __pyx_v_old); - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_6, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 320, __pyx_L1_error) + PyTuple_SET_ITEM(__pyx_t_13, 1+__pyx_t_7, __pyx_v_old); + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_13, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 325, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_XDECREF_SET(__pyx_v_new, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":322 - * new = _css_javascript_re.sub('', old) + /* "lxml/html/clean.py":327 + * new = _replace_css_javascript('', old) * # The imported CSS can do anything; we just can't allow: - * new = _css_import_re.sub('', old) # <<<<<<<<<<<<<< + * new = _replace_css_import('', new) # <<<<<<<<<<<<<< * if self._has_sneaky_javascript(new): * # Something tricky is going on... */ - __Pyx_GetModuleGlobalName(__pyx_t_17, __pyx_n_s_css_import_re); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 322, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_17, __pyx_n_s_replace_css_import); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_17); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_17, __pyx_n_s_sub); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - __pyx_t_17 = NULL; + __pyx_t_13 = NULL; __pyx_t_7 = 0; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_17)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_17); + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_13)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_13); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); __pyx_t_7 = 1; } } #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_6)) { - PyObject *__pyx_temp[3] = {__pyx_t_17, __pyx_kp_s__2, __pyx_v_old}; - __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + if (PyFunction_Check(__pyx_t_17)) { + PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_kp_s__2, __pyx_v_new}; + __pyx_t_4 = __Pyx_PyFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) { - PyObject *__pyx_temp[3] = {__pyx_t_17, __pyx_kp_s__2, __pyx_v_old}; - __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + if (__Pyx_PyFastCFunction_Check(__pyx_t_17)) { + PyObject *__pyx_temp[3] = {__pyx_t_13, __pyx_kp_s__2, __pyx_v_new}; + __pyx_t_4 = __Pyx_PyCFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; __Pyx_GOTREF(__pyx_t_4); } else #endif { - __pyx_t_13 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 322, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - if (__pyx_t_17) { - __Pyx_GIVEREF(__pyx_t_17); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_17); __pyx_t_17 = NULL; + __pyx_t_6 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 327, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + if (__pyx_t_13) { + __Pyx_GIVEREF(__pyx_t_13); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_13); __pyx_t_13 = NULL; } __Pyx_INCREF(__pyx_kp_s__2); __Pyx_GIVEREF(__pyx_kp_s__2); - PyTuple_SET_ITEM(__pyx_t_13, 0+__pyx_t_7, __pyx_kp_s__2); - __Pyx_INCREF(__pyx_v_old); - __Pyx_GIVEREF(__pyx_v_old); - PyTuple_SET_ITEM(__pyx_t_13, 1+__pyx_t_7, __pyx_v_old); - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_13, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 322, __pyx_L1_error) + PyTuple_SET_ITEM(__pyx_t_6, 0+__pyx_t_7, __pyx_kp_s__2); + __Pyx_INCREF(__pyx_v_new); + __Pyx_GIVEREF(__pyx_v_new); + PyTuple_SET_ITEM(__pyx_t_6, 1+__pyx_t_7, __pyx_v_new); + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_6, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 327, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF_SET(__pyx_v_new, __pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":323 + /* "lxml/html/clean.py":328 * # The imported CSS can do anything; we just can't allow: - * new = _css_import_re.sub('', old) + * new = _replace_css_import('', new) * if self._has_sneaky_javascript(new): # <<<<<<<<<<<<<< * # Something tricky is going on... * el.text = '/[inserted by cython to avoid comment start]* deleted *[inserted by cython to avoid comment closer]/' */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_has_sneaky_javascript); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 323, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_13)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_13); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_has_sneaky_javascript); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 328, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_6 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_4 = (__pyx_t_13) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_13, __pyx_v_new) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_new); - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 323, __pyx_L1_error) + __pyx_t_4 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_17, __pyx_t_6, __pyx_v_new) : __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_v_new); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 328, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 323, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 328, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":325 + /* "lxml/html/clean.py":330 * if self._has_sneaky_javascript(new): * # Something tricky is going on... * el.text = '/[inserted by cython to avoid comment start]* deleted *[inserted by cython to avoid comment closer]/' # <<<<<<<<<<<<<< * elif new != old: * el.text = new */ - if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_text, __pyx_kp_s_deleted) < 0) __PYX_ERR(0, 325, __pyx_L1_error) + if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_text, __pyx_kp_s_deleted) < 0) __PYX_ERR(0, 330, __pyx_L1_error) - /* "lxml/html/clean.py":323 + /* "lxml/html/clean.py":328 * # The imported CSS can do anything; we just can't allow: - * new = _css_import_re.sub('', old) + * new = _replace_css_import('', new) * if self._has_sneaky_javascript(new): # <<<<<<<<<<<<<< * # Something tricky is going on... * el.text = '/[inserted by cython to avoid comment start]* deleted *[inserted by cython to avoid comment closer]/' @@ -4417,28 +4417,28 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L44; } - /* "lxml/html/clean.py":326 + /* "lxml/html/clean.py":331 * # Something tricky is going on... * el.text = '/[inserted by cython to avoid comment start]* deleted *[inserted by cython to avoid comment closer]/' * elif new != old: # <<<<<<<<<<<<<< * el.text = new * if self.comments: */ - __pyx_t_4 = PyObject_RichCompare(__pyx_v_new, __pyx_v_old, Py_NE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 326, __pyx_L1_error) - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 326, __pyx_L1_error) + __pyx_t_4 = PyObject_RichCompare(__pyx_v_new, __pyx_v_old, Py_NE); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 331, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":327 + /* "lxml/html/clean.py":332 * el.text = '/[inserted by cython to avoid comment start]* deleted *[inserted by cython to avoid comment closer]/' * elif new != old: * el.text = new # <<<<<<<<<<<<<< * if self.comments: * kill_tags.add(etree.Comment) */ - if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_text, __pyx_v_new) < 0) __PYX_ERR(0, 327, __pyx_L1_error) + if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_text, __pyx_v_new) < 0) __PYX_ERR(0, 332, __pyx_L1_error) - /* "lxml/html/clean.py":326 + /* "lxml/html/clean.py":331 * # Something tricky is going on... * el.text = '/[inserted by cython to avoid comment start]* deleted *[inserted by cython to avoid comment closer]/' * elif new != old: # <<<<<<<<<<<<<< @@ -4448,7 +4448,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_L44:; - /* "lxml/html/clean.py":315 + /* "lxml/html/clean.py":320 * el.set('style', new) * if not self.style: * for el in list(doc.iter('style')): # <<<<<<<<<<<<<< @@ -4459,7 +4459,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":314 + /* "lxml/html/clean.py":319 * elif new != old: * el.set('style', new) * if not self.style: # <<<<<<<<<<<<<< @@ -4468,7 +4468,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":291 + /* "lxml/html/clean.py":296 * if aname not in safe_attrs: * del attrib[aname] * if self.javascript: # <<<<<<<<<<<<<< @@ -4477,35 +4477,35 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":328 + /* "lxml/html/clean.py":333 * elif new != old: * el.text = new * if self.comments: # <<<<<<<<<<<<<< * kill_tags.add(etree.Comment) * if self.processing_instructions: */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_comments); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 328, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_comments); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 328, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 333, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":329 + /* "lxml/html/clean.py":334 * el.text = new * if self.comments: * kill_tags.add(etree.Comment) # <<<<<<<<<<<<<< * if self.processing_instructions: * kill_tags.add(etree.ProcessingInstruction) */ - __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_etree); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 329, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_etree); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 334, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_Comment); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_Comment); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 334, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_t_4); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 329, __pyx_L1_error) + __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_t_4); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 334, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - /* "lxml/html/clean.py":328 + /* "lxml/html/clean.py":333 * elif new != old: * el.text = new * if self.comments: # <<<<<<<<<<<<<< @@ -4514,35 +4514,35 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":330 + /* "lxml/html/clean.py":335 * if self.comments: * kill_tags.add(etree.Comment) * if self.processing_instructions: # <<<<<<<<<<<<<< * kill_tags.add(etree.ProcessingInstruction) * if self.style: */ - __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_processing_instructions); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_processing_instructions); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 335, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 330, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 335, __pyx_L1_error) __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":331 + /* "lxml/html/clean.py":336 * kill_tags.add(etree.Comment) * if self.processing_instructions: * kill_tags.add(etree.ProcessingInstruction) # <<<<<<<<<<<<<< * if self.style: * kill_tags.add('style') */ - __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_etree); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 331, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_etree); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 336, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_ProcessingInstruction); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_ProcessingInstruction); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 336, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; - __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_t_5); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 331, __pyx_L1_error) + __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_t_5); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 336, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":330 + /* "lxml/html/clean.py":335 * if self.comments: * kill_tags.add(etree.Comment) * if self.processing_instructions: # <<<<<<<<<<<<<< @@ -4551,29 +4551,29 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":332 + /* "lxml/html/clean.py":337 * if self.processing_instructions: * kill_tags.add(etree.ProcessingInstruction) * if self.style: # <<<<<<<<<<<<<< * kill_tags.add('style') * if self.inline_style: */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_style); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_style); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 332, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 337, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":333 + /* "lxml/html/clean.py":338 * kill_tags.add(etree.ProcessingInstruction) * if self.style: * kill_tags.add('style') # <<<<<<<<<<<<<< * if self.inline_style: * etree.strip_attributes(doc, 'style') */ - __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_n_s_style); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 333, __pyx_L1_error) + __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_n_s_style); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 338, __pyx_L1_error) - /* "lxml/html/clean.py":332 + /* "lxml/html/clean.py":337 * if self.processing_instructions: * kill_tags.add(etree.ProcessingInstruction) * if self.style: # <<<<<<<<<<<<<< @@ -4582,79 +4582,79 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":334 + /* "lxml/html/clean.py":339 * if self.style: * kill_tags.add('style') * if self.inline_style: # <<<<<<<<<<<<<< * etree.strip_attributes(doc, 'style') * if self.links: */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_inline_style); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 334, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_inline_style); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 334, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 339, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":335 + /* "lxml/html/clean.py":340 * kill_tags.add('style') * if self.inline_style: * etree.strip_attributes(doc, 'style') # <<<<<<<<<<<<<< * if self.links: * kill_tags.add('link') */ - __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_etree); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 335, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_etree); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 340, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_strip_attributes); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 335, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_strip_attributes); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 340, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = NULL; __pyx_t_7 = 0; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_17); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); __pyx_t_7 = 1; } } #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_6)) { + if (PyFunction_Check(__pyx_t_17)) { PyObject *__pyx_temp[3] = {__pyx_t_4, __pyx_v_doc, __pyx_n_s_style}; - __pyx_t_5 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 335, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 340, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_GOTREF(__pyx_t_5); } else #endif #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) { + if (__Pyx_PyFastCFunction_Check(__pyx_t_17)) { PyObject *__pyx_temp[3] = {__pyx_t_4, __pyx_v_doc, __pyx_n_s_style}; - __pyx_t_5 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 335, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyCFunction_FastCall(__pyx_t_17, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 340, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_GOTREF(__pyx_t_5); } else #endif { - __pyx_t_13 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 335, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 340, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); if (__pyx_t_4) { - __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_4); __pyx_t_4 = NULL; + __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_4); __pyx_t_4 = NULL; } __Pyx_INCREF(__pyx_v_doc); __Pyx_GIVEREF(__pyx_v_doc); - PyTuple_SET_ITEM(__pyx_t_13, 0+__pyx_t_7, __pyx_v_doc); + PyTuple_SET_ITEM(__pyx_t_6, 0+__pyx_t_7, __pyx_v_doc); __Pyx_INCREF(__pyx_n_s_style); __Pyx_GIVEREF(__pyx_n_s_style); - PyTuple_SET_ITEM(__pyx_t_13, 1+__pyx_t_7, __pyx_n_s_style); - __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_13, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 335, __pyx_L1_error) + PyTuple_SET_ITEM(__pyx_t_6, 1+__pyx_t_7, __pyx_n_s_style); + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_17, __pyx_t_6, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 340, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":334 + /* "lxml/html/clean.py":339 * if self.style: * kill_tags.add('style') * if self.inline_style: # <<<<<<<<<<<<<< @@ -4663,29 +4663,29 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":336 + /* "lxml/html/clean.py":341 * if self.inline_style: * etree.strip_attributes(doc, 'style') * if self.links: # <<<<<<<<<<<<<< * kill_tags.add('link') * elif self.style or self.javascript: */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_links); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 336, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_links); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 341, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 336, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 341, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":337 + /* "lxml/html/clean.py":342 * etree.strip_attributes(doc, 'style') * if self.links: * kill_tags.add('link') # <<<<<<<<<<<<<< * elif self.style or self.javascript: * # We must get rid of included stylesheets if Javascript is not */ - __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_n_s_link); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 337, __pyx_L1_error) + __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_n_s_link); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 342, __pyx_L1_error) - /* "lxml/html/clean.py":336 + /* "lxml/html/clean.py":341 * if self.inline_style: * etree.strip_attributes(doc, 'style') * if self.links: # <<<<<<<<<<<<<< @@ -4695,161 +4695,161 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L49; } - /* "lxml/html/clean.py":338 + /* "lxml/html/clean.py":343 * if self.links: * kill_tags.add('link') * elif self.style or self.javascript: # <<<<<<<<<<<<<< * # We must get rid of included stylesheets if Javascript is not * # allowed, as you can put Javascript in them */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_style); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 338, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_style); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 343, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 338, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 343, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (!__pyx_t_10) { } else { __pyx_t_11 = __pyx_t_10; goto __pyx_L50_bool_binop_done; } - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_javascript); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 338, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_javascript); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 343, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 338, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 343, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_11 = __pyx_t_10; __pyx_L50_bool_binop_done:; if (__pyx_t_11) { - /* "lxml/html/clean.py":341 + /* "lxml/html/clean.py":346 * # We must get rid of included stylesheets if Javascript is not * # allowed, as you can put Javascript in them * for el in list(doc.iter('link')): # <<<<<<<<<<<<<< * if 'stylesheet' in el.get('rel', '').lower(): * # Note this kills alternate stylesheets as well */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 341, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_13)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_13); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 346, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_6 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_5 = (__pyx_t_13) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_13, __pyx_n_s_link) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_n_s_link); - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 341, __pyx_L1_error) + __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_17, __pyx_t_6, __pyx_n_s_link) : __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_n_s_link); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 346, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PySequence_List(__pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 341, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_17 = PySequence_List(__pyx_t_5); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 346, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = __pyx_t_6; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_5 = __pyx_t_17; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; for (;;) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_6); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 341, __pyx_L1_error) + __pyx_t_17 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_17); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 346, __pyx_L1_error) #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 341, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_17 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 346, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); #endif - __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_6); - __pyx_t_6 = 0; + __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_17); + __pyx_t_17 = 0; - /* "lxml/html/clean.py":342 + /* "lxml/html/clean.py":347 * # allowed, as you can put Javascript in them * for el in list(doc.iter('link')): * if 'stylesheet' in el.get('rel', '').lower(): # <<<<<<<<<<<<<< * # Note this kills alternate stylesheets as well * if not self.allow_element(el): */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 342, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_13, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 342, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 347, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_4 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_tuple__4, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 347, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_lower); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 342, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_lower); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 347, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_6 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_13); + __pyx_t_17 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_6); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 342, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_11 = (__Pyx_PySequence_ContainsTF(__pyx_n_s_stylesheet, __pyx_t_6, Py_EQ)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 342, __pyx_L1_error) + if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 347, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_11 = (__Pyx_PySequence_ContainsTF(__pyx_n_s_stylesheet, __pyx_t_17, Py_EQ)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 347, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __pyx_t_10 = (__pyx_t_11 != 0); if (__pyx_t_10) { - /* "lxml/html/clean.py":344 + /* "lxml/html/clean.py":349 * if 'stylesheet' in el.get('rel', '').lower(): * # Note this kills alternate stylesheets as well * if not self.allow_element(el): # <<<<<<<<<<<<<< * el.drop_tree() * if self.meta: */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_element); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 344, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_element); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 349, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_6 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_13, __pyx_t_4, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_v_el); + __pyx_t_17 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_4, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_el); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 344, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 344, __pyx_L1_error) + if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 349, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_17); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 349, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __pyx_t_11 = ((!__pyx_t_10) != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":345 + /* "lxml/html/clean.py":350 * # Note this kills alternate stylesheets as well * if not self.allow_element(el): * el.drop_tree() # <<<<<<<<<<<<<< * if self.meta: * kill_tags.add('meta') */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 345, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 350, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_6 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_13); + __pyx_t_17 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_6); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 345, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 350, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "lxml/html/clean.py":344 + /* "lxml/html/clean.py":349 * if 'stylesheet' in el.get('rel', '').lower(): * # Note this kills alternate stylesheets as well * if not self.allow_element(el): # <<<<<<<<<<<<<< @@ -4858,7 +4858,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":342 + /* "lxml/html/clean.py":347 * # allowed, as you can put Javascript in them * for el in list(doc.iter('link')): * if 'stylesheet' in el.get('rel', '').lower(): # <<<<<<<<<<<<<< @@ -4867,7 +4867,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":341 + /* "lxml/html/clean.py":346 * # We must get rid of included stylesheets if Javascript is not * # allowed, as you can put Javascript in them * for el in list(doc.iter('link')): # <<<<<<<<<<<<<< @@ -4877,7 +4877,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":338 + /* "lxml/html/clean.py":343 * if self.links: * kill_tags.add('link') * elif self.style or self.javascript: # <<<<<<<<<<<<<< @@ -4887,29 +4887,29 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_L49:; - /* "lxml/html/clean.py":346 + /* "lxml/html/clean.py":351 * if not self.allow_element(el): * el.drop_tree() * if self.meta: # <<<<<<<<<<<<<< * kill_tags.add('meta') * if self.page_structure: */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_meta); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 346, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_meta); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 351, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 346, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 351, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":347 + /* "lxml/html/clean.py":352 * el.drop_tree() * if self.meta: * kill_tags.add('meta') # <<<<<<<<<<<<<< * if self.page_structure: * remove_tags.update(('head', 'html', 'title')) */ - __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_n_s_meta); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 347, __pyx_L1_error) + __pyx_t_12 = PySet_Add(__pyx_v_kill_tags, __pyx_n_s_meta); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 352, __pyx_L1_error) - /* "lxml/html/clean.py":346 + /* "lxml/html/clean.py":351 * if not self.allow_element(el): * el.drop_tree() * if self.meta: # <<<<<<<<<<<<<< @@ -4918,31 +4918,31 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":348 + /* "lxml/html/clean.py":353 * if self.meta: * kill_tags.add('meta') * if self.page_structure: # <<<<<<<<<<<<<< * remove_tags.update(('head', 'html', 'title')) * if self.embedded: */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_page_structure); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 348, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_page_structure); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 353, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 348, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 353, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":349 + /* "lxml/html/clean.py":354 * kill_tags.add('meta') * if self.page_structure: * remove_tags.update(('head', 'html', 'title')) # <<<<<<<<<<<<<< * if self.embedded: * # FIXME: is really embedded? */ - __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_remove_tags, __pyx_tuple__5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 349, __pyx_L1_error) + __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_remove_tags, __pyx_tuple__5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 354, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":348 + /* "lxml/html/clean.py":353 * if self.meta: * kill_tags.add('meta') * if self.page_structure: # <<<<<<<<<<<<<< @@ -4951,87 +4951,87 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":350 + /* "lxml/html/clean.py":355 * if self.page_structure: * remove_tags.update(('head', 'html', 'title')) * if self.embedded: # <<<<<<<<<<<<<< * # FIXME: is really embedded? * # We should get rid of any tags not inside ; */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_embedded); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 350, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_embedded); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 355, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 350, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 355, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":354 + /* "lxml/html/clean.py":359 * # We should get rid of any tags not inside ; * # These are not really valid anyway. * for el in list(doc.iter('param')): # <<<<<<<<<<<<<< * parent = el.getparent() * while parent is not None and parent.tag not in ('applet', 'object'): */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 354, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_13)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_13); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 359, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_6 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_5 = (__pyx_t_13) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_13, __pyx_n_s_param) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_n_s_param); - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 354, __pyx_L1_error) + __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_17, __pyx_t_6, __pyx_n_s_param) : __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_n_s_param); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 359, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = PySequence_List(__pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 354, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_17 = PySequence_List(__pyx_t_5); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 359, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = __pyx_t_6; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_5 = __pyx_t_17; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; for (;;) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_6); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 354, __pyx_L1_error) + __pyx_t_17 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_17); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 359, __pyx_L1_error) #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 354, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_17 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 359, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); #endif - __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_6); - __pyx_t_6 = 0; + __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_17); + __pyx_t_17 = 0; - /* "lxml/html/clean.py":355 + /* "lxml/html/clean.py":360 * # These are not really valid anyway. * for el in list(doc.iter('param')): * parent = el.getparent() # <<<<<<<<<<<<<< * while parent is not None and parent.tag not in ('applet', 'object'): * parent = parent.getparent() */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_getparent); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 355, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_getparent); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 360, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_6 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_13); + __pyx_t_17 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_6); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 355, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __Pyx_XDECREF_SET(__pyx_v_parent, __pyx_t_6); - __pyx_t_6 = 0; + if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 360, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_XDECREF_SET(__pyx_v_parent, __pyx_t_17); + __pyx_t_17 = 0; - /* "lxml/html/clean.py":356 + /* "lxml/html/clean.py":361 * for el in list(doc.iter('param')): * parent = el.getparent() * while parent is not None and parent.tag not in ('applet', 'object'): # <<<<<<<<<<<<<< @@ -5046,52 +5046,52 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_11 = __pyx_t_18; goto __pyx_L63_bool_binop_done; } - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_parent, __pyx_n_s_tag); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 356, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_10 = (__Pyx_PyString_Equals(__pyx_t_6, __pyx_n_s_applet, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 356, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_parent, __pyx_n_s_tag); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 361, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_10 = (__Pyx_PyString_Equals(__pyx_t_17, __pyx_n_s_applet, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 361, __pyx_L1_error) if (__pyx_t_10) { } else { __pyx_t_18 = __pyx_t_10; goto __pyx_L65_bool_binop_done; } - __pyx_t_10 = (__Pyx_PyString_Equals(__pyx_t_6, __pyx_n_s_object, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 356, __pyx_L1_error) + __pyx_t_10 = (__Pyx_PyString_Equals(__pyx_t_17, __pyx_n_s_object, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 361, __pyx_L1_error) __pyx_t_18 = __pyx_t_10; __pyx_L65_bool_binop_done:; - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __pyx_t_10 = (__pyx_t_18 != 0); __pyx_t_11 = __pyx_t_10; __pyx_L63_bool_binop_done:; if (!__pyx_t_11) break; - /* "lxml/html/clean.py":357 + /* "lxml/html/clean.py":362 * parent = el.getparent() * while parent is not None and parent.tag not in ('applet', 'object'): * parent = parent.getparent() # <<<<<<<<<<<<<< * if parent is None: * el.drop_tree() */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_parent, __pyx_n_s_getparent); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 357, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_parent, __pyx_n_s_getparent); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 362, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_6 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_13); + __pyx_t_17 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_6); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 357, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __Pyx_DECREF_SET(__pyx_v_parent, __pyx_t_6); - __pyx_t_6 = 0; + if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 362, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF_SET(__pyx_v_parent, __pyx_t_17); + __pyx_t_17 = 0; } - /* "lxml/html/clean.py":358 + /* "lxml/html/clean.py":363 * while parent is not None and parent.tag not in ('applet', 'object'): * parent = parent.getparent() * if parent is None: # <<<<<<<<<<<<<< @@ -5102,33 +5102,33 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_10 = (__pyx_t_11 != 0); if (__pyx_t_10) { - /* "lxml/html/clean.py":359 + /* "lxml/html/clean.py":364 * parent = parent.getparent() * if parent is None: * el.drop_tree() # <<<<<<<<<<<<<< * kill_tags.update(('applet',)) * # The alternate contents that are in an iframe are a good fallback: */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 359, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 364, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_6 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_13); + __pyx_t_17 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_6); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 359, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 364, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "lxml/html/clean.py":358 + /* "lxml/html/clean.py":363 * while parent is not None and parent.tag not in ('applet', 'object'): * parent = parent.getparent() * if parent is None: # <<<<<<<<<<<<<< @@ -5137,7 +5137,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":354 + /* "lxml/html/clean.py":359 * # We should get rid of any tags not inside ; * # These are not really valid anyway. * for el in list(doc.iter('param')): # <<<<<<<<<<<<<< @@ -5147,29 +5147,29 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":360 + /* "lxml/html/clean.py":365 * if parent is None: * el.drop_tree() * kill_tags.update(('applet',)) # <<<<<<<<<<<<<< * # The alternate contents that are in an iframe are a good fallback: * remove_tags.update(('iframe', 'embed', 'layer', 'object', 'param')) */ - __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_kill_tags, __pyx_tuple__6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 360, __pyx_L1_error) + __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_kill_tags, __pyx_tuple__6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 365, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":362 + /* "lxml/html/clean.py":367 * kill_tags.update(('applet',)) * # The alternate contents that are in an iframe are a good fallback: * remove_tags.update(('iframe', 'embed', 'layer', 'object', 'param')) # <<<<<<<<<<<<<< * if self.frames: * # FIXME: ideally we should look at the frame links, but */ - __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_remove_tags, __pyx_tuple__7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 362, __pyx_L1_error) + __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_remove_tags, __pyx_tuple__7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 367, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":350 + /* "lxml/html/clean.py":355 * if self.page_structure: * remove_tags.update(('head', 'html', 'title')) * if self.embedded: # <<<<<<<<<<<<<< @@ -5178,37 +5178,37 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":363 + /* "lxml/html/clean.py":368 * # The alternate contents that are in an iframe are a good fallback: * remove_tags.update(('iframe', 'embed', 'layer', 'object', 'param')) * if self.frames: # <<<<<<<<<<<<<< * # FIXME: ideally we should look at the frame links, but * # generally frames don't mix properly with an HTML */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_frames); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 363, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_frames); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 368, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 363, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 368, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":367 + /* "lxml/html/clean.py":372 * # generally frames don't mix properly with an HTML * # fragment anyway. * kill_tags.update(defs.frame_tags) # <<<<<<<<<<<<<< * if self.forms: * remove_tags.add('form') */ - __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_defs); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 367, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_defs); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 372, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_frame_tags); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 367, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_frame_tags); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 372, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_kill_tags, __pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 367, __pyx_L1_error) + __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_kill_tags, __pyx_t_17); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 372, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":363 + /* "lxml/html/clean.py":368 * # The alternate contents that are in an iframe are a good fallback: * remove_tags.update(('iframe', 'embed', 'layer', 'object', 'param')) * if self.frames: # <<<<<<<<<<<<<< @@ -5217,40 +5217,40 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":368 + /* "lxml/html/clean.py":373 * # fragment anyway. * kill_tags.update(defs.frame_tags) * if self.forms: # <<<<<<<<<<<<<< * remove_tags.add('form') * kill_tags.update(('button', 'input', 'select', 'textarea')) */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_forms); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 368, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_forms); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 373, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 368, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 373, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":369 + /* "lxml/html/clean.py":374 * kill_tags.update(defs.frame_tags) * if self.forms: * remove_tags.add('form') # <<<<<<<<<<<<<< * kill_tags.update(('button', 'input', 'select', 'textarea')) * if self.annoying_tags: */ - __pyx_t_12 = PySet_Add(__pyx_v_remove_tags, __pyx_n_s_form); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 369, __pyx_L1_error) + __pyx_t_12 = PySet_Add(__pyx_v_remove_tags, __pyx_n_s_form); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 374, __pyx_L1_error) - /* "lxml/html/clean.py":370 + /* "lxml/html/clean.py":375 * if self.forms: * remove_tags.add('form') * kill_tags.update(('button', 'input', 'select', 'textarea')) # <<<<<<<<<<<<<< * if self.annoying_tags: * remove_tags.update(('blink', 'marquee')) */ - __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_kill_tags, __pyx_tuple__8); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 370, __pyx_L1_error) + __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_kill_tags, __pyx_tuple__8); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 375, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":368 + /* "lxml/html/clean.py":373 * # fragment anyway. * kill_tags.update(defs.frame_tags) * if self.forms: # <<<<<<<<<<<<<< @@ -5259,31 +5259,31 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":371 + /* "lxml/html/clean.py":376 * remove_tags.add('form') * kill_tags.update(('button', 'input', 'select', 'textarea')) * if self.annoying_tags: # <<<<<<<<<<<<<< * remove_tags.update(('blink', 'marquee')) * */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_annoying_tags); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 371, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_annoying_tags); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 371, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 376, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":372 + /* "lxml/html/clean.py":377 * kill_tags.update(('button', 'input', 'select', 'textarea')) * if self.annoying_tags: * remove_tags.update(('blink', 'marquee')) # <<<<<<<<<<<<<< * * _remove = [] */ - __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_remove_tags, __pyx_tuple__9); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 372, __pyx_L1_error) + __pyx_t_5 = __Pyx_CallUnboundCMethod1(&__pyx_umethod_PySet_Type_update, __pyx_v_remove_tags, __pyx_tuple__9); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 377, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":371 + /* "lxml/html/clean.py":376 * remove_tags.add('form') * kill_tags.update(('button', 'input', 'select', 'textarea')) * if self.annoying_tags: # <<<<<<<<<<<<<< @@ -5292,89 +5292,89 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":374 + /* "lxml/html/clean.py":379 * remove_tags.update(('blink', 'marquee')) * * _remove = [] # <<<<<<<<<<<<<< * _kill = [] * for el in doc.iter(): */ - __pyx_t_5 = PyList_New(0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 374, __pyx_L1_error) + __pyx_t_5 = PyList_New(0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 379, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_v__remove = ((PyObject*)__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":375 + /* "lxml/html/clean.py":380 * * _remove = [] * _kill = [] # <<<<<<<<<<<<<< * for el in doc.iter(): * if el.tag in kill_tags: */ - __pyx_t_5 = PyList_New(0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 375, __pyx_L1_error) + __pyx_t_5 = PyList_New(0); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 380, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_v__kill = ((PyObject*)__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":376 + /* "lxml/html/clean.py":381 * _remove = [] * _kill = [] * for el in doc.iter(): # <<<<<<<<<<<<<< * if el.tag in kill_tags: * if self.allow_element(el): */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 376, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_13)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_13); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 381, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_6 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_5 = (__pyx_t_13) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_13) : __Pyx_PyObject_CallNoArg(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 376, __pyx_L1_error) + __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_17); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; if (likely(PyList_CheckExact(__pyx_t_5)) || PyTuple_CheckExact(__pyx_t_5)) { - __pyx_t_6 = __pyx_t_5; __Pyx_INCREF(__pyx_t_6); __pyx_t_8 = 0; + __pyx_t_17 = __pyx_t_5; __Pyx_INCREF(__pyx_t_17); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_8 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 376, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_9 = Py_TYPE(__pyx_t_6)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 376, __pyx_L1_error) + __pyx_t_8 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 381, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_9 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 381, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; for (;;) { if (likely(!__pyx_t_9)) { - if (likely(PyList_CheckExact(__pyx_t_6))) { - if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_6)) break; + if (likely(PyList_CheckExact(__pyx_t_17))) { + if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 376, __pyx_L1_error) + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 381, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 376, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_17, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } else { - if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_6)) break; + if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 376, __pyx_L1_error) + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 381, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 376, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_17, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 381, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } } else { - __pyx_t_5 = __pyx_t_9(__pyx_t_6); + __pyx_t_5 = __pyx_t_9(__pyx_t_17); if (unlikely(!__pyx_t_5)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 376, __pyx_L1_error) + else __PYX_ERR(0, 381, __pyx_L1_error) } break; } @@ -5383,49 +5383,49 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":377 + /* "lxml/html/clean.py":382 * _kill = [] * for el in doc.iter(): * if el.tag in kill_tags: # <<<<<<<<<<<<<< * if self.allow_element(el): * continue */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 377, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = (__Pyx_PySet_ContainsTF(__pyx_t_5, __pyx_v_kill_tags, Py_EQ)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 377, __pyx_L1_error) + __pyx_t_10 = (__Pyx_PySet_ContainsTF(__pyx_t_5, __pyx_v_kill_tags, Py_EQ)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 382, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_11 = (__pyx_t_10 != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":378 + /* "lxml/html/clean.py":383 * for el in doc.iter(): * if el.tag in kill_tags: * if self.allow_element(el): # <<<<<<<<<<<<<< * continue * _kill.append(el) */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_element); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 378, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_element); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 383, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_13, __pyx_t_4, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_v_el); + __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_4, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_el); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 378, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 378, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 383, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_11) { - /* "lxml/html/clean.py":379 + /* "lxml/html/clean.py":384 * if el.tag in kill_tags: * if self.allow_element(el): * continue # <<<<<<<<<<<<<< @@ -5434,7 +5434,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ goto __pyx_L71_continue; - /* "lxml/html/clean.py":378 + /* "lxml/html/clean.py":383 * for el in doc.iter(): * if el.tag in kill_tags: * if self.allow_element(el): # <<<<<<<<<<<<<< @@ -5443,16 +5443,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":380 + /* "lxml/html/clean.py":385 * if self.allow_element(el): * continue * _kill.append(el) # <<<<<<<<<<<<<< * elif el.tag in remove_tags: * if self.allow_element(el): */ - __pyx_t_12 = __Pyx_PyList_Append(__pyx_v__kill, __pyx_v_el); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 380, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyList_Append(__pyx_v__kill, __pyx_v_el); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 385, __pyx_L1_error) - /* "lxml/html/clean.py":377 + /* "lxml/html/clean.py":382 * _kill = [] * for el in doc.iter(): * if el.tag in kill_tags: # <<<<<<<<<<<<<< @@ -5462,49 +5462,49 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L73; } - /* "lxml/html/clean.py":381 + /* "lxml/html/clean.py":386 * continue * _kill.append(el) * elif el.tag in remove_tags: # <<<<<<<<<<<<<< * if self.allow_element(el): * continue */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = (__Pyx_PySet_ContainsTF(__pyx_t_5, __pyx_v_remove_tags, Py_EQ)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 381, __pyx_L1_error) + __pyx_t_11 = (__Pyx_PySet_ContainsTF(__pyx_t_5, __pyx_v_remove_tags, Py_EQ)); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 386, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_10 = (__pyx_t_11 != 0); if (__pyx_t_10) { - /* "lxml/html/clean.py":382 + /* "lxml/html/clean.py":387 * _kill.append(el) * elif el.tag in remove_tags: * if self.allow_element(el): # <<<<<<<<<<<<<< * continue * _remove.append(el) */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_element); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 382, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_element); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 387, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_13, __pyx_t_4, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_v_el); + __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_4, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_el); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 382, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 387, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 382, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 387, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":383 + /* "lxml/html/clean.py":388 * elif el.tag in remove_tags: * if self.allow_element(el): * continue # <<<<<<<<<<<<<< @@ -5513,7 +5513,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ goto __pyx_L71_continue; - /* "lxml/html/clean.py":382 + /* "lxml/html/clean.py":387 * _kill.append(el) * elif el.tag in remove_tags: * if self.allow_element(el): # <<<<<<<<<<<<<< @@ -5522,16 +5522,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":384 + /* "lxml/html/clean.py":389 * if self.allow_element(el): * continue * _remove.append(el) # <<<<<<<<<<<<<< * * if _remove and _remove[0] == doc: */ - __pyx_t_12 = __Pyx_PyList_Append(__pyx_v__remove, __pyx_v_el); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 384, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyList_Append(__pyx_v__remove, __pyx_v_el); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 389, __pyx_L1_error) - /* "lxml/html/clean.py":381 + /* "lxml/html/clean.py":386 * continue * _kill.append(el) * elif el.tag in remove_tags: # <<<<<<<<<<<<<< @@ -5541,7 +5541,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_L73:; - /* "lxml/html/clean.py":376 + /* "lxml/html/clean.py":381 * _remove = [] * _kill = [] * for el in doc.iter(): # <<<<<<<<<<<<<< @@ -5550,9 +5550,9 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ __pyx_L71_continue:; } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "lxml/html/clean.py":386 + /* "lxml/html/clean.py":391 * _remove.append(el) * * if _remove and _remove[0] == doc: # <<<<<<<<<<<<<< @@ -5565,67 +5565,67 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_10 = __pyx_t_11; goto __pyx_L77_bool_binop_done; } - __pyx_t_6 = __Pyx_GetItemInt_List(__pyx_v__remove, 0, long, 1, __Pyx_PyInt_From_long, 1, 0, 1); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 386, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_5 = PyObject_RichCompare(__pyx_t_6, __pyx_v_doc, Py_EQ); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 386, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 386, __pyx_L1_error) + __pyx_t_17 = __Pyx_GetItemInt_List(__pyx_v__remove, 0, long, 1, __Pyx_PyInt_From_long, 1, 0, 1); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 391, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_5 = PyObject_RichCompare(__pyx_t_17, __pyx_v_doc, Py_EQ); __Pyx_XGOTREF(__pyx_t_5); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 391, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 391, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_10 = __pyx_t_11; __pyx_L77_bool_binop_done:; if (__pyx_t_10) { - /* "lxml/html/clean.py":389 + /* "lxml/html/clean.py":394 * # We have to drop the parent-most tag, which we can't * # do. Instead we'll rewrite it: * el = _remove.pop(0) # <<<<<<<<<<<<<< * el.tag = 'div' * el.attrib.clear() */ - __pyx_t_5 = __Pyx_PyList_PopIndex(__pyx_v__remove, __pyx_int_0, 0, 1, Py_ssize_t, PyInt_FromSsize_t); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 389, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyList_PopIndex(__pyx_v__remove, __pyx_int_0, 0, 1, Py_ssize_t, PyInt_FromSsize_t); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 394, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":390 + /* "lxml/html/clean.py":395 * # do. Instead we'll rewrite it: * el = _remove.pop(0) * el.tag = 'div' # <<<<<<<<<<<<<< * el.attrib.clear() * elif _kill and _kill[0] == doc: */ - if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_tag, __pyx_n_s_div) < 0) __PYX_ERR(0, 390, __pyx_L1_error) + if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_tag, __pyx_n_s_div) < 0) __PYX_ERR(0, 395, __pyx_L1_error) - /* "lxml/html/clean.py":391 + /* "lxml/html/clean.py":396 * el = _remove.pop(0) * el.tag = 'div' * el.attrib.clear() # <<<<<<<<<<<<<< * elif _kill and _kill[0] == doc: * # We have to drop the parent-most element, which we can't */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 391, __pyx_L1_error) + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 396, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_17, __pyx_n_s_clear); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 396, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_clear); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 391, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - __pyx_t_6 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_13); - if (likely(__pyx_t_6)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); - __Pyx_INCREF(__pyx_t_6); + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __pyx_t_17 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_6); + if (likely(__pyx_t_17)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + __Pyx_INCREF(__pyx_t_17); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_13); - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 391, __pyx_L1_error) + __pyx_t_5 = (__pyx_t_17) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_17) : __Pyx_PyObject_CallNoArg(__pyx_t_6); + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 396, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":386 + /* "lxml/html/clean.py":391 * _remove.append(el) * * if _remove and _remove[0] == doc: # <<<<<<<<<<<<<< @@ -5635,7 +5635,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L76; } - /* "lxml/html/clean.py":392 + /* "lxml/html/clean.py":397 * el.tag = 'div' * el.attrib.clear() * elif _kill and _kill[0] == doc: # <<<<<<<<<<<<<< @@ -5648,51 +5648,51 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_10 = __pyx_t_11; goto __pyx_L79_bool_binop_done; } - __pyx_t_5 = __Pyx_GetItemInt_List(__pyx_v__kill, 0, long, 1, __Pyx_PyInt_From_long, 1, 0, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 392, __pyx_L1_error) + __pyx_t_5 = __Pyx_GetItemInt_List(__pyx_v__kill, 0, long, 1, __Pyx_PyInt_From_long, 1, 0, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 397, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_13 = PyObject_RichCompare(__pyx_t_5, __pyx_v_doc, Py_EQ); __Pyx_XGOTREF(__pyx_t_13); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 392, __pyx_L1_error) + __pyx_t_6 = PyObject_RichCompare(__pyx_t_5, __pyx_v_doc, Py_EQ); __Pyx_XGOTREF(__pyx_t_6); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 397, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_13); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 392, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 397, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_10 = __pyx_t_11; __pyx_L79_bool_binop_done:; if (__pyx_t_10) { - /* "lxml/html/clean.py":395 + /* "lxml/html/clean.py":400 * # We have to drop the parent-most element, which we can't * # do. Instead we'll clear it: * el = _kill.pop(0) # <<<<<<<<<<<<<< * if el.tag != 'html': * el.tag = 'div' */ - __pyx_t_13 = __Pyx_PyList_PopIndex(__pyx_v__kill, __pyx_int_0, 0, 1, Py_ssize_t, PyInt_FromSsize_t); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 395, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_13); - __pyx_t_13 = 0; + __pyx_t_6 = __Pyx_PyList_PopIndex(__pyx_v__kill, __pyx_int_0, 0, 1, Py_ssize_t, PyInt_FromSsize_t); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 400, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_6); + __pyx_t_6 = 0; - /* "lxml/html/clean.py":396 + /* "lxml/html/clean.py":401 * # do. Instead we'll clear it: * el = _kill.pop(0) * if el.tag != 'html': # <<<<<<<<<<<<<< * el.tag = 'div' * el.clear() */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 396, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_10 = (__Pyx_PyString_Equals(__pyx_t_13, __pyx_n_s_html, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 396, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 401, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = (__Pyx_PyString_Equals(__pyx_t_6, __pyx_n_s_html, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 401, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":397 + /* "lxml/html/clean.py":402 * el = _kill.pop(0) * if el.tag != 'html': * el.tag = 'div' # <<<<<<<<<<<<<< * el.clear() * */ - if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_tag, __pyx_n_s_div) < 0) __PYX_ERR(0, 397, __pyx_L1_error) + if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_tag, __pyx_n_s_div) < 0) __PYX_ERR(0, 402, __pyx_L1_error) - /* "lxml/html/clean.py":396 + /* "lxml/html/clean.py":401 * # do. Instead we'll clear it: * el = _kill.pop(0) * if el.tag != 'html': # <<<<<<<<<<<<<< @@ -5701,33 +5701,33 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":398 + /* "lxml/html/clean.py":403 * if el.tag != 'html': * el.tag = 'div' * el.clear() # <<<<<<<<<<<<<< * * _kill.reverse() # start with innermost tags */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_clear); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 398, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_clear); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 403, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = NULL; + __pyx_t_17 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) { - __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5); - if (likely(__pyx_t_6)) { + __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_17)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); - __Pyx_INCREF(__pyx_t_6); + __Pyx_INCREF(__pyx_t_17); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_5, function); } } - __pyx_t_13 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 398, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = (__pyx_t_17) ? __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_17) : __Pyx_PyObject_CallNoArg(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 403, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "lxml/html/clean.py":392 + /* "lxml/html/clean.py":397 * el.tag = 'div' * el.attrib.clear() * elif _kill and _kill[0] == doc: # <<<<<<<<<<<<<< @@ -5737,61 +5737,61 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_L76:; - /* "lxml/html/clean.py":400 + /* "lxml/html/clean.py":405 * el.clear() * * _kill.reverse() # start with innermost tags # <<<<<<<<<<<<<< * for el in _kill: * el.drop_tree() */ - __pyx_t_12 = PyList_Reverse(__pyx_v__kill); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 400, __pyx_L1_error) + __pyx_t_12 = PyList_Reverse(__pyx_v__kill); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 405, __pyx_L1_error) - /* "lxml/html/clean.py":401 + /* "lxml/html/clean.py":406 * * _kill.reverse() # start with innermost tags * for el in _kill: # <<<<<<<<<<<<<< * el.drop_tree() * for el in _remove: */ - __pyx_t_13 = __pyx_v__kill; __Pyx_INCREF(__pyx_t_13); __pyx_t_8 = 0; + __pyx_t_6 = __pyx_v__kill; __Pyx_INCREF(__pyx_t_6); __pyx_t_8 = 0; for (;;) { - if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_13)) break; + if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_6)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyList_GET_ITEM(__pyx_t_13, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 401, __pyx_L1_error) + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 406, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_13, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 401, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 406, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":402 + /* "lxml/html/clean.py":407 * _kill.reverse() # start with innermost tags * for el in _kill: * el.drop_tree() # <<<<<<<<<<<<<< * for el in _remove: * el.drop_tag() */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 402, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 407, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_17); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_6); + __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_17); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 402, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 407, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":401 + /* "lxml/html/clean.py":406 * * _kill.reverse() # start with innermost tags * for el in _kill: # <<<<<<<<<<<<<< @@ -5799,54 +5799,54 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py * for el in _remove: */ } - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "lxml/html/clean.py":403 + /* "lxml/html/clean.py":408 * for el in _kill: * el.drop_tree() * for el in _remove: # <<<<<<<<<<<<<< * el.drop_tag() * */ - __pyx_t_13 = __pyx_v__remove; __Pyx_INCREF(__pyx_t_13); __pyx_t_8 = 0; + __pyx_t_6 = __pyx_v__remove; __Pyx_INCREF(__pyx_t_6); __pyx_t_8 = 0; for (;;) { - if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_13)) break; + if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_6)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyList_GET_ITEM(__pyx_t_13, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 403, __pyx_L1_error) + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 408, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_13, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 403, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 408, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":404 + /* "lxml/html/clean.py":409 * el.drop_tree() * for el in _remove: * el.drop_tag() # <<<<<<<<<<<<<< * * if self.remove_unknown_tags: */ - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tag); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 404, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tag); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 409, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_17); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_6); + __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_17); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 404, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 409, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":403 + /* "lxml/html/clean.py":408 * for el in _kill: * el.drop_tree() * for el in _remove: # <<<<<<<<<<<<<< @@ -5854,22 +5854,22 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py * */ } - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "lxml/html/clean.py":406 + /* "lxml/html/clean.py":411 * el.drop_tag() * * if self.remove_unknown_tags: # <<<<<<<<<<<<<< * if allow_tags: * raise ValueError( */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_remove_unknown_tags); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 406, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_13); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 406, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_remove_unknown_tags); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 411, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 411, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":407 + /* "lxml/html/clean.py":412 * * if self.remove_unknown_tags: * if allow_tags: # <<<<<<<<<<<<<< @@ -5879,20 +5879,20 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_10 = (PySet_GET_SIZE(__pyx_v_allow_tags) != 0); if (unlikely(__pyx_t_10)) { - /* "lxml/html/clean.py":408 + /* "lxml/html/clean.py":413 * if self.remove_unknown_tags: * if allow_tags: * raise ValueError( # <<<<<<<<<<<<<< * "It does not make sense to pass in both allow_tags and remove_unknown_tags") * allow_tags = set(defs.tags) */ - __pyx_t_13 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 408, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __Pyx_Raise(__pyx_t_13, 0, 0, 0); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __PYX_ERR(0, 408, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple_, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 413, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __Pyx_Raise(__pyx_t_6, 0, 0, 0); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __PYX_ERR(0, 413, __pyx_L1_error) - /* "lxml/html/clean.py":407 + /* "lxml/html/clean.py":412 * * if self.remove_unknown_tags: * if allow_tags: # <<<<<<<<<<<<<< @@ -5901,25 +5901,25 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":410 + /* "lxml/html/clean.py":415 * raise ValueError( * "It does not make sense to pass in both allow_tags and remove_unknown_tags") * allow_tags = set(defs.tags) # <<<<<<<<<<<<<< * if allow_tags: * # make sure we do not remove comments/PIs if users want them (which is rare enough) */ - __Pyx_GetModuleGlobalName(__pyx_t_13, __pyx_n_s_defs); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 410, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_tags); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 410, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_defs); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 415, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_tags); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 415, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = PySet_New(__pyx_t_5); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 410, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = PySet_New(__pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 415, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __Pyx_DECREF_SET(__pyx_v_allow_tags, ((PyObject*)__pyx_t_13)); - __pyx_t_13 = 0; + __Pyx_DECREF_SET(__pyx_v_allow_tags, ((PyObject*)__pyx_t_6)); + __pyx_t_6 = 0; - /* "lxml/html/clean.py":406 + /* "lxml/html/clean.py":411 * el.drop_tag() * * if self.remove_unknown_tags: # <<<<<<<<<<<<<< @@ -5928,7 +5928,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":411 + /* "lxml/html/clean.py":416 * "It does not make sense to pass in both allow_tags and remove_unknown_tags") * allow_tags = set(defs.tags) * if allow_tags: # <<<<<<<<<<<<<< @@ -5938,36 +5938,36 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_10 = (PySet_GET_SIZE(__pyx_v_allow_tags) != 0); if (__pyx_t_10) { - /* "lxml/html/clean.py":413 + /* "lxml/html/clean.py":418 * if allow_tags: * # make sure we do not remove comments/PIs if users want them (which is rare enough) * if not self.comments: # <<<<<<<<<<<<<< * allow_tags.add(etree.Comment) * if not self.processing_instructions: */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_comments); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 413, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_13); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 413, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_comments); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 418, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_6); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 418, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_11 = ((!__pyx_t_10) != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":414 + /* "lxml/html/clean.py":419 * # make sure we do not remove comments/PIs if users want them (which is rare enough) * if not self.comments: * allow_tags.add(etree.Comment) # <<<<<<<<<<<<<< * if not self.processing_instructions: * allow_tags.add(etree.ProcessingInstruction) */ - __Pyx_GetModuleGlobalName(__pyx_t_13, __pyx_n_s_etree); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 414, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_Comment); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 414, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_etree); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 419, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_Comment); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 419, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_12 = PySet_Add(__pyx_v_allow_tags, __pyx_t_5); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 414, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_12 = PySet_Add(__pyx_v_allow_tags, __pyx_t_5); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 419, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":413 + /* "lxml/html/clean.py":418 * if allow_tags: * # make sure we do not remove comments/PIs if users want them (which is rare enough) * if not self.comments: # <<<<<<<<<<<<<< @@ -5976,36 +5976,36 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":415 + /* "lxml/html/clean.py":420 * if not self.comments: * allow_tags.add(etree.Comment) * if not self.processing_instructions: # <<<<<<<<<<<<<< * allow_tags.add(etree.ProcessingInstruction) * */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_processing_instructions); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 415, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_processing_instructions); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 420, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 415, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 420, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_10 = ((!__pyx_t_11) != 0); if (__pyx_t_10) { - /* "lxml/html/clean.py":416 + /* "lxml/html/clean.py":421 * allow_tags.add(etree.Comment) * if not self.processing_instructions: * allow_tags.add(etree.ProcessingInstruction) # <<<<<<<<<<<<<< * * bad = [] */ - __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_etree); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 416, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_5, __pyx_n_s_etree); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 421, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_ProcessingInstruction); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 416, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_ProcessingInstruction); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 421, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_12 = PySet_Add(__pyx_v_allow_tags, __pyx_t_13); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 416, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_12 = PySet_Add(__pyx_v_allow_tags, __pyx_t_6); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 421, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; - /* "lxml/html/clean.py":415 + /* "lxml/html/clean.py":420 * if not self.comments: * allow_tags.add(etree.Comment) * if not self.processing_instructions: # <<<<<<<<<<<<<< @@ -6014,109 +6014,109 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":418 + /* "lxml/html/clean.py":423 * allow_tags.add(etree.ProcessingInstruction) * * bad = [] # <<<<<<<<<<<<<< * for el in doc.iter(): * if el.tag not in allow_tags: */ - __pyx_t_13 = PyList_New(0); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 418, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_v_bad = ((PyObject*)__pyx_t_13); - __pyx_t_13 = 0; + __pyx_t_6 = PyList_New(0); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 423, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_v_bad = ((PyObject*)__pyx_t_6); + __pyx_t_6 = 0; - /* "lxml/html/clean.py":419 + /* "lxml/html/clean.py":424 * * bad = [] * for el in doc.iter(): # <<<<<<<<<<<<<< * if el.tag not in allow_tags: * bad.append(el) */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 419, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 424, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_6 = NULL; + __pyx_t_17 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_5))) { - __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_5); - if (likely(__pyx_t_6)) { + __pyx_t_17 = PyMethod_GET_SELF(__pyx_t_5); + if (likely(__pyx_t_17)) { PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_5); - __Pyx_INCREF(__pyx_t_6); + __Pyx_INCREF(__pyx_t_17); __Pyx_INCREF(function); __Pyx_DECREF_SET(__pyx_t_5, function); } } - __pyx_t_13 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_5); - __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 419, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = (__pyx_t_17) ? __Pyx_PyObject_CallOneArg(__pyx_t_5, __pyx_t_17) : __Pyx_PyObject_CallNoArg(__pyx_t_5); + __Pyx_XDECREF(__pyx_t_17); __pyx_t_17 = 0; + if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 424, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - if (likely(PyList_CheckExact(__pyx_t_13)) || PyTuple_CheckExact(__pyx_t_13)) { - __pyx_t_5 = __pyx_t_13; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; + if (likely(PyList_CheckExact(__pyx_t_6)) || PyTuple_CheckExact(__pyx_t_6)) { + __pyx_t_5 = __pyx_t_6; __Pyx_INCREF(__pyx_t_5); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_8 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_13); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 419, __pyx_L1_error) + __pyx_t_8 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 424, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_9 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 419, __pyx_L1_error) + __pyx_t_9 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 424, __pyx_L1_error) } - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; for (;;) { if (likely(!__pyx_t_9)) { if (likely(PyList_CheckExact(__pyx_t_5))) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_13); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 419, __pyx_L1_error) + __pyx_t_6 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_6); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 424, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 419, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 424, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); #endif } else { if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_13 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_13); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 419, __pyx_L1_error) + __pyx_t_6 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_6); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 424, __pyx_L1_error) #else - __pyx_t_13 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 419, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 424, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); #endif } } else { - __pyx_t_13 = __pyx_t_9(__pyx_t_5); - if (unlikely(!__pyx_t_13)) { + __pyx_t_6 = __pyx_t_9(__pyx_t_5); + if (unlikely(!__pyx_t_6)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 419, __pyx_L1_error) + else __PYX_ERR(0, 424, __pyx_L1_error) } break; } - __Pyx_GOTREF(__pyx_t_13); + __Pyx_GOTREF(__pyx_t_6); } - __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_13); - __pyx_t_13 = 0; + __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_6); + __pyx_t_6 = 0; - /* "lxml/html/clean.py":420 + /* "lxml/html/clean.py":425 * bad = [] * for el in doc.iter(): * if el.tag not in allow_tags: # <<<<<<<<<<<<<< * bad.append(el) * if bad: */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 420, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_10 = (__Pyx_PySet_ContainsTF(__pyx_t_13, __pyx_v_allow_tags, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 420, __pyx_L1_error) - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 425, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); + __pyx_t_10 = (__Pyx_PySet_ContainsTF(__pyx_t_6, __pyx_v_allow_tags, Py_NE)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 425, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __pyx_t_11 = (__pyx_t_10 != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":421 + /* "lxml/html/clean.py":426 * for el in doc.iter(): * if el.tag not in allow_tags: * bad.append(el) # <<<<<<<<<<<<<< * if bad: * if bad[0] is doc: */ - __pyx_t_12 = __Pyx_PyList_Append(__pyx_v_bad, __pyx_v_el); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 421, __pyx_L1_error) + __pyx_t_12 = __Pyx_PyList_Append(__pyx_v_bad, __pyx_v_el); if (unlikely(__pyx_t_12 == ((int)-1))) __PYX_ERR(0, 426, __pyx_L1_error) - /* "lxml/html/clean.py":420 + /* "lxml/html/clean.py":425 * bad = [] * for el in doc.iter(): * if el.tag not in allow_tags: # <<<<<<<<<<<<<< @@ -6125,7 +6125,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":419 + /* "lxml/html/clean.py":424 * * bad = [] * for el in doc.iter(): # <<<<<<<<<<<<<< @@ -6135,7 +6135,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":422 + /* "lxml/html/clean.py":427 * if el.tag not in allow_tags: * bad.append(el) * if bad: # <<<<<<<<<<<<<< @@ -6145,71 +6145,71 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __pyx_t_11 = (PyList_GET_SIZE(__pyx_v_bad) != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":423 + /* "lxml/html/clean.py":428 * bad.append(el) * if bad: * if bad[0] is doc: # <<<<<<<<<<<<<< * el = bad.pop(0) * el.tag = 'div' */ - __pyx_t_5 = __Pyx_GetItemInt_List(__pyx_v_bad, 0, long, 1, __Pyx_PyInt_From_long, 1, 0, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 423, __pyx_L1_error) + __pyx_t_5 = __Pyx_GetItemInt_List(__pyx_v_bad, 0, long, 1, __Pyx_PyInt_From_long, 1, 0, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 428, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __pyx_t_11 = (__pyx_t_5 == __pyx_v_doc); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_10 = (__pyx_t_11 != 0); if (__pyx_t_10) { - /* "lxml/html/clean.py":424 + /* "lxml/html/clean.py":429 * if bad: * if bad[0] is doc: * el = bad.pop(0) # <<<<<<<<<<<<<< * el.tag = 'div' * el.attrib.clear() */ - __pyx_t_5 = __Pyx_PyList_PopIndex(__pyx_v_bad, __pyx_int_0, 0, 1, Py_ssize_t, PyInt_FromSsize_t); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 424, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyList_PopIndex(__pyx_v_bad, __pyx_int_0, 0, 1, Py_ssize_t, PyInt_FromSsize_t); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 429, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":425 + /* "lxml/html/clean.py":430 * if bad[0] is doc: * el = bad.pop(0) * el.tag = 'div' # <<<<<<<<<<<<<< * el.attrib.clear() * for el in bad: */ - if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_tag, __pyx_n_s_div) < 0) __PYX_ERR(0, 425, __pyx_L1_error) + if (__Pyx_PyObject_SetAttrStr(__pyx_v_el, __pyx_n_s_tag, __pyx_n_s_div) < 0) __PYX_ERR(0, 430, __pyx_L1_error) - /* "lxml/html/clean.py":426 + /* "lxml/html/clean.py":431 * el = bad.pop(0) * el.tag = 'div' * el.attrib.clear() # <<<<<<<<<<<<<< * for el in bad: * el.drop_tag() */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 426, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); - __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_t_13, __pyx_n_s_clear); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 426, __pyx_L1_error) + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_attrib); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_13 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_13)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_13); + __pyx_t_17 = __Pyx_PyObject_GetAttrStr(__pyx_t_6, __pyx_n_s_clear); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 431, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_6 = NULL; + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_5 = (__pyx_t_13) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_13) : __Pyx_PyObject_CallNoArg(__pyx_t_6); - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 426, __pyx_L1_error) + __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_17); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 431, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":423 + /* "lxml/html/clean.py":428 * bad.append(el) * if bad: * if bad[0] is doc: # <<<<<<<<<<<<<< @@ -6218,7 +6218,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":427 + /* "lxml/html/clean.py":432 * el.tag = 'div' * el.attrib.clear() * for el in bad: # <<<<<<<<<<<<<< @@ -6229,41 +6229,41 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py for (;;) { if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_6 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_6); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 427, __pyx_L1_error) + __pyx_t_17 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_8); __Pyx_INCREF(__pyx_t_17); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 432, __pyx_L1_error) #else - __pyx_t_6 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 427, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); + __pyx_t_17 = PySequence_ITEM(__pyx_t_5, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 432, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); #endif - __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_6); - __pyx_t_6 = 0; + __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_17); + __pyx_t_17 = 0; - /* "lxml/html/clean.py":428 + /* "lxml/html/clean.py":433 * el.attrib.clear() * for el in bad: * el.drop_tag() # <<<<<<<<<<<<<< * if self.add_nofollow: * for el in _find_external_links(doc): */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tag); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 428, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tag); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 433, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_6 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_13); + __pyx_t_17 = (__pyx_t_4) ? __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_t_4) : __Pyx_PyObject_CallNoArg(__pyx_t_6); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 428, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 433, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "lxml/html/clean.py":427 + /* "lxml/html/clean.py":432 * el.tag = 'div' * el.attrib.clear() * for el in bad: # <<<<<<<<<<<<<< @@ -6273,7 +6273,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":422 + /* "lxml/html/clean.py":427 * if el.tag not in allow_tags: * bad.append(el) * if bad: # <<<<<<<<<<<<<< @@ -6282,7 +6282,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":411 + /* "lxml/html/clean.py":416 * "It does not make sense to pass in both allow_tags and remove_unknown_tags") * allow_tags = set(defs.tags) * if allow_tags: # <<<<<<<<<<<<<< @@ -6291,78 +6291,78 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":429 + /* "lxml/html/clean.py":434 * for el in bad: * el.drop_tag() * if self.add_nofollow: # <<<<<<<<<<<<<< * for el in _find_external_links(doc): * if not self.allow_follow(el): */ - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_add_nofollow); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_add_nofollow); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 429, __pyx_L1_error) + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 434, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; if (__pyx_t_10) { - /* "lxml/html/clean.py":430 + /* "lxml/html/clean.py":435 * el.drop_tag() * if self.add_nofollow: * for el in _find_external_links(doc): # <<<<<<<<<<<<<< * if not self.allow_follow(el): * rel = el.get('rel') */ - __Pyx_GetModuleGlobalName(__pyx_t_6, __pyx_n_s_find_external_links); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 430, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_13 = NULL; - if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_6))) { - __pyx_t_13 = PyMethod_GET_SELF(__pyx_t_6); - if (likely(__pyx_t_13)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); - __Pyx_INCREF(__pyx_t_13); + __Pyx_GetModuleGlobalName(__pyx_t_17, __pyx_n_s_find_external_links); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 435, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_6 = NULL; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_17))) { + __pyx_t_6 = PyMethod_GET_SELF(__pyx_t_17); + if (likely(__pyx_t_6)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_17); + __Pyx_INCREF(__pyx_t_6); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_6, function); + __Pyx_DECREF_SET(__pyx_t_17, function); } } - __pyx_t_5 = (__pyx_t_13) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_13, __pyx_v_doc) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_doc); - __Pyx_XDECREF(__pyx_t_13); __pyx_t_13 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_17, __pyx_t_6, __pyx_v_doc) : __Pyx_PyObject_CallOneArg(__pyx_t_17, __pyx_v_doc); + __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 435, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; if (likely(PyList_CheckExact(__pyx_t_5)) || PyTuple_CheckExact(__pyx_t_5)) { - __pyx_t_6 = __pyx_t_5; __Pyx_INCREF(__pyx_t_6); __pyx_t_8 = 0; + __pyx_t_17 = __pyx_t_5; __Pyx_INCREF(__pyx_t_17); __pyx_t_8 = 0; __pyx_t_9 = NULL; } else { - __pyx_t_8 = -1; __pyx_t_6 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 430, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_6); - __pyx_t_9 = Py_TYPE(__pyx_t_6)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_8 = -1; __pyx_t_17 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 435, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_17); + __pyx_t_9 = Py_TYPE(__pyx_t_17)->tp_iternext; if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 435, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; for (;;) { if (likely(!__pyx_t_9)) { - if (likely(PyList_CheckExact(__pyx_t_6))) { - if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_6)) break; + if (likely(PyList_CheckExact(__pyx_t_17))) { + if (__pyx_t_8 >= PyList_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyList_GET_ITEM(__pyx_t_6, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_5 = PyList_GET_ITEM(__pyx_t_17, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 435, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_17, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 435, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } else { - if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_6)) break; + if (__pyx_t_8 >= PyTuple_GET_SIZE(__pyx_t_17)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_5 = PyTuple_GET_ITEM(__pyx_t_17, __pyx_t_8); __Pyx_INCREF(__pyx_t_5); __pyx_t_8++; if (unlikely(0 < 0)) __PYX_ERR(0, 435, __pyx_L1_error) #else - __pyx_t_5 = PySequence_ITEM(__pyx_t_6, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 430, __pyx_L1_error) + __pyx_t_5 = PySequence_ITEM(__pyx_t_17, __pyx_t_8); __pyx_t_8++; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 435, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); #endif } } else { - __pyx_t_5 = __pyx_t_9(__pyx_t_6); + __pyx_t_5 = __pyx_t_9(__pyx_t_17); if (unlikely(!__pyx_t_5)) { PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 430, __pyx_L1_error) + else __PYX_ERR(0, 435, __pyx_L1_error) } break; } @@ -6371,80 +6371,80 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":431 + /* "lxml/html/clean.py":436 * if self.add_nofollow: * for el in _find_external_links(doc): * if not self.allow_follow(el): # <<<<<<<<<<<<<< * rel = el.get('rel') * if rel: */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_follow); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 431, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_follow); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 436, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_13, __pyx_t_4, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_v_el); + __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_4, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_v_el); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 431, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 436, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; - __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 431, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __pyx_t_10 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 436, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_11 = ((!__pyx_t_10) != 0); if (__pyx_t_11) { - /* "lxml/html/clean.py":432 + /* "lxml/html/clean.py":437 * for el in _find_external_links(doc): * if not self.allow_follow(el): * rel = el.get('rel') # <<<<<<<<<<<<<< * if rel: * if ('nofollow' in rel */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 432, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 437, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); } } - __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_13, __pyx_t_4, __pyx_n_s_rel) : __Pyx_PyObject_CallOneArg(__pyx_t_13, __pyx_n_s_rel); + __pyx_t_5 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_6, __pyx_t_4, __pyx_n_s_rel) : __Pyx_PyObject_CallOneArg(__pyx_t_6, __pyx_n_s_rel); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 432, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 437, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_XDECREF_SET(__pyx_v_rel, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":433 + /* "lxml/html/clean.py":438 * if not self.allow_follow(el): * rel = el.get('rel') * if rel: # <<<<<<<<<<<<<< * if ('nofollow' in rel * and ' nofollow ' in (' %s ' % rel)): */ - __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_v_rel); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 433, __pyx_L1_error) + __pyx_t_11 = __Pyx_PyObject_IsTrue(__pyx_v_rel); if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 438, __pyx_L1_error) if (__pyx_t_11) { - /* "lxml/html/clean.py":434 + /* "lxml/html/clean.py":439 * rel = el.get('rel') * if rel: * if ('nofollow' in rel # <<<<<<<<<<<<<< * and ' nofollow ' in (' %s ' % rel)): * continue */ - __pyx_t_10 = (__Pyx_PySequence_ContainsTF(__pyx_n_s_nofollow, __pyx_v_rel, Py_EQ)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 434, __pyx_L1_error) + __pyx_t_10 = (__Pyx_PySequence_ContainsTF(__pyx_n_s_nofollow, __pyx_v_rel, Py_EQ)); if (unlikely(__pyx_t_10 < 0)) __PYX_ERR(0, 439, __pyx_L1_error) __pyx_t_18 = (__pyx_t_10 != 0); if (__pyx_t_18) { } else { @@ -6452,22 +6452,22 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L104_bool_binop_done; } - /* "lxml/html/clean.py":435 + /* "lxml/html/clean.py":440 * if rel: * if ('nofollow' in rel * and ' nofollow ' in (' %s ' % rel)): # <<<<<<<<<<<<<< * continue * rel = '%s nofollow' % rel */ - __pyx_t_5 = __Pyx_PyString_FormatSafe(__pyx_kp_s_s, __pyx_v_rel); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 435, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyString_FormatSafe(__pyx_kp_s_s, __pyx_v_rel); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_18 = (__Pyx_PySequence_ContainsTF(__pyx_kp_s_nofollow_2, __pyx_t_5, Py_EQ)); if (unlikely(__pyx_t_18 < 0)) __PYX_ERR(0, 435, __pyx_L1_error) + __pyx_t_18 = (__Pyx_PySequence_ContainsTF(__pyx_kp_s_nofollow_2, __pyx_t_5, Py_EQ)); if (unlikely(__pyx_t_18 < 0)) __PYX_ERR(0, 440, __pyx_L1_error) __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_10 = (__pyx_t_18 != 0); __pyx_t_11 = __pyx_t_10; __pyx_L104_bool_binop_done:; - /* "lxml/html/clean.py":434 + /* "lxml/html/clean.py":439 * rel = el.get('rel') * if rel: * if ('nofollow' in rel # <<<<<<<<<<<<<< @@ -6476,7 +6476,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ if (__pyx_t_11) { - /* "lxml/html/clean.py":436 + /* "lxml/html/clean.py":441 * if ('nofollow' in rel * and ' nofollow ' in (' %s ' % rel)): * continue # <<<<<<<<<<<<<< @@ -6485,7 +6485,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ goto __pyx_L99_continue; - /* "lxml/html/clean.py":434 + /* "lxml/html/clean.py":439 * rel = el.get('rel') * if rel: * if ('nofollow' in rel # <<<<<<<<<<<<<< @@ -6494,19 +6494,19 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":437 + /* "lxml/html/clean.py":442 * and ' nofollow ' in (' %s ' % rel)): * continue * rel = '%s nofollow' % rel # <<<<<<<<<<<<<< * else: * rel = 'nofollow' */ - __pyx_t_5 = __Pyx_PyString_FormatSafe(__pyx_kp_s_s_nofollow, __pyx_v_rel); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 437, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyString_FormatSafe(__pyx_kp_s_s_nofollow, __pyx_v_rel); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 442, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF_SET(__pyx_v_rel, __pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":433 + /* "lxml/html/clean.py":438 * if not self.allow_follow(el): * rel = el.get('rel') * if rel: # <<<<<<<<<<<<<< @@ -6516,7 +6516,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py goto __pyx_L102; } - /* "lxml/html/clean.py":439 + /* "lxml/html/clean.py":444 * rel = '%s nofollow' % rel * else: * rel = 'nofollow' # <<<<<<<<<<<<<< @@ -6529,63 +6529,63 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py } __pyx_L102:; - /* "lxml/html/clean.py":440 + /* "lxml/html/clean.py":445 * else: * rel = 'nofollow' * el.set('rel', rel) # <<<<<<<<<<<<<< * * def allow_follow(self, anchor): */ - __pyx_t_13 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_set); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 440, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_13); + __pyx_t_6 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_set); if (unlikely(!__pyx_t_6)) __PYX_ERR(0, 445, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_6); __pyx_t_4 = NULL; __pyx_t_7 = 0; - if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_13))) { - __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_13); + if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_6))) { + __pyx_t_4 = PyMethod_GET_SELF(__pyx_t_6); if (likely(__pyx_t_4)) { - PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_13); + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_6); __Pyx_INCREF(__pyx_t_4); __Pyx_INCREF(function); - __Pyx_DECREF_SET(__pyx_t_13, function); + __Pyx_DECREF_SET(__pyx_t_6, function); __pyx_t_7 = 1; } } #if CYTHON_FAST_PYCALL - if (PyFunction_Check(__pyx_t_13)) { + if (PyFunction_Check(__pyx_t_6)) { PyObject *__pyx_temp[3] = {__pyx_t_4, __pyx_n_s_rel, __pyx_v_rel}; - __pyx_t_5 = __Pyx_PyFunction_FastCall(__pyx_t_13, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_GOTREF(__pyx_t_5); } else #endif #if CYTHON_FAST_PYCCALL - if (__Pyx_PyFastCFunction_Check(__pyx_t_13)) { + if (__Pyx_PyFastCFunction_Check(__pyx_t_6)) { PyObject *__pyx_temp[3] = {__pyx_t_4, __pyx_n_s_rel, __pyx_v_rel}; - __pyx_t_5 = __Pyx_PyCFunction_FastCall(__pyx_t_13, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 440, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyCFunction_FastCall(__pyx_t_6, __pyx_temp+1-__pyx_t_7, 2+__pyx_t_7); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_GOTREF(__pyx_t_5); } else #endif { - __pyx_t_17 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_17)) __PYX_ERR(0, 440, __pyx_L1_error) - __Pyx_GOTREF(__pyx_t_17); + __pyx_t_13 = PyTuple_New(2+__pyx_t_7); if (unlikely(!__pyx_t_13)) __PYX_ERR(0, 445, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_13); if (__pyx_t_4) { - __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_17, 0, __pyx_t_4); __pyx_t_4 = NULL; + __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_13, 0, __pyx_t_4); __pyx_t_4 = NULL; } __Pyx_INCREF(__pyx_n_s_rel); __Pyx_GIVEREF(__pyx_n_s_rel); - PyTuple_SET_ITEM(__pyx_t_17, 0+__pyx_t_7, __pyx_n_s_rel); + PyTuple_SET_ITEM(__pyx_t_13, 0+__pyx_t_7, __pyx_n_s_rel); __Pyx_INCREF(__pyx_v_rel); __Pyx_GIVEREF(__pyx_v_rel); - PyTuple_SET_ITEM(__pyx_t_17, 1+__pyx_t_7, __pyx_v_rel); - __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_13, __pyx_t_17, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 440, __pyx_L1_error) + PyTuple_SET_ITEM(__pyx_t_13, 1+__pyx_t_7, __pyx_v_rel); + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_6, __pyx_t_13, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 445, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; + __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; } - __Pyx_DECREF(__pyx_t_13); __pyx_t_13 = 0; + __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":431 + /* "lxml/html/clean.py":436 * if self.add_nofollow: * for el in _find_external_links(doc): * if not self.allow_follow(el): # <<<<<<<<<<<<<< @@ -6594,7 +6594,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":430 + /* "lxml/html/clean.py":435 * el.drop_tag() * if self.add_nofollow: * for el in _find_external_links(doc): # <<<<<<<<<<<<<< @@ -6603,9 +6603,9 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ __pyx_L99_continue:; } - __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0; + __Pyx_DECREF(__pyx_t_17); __pyx_t_17 = 0; - /* "lxml/html/clean.py":429 + /* "lxml/html/clean.py":434 * for el in bad: * el.drop_tag() * if self.add_nofollow: # <<<<<<<<<<<<<< @@ -6614,7 +6614,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py */ } - /* "lxml/html/clean.py":257 + /* "lxml/html/clean.py":262 * ) * * def __call__(self, doc): # <<<<<<<<<<<<<< @@ -6655,7 +6655,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_2__call__(CYTHON_UNUSED Py return __pyx_r; } -/* "lxml/html/clean.py":442 +/* "lxml/html/clean.py":447 * el.set('rel', rel) * * def allow_follow(self, anchor): # <<<<<<<<<<<<<< @@ -6699,11 +6699,11 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_5allow_follow(PyObject *__ case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_anchor)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("allow_follow", 1, 2, 2, 1); __PYX_ERR(0, 442, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("allow_follow", 1, 2, 2, 1); __PYX_ERR(0, 447, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "allow_follow") < 0)) __PYX_ERR(0, 442, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "allow_follow") < 0)) __PYX_ERR(0, 447, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { goto __pyx_L5_argtuple_error; @@ -6716,7 +6716,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_5allow_follow(PyObject *__ } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("allow_follow", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 442, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("allow_follow", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 447, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("lxml.html.clean.Cleaner.allow_follow", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -6734,7 +6734,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_4allow_follow(CYTHON_UNUSE __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("allow_follow", 0); - /* "lxml/html/clean.py":446 + /* "lxml/html/clean.py":451 * Override to suppress rel="nofollow" on some anchors. * """ * return False # <<<<<<<<<<<<<< @@ -6746,7 +6746,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_4allow_follow(CYTHON_UNUSE __pyx_r = Py_False; goto __pyx_L0; - /* "lxml/html/clean.py":442 + /* "lxml/html/clean.py":447 * el.set('rel', rel) * * def allow_follow(self, anchor): # <<<<<<<<<<<<<< @@ -6761,7 +6761,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_4allow_follow(CYTHON_UNUSE return __pyx_r; } -/* "lxml/html/clean.py":448 +/* "lxml/html/clean.py":453 * return False * * def allow_element(self, el): # <<<<<<<<<<<<<< @@ -6805,11 +6805,11 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_7allow_element(PyObject *_ case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_el)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("allow_element", 1, 2, 2, 1); __PYX_ERR(0, 448, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("allow_element", 1, 2, 2, 1); __PYX_ERR(0, 453, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "allow_element") < 0)) __PYX_ERR(0, 448, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "allow_element") < 0)) __PYX_ERR(0, 453, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { goto __pyx_L5_argtuple_error; @@ -6822,7 +6822,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_7allow_element(PyObject *_ } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("allow_element", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 448, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("allow_element", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 453, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("lxml.html.clean.Cleaner.allow_element", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -6857,24 +6857,24 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS int __pyx_clineno = 0; __Pyx_RefNannySetupContext("allow_element", 0); - /* "lxml/html/clean.py":455 + /* "lxml/html/clean.py":460 * :return: true to accept the element or false to reject/discard it. * """ * if el.tag not in self._tag_link_attrs: # <<<<<<<<<<<<<< * return False * attr = self._tag_link_attrs[el.tag] */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 460, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_tag_link_attrs); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_tag_link_attrs); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 460, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_3 = (__Pyx_PySequence_ContainsTF(__pyx_t_1, __pyx_t_2, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 455, __pyx_L1_error) + __pyx_t_3 = (__Pyx_PySequence_ContainsTF(__pyx_t_1, __pyx_t_2, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 460, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __pyx_t_4 = (__pyx_t_3 != 0); if (__pyx_t_4) { - /* "lxml/html/clean.py":456 + /* "lxml/html/clean.py":461 * """ * if el.tag not in self._tag_link_attrs: * return False # <<<<<<<<<<<<<< @@ -6886,7 +6886,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __pyx_r = Py_False; goto __pyx_L0; - /* "lxml/html/clean.py":455 + /* "lxml/html/clean.py":460 * :return: true to accept the element or false to reject/discard it. * """ * if el.tag not in self._tag_link_attrs: # <<<<<<<<<<<<<< @@ -6895,25 +6895,25 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS */ } - /* "lxml/html/clean.py":457 + /* "lxml/html/clean.py":462 * if el.tag not in self._tag_link_attrs: * return False * attr = self._tag_link_attrs[el.tag] # <<<<<<<<<<<<<< * if isinstance(attr, (list, tuple)): * for one_attr in attr: */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_tag_link_attrs); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 457, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_tag_link_attrs); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 462, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 457, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 462, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_5 = __Pyx_PyObject_GetItem(__pyx_t_2, __pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 457, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetItem(__pyx_t_2, __pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 462, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_attr = __pyx_t_5; __pyx_t_5 = 0; - /* "lxml/html/clean.py":458 + /* "lxml/html/clean.py":463 * return False * attr = self._tag_link_attrs[el.tag] * if isinstance(attr, (list, tuple)): # <<<<<<<<<<<<<< @@ -6934,7 +6934,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __pyx_t_3 = (__pyx_t_4 != 0); if (__pyx_t_3) { - /* "lxml/html/clean.py":459 + /* "lxml/html/clean.py":464 * attr = self._tag_link_attrs[el.tag] * if isinstance(attr, (list, tuple)): * for one_attr in attr: # <<<<<<<<<<<<<< @@ -6945,26 +6945,26 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __pyx_t_5 = __pyx_v_attr; __Pyx_INCREF(__pyx_t_5); __pyx_t_7 = 0; __pyx_t_8 = NULL; } else { - __pyx_t_7 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_v_attr); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_7 = -1; __pyx_t_5 = PyObject_GetIter(__pyx_v_attr); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 464, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_8 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_8 = Py_TYPE(__pyx_t_5)->tp_iternext; if (unlikely(!__pyx_t_8)) __PYX_ERR(0, 464, __pyx_L1_error) } for (;;) { if (likely(!__pyx_t_8)) { if (likely(PyList_CheckExact(__pyx_t_5))) { if (__pyx_t_7 >= PyList_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_7); __Pyx_INCREF(__pyx_t_1); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_5, __pyx_t_7); __Pyx_INCREF(__pyx_t_1); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 464, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 464, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_5)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_7); __Pyx_INCREF(__pyx_t_1); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_5, __pyx_t_7); __Pyx_INCREF(__pyx_t_1); __pyx_t_7++; if (unlikely(0 < 0)) __PYX_ERR(0, 464, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 459, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_5, __pyx_t_7); __pyx_t_7++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 464, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -6974,7 +6974,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 459, __pyx_L1_error) + else __PYX_ERR(0, 464, __pyx_L1_error) } break; } @@ -6983,14 +6983,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __Pyx_XDECREF_SET(__pyx_v_one_attr, __pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":460 + /* "lxml/html/clean.py":465 * if isinstance(attr, (list, tuple)): * for one_attr in attr: * url = el.get(one_attr) # <<<<<<<<<<<<<< * if not url: * return False */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 460, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 465, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_9 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_2))) { @@ -7004,24 +7004,24 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS } __pyx_t_1 = (__pyx_t_9) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_9, __pyx_v_one_attr) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v_one_attr); __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 460, __pyx_L1_error) + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 465, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_XDECREF_SET(__pyx_v_url, __pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":461 + /* "lxml/html/clean.py":466 * for one_attr in attr: * url = el.get(one_attr) * if not url: # <<<<<<<<<<<<<< * return False * if not self.allow_embedded_url(el, url): */ - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_v_url); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 461, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_v_url); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 466, __pyx_L1_error) __pyx_t_4 = ((!__pyx_t_3) != 0); if (__pyx_t_4) { - /* "lxml/html/clean.py":462 + /* "lxml/html/clean.py":467 * url = el.get(one_attr) * if not url: * return False # <<<<<<<<<<<<<< @@ -7034,7 +7034,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; goto __pyx_L0; - /* "lxml/html/clean.py":461 + /* "lxml/html/clean.py":466 * for one_attr in attr: * url = el.get(one_attr) * if not url: # <<<<<<<<<<<<<< @@ -7043,14 +7043,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS */ } - /* "lxml/html/clean.py":463 + /* "lxml/html/clean.py":468 * if not url: * return False * if not self.allow_embedded_url(el, url): # <<<<<<<<<<<<<< * return False * return True */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_embedded_url); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 463, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_embedded_url); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 468, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_9 = NULL; __pyx_t_10 = 0; @@ -7067,7 +7067,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_2)) { PyObject *__pyx_temp[3] = {__pyx_t_9, __pyx_v_el, __pyx_v_url}; - __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_10, 2+__pyx_t_10); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 463, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_10, 2+__pyx_t_10); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 468, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_GOTREF(__pyx_t_1); } else @@ -7075,13 +7075,13 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_2)) { PyObject *__pyx_temp[3] = {__pyx_t_9, __pyx_v_el, __pyx_v_url}; - __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_10, 2+__pyx_t_10); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 463, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_10, 2+__pyx_t_10); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 468, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_9); __pyx_t_9 = 0; __Pyx_GOTREF(__pyx_t_1); } else #endif { - __pyx_t_11 = PyTuple_New(2+__pyx_t_10); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 463, __pyx_L1_error) + __pyx_t_11 = PyTuple_New(2+__pyx_t_10); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 468, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); if (__pyx_t_9) { __Pyx_GIVEREF(__pyx_t_9); PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_9); __pyx_t_9 = NULL; @@ -7092,17 +7092,17 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __Pyx_INCREF(__pyx_v_url); __Pyx_GIVEREF(__pyx_v_url); PyTuple_SET_ITEM(__pyx_t_11, 1+__pyx_t_10, __pyx_v_url); - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_11, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 463, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_11, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 468, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 463, __pyx_L1_error) + __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 468, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_t_3 = ((!__pyx_t_4) != 0); if (__pyx_t_3) { - /* "lxml/html/clean.py":464 + /* "lxml/html/clean.py":469 * return False * if not self.allow_embedded_url(el, url): * return False # <<<<<<<<<<<<<< @@ -7115,7 +7115,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; goto __pyx_L0; - /* "lxml/html/clean.py":463 + /* "lxml/html/clean.py":468 * if not url: * return False * if not self.allow_embedded_url(el, url): # <<<<<<<<<<<<<< @@ -7124,7 +7124,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS */ } - /* "lxml/html/clean.py":459 + /* "lxml/html/clean.py":464 * attr = self._tag_link_attrs[el.tag] * if isinstance(attr, (list, tuple)): * for one_attr in attr: # <<<<<<<<<<<<<< @@ -7134,7 +7134,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS } __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - /* "lxml/html/clean.py":465 + /* "lxml/html/clean.py":470 * if not self.allow_embedded_url(el, url): * return False * return True # <<<<<<<<<<<<<< @@ -7146,7 +7146,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __pyx_r = Py_True; goto __pyx_L0; - /* "lxml/html/clean.py":458 + /* "lxml/html/clean.py":463 * return False * attr = self._tag_link_attrs[el.tag] * if isinstance(attr, (list, tuple)): # <<<<<<<<<<<<<< @@ -7155,7 +7155,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS */ } - /* "lxml/html/clean.py":467 + /* "lxml/html/clean.py":472 * return True * else: * url = el.get(attr) # <<<<<<<<<<<<<< @@ -7163,7 +7163,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS * return False */ /*else*/ { - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 467, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_get); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 472, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_1))) { @@ -7177,24 +7177,24 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS } __pyx_t_5 = (__pyx_t_2) ? __Pyx_PyObject_Call2Args(__pyx_t_1, __pyx_t_2, __pyx_v_attr) : __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_v_attr); __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 467, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 472, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __pyx_v_url = __pyx_t_5; __pyx_t_5 = 0; - /* "lxml/html/clean.py":468 + /* "lxml/html/clean.py":473 * else: * url = el.get(attr) * if not url: # <<<<<<<<<<<<<< * return False * return self.allow_embedded_url(el, url) */ - __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_v_url); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 468, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_IsTrue(__pyx_v_url); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 473, __pyx_L1_error) __pyx_t_4 = ((!__pyx_t_3) != 0); if (__pyx_t_4) { - /* "lxml/html/clean.py":469 + /* "lxml/html/clean.py":474 * url = el.get(attr) * if not url: * return False # <<<<<<<<<<<<<< @@ -7206,7 +7206,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __pyx_r = Py_False; goto __pyx_L0; - /* "lxml/html/clean.py":468 + /* "lxml/html/clean.py":473 * else: * url = el.get(attr) * if not url: # <<<<<<<<<<<<<< @@ -7215,7 +7215,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS */ } - /* "lxml/html/clean.py":470 + /* "lxml/html/clean.py":475 * if not url: * return False * return self.allow_embedded_url(el, url) # <<<<<<<<<<<<<< @@ -7223,7 +7223,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS * def allow_embedded_url(self, el, url): */ __Pyx_XDECREF(__pyx_r); - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_embedded_url); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 470, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_allow_embedded_url); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 475, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_2 = NULL; __pyx_t_10 = 0; @@ -7240,7 +7240,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[3] = {__pyx_t_2, __pyx_v_el, __pyx_v_url}; - __pyx_t_5 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_10, 2+__pyx_t_10); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 470, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_10, 2+__pyx_t_10); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 475, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_GOTREF(__pyx_t_5); } else @@ -7248,13 +7248,13 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[3] = {__pyx_t_2, __pyx_v_el, __pyx_v_url}; - __pyx_t_5 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_10, 2+__pyx_t_10); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 470, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_10, 2+__pyx_t_10); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 475, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_GOTREF(__pyx_t_5); } else #endif { - __pyx_t_11 = PyTuple_New(2+__pyx_t_10); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 470, __pyx_L1_error) + __pyx_t_11 = PyTuple_New(2+__pyx_t_10); if (unlikely(!__pyx_t_11)) __PYX_ERR(0, 475, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_11); if (__pyx_t_2) { __Pyx_GIVEREF(__pyx_t_2); PyTuple_SET_ITEM(__pyx_t_11, 0, __pyx_t_2); __pyx_t_2 = NULL; @@ -7265,7 +7265,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS __Pyx_INCREF(__pyx_v_url); __Pyx_GIVEREF(__pyx_v_url); PyTuple_SET_ITEM(__pyx_t_11, 1+__pyx_t_10, __pyx_v_url); - __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_11, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 470, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_11, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 475, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_11); __pyx_t_11 = 0; } @@ -7275,7 +7275,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS goto __pyx_L0; } - /* "lxml/html/clean.py":448 + /* "lxml/html/clean.py":453 * return False * * def allow_element(self, el): # <<<<<<<<<<<<<< @@ -7301,7 +7301,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_6allow_element(CYTHON_UNUS return __pyx_r; } -/* "lxml/html/clean.py":472 +/* "lxml/html/clean.py":477 * return self.allow_embedded_url(el, url) * * def allow_embedded_url(self, el, url): # <<<<<<<<<<<<<< @@ -7348,17 +7348,17 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_9allow_embedded_url(PyObje case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_el)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("allow_embedded_url", 1, 3, 3, 1); __PYX_ERR(0, 472, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("allow_embedded_url", 1, 3, 3, 1); __PYX_ERR(0, 477, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_url)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("allow_embedded_url", 1, 3, 3, 2); __PYX_ERR(0, 472, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("allow_embedded_url", 1, 3, 3, 2); __PYX_ERR(0, 477, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "allow_embedded_url") < 0)) __PYX_ERR(0, 472, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "allow_embedded_url") < 0)) __PYX_ERR(0, 477, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 3) { goto __pyx_L5_argtuple_error; @@ -7373,7 +7373,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_9allow_embedded_url(PyObje } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("allow_embedded_url", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 472, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("allow_embedded_url", 1, 3, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 477, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("lxml.html.clean.Cleaner.allow_embedded_url", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -7410,14 +7410,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON int __pyx_clineno = 0; __Pyx_RefNannySetupContext("allow_embedded_url", 0); - /* "lxml/html/clean.py":481 + /* "lxml/html/clean.py":486 * :return: true to accept the URL and false to reject it. * """ * if self.whitelist_tags is not None and el.tag not in self.whitelist_tags: # <<<<<<<<<<<<<< * return False * scheme, netloc, path, query, fragment = urlsplit(url) */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_whitelist_tags); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 481, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_whitelist_tags); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 486, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = (__pyx_t_2 != Py_None); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; @@ -7427,11 +7427,11 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __pyx_t_1 = __pyx_t_4; goto __pyx_L4_bool_binop_done; } - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 481, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_tag); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 486, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_whitelist_tags); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 481, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_whitelist_tags); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 486, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); - __pyx_t_4 = (__Pyx_PySequence_ContainsTF(__pyx_t_2, __pyx_t_5, Py_NE)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 481, __pyx_L1_error) + __pyx_t_4 = (__Pyx_PySequence_ContainsTF(__pyx_t_2, __pyx_t_5, Py_NE)); if (unlikely(__pyx_t_4 < 0)) __PYX_ERR(0, 486, __pyx_L1_error) __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_3 = (__pyx_t_4 != 0); @@ -7439,7 +7439,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __pyx_L4_bool_binop_done:; if (__pyx_t_1) { - /* "lxml/html/clean.py":482 + /* "lxml/html/clean.py":487 * """ * if self.whitelist_tags is not None and el.tag not in self.whitelist_tags: * return False # <<<<<<<<<<<<<< @@ -7451,7 +7451,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __pyx_r = Py_False; goto __pyx_L0; - /* "lxml/html/clean.py":481 + /* "lxml/html/clean.py":486 * :return: true to accept the URL and false to reject it. * """ * if self.whitelist_tags is not None and el.tag not in self.whitelist_tags: # <<<<<<<<<<<<<< @@ -7460,14 +7460,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON */ } - /* "lxml/html/clean.py":483 + /* "lxml/html/clean.py":488 * if self.whitelist_tags is not None and el.tag not in self.whitelist_tags: * return False * scheme, netloc, path, query, fragment = urlsplit(url) # <<<<<<<<<<<<<< * netloc = netloc.lower().split(':', 1)[0] * if scheme not in ('http', 'https'): */ - __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_urlsplit); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 483, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_urlsplit); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 488, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_6 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) { @@ -7481,7 +7481,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON } __pyx_t_5 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_6, __pyx_v_url) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v_url); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 483, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 488, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if ((likely(PyTuple_CheckExact(__pyx_t_5))) || (PyList_CheckExact(__pyx_t_5))) { @@ -7490,7 +7490,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON if (unlikely(size != 5)) { if (size > 5) __Pyx_RaiseTooManyValuesError(5); else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size); - __PYX_ERR(0, 483, __pyx_L1_error) + __PYX_ERR(0, 488, __pyx_L1_error) } #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS if (likely(PyTuple_CheckExact(sequence))) { @@ -7516,7 +7516,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON Py_ssize_t i; PyObject** temps[5] = {&__pyx_t_2,&__pyx_t_6,&__pyx_t_7,&__pyx_t_8,&__pyx_t_9}; for (i=0; i < 5; i++) { - PyObject* item = PySequence_ITEM(sequence, i); if (unlikely(!item)) __PYX_ERR(0, 483, __pyx_L1_error) + PyObject* item = PySequence_ITEM(sequence, i); if (unlikely(!item)) __PYX_ERR(0, 488, __pyx_L1_error) __Pyx_GOTREF(item); *(temps[i]) = item; } @@ -7526,7 +7526,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON } else { Py_ssize_t index = -1; PyObject** temps[5] = {&__pyx_t_2,&__pyx_t_6,&__pyx_t_7,&__pyx_t_8,&__pyx_t_9}; - __pyx_t_10 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 483, __pyx_L1_error) + __pyx_t_10 = PyObject_GetIter(__pyx_t_5); if (unlikely(!__pyx_t_10)) __PYX_ERR(0, 488, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_10); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __pyx_t_11 = Py_TYPE(__pyx_t_10)->tp_iternext; @@ -7535,7 +7535,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __Pyx_GOTREF(item); *(temps[index]) = item; } - if (__Pyx_IternextUnpackEndCheck(__pyx_t_11(__pyx_t_10), 5) < 0) __PYX_ERR(0, 483, __pyx_L1_error) + if (__Pyx_IternextUnpackEndCheck(__pyx_t_11(__pyx_t_10), 5) < 0) __PYX_ERR(0, 488, __pyx_L1_error) __pyx_t_11 = NULL; __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; goto __pyx_L7_unpacking_done; @@ -7543,7 +7543,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __Pyx_DECREF(__pyx_t_10); __pyx_t_10 = 0; __pyx_t_11 = NULL; if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index); - __PYX_ERR(0, 483, __pyx_L1_error) + __PYX_ERR(0, 488, __pyx_L1_error) __pyx_L7_unpacking_done:; } __pyx_v_scheme = __pyx_t_2; @@ -7557,14 +7557,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __pyx_v_fragment = __pyx_t_9; __pyx_t_9 = 0; - /* "lxml/html/clean.py":484 + /* "lxml/html/clean.py":489 * return False * scheme, netloc, path, query, fragment = urlsplit(url) * netloc = netloc.lower().split(':', 1)[0] # <<<<<<<<<<<<<< * if scheme not in ('http', 'https'): * return False */ - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_netloc, __pyx_n_s_lower); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 484, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_netloc, __pyx_n_s_lower); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 489, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __pyx_t_8 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_9))) { @@ -7578,22 +7578,22 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON } __pyx_t_5 = (__pyx_t_8) ? __Pyx_PyObject_CallOneArg(__pyx_t_9, __pyx_t_8) : __Pyx_PyObject_CallNoArg(__pyx_t_9); __Pyx_XDECREF(__pyx_t_8); __pyx_t_8 = 0; - if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 484, __pyx_L1_error) + if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 489, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_split); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 484, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_t_5, __pyx_n_s_split); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 489, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; - __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_9, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 484, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_9, __pyx_tuple__11, NULL); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 489, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; - __pyx_t_9 = __Pyx_GetItemInt(__pyx_t_5, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 484, __pyx_L1_error) + __pyx_t_9 = __Pyx_GetItemInt(__pyx_t_5, 0, long, 1, __Pyx_PyInt_From_long, 0, 0, 1); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 489, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_DECREF_SET(__pyx_v_netloc, __pyx_t_9); __pyx_t_9 = 0; - /* "lxml/html/clean.py":485 + /* "lxml/html/clean.py":490 * scheme, netloc, path, query, fragment = urlsplit(url) * netloc = netloc.lower().split(':', 1)[0] * if scheme not in ('http', 'https'): # <<<<<<<<<<<<<< @@ -7602,20 +7602,20 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON */ __Pyx_INCREF(__pyx_v_scheme); __pyx_t_9 = __pyx_v_scheme; - __pyx_t_3 = (__Pyx_PyString_Equals(__pyx_t_9, __pyx_n_s_http, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 485, __pyx_L1_error) + __pyx_t_3 = (__Pyx_PyString_Equals(__pyx_t_9, __pyx_n_s_http, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 490, __pyx_L1_error) if (__pyx_t_3) { } else { __pyx_t_1 = __pyx_t_3; goto __pyx_L9_bool_binop_done; } - __pyx_t_3 = (__Pyx_PyString_Equals(__pyx_t_9, __pyx_n_s_https, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 485, __pyx_L1_error) + __pyx_t_3 = (__Pyx_PyString_Equals(__pyx_t_9, __pyx_n_s_https, Py_NE)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 490, __pyx_L1_error) __pyx_t_1 = __pyx_t_3; __pyx_L9_bool_binop_done:; __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_t_3 = (__pyx_t_1 != 0); if (__pyx_t_3) { - /* "lxml/html/clean.py":486 + /* "lxml/html/clean.py":491 * netloc = netloc.lower().split(':', 1)[0] * if scheme not in ('http', 'https'): * return False # <<<<<<<<<<<<<< @@ -7627,7 +7627,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __pyx_r = Py_False; goto __pyx_L0; - /* "lxml/html/clean.py":485 + /* "lxml/html/clean.py":490 * scheme, netloc, path, query, fragment = urlsplit(url) * netloc = netloc.lower().split(':', 1)[0] * if scheme not in ('http', 'https'): # <<<<<<<<<<<<<< @@ -7636,21 +7636,21 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON */ } - /* "lxml/html/clean.py":487 + /* "lxml/html/clean.py":492 * if scheme not in ('http', 'https'): * return False * if netloc in self.host_whitelist: # <<<<<<<<<<<<<< * return True * return False */ - __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_host_whitelist); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 487, __pyx_L1_error) + __pyx_t_9 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_host_whitelist); if (unlikely(!__pyx_t_9)) __PYX_ERR(0, 492, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_9); - __pyx_t_3 = (__Pyx_PySequence_ContainsTF(__pyx_v_netloc, __pyx_t_9, Py_EQ)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 487, __pyx_L1_error) + __pyx_t_3 = (__Pyx_PySequence_ContainsTF(__pyx_v_netloc, __pyx_t_9, Py_EQ)); if (unlikely(__pyx_t_3 < 0)) __PYX_ERR(0, 492, __pyx_L1_error) __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0; __pyx_t_1 = (__pyx_t_3 != 0); if (__pyx_t_1) { - /* "lxml/html/clean.py":488 + /* "lxml/html/clean.py":493 * return False * if netloc in self.host_whitelist: * return True # <<<<<<<<<<<<<< @@ -7662,7 +7662,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __pyx_r = Py_True; goto __pyx_L0; - /* "lxml/html/clean.py":487 + /* "lxml/html/clean.py":492 * if scheme not in ('http', 'https'): * return False * if netloc in self.host_whitelist: # <<<<<<<<<<<<<< @@ -7671,7 +7671,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON */ } - /* "lxml/html/clean.py":489 + /* "lxml/html/clean.py":494 * if netloc in self.host_whitelist: * return True * return False # <<<<<<<<<<<<<< @@ -7683,7 +7683,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON __pyx_r = Py_False; goto __pyx_L0; - /* "lxml/html/clean.py":472 + /* "lxml/html/clean.py":477 * return self.allow_embedded_url(el, url) * * def allow_embedded_url(self, el, url): # <<<<<<<<<<<<<< @@ -7713,7 +7713,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_8allow_embedded_url(CYTHON return __pyx_r; } -/* "lxml/html/clean.py":491 +/* "lxml/html/clean.py":496 * return False * * def kill_conditional_comments(self, doc): # <<<<<<<<<<<<<< @@ -7757,11 +7757,11 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_11kill_conditional_comment case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_doc)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("kill_conditional_comments", 1, 2, 2, 1); __PYX_ERR(0, 491, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("kill_conditional_comments", 1, 2, 2, 1); __PYX_ERR(0, 496, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "kill_conditional_comments") < 0)) __PYX_ERR(0, 491, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "kill_conditional_comments") < 0)) __PYX_ERR(0, 496, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { goto __pyx_L5_argtuple_error; @@ -7774,7 +7774,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_11kill_conditional_comment } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("kill_conditional_comments", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 491, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("kill_conditional_comments", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 496, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("lxml.html.clean.Cleaner.kill_conditional_comments", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -7787,7 +7787,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_11kill_conditional_comment return __pyx_r; } -/* "lxml/html/clean.py":499 +/* "lxml/html/clean.py":504 * has_conditional_comment = _conditional_comment_re.search * self._kill_elements( * doc, lambda el: has_conditional_comment(el.text), # <<<<<<<<<<<<<< @@ -7825,8 +7825,8 @@ static PyObject *__pyx_lambda_funcdef_lambda(PyObject *__pyx_self, PyObject *__p __pyx_outer_scope = (struct __pyx_obj_4lxml_4html_5clean___pyx_scope_struct__kill_conditional_comments *) __Pyx_CyFunction_GetClosure(__pyx_self); __pyx_cur_scope = __pyx_outer_scope; __Pyx_XDECREF(__pyx_r); - if (unlikely(!__pyx_cur_scope->__pyx_v_has_conditional_comment)) { __Pyx_RaiseClosureNameError("has_conditional_comment"); __PYX_ERR(0, 499, __pyx_L1_error) } - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_text); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 499, __pyx_L1_error) + if (unlikely(!__pyx_cur_scope->__pyx_v_has_conditional_comment)) { __Pyx_RaiseClosureNameError("has_conditional_comment"); __PYX_ERR(0, 504, __pyx_L1_error) } + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_text); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 504, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_INCREF(__pyx_cur_scope->__pyx_v_has_conditional_comment); __pyx_t_3 = __pyx_cur_scope->__pyx_v_has_conditional_comment; __pyx_t_4 = NULL; @@ -7842,7 +7842,7 @@ static PyObject *__pyx_lambda_funcdef_lambda(PyObject *__pyx_self, PyObject *__p __pyx_t_1 = (__pyx_t_4) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_4, __pyx_t_2) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_2); __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 499, __pyx_L1_error) + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 504, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __pyx_r = __pyx_t_1; @@ -7863,7 +7863,7 @@ static PyObject *__pyx_lambda_funcdef_lambda(PyObject *__pyx_self, PyObject *__p return __pyx_r; } -/* "lxml/html/clean.py":491 +/* "lxml/html/clean.py":496 * return False * * def kill_conditional_comments(self, doc): # <<<<<<<<<<<<<< @@ -7890,57 +7890,57 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_10kill_conditional_comment if (unlikely(!__pyx_cur_scope)) { __pyx_cur_scope = ((struct __pyx_obj_4lxml_4html_5clean___pyx_scope_struct__kill_conditional_comments *)Py_None); __Pyx_INCREF(Py_None); - __PYX_ERR(0, 491, __pyx_L1_error) + __PYX_ERR(0, 496, __pyx_L1_error) } else { __Pyx_GOTREF(__pyx_cur_scope); } - /* "lxml/html/clean.py":497 + /* "lxml/html/clean.py":502 * we'll kill any comments that could be conditional. * """ * has_conditional_comment = _conditional_comment_re.search # <<<<<<<<<<<<<< * self._kill_elements( * doc, lambda el: has_conditional_comment(el.text), */ - __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_conditional_comment_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 497, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_conditional_comment_re); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 502, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_search); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 497, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_search); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 502, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_GIVEREF(__pyx_t_2); __pyx_cur_scope->__pyx_v_has_conditional_comment = __pyx_t_2; __pyx_t_2 = 0; - /* "lxml/html/clean.py":498 + /* "lxml/html/clean.py":503 * """ * has_conditional_comment = _conditional_comment_re.search * self._kill_elements( # <<<<<<<<<<<<<< * doc, lambda el: has_conditional_comment(el.text), * etree.Comment) */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_kill_elements); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 498, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_kill_elements); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - /* "lxml/html/clean.py":499 + /* "lxml/html/clean.py":504 * has_conditional_comment = _conditional_comment_re.search * self._kill_elements( * doc, lambda el: has_conditional_comment(el.text), # <<<<<<<<<<<<<< * etree.Comment) * */ - __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_4lxml_4html_5clean_7Cleaner_25kill_conditional_comments_lambda, 0, __pyx_n_s_Cleaner_kill_conditional_comment, ((PyObject*)__pyx_cur_scope), __pyx_n_s_lxml_html_clean, __pyx_d, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 499, __pyx_L1_error) + __pyx_t_3 = __Pyx_CyFunction_New(&__pyx_mdef_4lxml_4html_5clean_7Cleaner_25kill_conditional_comments_lambda, 0, __pyx_n_s_Cleaner_kill_conditional_comment, ((PyObject*)__pyx_cur_scope), __pyx_n_s_lxml_html_clean, __pyx_d, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 504, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); - /* "lxml/html/clean.py":500 + /* "lxml/html/clean.py":505 * self._kill_elements( * doc, lambda el: has_conditional_comment(el.text), * etree.Comment) # <<<<<<<<<<<<<< * * def _kill_elements(self, doc, condition, iterate=None): */ - __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_etree); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 500, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_etree); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 505, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); - __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_Comment); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 500, __pyx_L1_error) + __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_Comment); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 505, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = NULL; @@ -7958,7 +7958,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_10kill_conditional_comment #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_4, __pyx_v_doc, __pyx_t_3, __pyx_t_5}; - __pyx_t_2 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_6, 3+__pyx_t_6); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 498, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_6, 3+__pyx_t_6); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -7968,7 +7968,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_10kill_conditional_comment #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[4] = {__pyx_t_4, __pyx_v_doc, __pyx_t_3, __pyx_t_5}; - __pyx_t_2 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_6, 3+__pyx_t_6); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 498, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_6, 3+__pyx_t_6); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -7976,7 +7976,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_10kill_conditional_comment } else #endif { - __pyx_t_7 = PyTuple_New(3+__pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 498, __pyx_L1_error) + __pyx_t_7 = PyTuple_New(3+__pyx_t_6); if (unlikely(!__pyx_t_7)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_7); if (__pyx_t_4) { __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_4); __pyx_t_4 = NULL; @@ -7990,14 +7990,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_10kill_conditional_comment PyTuple_SET_ITEM(__pyx_t_7, 2+__pyx_t_6, __pyx_t_5); __pyx_t_3 = 0; __pyx_t_5 = 0; - __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_7, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 498, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_7, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 503, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0; } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "lxml/html/clean.py":491 + /* "lxml/html/clean.py":496 * return False * * def kill_conditional_comments(self, doc): # <<<<<<<<<<<<<< @@ -8024,7 +8024,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_10kill_conditional_comment return __pyx_r; } -/* "lxml/html/clean.py":502 +/* "lxml/html/clean.py":507 * etree.Comment) * * def _kill_elements(self, doc, condition, iterate=None): # <<<<<<<<<<<<<< @@ -8074,13 +8074,13 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_13_kill_elements(PyObject case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_doc)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("_kill_elements", 0, 3, 4, 1); __PYX_ERR(0, 502, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("_kill_elements", 0, 3, 4, 1); __PYX_ERR(0, 507, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 2: if (likely((values[2] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_condition)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("_kill_elements", 0, 3, 4, 2); __PYX_ERR(0, 502, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("_kill_elements", 0, 3, 4, 2); __PYX_ERR(0, 507, __pyx_L3_error) } CYTHON_FALLTHROUGH; case 3: @@ -8090,7 +8090,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_13_kill_elements(PyObject } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_kill_elements") < 0)) __PYX_ERR(0, 502, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_kill_elements") < 0)) __PYX_ERR(0, 507, __pyx_L3_error) } } else { switch (PyTuple_GET_SIZE(__pyx_args)) { @@ -8110,7 +8110,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_13_kill_elements(PyObject } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("_kill_elements", 0, 3, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 502, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("_kill_elements", 0, 3, 4, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 507, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("lxml.html.clean.Cleaner._kill_elements", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -8141,26 +8141,26 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_kill_elements", 0); - /* "lxml/html/clean.py":503 + /* "lxml/html/clean.py":508 * * def _kill_elements(self, doc, condition, iterate=None): * bad = [] # <<<<<<<<<<<<<< * for el in doc.iter(iterate): * if condition(el): */ - __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 503, __pyx_L1_error) + __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 508, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_v_bad = ((PyObject*)__pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":504 + /* "lxml/html/clean.py":509 * def _kill_elements(self, doc, condition, iterate=None): * bad = [] * for el in doc.iter(iterate): # <<<<<<<<<<<<<< * if condition(el): * bad.append(el) */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_doc, __pyx_n_s_iter); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_2))) { @@ -8174,16 +8174,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN } __pyx_t_1 = (__pyx_t_3) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_3, __pyx_v_iterate) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v_iterate); __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 504, __pyx_L1_error) + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; if (likely(PyList_CheckExact(__pyx_t_1)) || PyTuple_CheckExact(__pyx_t_1)) { __pyx_t_2 = __pyx_t_1; __Pyx_INCREF(__pyx_t_2); __pyx_t_4 = 0; __pyx_t_5 = NULL; } else { - __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_t_4 = -1; __pyx_t_2 = PyObject_GetIter(__pyx_t_1); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __pyx_t_5 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_t_5 = Py_TYPE(__pyx_t_2)->tp_iternext; if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 509, __pyx_L1_error) } __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; for (;;) { @@ -8191,17 +8191,17 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN if (likely(PyList_CheckExact(__pyx_t_2))) { if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_1); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_1); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 509, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } else { if (__pyx_t_4 >= PyTuple_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_1); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_t_1 = PyTuple_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_1); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 509, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 504, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 509, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif } @@ -8211,7 +8211,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN PyObject* exc_type = PyErr_Occurred(); if (exc_type) { if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear(); - else __PYX_ERR(0, 504, __pyx_L1_error) + else __PYX_ERR(0, 509, __pyx_L1_error) } break; } @@ -8220,7 +8220,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":505 + /* "lxml/html/clean.py":510 * bad = [] * for el in doc.iter(iterate): * if condition(el): # <<<<<<<<<<<<<< @@ -8240,23 +8240,23 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN } __pyx_t_1 = (__pyx_t_6) ? __Pyx_PyObject_Call2Args(__pyx_t_3, __pyx_t_6, __pyx_v_el) : __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_v_el); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 505, __pyx_L1_error) + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 505, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 510, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_7) { - /* "lxml/html/clean.py":506 + /* "lxml/html/clean.py":511 * for el in doc.iter(iterate): * if condition(el): * bad.append(el) # <<<<<<<<<<<<<< * for el in bad: * el.drop_tree() */ - __pyx_t_8 = __Pyx_PyList_Append(__pyx_v_bad, __pyx_v_el); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 506, __pyx_L1_error) + __pyx_t_8 = __Pyx_PyList_Append(__pyx_v_bad, __pyx_v_el); if (unlikely(__pyx_t_8 == ((int)-1))) __PYX_ERR(0, 511, __pyx_L1_error) - /* "lxml/html/clean.py":505 + /* "lxml/html/clean.py":510 * bad = [] * for el in doc.iter(iterate): * if condition(el): # <<<<<<<<<<<<<< @@ -8265,7 +8265,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN */ } - /* "lxml/html/clean.py":504 + /* "lxml/html/clean.py":509 * def _kill_elements(self, doc, condition, iterate=None): * bad = [] * for el in doc.iter(iterate): # <<<<<<<<<<<<<< @@ -8275,7 +8275,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "lxml/html/clean.py":507 + /* "lxml/html/clean.py":512 * if condition(el): * bad.append(el) * for el in bad: # <<<<<<<<<<<<<< @@ -8286,22 +8286,22 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN for (;;) { if (__pyx_t_4 >= PyList_GET_SIZE(__pyx_t_2)) break; #if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS - __pyx_t_1 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_1); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 507, __pyx_L1_error) + __pyx_t_1 = PyList_GET_ITEM(__pyx_t_2, __pyx_t_4); __Pyx_INCREF(__pyx_t_1); __pyx_t_4++; if (unlikely(0 < 0)) __PYX_ERR(0, 512, __pyx_L1_error) #else - __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 507, __pyx_L1_error) + __pyx_t_1 = PySequence_ITEM(__pyx_t_2, __pyx_t_4); __pyx_t_4++; if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 512, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); #endif __Pyx_XDECREF_SET(__pyx_v_el, __pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":508 + /* "lxml/html/clean.py":513 * bad.append(el) * for el in bad: * el.drop_tree() # <<<<<<<<<<<<<< * * def _remove_javascript_link(self, link): */ - __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 508, __pyx_L1_error) + __pyx_t_3 = __Pyx_PyObject_GetAttrStr(__pyx_v_el, __pyx_n_s_drop_tree); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 513, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __pyx_t_6 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_3))) { @@ -8315,12 +8315,12 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN } __pyx_t_1 = (__pyx_t_6) ? __Pyx_PyObject_CallOneArg(__pyx_t_3, __pyx_t_6) : __Pyx_PyObject_CallNoArg(__pyx_t_3); __Pyx_XDECREF(__pyx_t_6); __pyx_t_6 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 508, __pyx_L1_error) + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 513, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":507 + /* "lxml/html/clean.py":512 * if condition(el): * bad.append(el) * for el in bad: # <<<<<<<<<<<<<< @@ -8330,7 +8330,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN } __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - /* "lxml/html/clean.py":502 + /* "lxml/html/clean.py":507 * etree.Comment) * * def _kill_elements(self, doc, condition, iterate=None): # <<<<<<<<<<<<<< @@ -8356,7 +8356,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_12_kill_elements(CYTHON_UN return __pyx_r; } -/* "lxml/html/clean.py":510 +/* "lxml/html/clean.py":515 * el.drop_tree() * * def _remove_javascript_link(self, link): # <<<<<<<<<<<<<< @@ -8399,11 +8399,11 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_15_remove_javascript_link( case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_link)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("_remove_javascript_link", 1, 2, 2, 1); __PYX_ERR(0, 510, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("_remove_javascript_link", 1, 2, 2, 1); __PYX_ERR(0, 515, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_remove_javascript_link") < 0)) __PYX_ERR(0, 510, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_remove_javascript_link") < 0)) __PYX_ERR(0, 515, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { goto __pyx_L5_argtuple_error; @@ -8416,7 +8416,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_15_remove_javascript_link( } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("_remove_javascript_link", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 510, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("_remove_javascript_link", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 515, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("lxml.html.clean.Cleaner._remove_javascript_link", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -8445,16 +8445,16 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( int __pyx_clineno = 0; __Pyx_RefNannySetupContext("_remove_javascript_link", 0); - /* "lxml/html/clean.py":512 + /* "lxml/html/clean.py":517 * def _remove_javascript_link(self, link): * # links like "j a v a s c r i p t:" might be interpreted in IE * new = _substitute_whitespace('', unquote_plus(link)) # <<<<<<<<<<<<<< * if _is_javascript_scheme(new): * # FIXME: should this be None to delete? */ - __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_substitute_whitespace); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 512, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_substitute_whitespace); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); - __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_unquote_plus); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 512, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_4, __pyx_n_s_unquote_plus); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_t_5 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_4))) { @@ -8468,7 +8468,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( } __pyx_t_3 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_4, __pyx_t_5, __pyx_v_link) : __Pyx_PyObject_CallOneArg(__pyx_t_4, __pyx_v_link); __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 512, __pyx_L1_error) + if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0; __pyx_t_4 = NULL; @@ -8486,7 +8486,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_2)) { PyObject *__pyx_temp[3] = {__pyx_t_4, __pyx_kp_s__2, __pyx_t_3}; - __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 512, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; @@ -8495,14 +8495,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_2)) { PyObject *__pyx_temp[3] = {__pyx_t_4, __pyx_kp_s__2, __pyx_t_3}; - __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 512, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_6, 2+__pyx_t_6); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_4); __pyx_t_4 = 0; __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } else #endif { - __pyx_t_5 = PyTuple_New(2+__pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 512, __pyx_L1_error) + __pyx_t_5 = PyTuple_New(2+__pyx_t_6); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); if (__pyx_t_4) { __Pyx_GIVEREF(__pyx_t_4); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_4); __pyx_t_4 = NULL; @@ -8513,7 +8513,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_5, 1+__pyx_t_6, __pyx_t_3); __pyx_t_3 = 0; - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 512, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 517, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } @@ -8521,14 +8521,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( __pyx_v_new = __pyx_t_1; __pyx_t_1 = 0; - /* "lxml/html/clean.py":513 + /* "lxml/html/clean.py":518 * # links like "j a v a s c r i p t:" might be interpreted in IE * new = _substitute_whitespace('', unquote_plus(link)) * if _is_javascript_scheme(new): # <<<<<<<<<<<<<< * # FIXME: should this be None to delete? * return '' */ - __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_is_javascript_scheme); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 513, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_2, __pyx_n_s_is_javascript_scheme); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 518, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_5 = NULL; if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_2))) { @@ -8542,14 +8542,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( } __pyx_t_1 = (__pyx_t_5) ? __Pyx_PyObject_Call2Args(__pyx_t_2, __pyx_t_5, __pyx_v_new) : __Pyx_PyObject_CallOneArg(__pyx_t_2, __pyx_v_new); __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; - if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 513, __pyx_L1_error) + if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 518, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; - __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 513, __pyx_L1_error) + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 518, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; if (__pyx_t_7) { - /* "lxml/html/clean.py":515 + /* "lxml/html/clean.py":520 * if _is_javascript_scheme(new): * # FIXME: should this be None to delete? * return '' # <<<<<<<<<<<<<< @@ -8561,7 +8561,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( __pyx_r = __pyx_kp_s__2; goto __pyx_L0; - /* "lxml/html/clean.py":513 + /* "lxml/html/clean.py":518 * # links like "j a v a s c r i p t:" might be interpreted in IE * new = _substitute_whitespace('', unquote_plus(link)) * if _is_javascript_scheme(new): # <<<<<<<<<<<<<< @@ -8570,7 +8570,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( */ } - /* "lxml/html/clean.py":516 + /* "lxml/html/clean.py":521 * # FIXME: should this be None to delete? * return '' * return link # <<<<<<<<<<<<<< @@ -8582,7 +8582,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( __pyx_r = __pyx_v_link; goto __pyx_L0; - /* "lxml/html/clean.py":510 + /* "lxml/html/clean.py":515 * el.drop_tree() * * def _remove_javascript_link(self, link): # <<<<<<<<<<<<<< @@ -8606,7 +8606,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_14_remove_javascript_link( return __pyx_r; } -/* "lxml/html/clean.py":520 +/* "lxml/html/clean.py":525 * _substitute_comments = re.compile(r'/\*.*?\*[inserted by cython to avoid comment closer]/', re.S).sub * * def _has_sneaky_javascript(self, style): # <<<<<<<<<<<<<< @@ -8650,11 +8650,11 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_17_has_sneaky_javascript(P case 1: if (likely((values[1] = __Pyx_PyDict_GetItemStr(__pyx_kwds, __pyx_n_s_style)) != 0)) kw_args--; else { - __Pyx_RaiseArgtupleInvalid("_has_sneaky_javascript", 1, 2, 2, 1); __PYX_ERR(0, 520, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("_has_sneaky_javascript", 1, 2, 2, 1); __PYX_ERR(0, 525, __pyx_L3_error) } } if (unlikely(kw_args > 0)) { - if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_has_sneaky_javascript") < 0)) __PYX_ERR(0, 520, __pyx_L3_error) + if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "_has_sneaky_javascript") < 0)) __PYX_ERR(0, 525, __pyx_L3_error) } } else if (PyTuple_GET_SIZE(__pyx_args) != 2) { goto __pyx_L5_argtuple_error; @@ -8667,7 +8667,7 @@ static PyObject *__pyx_pw_4lxml_4html_5clean_7Cleaner_17_has_sneaky_javascript(P } goto __pyx_L4_argument_unpacking_done; __pyx_L5_argtuple_error:; - __Pyx_RaiseArgtupleInvalid("_has_sneaky_javascript", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 520, __pyx_L3_error) + __Pyx_RaiseArgtupleInvalid("_has_sneaky_javascript", 1, 2, 2, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 525, __pyx_L3_error) __pyx_L3_error:; __Pyx_AddTraceback("lxml.html.clean.Cleaner._has_sneaky_javascript", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); @@ -8696,14 +8696,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C __Pyx_RefNannySetupContext("_has_sneaky_javascript", 0); __Pyx_INCREF(__pyx_v_style); - /* "lxml/html/clean.py":531 + /* "lxml/html/clean.py":536 * more sneaky attempts. * """ * style = self._substitute_comments('', style) # <<<<<<<<<<<<<< * style = style.replace('\\', '') * style = _substitute_whitespace('', style) */ - __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_substitute_comments); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 531, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_v_self, __pyx_n_s_substitute_comments); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 536, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __pyx_t_3 = NULL; __pyx_t_4 = 0; @@ -8720,7 +8720,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_2)) { PyObject *__pyx_temp[3] = {__pyx_t_3, __pyx_kp_s__2, __pyx_v_style}; - __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_4, 2+__pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 531, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_4, 2+__pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 536, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_GOTREF(__pyx_t_1); } else @@ -8728,13 +8728,13 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_2)) { PyObject *__pyx_temp[3] = {__pyx_t_3, __pyx_kp_s__2, __pyx_v_style}; - __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_4, 2+__pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 531, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyCFunction_FastCall(__pyx_t_2, __pyx_temp+1-__pyx_t_4, 2+__pyx_t_4); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 536, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; __Pyx_GOTREF(__pyx_t_1); } else #endif { - __pyx_t_5 = PyTuple_New(2+__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 531, __pyx_L1_error) + __pyx_t_5 = PyTuple_New(2+__pyx_t_4); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 536, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_5); if (__pyx_t_3) { __Pyx_GIVEREF(__pyx_t_3); PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_3); __pyx_t_3 = NULL; @@ -8745,7 +8745,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C __Pyx_INCREF(__pyx_v_style); __Pyx_GIVEREF(__pyx_v_style); PyTuple_SET_ITEM(__pyx_t_5, 1+__pyx_t_4, __pyx_v_style); - __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 531, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_5, NULL); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 536, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0; } @@ -8753,29 +8753,29 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C __Pyx_DECREF_SET(__pyx_v_style, __pyx_t_1); __pyx_t_1 = 0; - /* "lxml/html/clean.py":532 + /* "lxml/html/clean.py":537 * """ * style = self._substitute_comments('', style) * style = style.replace('\\', '') # <<<<<<<<<<<<<< * style = _substitute_whitespace('', style) * style = style.lower() */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_style, __pyx_n_s_replace); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 532, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_style, __pyx_n_s_replace); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 537, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); - __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__13, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 532, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_tuple__13, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 537, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_style, __pyx_t_2); __pyx_t_2 = 0; - /* "lxml/html/clean.py":533 + /* "lxml/html/clean.py":538 * style = self._substitute_comments('', style) * style = style.replace('\\', '') * style = _substitute_whitespace('', style) # <<<<<<<<<<<<<< * style = style.lower() * if 'javascript:' in style: */ - __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_substitute_whitespace); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 533, __pyx_L1_error) + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_substitute_whitespace); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 538, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_5 = NULL; __pyx_t_4 = 0; @@ -8792,7 +8792,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C #if CYTHON_FAST_PYCALL if (PyFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_kp_s__2, __pyx_v_style}; - __pyx_t_2 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_4, 2+__pyx_t_4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 533, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_4, 2+__pyx_t_4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 538, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_2); } else @@ -8800,13 +8800,13 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C #if CYTHON_FAST_PYCCALL if (__Pyx_PyFastCFunction_Check(__pyx_t_1)) { PyObject *__pyx_temp[3] = {__pyx_t_5, __pyx_kp_s__2, __pyx_v_style}; - __pyx_t_2 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_4, 2+__pyx_t_4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 533, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyCFunction_FastCall(__pyx_t_1, __pyx_temp+1-__pyx_t_4, 2+__pyx_t_4); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 538, __pyx_L1_error) __Pyx_XDECREF(__pyx_t_5); __pyx_t_5 = 0; __Pyx_GOTREF(__pyx_t_2); } else #endif { - __pyx_t_3 = PyTuple_New(2+__pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 533, __pyx_L1_error) + __pyx_t_3 = PyTuple_New(2+__pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 538, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_3); if (__pyx_t_5) { __Pyx_GIVEREF(__pyx_t_5); PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_5); __pyx_t_5 = NULL; @@ -8817,7 +8817,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C __Pyx_INCREF(__pyx_v_style); __Pyx_GIVEREF(__pyx_v_style); PyTuple_SET_ITEM(__pyx_t_3, 1+__pyx_t_4, __pyx_v_style); - __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 533, __pyx_L1_error) + __pyx_t_2 = __Pyx_PyObject_Call(__pyx_t_1, __pyx_t_3, NULL); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 538, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0; } @@ -8825,14 +8825,14 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C __Pyx_DECREF_SET(__pyx_v_style, __pyx_t_2); __pyx_t_2 = 0; - /* "lxml/html/clean.py":534 + /* "lxml/html/clean.py":539 * style = style.replace('\\', '') * style = _substitute_whitespace('', style) * style = style.lower() # <<<<<<<<<<<<<< * if 'javascript:' in style: * return True */ - __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_style, __pyx_n_s_lower); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 534, __pyx_L1_error) + __pyx_t_1 = __Pyx_PyObject_GetAttrStr(__pyx_v_style, __pyx_n_s_lower); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 539, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); __pyx_t_3 = NULL; if (CYTHON_UNPACK_METHODS && likely(PyMethod_Check(__pyx_t_1))) { @@ -8846,24 +8846,24 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C } __pyx_t_2 = (__pyx_t_3) ? __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3) : __Pyx_PyObject_CallNoArg(__pyx_t_1); __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; - if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 534, __pyx_L1_error) + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 539, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_2); __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; __Pyx_DECREF_SET(__pyx_v_style, __pyx_t_2); __pyx_t_2 = 0; - /* "lxml/html/clean.py":535 + /* "lxml/html/clean.py":540 * style = _substitute_whitespace('', style) * style = style.lower() * if 'javascript:' in style: # <<<<<<<<<<<<<< * return True * if 'expression(' in style: */ - __pyx_t_6 = (__Pyx_PySequence_ContainsTF(__pyx_kp_s_javascript_2, __pyx_v_style, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 535, __pyx_L1_error) + __pyx_t_6 = (__Pyx_PySequence_ContainsTF(__pyx_kp_s_javascript_2, __pyx_v_style, Py_EQ)); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(0, 540, __pyx_L1_error) __pyx_t_7 = (__pyx_t_6 != 0); if (__pyx_t_7) { - /* "lxml/html/clean.py":536 + /* "lxml/html/clean.py":541 * style = style.lower() * if 'javascript:' in style: * return True # <<<<<<<<<<<<<< @@ -8875,7 +8875,7 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C __pyx_r = Py_True; goto __pyx_L0; - /* "lxml/html/clean.py":535 + /* "lxml/html/clean.py":540 * style = _substitute_whitespace('', style) * style = style.lower() * if 'javascript:' in style: # <<<<<<<<<<<<<< @@ -8884,18 +8884,18 @@ static PyObject *__pyx_pf_4lxml_4html_5clean_7Cleaner_16_has_sneaky_javascript(C */ } - /* "lxml/html/clean.py":537 + /* "lxml/html/clean.py":542 * if 'javascript:' in style: * return True * if 'expression(' in style: # <<<<<<<<<<<<<< * return True * if '' */ __Pyx_XDECREF(__pyx_r); __Pyx_INCREF(Py_True); __pyx_r = Py_True; goto __pyx_L0; - /* "lxml/html/clean.py":539 + /* "lxml/html/clean.py":544 * if 'expression(' in style: * return True * if '' + * return True */ - __Pyx_XDECREF(__pyx_r); - __Pyx_INCREF(Py_False); - __pyx_r = Py_False; - goto __pyx_L0; + __Pyx_GetModuleGlobalName(__pyx_t_1, __pyx_n_s_looks_like_tag_content); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 547, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_1); + __pyx_t_3 = NULL; + if (CYTHON_UNPACK_METHODS && unlikely(PyMethod_Check(__pyx_t_1))) { + __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1); + if (likely(__pyx_t_3)) { + PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1); + __Pyx_INCREF(__pyx_t_3); + __Pyx_INCREF(function); + __Pyx_DECREF_SET(__pyx_t_1, function); + } + } + __pyx_t_2 = (__pyx_t_3) ? __Pyx_PyObject_Call2Args(__pyx_t_1, __pyx_t_3, __pyx_v_style) : __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_v_style); + __Pyx_XDECREF(__pyx_t_3); __pyx_t_3 = 0; + if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 547, __pyx_L1_error) + __Pyx_GOTREF(__pyx_t_2); + __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; + __pyx_t_7 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_7 < 0)) __PYX_ERR(0, 547, __pyx_L1_error) + __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0; + if (__pyx_t_7) { - /* "lxml/html/clean.py":520 - * _substitute_comments = re.compile(r'/\*.*?\*[inserted by cython to avoid comment closer]/', re.S).sub + /* "lxml/html/clean.py":549 + * if _looks_like_tag_content(style): + * # e.g. '' + * return True # <<<<<<<<<<<<<< + * return False * - * def _has_sneaky_javascript(self, style): # <<<<<<<<<<<<<< - * """ - * Depending on the browser, stuff like ``e x p r e s s i o n(...)`` */ + __Pyx_XDECREF(__pyx_r); + __Pyx_INCREF(Py_True); + __pyx_r = Py_True; + goto __pyx_L0; - /* function exit code */ - __pyx_L1_error:; + /* "lxml/html/clean.py":547 + * # e.g. '