From ff75f3c1d46d9d76a91ce066af4c77555f62616e Mon Sep 17 00:00:00 2001 From: JinWang An Date: Tue, 6 Apr 2021 15:38:05 +0900 Subject: [PATCH] [CVE-2020-27783] Prevent combinations of and ' + return True return False def clean_html(self, html): diff --git a/src/lxml/html/tests/test_clean.py b/src/lxml/html/tests/test_clean.py index a193d99..e1a7a26 100644 --- a/src/lxml/html/tests/test_clean.py +++ b/src/lxml/html/tests/test_clean.py @@ -68,6 +68,16 @@ class CleanerTest(unittest.TestCase): s = lxml.html.fromstring('child') self.assertEqual('child', clean_html(s).text_content()) + def test_sneaky_js_in_math_style(self): + # This gets parsed as -> + # thus passing any tag/script/whatever content through into the output. + html = '' + s = lxml.html.fragment_fromstring(html) + + self.assertEqual( + b'', + lxml.html.tostring(clean_html(s))) + def test_suite(): suite = unittest.TestSuite() diff --git a/src/lxml/html/tests/test_clean.txt b/src/lxml/html/tests/test_clean.txt index 2824f64..7df1f1d 100644 --- a/src/lxml/html/tests/test_clean.txt +++ b/src/lxml/html/tests/test_clean.txt @@ -104,7 +104,11 @@ >>> print(Cleaner(page_structure=False, safe_attrs_only=False).clean_html(doc)) - + a link @@ -168,7 +172,11 @@ - + a link -- 2.7.4