From: JinWang An Date: Wed, 18 Jan 2023 06:01:32 +0000 (+0900) Subject: Imported Upstream version 3.9.16 X-Git-Tag: upstream/3.9.16^0 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5a56a36e81328ca3649f4ee56c9b7f22d4e97d67;p=platform%2Fupstream%2Fpython3.git Imported Upstream version 3.9.16 --- diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 4aa10e26..94647e99 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -499,3 +499,12 @@ Security Considerations :class:`SimpleHTTPRequestHandler` will follow symbolic links when handling requests, this makes it possible for files outside of the specified directory to be served. + +Earlier versions of Python did not scrub control characters from the +log messages emitted to stderr from ``python -m http.server`` or the +default :class:`BaseHTTPRequestHandler` ``.log_message`` +implementation. This could allow remote clients connecting to your +server to send nefarious control codes to your terminal. + +.. versionadded:: 3.9.16 + scrubbing control characters from log messages diff --git a/Doc/library/mailcap.rst b/Doc/library/mailcap.rst index a22b5b9c..7aa3380f 100644 --- a/Doc/library/mailcap.rst +++ b/Doc/library/mailcap.rst @@ -60,6 +60,18 @@ standard. However, mailcap files are supported on most Unix systems. use) to determine whether or not the mailcap line applies. :func:`findmatch` will automatically check such conditions and skip the entry if the check fails. + .. versionchanged:: 3.11 + + To prevent security issues with shell metacharacters (symbols that have + special effects in a shell command line), ``findmatch`` will refuse + to inject ASCII characters other than alphanumerics and ``@+=:,./-_`` + into the returned command line. + + If a disallowed character appears in *filename*, ``findmatch`` will always + return ``(None, None)`` as if no entry was found. + If such a character appears elsewhere (a value in *plist* or in *MIMEtype*), + ``findmatch`` will ignore all mailcap entries which use that value. + A :mod:`warning ` will be raised in either case. .. function:: getcaps() diff --git a/Include/patchlevel.h b/Include/patchlevel.h index a30fa4aa..e33d9a1d 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -18,12 +18,12 @@ /*--start constants--*/ #define PY_MAJOR_VERSION 3 #define PY_MINOR_VERSION 9 -#define PY_MICRO_VERSION 15 +#define PY_MICRO_VERSION 16 #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL #define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "3.9.15" +#define PY_VERSION "3.9.16" /*--end constants--*/ /* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. diff --git a/Lib/encodings/idna.py b/Lib/encodings/idna.py index ea405851..bf98f513 100644 --- a/Lib/encodings/idna.py +++ b/Lib/encodings/idna.py @@ -39,23 +39,21 @@ def nameprep(label): # Check bidi RandAL = [stringprep.in_table_d1(x) for x in label] - for c in RandAL: - if c: - # There is a RandAL char in the string. Must perform further - # tests: - # 1) The characters in section 5.8 MUST be prohibited. - # This is table C.8, which was already checked - # 2) If a string contains any RandALCat character, the string - # MUST NOT contain any LCat character. - if any(stringprep.in_table_d2(x) for x in label): - raise UnicodeError("Violation of BIDI requirement 2") - - # 3) If a string contains any RandALCat character, a - # RandALCat character MUST be the first character of the - # string, and a RandALCat character MUST be the last - # character of the string. - if not RandAL[0] or not RandAL[-1]: - raise UnicodeError("Violation of BIDI requirement 3") + if any(RandAL): + # There is a RandAL char in the string. Must perform further + # tests: + # 1) The characters in section 5.8 MUST be prohibited. + # This is table C.8, which was already checked + # 2) If a string contains any RandALCat character, the string + # MUST NOT contain any LCat character. + if any(stringprep.in_table_d2(x) for x in label): + raise UnicodeError("Violation of BIDI requirement 2") + # 3) If a string contains any RandALCat character, a + # RandALCat character MUST be the first character of the + # string, and a RandALCat character MUST be the last + # character of the string. + if not RandAL[0] or not RandAL[-1]: + raise UnicodeError("Violation of BIDI requirement 3") return label diff --git a/Lib/http/server.py b/Lib/http/server.py index 6bf90843..cf8933c3 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -93,6 +93,7 @@ import email.utils import html import http.client import io +import itertools import mimetypes import os import posixpath @@ -563,6 +564,11 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): self.log_message(format, *args) + # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes + _control_char_table = str.maketrans( + {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))}) + _control_char_table[ord('\\')] = r'\\' + def log_message(self, format, *args): """Log an arbitrary message. @@ -578,12 +584,16 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): The client ip and current date/time are prefixed to every message. + Unicode control characters are replaced with escaped hex + before writing the output to stderr. + """ + message = format % args sys.stderr.write("%s - - [%s] %s\n" % (self.address_string(), self.log_date_time_string(), - format%args)) + message.translate(self._control_char_table))) def version_string(self): """Return the server software version string.""" diff --git a/Lib/mailcap.py b/Lib/mailcap.py index ae416a8e..444c6408 100644 --- a/Lib/mailcap.py +++ b/Lib/mailcap.py @@ -2,6 +2,7 @@ import os import warnings +import re __all__ = ["getcaps","findmatch"] @@ -13,6 +14,11 @@ def lineno_sort_key(entry): else: return 1, 0 +_find_unsafe = re.compile(r'[^\xa1-\U0010FFFF\w@+=:,./-]').search + +class UnsafeMailcapInput(Warning): + """Warning raised when refusing unsafe input""" + # Part 1: top-level interface. @@ -165,15 +171,22 @@ def findmatch(caps, MIMEtype, key='view', filename="/dev/null", plist=[]): entry to use. """ + if _find_unsafe(filename): + msg = "Refusing to use mailcap with filename %r. Use a safe temporary filename." % (filename,) + warnings.warn(msg, UnsafeMailcapInput) + return None, None entries = lookup(caps, MIMEtype, key) # XXX This code should somehow check for the needsterminal flag. for e in entries: if 'test' in e: test = subst(e['test'], filename, plist) + if test is None: + continue if test and os.system(test) != 0: continue command = subst(e[key], MIMEtype, filename, plist) - return command, e + if command is not None: + return command, e return None, None def lookup(caps, MIMEtype, key=None): @@ -206,6 +219,10 @@ def subst(field, MIMEtype, filename, plist=[]): elif c == 's': res = res + filename elif c == 't': + if _find_unsafe(MIMEtype): + msg = "Refusing to substitute MIME type %r into a shell command." % (MIMEtype,) + warnings.warn(msg, UnsafeMailcapInput) + return None res = res + MIMEtype elif c == '{': start = i @@ -213,7 +230,12 @@ def subst(field, MIMEtype, filename, plist=[]): i = i+1 name = field[start:i] i = i+1 - res = res + findparam(name, plist) + param = findparam(name, plist) + if _find_unsafe(param): + msg = "Refusing to substitute parameter %r (%s) into a shell command" % (param, name) + warnings.warn(msg, UnsafeMailcapInput) + return None + res = res + param # XXX To do: # %n == number of parts if type is multipart/* # %F == list of alternating type and filename for parts diff --git a/Lib/multiprocessing/connection.py b/Lib/multiprocessing/connection.py index 510e4b5a..8e2facf9 100644 --- a/Lib/multiprocessing/connection.py +++ b/Lib/multiprocessing/connection.py @@ -73,11 +73,6 @@ def arbitrary_address(family): if family == 'AF_INET': return ('localhost', 0) elif family == 'AF_UNIX': - # Prefer abstract sockets if possible to avoid problems with the address - # size. When coding portable applications, some implementations have - # sun_path as short as 92 bytes in the sockaddr_un struct. - if util.abstract_sockets_supported: - return f"\0listener-{os.getpid()}-{next(_mmap_counter)}" return tempfile.mktemp(prefix='listener-', dir=util.get_temp_dir()) elif family == 'AF_PIPE': return tempfile.mktemp(prefix=r'\\.\pipe\pyc-%d-%d-' % diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py index 95216bcc..6de636a9 100644 --- a/Lib/test/audit-tests.py +++ b/Lib/test/audit-tests.py @@ -368,6 +368,17 @@ def test_gc(): gc.get_referents(y) +def test_not_in_gc(): + import gc + + hook = lambda *a: None + sys.addaudithook(hook) + + for o in gc.get_objects(): + if isinstance(o, list): + assert hook not in o + + if __name__ == "__main__": from test.support import suppress_msvcrt_asserts diff --git a/Lib/test/test_audit.py b/Lib/test/test_audit.py index 387a3122..e47e5569 100644 --- a/Lib/test/test_audit.py +++ b/Lib/test/test_audit.py @@ -132,6 +132,11 @@ class AuditTest(unittest.TestCase): ["gc.get_objects", "gc.get_referrers", "gc.get_referents"] ) + def test_not_in_gc(self): + returncode, _, stderr = self.run_python("test_not_in_gc") + if returncode: + self.fail(stderr) + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py index fc50e70d..3520cc00 100644 --- a/Lib/test/test_codecs.py +++ b/Lib/test/test_codecs.py @@ -1532,6 +1532,12 @@ class IDNACodecTest(unittest.TestCase): self.assertEqual("pyth\xf6n.org".encode("idna"), b"xn--pythn-mua.org") self.assertEqual("pyth\xf6n.org.".encode("idna"), b"xn--pythn-mua.org.") + def test_builtin_decode_length_limit(self): + with self.assertRaisesRegex(UnicodeError, "too long"): + (b"xn--016c"+b"a"*1100).decode("idna") + with self.assertRaisesRegex(UnicodeError, "too long"): + (b"xn--016c"+b"a"*70).decode("idna") + def test_stream(self): r = codecs.getreader("idna")(io.BytesIO(b"abc")) r.read(3) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index f845c7a7..bc11a8d8 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -497,6 +497,15 @@ class HashLibTestCase(unittest.TestCase): def test_case_md5_uintmax(self, size): self.check('md5', b'A'*size, '28138d306ff1b8281f1a9067e1a1a2b3') + @unittest.skipIf(sys.maxsize < _4G - 1, 'test cannot run on 32-bit systems') + @bigmemtest(size=_4G - 1, memuse=1, dry_run=False) + def test_sha3_update_overflow(self, size): + """Regression test for gh-98517 CVE-2022-37454.""" + h = hashlib.sha3_224() + h.update(b'\x01') + h.update(b'\x01'*0xffff_ffff) + self.assertEqual(h.hexdigest(), '80762e8ce6700f114fec0f621fd97c4b9c00147fa052215294cceeed') + # use the three examples from Federal Information Processing Standards # Publication 180-1, Secure Hash Standard, 1995 April 17 # http://www.itl.nist.gov/div897/pubs/fip180-1.htm diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index 4acf7a6f..db9ee29e 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -26,7 +26,7 @@ import time import datetime import threading from unittest import mock -from io import BytesIO +from io import BytesIO, StringIO import unittest from test import support @@ -982,6 +982,25 @@ class BaseHTTPRequestHandlerTestCase(unittest.TestCase): match = self.HTTPResponseMatch.search(response) self.assertIsNotNone(match) + def test_unprintable_not_logged(self): + # We call the method from the class directly as our Socketless + # Handler subclass overrode it... nice for everything BUT this test. + self.handler.client_address = ('127.0.0.1', 1337) + log_message = BaseHTTPRequestHandler.log_message + with mock.patch.object(sys, 'stderr', StringIO()) as fake_stderr: + log_message(self.handler, '/foo') + log_message(self.handler, '/\033bar\000\033') + log_message(self.handler, '/spam %s.', 'a') + log_message(self.handler, '/spam %s.', '\033\x7f\x9f\xa0beans') + stderr = fake_stderr.getvalue() + self.assertNotIn('\033', stderr) # non-printable chars are caught. + self.assertNotIn('\000', stderr) # non-printable chars are caught. + lines = stderr.splitlines() + self.assertIn('/foo', lines[0]) + self.assertIn(r'/\x1bbar\x00\x1b', lines[1]) + self.assertIn('/spam a.', lines[2]) + self.assertIn('/spam \\x1b\\x7f\\x9f\xa0beans.', lines[3]) + def test_http_1_1(self): result = self.send_typical_request(b'GET / HTTP/1.1\r\n\r\n') self.verify_http_server_response(result[0]) diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index cbbddf50..a8bb99a9 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -644,7 +644,8 @@ class IntStrDigitLimitsTests(unittest.TestCase): self.assertEqual(len(huge_decimal), digits) # Ensuring that we chose a slow enough conversion to measure. # It takes 0.1 seconds on a Zen based cloud VM in an opt build. - if seconds_to_convert < 0.005: + # Some OSes have a low res 1/64s timer, skip if hard to measure. + if seconds_to_convert < 1/64: raise unittest.SkipTest('"slow" conversion took only ' f'{seconds_to_convert} seconds.') @@ -656,7 +657,7 @@ class IntStrDigitLimitsTests(unittest.TestCase): str(huge_int) seconds_to_fail_huge = get_time() - start self.assertIn('conversion', str(err.exception)) - self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + self.assertLessEqual(seconds_to_fail_huge, seconds_to_convert/2) # Now we test that a conversion that would take 30x as long also fails # in a similarly fast fashion. @@ -667,7 +668,7 @@ class IntStrDigitLimitsTests(unittest.TestCase): str(extra_huge_int) seconds_to_fail_extra_huge = get_time() - start self.assertIn('conversion', str(err.exception)) - self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/2) def test_denial_of_service_prevented_str_to_int(self): """Regression test: ensure we fail before performing O(N**2) work.""" @@ -685,7 +686,8 @@ class IntStrDigitLimitsTests(unittest.TestCase): seconds_to_convert = get_time() - start # Ensuring that we chose a slow enough conversion to measure. # It takes 0.1 seconds on a Zen based cloud VM in an opt build. - if seconds_to_convert < 0.005: + # Some OSes have a low res 1/64s timer, skip if hard to measure. + if seconds_to_convert < 1/64: raise unittest.SkipTest('"slow" conversion took only ' f'{seconds_to_convert} seconds.') @@ -695,7 +697,7 @@ class IntStrDigitLimitsTests(unittest.TestCase): int(huge) seconds_to_fail_huge = get_time() - start self.assertIn('conversion', str(err.exception)) - self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + self.assertLessEqual(seconds_to_fail_huge, seconds_to_convert/2) # Now we test that a conversion that would take 30x as long also fails # in a similarly fast fashion. @@ -706,7 +708,7 @@ class IntStrDigitLimitsTests(unittest.TestCase): int(extra_huge) seconds_to_fail_extra_huge = get_time() - start self.assertIn('conversion', str(err.exception)) - self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + self.assertLessEqual(seconds_to_fail_extra_huge, seconds_to_convert/2) def test_power_of_two_bases_unlimited(self): """The limit does not apply to power of 2 bases.""" diff --git a/Lib/test/test_mailcap.py b/Lib/test/test_mailcap.py index c08423c6..920283d9 100644 --- a/Lib/test/test_mailcap.py +++ b/Lib/test/test_mailcap.py @@ -121,7 +121,8 @@ class HelperFunctionTest(unittest.TestCase): (["", "audio/*", "foo.txt"], ""), (["echo foo", "audio/*", "foo.txt"], "echo foo"), (["echo %s", "audio/*", "foo.txt"], "echo foo.txt"), - (["echo %t", "audio/*", "foo.txt"], "echo audio/*"), + (["echo %t", "audio/*", "foo.txt"], None), + (["echo %t", "audio/wav", "foo.txt"], "echo audio/wav"), (["echo \\%t", "audio/*", "foo.txt"], "echo %t"), (["echo foo", "audio/*", "foo.txt", plist], "echo foo"), (["echo %{total}", "audio/*", "foo.txt", plist], "echo 3") @@ -205,7 +206,10 @@ class FindmatchTest(unittest.TestCase): ('"An audio fragment"', audio_basic_entry)), ([c, "audio/*"], {"filename": fname}, - ("/usr/local/bin/showaudio audio/*", audio_entry)), + (None, None)), + ([c, "audio/wav"], + {"filename": fname}, + ("/usr/local/bin/showaudio audio/wav", audio_entry)), ([c, "message/external-body"], {"plist": plist}, ("showexternal /dev/null default john python.org /tmp foo bar", message_entry)) diff --git a/Misc/NEWS b/Misc/NEWS index ee0452b4..d7e2fe42 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -2,6 +2,61 @@ Python News +++++++++++ +What's New in Python 3.9.16 final? +================================== + +*Release date: 2022-12-06* + +Security +-------- + +- gh-issue-100001: ``python -m http.server`` no longer allows terminal + control characters sent within a garbage request to be printed to the + stderr server log. + + This is done by changing the :mod:`http.server` + :class:`BaseHTTPRequestHandler` ``.log_message`` method to replace control + characters with a ``\xHH`` hex escape before printing. + +- gh-issue-87604: Avoid publishing list of active per-interpreter audit + hooks via the :mod:`gc` module + +- gh-issue-98433: The IDNA codec decoder used on DNS hostnames by + :mod:`socket` or :mod:`asyncio` related name resolution functions no + longer involves a quadratic algorithm. This prevents a potential CPU + denial of service if an out-of-spec excessive length hostname involving + bidirectional characters were decoded. Some protocols such as + :mod:`urllib` http ``3xx`` redirects potentially allow for an attacker to + supply such a name. + +- gh-issue-98739: Update bundled libexpat to 2.5.0 + +- gh-issue-98517: Port XKCP's fix for the buffer overflows in SHA-3 + (CVE-2022-37454). + +- gh-issue-97514: On Linux the :mod:`multiprocessing` module returns to + using filesystem backed unix domain sockets for communication with the + *forkserver* process instead of the Linux abstract socket namespace. Only + code that chooses to use the :ref:`"forkserver" start method + ` is affected. + + Abstract sockets have no permissions and could allow any user on the + system in the same `network namespace + `_ (often + the whole system) to inject code into the multiprocessing *forkserver* + process. This was a potential privilege escalation. Filesystem based + socket permissions restrict this to the *forkserver* process user as was + the default in Python 3.8 and earlier. + + This prevents Linux `CVE-2022-42919 + `_. + +- gh-issue-68966: The deprecated mailcap module now refuses to inject unsafe + text (filenames, MIME types, parameters) into shell commands. Instead of + using such text, it will warn and act as if a match was not found (or for + test commands, as if the test failed). + + What's New in Python 3.9.15 final? ================================== diff --git a/Modules/_sha3/kcp/KeccakSponge.inc b/Modules/_sha3/kcp/KeccakSponge.inc index e10739de..cf92e4db 100644 --- a/Modules/_sha3/kcp/KeccakSponge.inc +++ b/Modules/_sha3/kcp/KeccakSponge.inc @@ -171,7 +171,7 @@ int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dat i = 0; curData = data; while(i < dataByteLen) { - if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) { + if ((instance->byteIOIndex == 0) && (dataByteLen-i >= rateInBytes)) { #ifdef SnP_FastLoop_Absorb /* processing full blocks first */ @@ -199,10 +199,10 @@ int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dat } else { /* normal lane: using the message queue */ - - partialBlock = (unsigned int)(dataByteLen - i); - if (partialBlock+instance->byteIOIndex > rateInBytes) + if (dataByteLen-i > rateInBytes-instance->byteIOIndex) partialBlock = rateInBytes-instance->byteIOIndex; + else + partialBlock = (unsigned int)(dataByteLen - i); #ifdef KeccakReference displayBytes(1, "Block to be absorbed (part)", curData, partialBlock); #endif @@ -281,7 +281,7 @@ int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByte i = 0; curData = data; while(i < dataByteLen) { - if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) { + if ((instance->byteIOIndex == rateInBytes) && (dataByteLen-i >= rateInBytes)) { for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { SnP_Permute(instance->state); SnP_ExtractBytes(instance->state, curData, 0, rateInBytes); @@ -299,9 +299,10 @@ int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByte SnP_Permute(instance->state); instance->byteIOIndex = 0; } - partialBlock = (unsigned int)(dataByteLen - i); - if (partialBlock+instance->byteIOIndex > rateInBytes) + if (dataByteLen-i > rateInBytes-instance->byteIOIndex) partialBlock = rateInBytes-instance->byteIOIndex; + else + partialBlock = (unsigned int)(dataByteLen - i); i += partialBlock; SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock); diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h index 2b47ce2a..1c83563c 100644 --- a/Modules/expat/expat.h +++ b/Modules/expat/expat.h @@ -1054,8 +1054,8 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( See http://semver.org. */ #define XML_MAJOR_VERSION 2 -#define XML_MINOR_VERSION 4 -#define XML_MICRO_VERSION 9 +#define XML_MINOR_VERSION 5 +#define XML_MICRO_VERSION 0 #ifdef __cplusplus } diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c index c0bece51..b6c2eca9 100644 --- a/Modules/expat/xmlparse.c +++ b/Modules/expat/xmlparse.c @@ -1,4 +1,4 @@ -/* 90815a2b2c80c03b2b889fe1d427bb2b9e3282aa065e42784e001db4f23de324 (2.4.9+) +/* 5ab094ffadd6edfc94c3eee53af44a86951f9f1f0933ada3114bbce2bfb02c99 (2.5.0+) __ __ _ ___\ \/ /_ __ __ _| |_ / _ \\ /| '_ \ / _` | __| @@ -35,6 +35,7 @@ Copyright (c) 2021 Dong-hee Na Copyright (c) 2022 Samanta Navarro Copyright (c) 2022 Jeffrey Walton + Copyright (c) 2022 Jann Horn Licensed under the MIT license: Permission is hereby granted, free of charge, to any person obtaining @@ -1068,6 +1069,14 @@ parserCreate(const XML_Char *encodingName, parserInit(parser, encodingName); if (encodingName && ! parser->m_protocolEncodingName) { + if (dtd) { + // We need to stop the upcoming call to XML_ParserFree from happily + // destroying parser->m_dtd because the DTD is shared with the parent + // parser and the only guard that keeps XML_ParserFree from destroying + // parser->m_dtd is parser->m_isParamEntity but it will be set to + // XML_TRUE only later in XML_ExternalEntityParserCreate (or not at all). + parser->m_dtd = NULL; + } XML_ParserFree(parser); return NULL; } @@ -3011,9 +3020,6 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, int len; const char *rawName; TAG *tag = parser->m_tagStack; - parser->m_tagStack = tag->parent; - tag->parent = parser->m_freeTagList; - parser->m_freeTagList = tag; rawName = s + enc->minBytesPerChar * 2; len = XmlNameLength(enc, rawName); if (len != tag->rawNameLength @@ -3021,6 +3027,9 @@ doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, *eventPP = rawName; return XML_ERROR_TAG_MISMATCH; } + parser->m_tagStack = tag->parent; + tag->parent = parser->m_freeTagList; + parser->m_freeTagList = tag; --parser->m_tagLevel; if (parser->m_endElementHandler) { const XML_Char *localPart; @@ -4975,10 +4984,10 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, parser->m_handlerArg, parser->m_declElementType->name, parser->m_declAttributeId->name, parser->m_declAttributeType, 0, role == XML_ROLE_REQUIRED_ATTRIBUTE_VALUE); - poolClear(&parser->m_tempPool); handleDefault = XML_FALSE; } } + poolClear(&parser->m_tempPool); break; case XML_ROLE_DEFAULT_ATTRIBUTE_VALUE: case XML_ROLE_FIXED_ATTRIBUTE_VALUE: @@ -5386,7 +5395,7 @@ doProlog(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, * * If 'standalone' is false, the DTD must have no * parameter entities or we wouldn't have passed the outer - * 'if' statement. That measn the only entity in the hash + * 'if' statement. That means the only entity in the hash * table is the external subset name "#" which cannot be * given as a parameter entity name in XML syntax, so the * lookup must have returned NULL and we don't even reach @@ -5798,19 +5807,27 @@ internalEntityProcessor(XML_Parser parser, const char *s, const char *end, if (result != XML_ERROR_NONE) return result; - else if (textEnd != next - && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + + if (textEnd != next && parser->m_parsingStatus.parsing == XML_SUSPENDED) { entity->processed = (int)(next - (const char *)entity->textPtr); return result; - } else { + } + #ifdef XML_DTD - entityTrackingOnClose(parser, entity, __LINE__); + entityTrackingOnClose(parser, entity, __LINE__); #endif - entity->open = XML_FALSE; - parser->m_openInternalEntities = openEntity->next; - /* put openEntity back in list of free instances */ - openEntity->next = parser->m_freeInternalEntities; - parser->m_freeInternalEntities = openEntity; + entity->open = XML_FALSE; + parser->m_openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ + openEntity->next = parser->m_freeInternalEntities; + parser->m_freeInternalEntities = openEntity; + + // If there are more open entities we want to stop right here and have the + // upcoming call to XML_ResumeParser continue with entity content, or it would + // be ignored altogether. + if (parser->m_openInternalEntities != NULL + && parser->m_parsingStatus.parsing == XML_SUSPENDED) { + return XML_ERROR_NONE; } #ifdef XML_DTD diff --git a/Modules/expat/xmltok_impl.h b/Modules/expat/xmltok_impl.h index c518aada..3469c4ae 100644 --- a/Modules/expat/xmltok_impl.h +++ b/Modules/expat/xmltok_impl.h @@ -45,7 +45,7 @@ enum { BT_LF, /* line feed = "\n" */ BT_GT, /* greater than = ">" */ BT_QUOT, /* quotation character = "\"" */ - BT_APOS, /* aposthrophe = "'" */ + BT_APOS, /* apostrophe = "'" */ BT_EQUALS, /* equal sign = "=" */ BT_QUEST, /* question mark = "?" */ BT_EXCL, /* exclamation mark = "!" */ diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 8efa850d..a8f2f021 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -440,6 +440,8 @@ sys_addaudithook_impl(PyObject *module, PyObject *hook) if (is->audit_hooks == NULL) { return NULL; } + /* Avoid having our list of hooks show up in the GC module */ + PyObject_GC_UnTrack(is->audit_hooks); } if (PyList_Append(is->audit_hooks, hook) < 0) { diff --git a/README.rst b/README.rst index d78b6bc9..4c804d67 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -This is Python version 3.9.15 +This is Python version 3.9.16 ============================= .. image:: https://travis-ci.org/python/cpython.svg?branch=3.9