From: JinWang An Date: Wed, 18 Jan 2023 06:01:29 +0000 (+0900) Subject: Imported Upstream version 3.9.14 X-Git-Tag: upstream/3.9.14^0 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=b88ca1bc14415673dcc77d6f225317397819e8b0;p=platform%2Fupstream%2Fpython3.git Imported Upstream version 3.9.14 --- diff --git a/Doc/bugs.rst b/Doc/bugs.rst index 6654a23c..69d7c274 100644 --- a/Doc/bugs.rst +++ b/Doc/bugs.rst @@ -44,38 +44,39 @@ though it may take a while to be processed. Using the Python issue tracker ============================== -Bug reports for Python itself should be submitted via the Python Bug Tracker -(https://bugs.python.org/). The bug tracker offers a Web form which allows -pertinent information to be entered and submitted to the developers. +Issue reports for Python itself should be submitted via the GitHub issues +tracker (https://github.com/python/cpython/issues). +The GitHub issues tracker offers a web form which allows pertinent information +to be entered and submitted to the developers. The first step in filing a report is to determine whether the problem has already been reported. The advantage in doing so, aside from saving the -developers time, is that you learn what has been done to fix it; it may be that +developers' time, is that you learn what has been done to fix it; it may be that the problem has already been fixed for the next release, or additional information is needed (in which case you are welcome to provide it if you can!). -To do this, search the bug database using the search box on the top of the page. +To do this, search the tracker using the search box at the top of the page. -If the problem you're reporting is not already in the bug tracker, go back to -the Python Bug Tracker and log in. If you don't already have a tracker account, -select the "Register" link or, if you use OpenID, one of the OpenID provider -logos in the sidebar. It is not possible to submit a bug report anonymously. +If the problem you're reporting is not already in the list, log in to GitHub. +If you don't already have a GitHub account, create a new account using the +"Sign up" link. +It is not possible to submit a bug report anonymously. -Being now logged in, you can submit a bug. Select the "Create New" link in the -sidebar to open the bug reporting form. +Being now logged in, you can submit an issue. +Click on the "New issue" button in the top bar to report a new issue. -The submission form has a number of fields. For the "Title" field, enter a -*very* short description of the problem; less than ten words is good. In the -"Type" field, select the type of your problem; also select the "Component" and -"Versions" to which the bug relates. +The submission form has two fields, "Title" and "Comment". + +For the "Title" field, enter a *very* short description of the problem; +less than ten words is good. In the "Comment" field, describe the problem in detail, including what you expected to happen and what did happen. Be sure to include whether any extension modules were involved, and what hardware and software platform you were using (including version information as appropriate). -Each bug report will be assigned to a developer who will determine what needs to -be done to correct the problem. You will receive an update each time action is -taken on the bug. +Each issue report will be reviewed by a developer who will determine what needs to +be done to correct the problem. You will receive an update each time an action is +taken on the issue. .. seealso:: @@ -99,6 +100,6 @@ patching Python in the `Python Developer's Guide`_. If you have questions, the `core-mentorship mailing list`_ is a friendly place to get answers to any and all questions pertaining to the process of fixing issues in Python. -.. _Documentation bugs: https://bugs.python.org/issue?@filter=status&@filter=components&components=4&status=1&@columns=id,activity,title,status&@sort=-activity +.. _Documentation bugs: https://github.com/python/cpython/issues?q=is%3Aissue+is%3Aopen+label%3Adocs .. _Python Developer's Guide: https://devguide.python.org/ .. _core-mentorship mailing list: https://mail.python.org/mailman3/lists/core-mentorship.python.org/ diff --git a/Doc/c-api/datetime.rst b/Doc/c-api/datetime.rst index 66f148df..ad6ae653 100644 --- a/Doc/c-api/datetime.rst +++ b/Doc/c-api/datetime.rst @@ -191,6 +191,13 @@ must not be ``NULL``, and the type is not checked: Return the microsecond, as an int from 0 through 999999. +.. c:function:: int PyDateTime_DATE_GET_FOLD(PyDateTime_DateTime *o) + + Return the fold, as an int from 0 through 1. + + .. versionadded:: 3.6 + + Macros to extract fields from time objects. The argument must be an instance of :c:data:`PyDateTime_Time`, including subclasses. The argument must not be ``NULL``, and the type is not checked: @@ -215,6 +222,13 @@ and the type is not checked: Return the microsecond, as an int from 0 through 999999. +.. c:function:: int PyDateTime_TIME_GET_FOLD(PyDateTime_Time *o) + + Return the fold, as an int from 0 through 1. + + .. versionadded:: 3.6 + + Macros to extract fields from time delta objects. The argument must be an instance of :c:data:`PyDateTime_Delta`, including subclasses. The argument must not be ``NULL``, and the type is not checked: diff --git a/Doc/data/python3.9.abi b/Doc/data/python3.9.abi index e2037436..cca97796 100644 --- a/Doc/data/python3.9.abi +++ b/Doc/data/python3.9.abi @@ -5653,7 +5653,7 @@ - + @@ -5774,6 +5774,9 @@ + + + diff --git a/Doc/library/asyncio-subprocess.rst b/Doc/library/asyncio-subprocess.rst index fbe68f78..4f8b0b3e 100644 --- a/Doc/library/asyncio-subprocess.rst +++ b/Doc/library/asyncio-subprocess.rst @@ -127,6 +127,7 @@ Constants ========= .. data:: asyncio.subprocess.PIPE + :module: Can be passed to the *stdin*, *stdout* or *stderr* parameters. @@ -140,11 +141,13 @@ Constants attributes will point to :class:`StreamReader` instances. .. data:: asyncio.subprocess.STDOUT + :module: Special value that can be used as the *stderr* argument and indicates that standard error should be redirected into standard output. .. data:: asyncio.subprocess.DEVNULL + :module: Special value that can be used as the *stdin*, *stdout* or *stderr* argument to process creation functions. It indicates that the special file @@ -160,6 +163,7 @@ wrapper that allows communicating with subprocesses and watching for their completion. .. class:: asyncio.subprocess.Process + :module: An object that wraps OS processes created by the :func:`create_subprocess_exec` and :func:`create_subprocess_shell` diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index a77322f8..3d2bb27e 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -568,41 +568,35 @@ doctest decides whether actual output matches an example's expected output: .. data:: IGNORE_EXCEPTION_DETAIL - When specified, an example that expects an exception passes if an exception of - the expected type is raised, even if the exception detail does not match. For - example, an example expecting ``ValueError: 42`` will pass if the actual - exception raised is ``ValueError: 3*14``, but will fail, e.g., if - :exc:`TypeError` is raised. + When specified, doctests expecting exceptions pass so long as an exception + of the expected type is raised, even if the details + (message and fully-qualified exception name) don't match. - It will also ignore the module name used in Python 3 doctest reports. Hence - both of these variations will work with the flag specified, regardless of - whether the test is run under Python 2.7 or Python 3.2 (or later versions):: + For example, an example expecting ``ValueError: 42`` will pass if the actual + exception raised is ``ValueError: 3*14``, but will fail if, say, a + :exc:`TypeError` is raised instead. + It will also ignore any fully-qualified name included before the + exception class, which can vary between implementations and versions + of Python and the code/libraries in use. + Hence, all three of these variations will work with the flag specified: - >>> raise CustomError('message') + .. code-block:: pycon + + >>> raise Exception('message') Traceback (most recent call last): - CustomError: message + Exception: message - >>> raise CustomError('message') + >>> raise Exception('message') Traceback (most recent call last): - my_module.CustomError: message + builtins.Exception: message - Note that :const:`ELLIPSIS` can also be used to ignore the - details of the exception message, but such a test may still fail based - on whether or not the module details are printed as part of the - exception name. Using :const:`IGNORE_EXCEPTION_DETAIL` and the details - from Python 2.3 is also the only clear way to write a doctest that doesn't - care about the exception detail yet continues to pass under Python 2.3 or - earlier (those releases do not support :ref:`doctest directives - ` and ignore them as irrelevant comments). For example:: - - >>> (1, 2)[3] = 'moo' + >>> raise Exception('message') Traceback (most recent call last): - File "", line 1, in - TypeError: object doesn't support item assignment + __main__.Exception: message - passes under Python 2.3 and later Python versions with the flag specified, - even though the detail - changed in Python 2.4 to say "does not" instead of "doesn't". + Note that :const:`ELLIPSIS` can also be used to ignore the + details of the exception message, but such a test may still fail based + on whether the module name is present or matches exactly. .. versionchanged:: 3.2 :const:`IGNORE_EXCEPTION_DETAIL` now also ignores any information relating diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 13d7d6e5..80b56fd7 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -844,6 +844,14 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.8 Falls back to :meth:`__index__` if :meth:`__int__` is not defined. + .. versionchanged:: 3.9.14 + :class:`int` string inputs and string representations can be limited to + help avoid denial of service attacks. A :exc:`ValueError` is raised when + the limit is exceeded while converting a string *x* to an :class:`int` or + when converting an :class:`int` into a string would exceed the limit. + See the :ref:`integer string conversion length limitation + ` documentation. + .. function:: isinstance(object, classinfo) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index dea79108..4aa10e26 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -20,7 +20,7 @@ This module defines classes for implementing HTTP servers (Web servers). .. warning:: :mod:`http.server` is not recommended for production. It only implements - basic security checks. + :ref:`basic security checks `. One class, :class:`HTTPServer`, is a :class:`socketserver.TCPServer` subclass. It creates and listens at the HTTP socket, dispatching the requests to a @@ -488,3 +488,14 @@ the following command uses a specific directory:: the ``--cgi`` option:: python -m http.server --cgi + +.. _http.server-security: + +Security Considerations +----------------------- + +.. index:: pair: http.server; security + +:class:`SimpleHTTPRequestHandler` will follow symbolic links when handling +requests, this makes it possible for files outside of the specified directory +to be served. diff --git a/Doc/library/json.rst b/Doc/library/json.rst index 608e70df..4dc085fd 100644 --- a/Doc/library/json.rst +++ b/Doc/library/json.rst @@ -18,6 +18,11 @@ is a lightweight data interchange format inspired by `JavaScript `_ object literal syntax (although it is not a strict subset of JavaScript [#rfc-errata]_ ). +.. warning:: + Be cautious when parsing JSON data from untrusted sources. A malicious + JSON string may cause the decoder to consume considerable CPU and memory + resources. Limiting the size of data to be parsed is recommended. + :mod:`json` exposes an API familiar to users of the standard library :mod:`marshal` and :mod:`pickle` modules. @@ -248,6 +253,12 @@ Basic Usage be used to use another datatype or parser for JSON integers (e.g. :class:`float`). + .. versionchanged:: 3.9.14 + The default *parse_int* of :func:`int` now limits the maximum length of + the integer string via the interpreter's :ref:`integer string + conversion length limitation ` to help avoid denial + of service attacks. + *parse_constant*, if specified, will be called with one of the following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This can be used to raise an exception if invalid JSON numbers diff --git a/Doc/library/multiprocessing.rst b/Doc/library/multiprocessing.rst index b62e1f8d..1bde2ec2 100644 --- a/Doc/library/multiprocessing.rst +++ b/Doc/library/multiprocessing.rst @@ -1648,6 +1648,7 @@ different machines. A manager object controls a server process which manages proxies. .. function:: multiprocessing.Manager() + :module: Returns a started :class:`~multiprocessing.managers.SyncManager` object which can be used for sharing objects between processes. The returned manager diff --git a/Doc/library/security_warnings.rst b/Doc/library/security_warnings.rst index 61fd4e6e..8432248d 100644 --- a/Doc/library/security_warnings.rst +++ b/Doc/library/security_warnings.rst @@ -12,7 +12,7 @@ The following modules have specific security considerations: argument disabling known insecure and blocked algorithms ` * :mod:`http.server` is not suitable for production use, only implementing - basic security checks + basic security checks. See the :ref:`security considerations `. * :mod:`logging`: :ref:`Logging configuration uses eval() ` * :mod:`multiprocessing`: :ref:`Connection.recv() uses pickle diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst index 28924867..6eef5645 100644 --- a/Doc/library/stdtypes.rst +++ b/Doc/library/stdtypes.rst @@ -5244,6 +5244,165 @@ types, where they are relevant. Some of these are not reported by the [] +.. _int_max_str_digits: + +Integer string conversion length limitation +=========================================== + +CPython has a global limit for converting between :class:`int` and :class:`str` +to mitigate denial of service attacks. This limit *only* applies to decimal or +other non-power-of-two number bases. Hexadecimal, octal, and binary conversions +are unlimited. The limit can be configured. + +The :class:`int` type in CPython is an abitrary length number stored in binary +form (commonly known as a "bignum"). There exists no algorithm that can convert +a string to a binary integer or a binary integer to a string in linear time, +*unless* the base is a power of 2. Even the best known algorithms for base 10 +have sub-quadratic complexity. Converting a large value such as ``int('1' * +500_000)`` can take over a second on a fast CPU. + +Limiting conversion size offers a practical way to avoid `CVE-2020-10735 +`_. + +The limit is applied to the number of digit characters in the input or output +string when a non-linear conversion algorithm would be involved. Underscores +and the sign are not counted towards the limit. + +When an operation would exceed the limit, a :exc:`ValueError` is raised: + +.. doctest:: + + >>> import sys + >>> sys.set_int_max_str_digits(4300) # Illustrative, this is the default. + >>> _ = int('2' * 5432) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 5432 digits. + >>> i = int('2' * 4300) + >>> len(str(i)) + 4300 + >>> i_squared = i*i + >>> len(str(i_squared)) + Traceback (most recent call last): + ... + ValueError: Exceeds the limit (4300) for integer string conversion: value has 8599 digits. + >>> len(hex(i_squared)) + 7144 + >>> assert int(hex(i_squared), base=16) == i*i # Hexadecimal is unlimited. + +The default limit is 4300 digits as provided in +:data:`sys.int_info.default_max_str_digits `. +The lowest limit that can be configured is 640 digits as provided in +:data:`sys.int_info.str_digits_check_threshold `. + +Verification: + +.. doctest:: + + >>> import sys + >>> assert sys.int_info.default_max_str_digits == 4300, sys.int_info + >>> assert sys.int_info.str_digits_check_threshold == 640, sys.int_info + >>> msg = int('578966293710682886880994035146873798396722250538762761564' + ... '9252925514383915483333812743580549779436104706260696366600' + ... '571186405732').to_bytes(53, 'big') + ... + +.. versionadded:: 3.9.14 + +Affected APIs +------------- + +The limitation only applies to potentially slow conversions between :class:`int` +and :class:`str` or :class:`bytes`: + +* ``int(string)`` with default base 10. +* ``int(string, base)`` for all bases that are not a power of 2. +* ``str(integer)``. +* ``repr(integer)`` +* any other string conversion to base 10, for example ``f"{integer}"``, + ``"{}".format(integer)``, or ``b"%d" % integer``. + +The limitations do not apply to functions with a linear algorithm: + +* ``int(string, base)`` with base 2, 4, 8, 16, or 32. +* :func:`int.from_bytes` and :func:`int.to_bytes`. +* :func:`hex`, :func:`oct`, :func:`bin`. +* :ref:`formatspec` for hex, octal, and binary numbers. +* :class:`str` to :class:`float`. +* :class:`str` to :class:`decimal.Decimal`. + +Configuring the limit +--------------------- + +Before Python starts up you can use an environment variable or an interpreter +command line flag to configure the limit: + +* :envvar:`PYTHONINTMAXSTRDIGITS`, e.g. + ``PYTHONINTMAXSTRDIGITS=640 python3`` to set the limit to 640 or + ``PYTHONINTMAXSTRDIGITS=0 python3`` to disable the limitation. +* :option:`-X int_max_str_digits <-X>`, e.g. + ``python3 -X int_max_str_digits=640`` +* :data:`sys.flags.int_max_str_digits` contains the value of + :envvar:`PYTHONINTMAXSTRDIGITS` or :option:`-X int_max_str_digits <-X>`. + If both the env var and the ``-X`` option are set, the ``-X`` option takes + precedence. A value of *-1* indicates that both were unset, thus a value of + :data:`sys.int_info.default_max_str_digits` was used during initilization. + +From code, you can inspect the current limit and set a new one using these +:mod:`sys` APIs: + +* :func:`sys.get_int_max_str_digits` and :func:`sys.set_int_max_str_digits` are + a getter and setter for the interpreter-wide limit. Subinterpreters have + their own limit. + +Information about the default and minimum can be found in :attr:`sys.int_info`: + +* :data:`sys.int_info.default_max_str_digits ` is the compiled-in + default limit. +* :data:`sys.int_info.str_digits_check_threshold ` is the lowest + accepted value for the limit (other than 0 which disables it). + +.. versionadded:: 3.9.14 + +.. caution:: + + Setting a low limit *can* lead to problems. While rare, code exists that + contains integer constants in decimal in their source that exceed the + minimum threshold. A consequence of setting the limit is that Python source + code containing decimal integer literals longer than the limit will + encounter an error during parsing, usually at startup time or import time or + even at installation time - anytime an up to date ``.pyc`` does not already + exist for the code. A workaround for source that contains such large + constants is to convert them to ``0x`` hexadecimal form as it has no limit. + + Test your application thoroughly if you use a low limit. Ensure your tests + run with the limit set early via the environment or flag so that it applies + during startup and even during any installation step that may invoke Python + to precompile ``.py`` sources to ``.pyc`` files. + +Recommended configuration +------------------------- + +The default :data:`sys.int_info.default_max_str_digits` is expected to be +reasonable for most applications. If your application requires a different +limit, set it from your main entry point using Python version agnostic code as +these APIs were added in security patch releases in versions before 3.11. + +Example:: + + >>> import sys + >>> if hasattr(sys, "set_int_max_str_digits"): + ... upper_bound = 68000 + ... lower_bound = 4004 + ... current_limit = sys.get_int_max_str_digits() + ... if current_limit == 0 or current_limit > upper_bound: + ... sys.set_int_max_str_digits(upper_bound) + ... elif current_limit < lower_bound: + ... sys.set_int_max_str_digits(lower_bound) + +If you need to disable it entirely, set it to ``0``. + + .. rubric:: Footnotes .. [1] Additional information on these special methods may be found in the Python diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 03986db1..97c450e7 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -443,9 +443,9 @@ always available. The :term:`named tuple` *flags* exposes the status of command line flags. The attributes are read only. - ============================= ================================================================ + ============================= ============================================================================================================== attribute flag - ============================= ================================================================ + ============================= ============================================================================================================== :const:`debug` :option:`-d` :const:`inspect` :option:`-i` :const:`interactive` :option:`-i` @@ -461,7 +461,8 @@ always available. :const:`hash_randomization` :option:`-R` :const:`dev_mode` :option:`-X dev <-X>` (:ref:`Python Development Mode `) :const:`utf8_mode` :option:`-X utf8 <-X>` - ============================= ================================================================ + :const:`int_max_str_digits` :option:`-X int_max_str_digits <-X>` (:ref:`integer string conversion length limitation `) + ============================= ============================================================================================================== .. versionchanged:: 3.2 Added ``quiet`` attribute for the new :option:`-q` flag. @@ -480,6 +481,9 @@ always available. Mode ` and the ``utf8_mode`` attribute for the new :option:`-X` ``utf8`` flag. + .. versionchanged:: 3.9.14 + Added the ``int_max_str_digits`` attribute. + .. data:: float_info @@ -658,6 +662,15 @@ always available. .. versionadded:: 3.6 + +.. function:: get_int_max_str_digits() + + Returns the current value for the :ref:`integer string conversion length + limitation `. See also :func:`set_int_max_str_digits`. + + .. versionadded:: 3.9.14 + + .. function:: getrefcount(object) Return the reference count of the *object*. The count returned is generally one @@ -931,19 +944,31 @@ always available. .. tabularcolumns:: |l|L| - +-------------------------+----------------------------------------------+ - | Attribute | Explanation | - +=========================+==============================================+ - | :const:`bits_per_digit` | number of bits held in each digit. Python | - | | integers are stored internally in base | - | | ``2**int_info.bits_per_digit`` | - +-------------------------+----------------------------------------------+ - | :const:`sizeof_digit` | size in bytes of the C type used to | - | | represent a digit | - +-------------------------+----------------------------------------------+ + +----------------------------------------+-----------------------------------------------+ + | Attribute | Explanation | + +========================================+===============================================+ + | :const:`bits_per_digit` | number of bits held in each digit. Python | + | | integers are stored internally in base | + | | ``2**int_info.bits_per_digit`` | + +----------------------------------------+-----------------------------------------------+ + | :const:`sizeof_digit` | size in bytes of the C type used to | + | | represent a digit | + +----------------------------------------+-----------------------------------------------+ + | :const:`default_max_str_digits` | default value for | + | | :func:`sys.get_int_max_str_digits` when it | + | | is not otherwise explicitly configured. | + +----------------------------------------+-----------------------------------------------+ + | :const:`str_digits_check_threshold` | minimum non-zero value for | + | | :func:`sys.set_int_max_str_digits`, | + | | :envvar:`PYTHONINTMAXSTRDIGITS`, or | + | | :option:`-X int_max_str_digits <-X>`. | + +----------------------------------------+-----------------------------------------------+ .. versionadded:: 3.1 + .. versionchanged:: 3.9.14 + Added ``default_max_str_digits`` and ``str_digits_check_threshold``. + .. data:: __interactivehook__ @@ -1221,6 +1246,14 @@ always available. .. availability:: Unix. +.. function:: set_int_max_str_digits(n) + + Set the :ref:`integer string conversion length limitation + ` used by this interpreter. See also + :func:`get_int_max_str_digits`. + + .. versionadded:: 3.9.14 + .. function:: setprofile(profilefunc) .. index:: diff --git a/Doc/library/test.rst b/Doc/library/test.rst index 16f908c8..563197f8 100644 --- a/Doc/library/test.rst +++ b/Doc/library/test.rst @@ -1302,6 +1302,16 @@ The :mod:`test.support` module defines the following functions: .. versionadded:: 3.6 +.. function:: adjust_int_max_str_digits(max_digits) + + This function returns a context manager that will change the global + :func:`sys.set_int_max_str_digits` setting for the duration of the + context to allow execution of test code that needs a different limit + on the number of digits when converting between an integer and string. + + .. versionadded:: 3.9.14 + + The :mod:`test.support` module defines the following classes: .. class:: TransientResource(exc, **kwargs) diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 92248e75..0d96a78c 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -808,6 +808,7 @@ Functions ^^^^^^^^^ .. function:: xml.etree.ElementInclude.default_loader( href, parse, encoding=None) + :module: Default loader. This default loader reads an included resource from disk. *href* is a URL. *parse* is for parse mode either "xml" or "text". *encoding* @@ -819,6 +820,7 @@ Functions .. function:: xml.etree.ElementInclude.include( elem, loader=None, base_url=None, \ max_depth=6) + :module: This function expands XInclude directives. *elem* is the root element. *loader* is an optional resource loader. If omitted, it defaults to :func:`default_loader`. diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 502c0225..9f6f6f7d 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1882,7 +1882,7 @@ precedence and have a left-to-right chaining feature as described in the | ``x[index]``, ``x[index:index]``, | Subscription, slicing, | | ``x(arguments...)``, ``x.attribute`` | call, attribute reference | +-----------------------------------------------+-------------------------------------+ -| :keyword:`await` ``x`` | Await expression | +| :keyword:`await x ` | Await expression | +-----------------------------------------------+-------------------------------------+ | ``**`` | Exponentiation [#]_ | +-----------------------------------------------+-------------------------------------+ @@ -1906,7 +1906,7 @@ precedence and have a left-to-right chaining feature as described in the | :keyword:`is`, :keyword:`is not`, ``<``, | tests and identity tests | | ``<=``, ``>``, ``>=``, ``!=``, ``==`` | | +-----------------------------------------------+-------------------------------------+ -| :keyword:`not` ``x`` | Boolean NOT | +| :keyword:`not x ` | Boolean NOT | +-----------------------------------------------+-------------------------------------+ | :keyword:`and` | Boolean AND | +-----------------------------------------------+-------------------------------------+ diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 5739388e..66d8d57a 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -436,6 +436,9 @@ Miscellaneous options stored in a traceback of a trace. Use ``-X tracemalloc=NFRAME`` to start tracing with a traceback limit of *NFRAME* frames. See the :func:`tracemalloc.start` for more information. + * ``-X int_max_str_digits`` configures the :ref:`integer string conversion + length limitation `. See also + :envvar:`PYTHONINTMAXSTRDIGITS`. * ``-X importtime`` to show how long each import takes. It shows module name, cumulative time (including nested imports) and self time (excluding nested imports). Note that its output may be broken in multi-threaded @@ -480,6 +483,9 @@ Miscellaneous options The ``-X showalloccount`` option has been removed. + .. versionadded:: 3.9.14 + The ``-X int_max_str_digits`` option. + .. deprecated-removed:: 3.9 3.10 The ``-X oldparser`` option. @@ -659,6 +665,13 @@ conflict. .. versionadded:: 3.2.3 +.. envvar:: PYTHONINTMAXSTRDIGITS + + If this variable is set to an integer, it is used to configure the + interpreter's global :ref:`integer string conversion length limitation + `. + + .. versionadded:: 3.9.14 .. envvar:: PYTHONIOENCODING diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 92bdb455..58f44f75 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -2003,6 +2003,8 @@ Changes in the Python API ``replace()`` method of :class:`types.CodeType` can be used to make the code future-proof. +* The parameter ``digestmod`` for :func:`hmac.new` no longer uses the MD5 digest + by default. Changes in the C API -------------------- diff --git a/Doc/whatsnew/3.9.rst b/Doc/whatsnew/3.9.rst index 6aed8d2b..dab4746a 100644 --- a/Doc/whatsnew/3.9.rst +++ b/Doc/whatsnew/3.9.rst @@ -1587,3 +1587,17 @@ URL by the parser in :mod:`urllib.parse` preventing such attacks. The removal characters are controlled by a new module level variable ``urllib.parse._UNSAFE_URL_BYTES_TO_REMOVE``. (See :issue:`43882`) +Notable security feature in 3.9.14 +================================== + +Converting between :class:`int` and :class:`str` in bases other than 2 +(binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base 10 (decimal) +now raises a :exc:`ValueError` if the number of digits in string form is +above a limit to avoid potential denial of service attacks due to the +algorithmic complexity. This is a mitigation for `CVE-2020-10735 +`_. +This limit can be configured or disabled by environment variable, command +line flag, or :mod:`sys` APIs. See the :ref:`integer string conversion +length limitation ` documentation. The default limit +is 4300 digits in string form. + diff --git a/Include/internal/pycore_hamt.h b/Include/internal/pycore_hamt.h index aaf65590..357d9661 100644 --- a/Include/internal/pycore_hamt.h +++ b/Include/internal/pycore_hamt.h @@ -5,7 +5,19 @@ # error "this header requires Py_BUILD_CORE define" #endif -#define _Py_HAMT_MAX_TREE_DEPTH 7 + +/* +HAMT tree is shaped by hashes of keys. Every group of 5 bits of a hash denotes +the exact position of the key in one level of the tree. Since we're using +32 bit hashes, we can have at most 7 such levels. Although if there are +two distinct keys with equal hashes, they will have to occupy the same +cell in the 7th level of the tree -- so we'd put them in a "collision" node. +Which brings the total possible tree depth to 8. Read more about the actual +layout of the HAMT tree in `hamt.c`. + +This constant is used to define a datastucture for storing iteration state. +*/ +#define _Py_HAMT_MAX_TREE_DEPTH 8 #define PyHamt_Check(o) Py_IS_TYPE(o, &_PyHamt_Type) diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 457a0058..ad1b7e55 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -156,6 +156,8 @@ extern PyStatus _PyConfig_SetPyArgv( PyConfig *config, const _PyArgv *args); +extern int _Py_global_config_int_max_str_digits; + /* --- Function used for testing ---------------------------------- */ diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 551ad833..304d704a 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -154,6 +154,8 @@ struct _is { */ PyLongObject* small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; #endif + + int int_max_str_digits; }; /* Used by _PyImport_Cleanup() */ diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h new file mode 100644 index 00000000..ae04332a --- /dev/null +++ b/Include/internal/pycore_long.h @@ -0,0 +1,49 @@ +#ifndef Py_INTERNAL_LONG_H +#define Py_INTERNAL_LONG_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* + * Default int base conversion size limitation: Denial of Service prevention. + * + * Chosen such that this isn't wildly slow on modern hardware and so that + * everyone's existing deployed numpy test suite passes before + * https://github.com/numpy/numpy/issues/22098 is widely available. + * + * $ python -m timeit -s 's = "1"*4300' 'int(s)' + * 2000 loops, best of 5: 125 usec per loop + * $ python -m timeit -s 's = "1"*4300; v = int(s)' 'str(v)' + * 1000 loops, best of 5: 311 usec per loop + * (zen2 cloud VM) + * + * 4300 decimal digits fits a ~14284 bit number. + */ +#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300 +/* + * Threshold for max digits check. For performance reasons int() and + * int.__str__() don't checks values that are smaller than this + * threshold. Acts as a guaranteed minimum size limit for bignums that + * applications can expect from CPython. + * + * % python -m timeit -s 's = "1"*640; v = int(s)' 'str(int(s))' + * 20000 loops, best of 5: 12 usec per loop + * + * "640 digits should be enough for anyone." - gps + * fits a ~2126 bit decimal number. + */ +#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640 + +#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \ + (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) +# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold." +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_LONG_H */ diff --git a/Include/patchlevel.h b/Include/patchlevel.h index cf352947..3d48e977 100644 --- a/Include/patchlevel.h +++ b/Include/patchlevel.h @@ -18,12 +18,12 @@ /*--start constants--*/ #define PY_MAJOR_VERSION 3 #define PY_MINOR_VERSION 9 -#define PY_MICRO_VERSION 13 +#define PY_MICRO_VERSION 14 #define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL #define PY_RELEASE_SERIAL 0 /* Version as a string */ -#define PY_VERSION "3.9.13" +#define PY_VERSION "3.9.14" /*--end constants--*/ /* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index e510cc7f..981534c4 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -31,7 +31,12 @@ sys.path = {additional_paths or []} + sys.path sys.argv[1:] = {args} runpy.run_module("pip", run_name="__main__", alter_sys=True) """ - return subprocess.run([sys.executable, "-c", code], check=True).returncode + + cmd = [sys.executable, '-c', code] + if sys.flags.isolated: + # run code in isolated mode if currently running isolated + cmd.insert(1, '-I') + return subprocess.run(cmd, check=True).returncode def version(): diff --git a/Lib/http/server.py b/Lib/http/server.py index 2d2300c2..6bf90843 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -330,6 +330,13 @@ class BaseHTTPRequestHandler(socketserver.StreamRequestHandler): return False self.command, self.path = command, path + # gh-87389: The purpose of replacing '//' with '/' is to protect + # against open redirect attacks possibly triggered if the path starts + # with '//' because http clients treat //path as an absolute URI + # without scheme (similar to http://path) rather than a path. + if self.path.startswith('//'): + self.path = '/' + self.path.lstrip('/') # Reduce to a single / + # Examine the headers and look for a Connection directive. try: self.headers = http.client.parse_headers(self.rfile, diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 318e8566..bfb5db0f 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Autogenerated by Sphinx on Tue May 17 12:57:36 2022 +# Autogenerated by Sphinx on Tue Sep 6 19:25:22 2022 topics = {'assert': 'The "assert" statement\n' '**********************\n' '\n' @@ -7319,7 +7319,7 @@ topics = {'assert': 'The "assert" statement\n' '| "x(arguments...)", "x.attribute" | ' 'attribute reference |\n' '+-------------------------------------------------+---------------------------------------+\n' - '| "await" "x" | ' + '| "await x" | ' 'Await expression |\n' '+-------------------------------------------------+---------------------------------------+\n' '| "**" | ' @@ -7355,7 +7355,7 @@ topics = {'assert': 'The "assert" statement\n' '| ">=", "!=", "==" | ' 'tests and identity tests |\n' '+-------------------------------------------------+---------------------------------------+\n' - '| "not" "x" | ' + '| "not x" | ' 'Boolean NOT |\n' '+-------------------------------------------------+---------------------------------------+\n' '| "and" | ' @@ -8044,31 +8044,7 @@ topics = {'assert': 'The "assert" statement\n' ' still alive. The list is in definition order. Example:\n' '\n' ' >>> int.__subclasses__()\n' - " []\n" - '\n' - '-[ Footnotes ]-\n' - '\n' - '[1] Additional information on these special methods may be ' - 'found in\n' - ' the Python Reference Manual (Basic customization).\n' - '\n' - '[2] As a consequence, the list "[1, 2]" is considered equal ' - 'to "[1.0,\n' - ' 2.0]", and similarly for tuples.\n' - '\n' - '[3] They must have since the parser can’t tell the type of ' - 'the\n' - ' operands.\n' - '\n' - '[4] Cased characters are those with general category ' - 'property being\n' - ' one of “Lu” (Letter, uppercase), “Ll” (Letter, ' - 'lowercase), or “Lt”\n' - ' (Letter, titlecase).\n' - '\n' - '[5] To format only a tuple you should therefore provide a ' - 'singleton\n' - ' tuple whose only element is the tuple to be formatted.\n', + " []\n", 'specialnames': 'Special method names\n' '********************\n' '\n' diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 86ac8f09..6dc08135 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -3294,3 +3294,14 @@ def clear_ignored_deprecations(*tokens: object) -> None: if warnings.filters != new_filters: warnings.filters[:] = new_filters warnings._filters_mutated() + + +@contextlib.contextmanager +def adjust_int_max_str_digits(max_digits): + """Temporarily change the integer string conversion length limit.""" + current = sys.get_int_max_str_digits() + try: + sys.set_int_max_str_digits(max_digits) + yield + finally: + sys.set_int_max_str_digits(current) diff --git a/Lib/test/test_ast.py b/Lib/test/test_ast.py index c3e3be63..a048d389 100644 --- a/Lib/test/test_ast.py +++ b/Lib/test/test_ast.py @@ -978,6 +978,14 @@ Module( self.assertRaises(ValueError, ast.literal_eval, '+True') self.assertRaises(ValueError, ast.literal_eval, '2+3') + def test_literal_eval_str_int_limit(self): + with support.adjust_int_max_str_digits(4000): + ast.literal_eval('3'*4000) # no error + with self.assertRaises(SyntaxError) as err_ctx: + ast.literal_eval('3'*4001) + self.assertIn('Exceeds the limit ', str(err_ctx.exception)) + self.assertIn(' Consider hexadecimal ', str(err_ctx.exception)) + def test_literal_eval_complex(self): # Issue #4907 self.assertEqual(ast.literal_eval('6j'), 6j) diff --git a/Lib/test/test_cmd_line.py b/Lib/test/test_cmd_line.py index 4b3e33c4..e38fc698 100644 --- a/Lib/test/test_cmd_line.py +++ b/Lib/test/test_cmd_line.py @@ -815,6 +815,39 @@ class CmdLineTest(unittest.TestCase): self.assertTrue(proc.stderr.startswith(err_msg), proc.stderr) self.assertNotEqual(proc.returncode, 0) + def test_int_max_str_digits(self): + code = "import sys; print(sys.flags.int_max_str_digits, sys.get_int_max_str_digits())" + + assert_python_failure('-X', 'int_max_str_digits', '-c', code) + assert_python_failure('-X', 'int_max_str_digits=foo', '-c', code) + assert_python_failure('-X', 'int_max_str_digits=100', '-c', code) + + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='foo') + assert_python_failure('-c', code, PYTHONINTMAXSTRDIGITS='100') + + def res2int(res): + out = res.out.strip().decode("utf-8") + return tuple(int(i) for i in out.split()) + + res = assert_python_ok('-c', code) + self.assertEqual(res2int(res), (-1, sys.get_int_max_str_digits())) + res = assert_python_ok('-X', 'int_max_str_digits=0', '-c', code) + self.assertEqual(res2int(res), (0, 0)) + res = assert_python_ok('-X', 'int_max_str_digits=4000', '-c', code) + self.assertEqual(res2int(res), (4000, 4000)) + res = assert_python_ok('-X', 'int_max_str_digits=100000', '-c', code) + self.assertEqual(res2int(res), (100000, 100000)) + + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='0') + self.assertEqual(res2int(res), (0, 0)) + res = assert_python_ok('-c', code, PYTHONINTMAXSTRDIGITS='4000') + self.assertEqual(res2int(res), (4000, 4000)) + res = assert_python_ok( + '-X', 'int_max_str_digits=6000', '-c', code, + PYTHONINTMAXSTRDIGITS='4000' + ) + self.assertEqual(res2int(res), (6000, 6000)) + @unittest.skipIf(interpreter_requires_environment(), 'Cannot run -I tests when PYTHON env vars are required.') diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 55716fd4..ec776b9e 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -189,6 +189,19 @@ if 1: self.assertEqual(eval("0o777"), 511) self.assertEqual(eval("-0o0000010"), -8) + def test_int_literals_too_long(self): + n = 3000 + source = f"a = 1\nb = 2\nc = {'3'*n}\nd = 4" + with support.adjust_int_max_str_digits(n): + compile(source, "", "exec") # no errors. + with support.adjust_int_max_str_digits(n-1): + with self.assertRaises(SyntaxError) as err_ctx: + compile(source, "", "exec") + exc = err_ctx.exception + self.assertEqual(exc.lineno, 3) + self.assertIn('Exceeds the limit ', str(exc)) + self.assertIn(' Consider hexadecimal ', str(exc)) + def test_unary_minus(self): # Verify treatment of unary minus on negative numbers SF bug #660455 if sys.maxsize == 2147483647: diff --git a/Lib/test/test_context.py b/Lib/test/test_context.py index 2d8b63a1..689e3d4d 100644 --- a/Lib/test/test_context.py +++ b/Lib/test/test_context.py @@ -533,6 +533,41 @@ class HamtTest(unittest.TestCase): self.assertEqual(len(h4), 2) self.assertEqual(len(h5), 3) + def test_hamt_collision_3(self): + # Test that iteration works with the deepest tree possible. + # https://github.com/python/cpython/issues/93065 + + C = HashKey(0b10000000_00000000_00000000_00000000, 'C') + D = HashKey(0b10000000_00000000_00000000_00000000, 'D') + + E = HashKey(0b00000000_00000000_00000000_00000000, 'E') + + h = hamt() + h = h.set(C, 'C') + h = h.set(D, 'D') + h = h.set(E, 'E') + + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=2 count=1 bitmap=0b1): + # NULL: + # BitmapNode(size=4 count=2 bitmap=0b101): + # : 'E' + # NULL: + # CollisionNode(size=4 id=0x107a24520): + # : 'C' + # : 'D' + + self.assertEqual({k.name for k in h.keys()}, {'C', 'D', 'E'}) + def test_hamt_stress(self): COLLECTION_SIZE = 7000 TEST_ITERS_EVERY = 647 diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 58f4df30..7b1488f5 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -2452,6 +2452,15 @@ class CUsabilityTest(UsabilityTest): class PyUsabilityTest(UsabilityTest): decimal = P + def setUp(self): + super().setUp() + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_int_limit) + super().tearDown() + class PythonAPItests(unittest.TestCase): def test_abc(self): @@ -4509,6 +4518,15 @@ class CCoverage(Coverage): class PyCoverage(Coverage): decimal = P + def setUp(self): + super().setUp() + self._previous_int_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(7000) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_int_limit) + super().tearDown() + class PyFunctionality(unittest.TestCase): """Extra functionality in decimal.py""" diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index c1494d29..4acf7a6f 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -331,7 +331,7 @@ class SimpleHTTPServerTestCase(BaseTestCase): pass def setUp(self): - BaseTestCase.setUp(self) + super().setUp() self.cwd = os.getcwd() basetempdir = tempfile.gettempdir() os.chdir(basetempdir) @@ -359,7 +359,7 @@ class SimpleHTTPServerTestCase(BaseTestCase): except: pass finally: - BaseTestCase.tearDown(self) + super().tearDown() def check_status_and_reason(self, response, status, data=None): def close_conn(): @@ -415,6 +415,55 @@ class SimpleHTTPServerTestCase(BaseTestCase): self.check_status_and_reason(response, HTTPStatus.OK, data=support.TESTFN_UNDECODABLE) + def test_get_dir_redirect_location_domain_injection_bug(self): + """Ensure //evil.co/..%2f../../X does not put //evil.co/ in Location. + + //netloc/ in a Location header is a redirect to a new host. + https://github.com/python/cpython/issues/87389 + + This checks that a path resolving to a directory on our server cannot + resolve into a redirect to another server. + """ + os.mkdir(os.path.join(self.tempdir, 'existing_directory')) + url = f'/python.org/..%2f..%2f..%2f..%2f..%2f../%0a%0d/../{self.tempdir_name}/existing_directory' + expected_location = f'{url}/' # /python.org.../ single slash single prefix, trailing slash + # Canonicalizes to /tmp/tempdir_name/existing_directory which does + # exist and is a dir, triggering the 301 redirect logic. + response = self.request(url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertEqual(location, expected_location, msg='non-attack failed!') + + # //python.org... multi-slash prefix, no trailing slash + attack_url = f'/{url}' + response = self.request(attack_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + self.assertFalse(location.startswith('//'), msg=location) + self.assertEqual(location, expected_location, + msg='Expected Location header to start with a single / and ' + 'end with a / as this is a directory redirect.') + + # ///python.org... triple-slash prefix, no trailing slash + attack3_url = f'//{url}' + response = self.request(attack3_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + self.assertEqual(response.getheader('Location'), expected_location) + + # If the second word in the http request (Request-URI for the http + # method) is a full URI, we don't worry about it, as that'll be parsed + # and reassembled as a full URI within BaseHTTPRequestHandler.send_head + # so no errant scheme-less //netloc//evil.co/ domain mixup can happen. + attack_scheme_netloc_2slash_url = f'https://pypi.org/{url}' + expected_scheme_netloc_location = f'{attack_scheme_netloc_2slash_url}/' + response = self.request(attack_scheme_netloc_2slash_url) + self.check_status_and_reason(response, HTTPStatus.MOVED_PERMANENTLY) + location = response.getheader('Location') + # We're just ensuring that the scheme and domain make it through, if + # there are or aren't multiple slashes at the start of the path that + # follows that isn't important in this Location: header. + self.assertTrue(location.startswith('https://pypi.org/'), msg=location) + def test_get(self): #constructs the path relative to the root directory of the HTTPServer response = self.request(self.base_url + '/test') diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index 6fdf52ef..cbbddf50 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -1,4 +1,5 @@ import sys +import time import unittest from test import support @@ -571,5 +572,200 @@ class IntTestCases(unittest.TestCase): self.assertEqual(int('1_2_3_4_5_6_7', 32), 1144132807) +class IntStrDigitLimitsTests(unittest.TestCase): + + int_class = int # Override this in subclasses to reuse the suite. + + def setUp(self): + super().setUp() + self._previous_limit = sys.get_int_max_str_digits() + sys.set_int_max_str_digits(2048) + + def tearDown(self): + sys.set_int_max_str_digits(self._previous_limit) + super().tearDown() + + def test_disabled_limit(self): + self.assertGreater(sys.get_int_max_str_digits(), 0) + self.assertLess(sys.get_int_max_str_digits(), 20_000) + with support.adjust_int_max_str_digits(0): + self.assertEqual(sys.get_int_max_str_digits(), 0) + i = self.int_class('1' * 20_000) + str(i) + self.assertGreater(sys.get_int_max_str_digits(), 0) + + def test_max_str_digits_edge_cases(self): + """Ignore the +/- sign and space padding.""" + int_class = self.int_class + maxdigits = sys.get_int_max_str_digits() + + int_class('1' * maxdigits) + int_class(' ' + '1' * maxdigits) + int_class('1' * maxdigits + ' ') + int_class('+' + '1' * maxdigits) + int_class('-' + '1' * maxdigits) + self.assertEqual(len(str(10 ** (maxdigits - 1))), maxdigits) + + def check(self, i, base=None): + with self.assertRaises(ValueError): + if base is None: + self.int_class(i) + else: + self.int_class(i, base) + + def test_max_str_digits(self): + maxdigits = sys.get_int_max_str_digits() + + self.check('1' * (maxdigits + 1)) + self.check(' ' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1) + ' ') + self.check('+' + '1' * (maxdigits + 1)) + self.check('-' + '1' * (maxdigits + 1)) + self.check('1' * (maxdigits + 1)) + + i = 10 ** maxdigits + with self.assertRaises(ValueError): + str(i) + + def test_denial_of_service_prevented_int_to_str(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 50_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + huge_int = int(f'0x{"c"*65_000}', base=16) # 78268 decimal digits. + digits = 78_268 + with support.adjust_int_max_str_digits(digits): + start = get_time() + huge_decimal = str(huge_int) + seconds_to_convert = get_time() - start + self.assertEqual(len(huge_decimal), digits) + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + # We test with the limit almost at the size needed to check performance. + # The performant limit check is slightly fuzzy, give it a some room. + with support.adjust_int_max_str_digits(int(.995 * digits)): + with self.assertRaises(ValueError) as err: + start = get_time() + str(huge_int) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge_int = int(f'0x{"c"*500_000}', base=16) # 602060 digits. + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds said Zen based cloud VM. + str(extra_huge_int) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + + def test_denial_of_service_prevented_str_to_int(self): + """Regression test: ensure we fail before performing O(N**2) work.""" + maxdigits = sys.get_int_max_str_digits() + assert maxdigits < 100_000, maxdigits # A test prerequisite. + get_time = time.process_time + if get_time() <= 0: # some platforms like WASM lack process_time() + get_time = time.monotonic + + digits = 133700 + huge = '8'*digits + with support.adjust_int_max_str_digits(digits): + start = get_time() + int(huge) + seconds_to_convert = get_time() - start + # Ensuring that we chose a slow enough conversion to measure. + # It takes 0.1 seconds on a Zen based cloud VM in an opt build. + if seconds_to_convert < 0.005: + raise unittest.SkipTest('"slow" conversion took only ' + f'{seconds_to_convert} seconds.') + + with support.adjust_int_max_str_digits(digits - 1): + with self.assertRaises(ValueError) as err: + start = get_time() + int(huge) + seconds_to_fail_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_huge, seconds_to_convert/8) + + # Now we test that a conversion that would take 30x as long also fails + # in a similarly fast fashion. + extra_huge = '7'*1_200_000 + with self.assertRaises(ValueError) as err: + start = get_time() + # If not limited, 8 seconds in the Zen based cloud VM. + int(extra_huge) + seconds_to_fail_extra_huge = get_time() - start + self.assertIn('conversion', str(err.exception)) + self.assertLess(seconds_to_fail_extra_huge, seconds_to_convert/8) + + def test_power_of_two_bases_unlimited(self): + """The limit does not apply to power of 2 bases.""" + maxdigits = sys.get_int_max_str_digits() + + for base in (2, 4, 8, 16, 32): + with self.subTest(base=base): + self.int_class('1' * (maxdigits + 1), base) + assert maxdigits < 100_000 + self.int_class('1' * 100_000, base) + + def test_underscores_ignored(self): + maxdigits = sys.get_int_max_str_digits() + + triples = maxdigits // 3 + s = '111' * triples + s_ = '1_11' * triples + self.int_class(s) # succeeds + self.int_class(s_) # succeeds + self.check(f'{s}111') + self.check(f'{s_}_111') + + def test_sign_not_counted(self): + int_class = self.int_class + max_digits = sys.get_int_max_str_digits() + s = '5' * max_digits + i = int_class(s) + pos_i = int_class(f'+{s}') + assert i == pos_i + neg_i = int_class(f'-{s}') + assert -pos_i == neg_i + str(pos_i) + str(neg_i) + + def _other_base_helper(self, base): + int_class = self.int_class + max_digits = sys.get_int_max_str_digits() + s = '2' * max_digits + i = int_class(s, base) + if base > 10: + with self.assertRaises(ValueError): + str(i) + elif base < 10: + str(i) + with self.assertRaises(ValueError) as err: + int_class(f'{s}1', base) + + def test_int_from_other_bases(self): + base = 3 + with self.subTest(base=base): + self._other_base_helper(base) + base = 36 + with self.subTest(base=base): + self._other_base_helper(base) + + +class IntSubclassStrDigitLimitsTests(IntStrDigitLimitsTests): + int_class = IntSubclass + + if __name__ == "__main__": unittest.main() diff --git a/Lib/test/test_json/test_decode.py b/Lib/test/test_json/test_decode.py index fdb9e621..124045b1 100644 --- a/Lib/test/test_json/test_decode.py +++ b/Lib/test/test_json/test_decode.py @@ -2,6 +2,7 @@ import decimal from io import StringIO from collections import OrderedDict from test.test_json import PyTest, CTest +from test import support class TestDecode: @@ -95,5 +96,13 @@ class TestDecode: d = self.json.JSONDecoder() self.assertRaises(ValueError, d.raw_decode, 'a'*42, -50000) + def test_limit_int(self): + maxdigits = 5000 + with support.adjust_int_max_str_digits(maxdigits): + self.loads('1' * maxdigits) + with self.assertRaises(ValueError): + self.loads('1' * (maxdigits + 1)) + + class TestPyDecode(TestDecode, PyTest): pass class TestCDecode(TestDecode, CTest): pass diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 5712b46f..127d61cb 100755 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -5417,6 +5417,20 @@ class TestLinuxAbstractNamespace(unittest.TestCase): s.bind(bytearray(b"\x00python\x00test\x00")) self.assertEqual(s.getsockname(), b"\x00python\x00test\x00") + def testAutobind(self): + # Check that binding to an empty string binds to an available address + # in the abstract namespace as specified in unix(7) "Autobind feature". + abstract_address = b"^\0[0-9a-f]{5}" + with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s1: + s1.bind("") + self.assertRegex(s1.getsockname(), abstract_address) + # Each socket is bound to a different abstract address. + with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s2: + s2.bind("") + self.assertRegex(s2.getsockname(), abstract_address) + self.assertNotEqual(s1.getsockname(), s2.getsockname()) + + @unittest.skipUnless(hasattr(socket, 'AF_UNIX'), 'test needs socket.AF_UNIX') class TestUnixDomain(unittest.TestCase): @@ -5486,6 +5500,11 @@ class TestUnixDomain(unittest.TestCase): self.addCleanup(support.unlink, path) self.assertEqual(self.sock.getsockname(), path) + @unittest.skipIf(sys.platform == 'linux', 'Linux specific test') + def testEmptyAddress(self): + # Test that binding empty address fails. + self.assertRaises(OSError, self.sock.bind, "") + class BufferIOTest(SocketConnectedTest): """ diff --git a/Lib/test/test_ssl.py b/Lib/test/test_ssl.py index b5eb40f8..6faa2ee0 100644 --- a/Lib/test/test_ssl.py +++ b/Lib/test/test_ssl.py @@ -4,7 +4,7 @@ import sys import unittest import unittest.mock from test import support -from test.support import socket_helper +from test.support import socket_helper, warnings_helper import socket import select import time @@ -1129,8 +1129,12 @@ class ContextTests(unittest.TestCase): def test_constructor(self): for protocol in PROTOCOLS: - ssl.SSLContext(protocol) - ctx = ssl.SSLContext() + if has_tls_protocol(protocol): + with warnings_helper.check_warnings(): + ctx = ssl.SSLContext(protocol) + self.assertEqual(ctx.protocol, protocol) + with warnings_helper.check_warnings(): + ctx = ssl.SSLContext() self.assertEqual(ctx.protocol, ssl.PROTOCOL_TLS) self.assertRaises(ValueError, ssl.SSLContext, -1) self.assertRaises(ValueError, ssl.SSLContext, 42) @@ -1165,8 +1169,20 @@ class ContextTests(unittest.TestCase): ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ctx.set_ciphers('AESGCM') names = set(d['name'] for d in ctx.get_ciphers()) - self.assertIn('AES256-GCM-SHA384', names) - self.assertIn('AES128-GCM-SHA256', names) + expected = { + 'AES128-GCM-SHA256', + 'ECDHE-ECDSA-AES128-GCM-SHA256', + 'ECDHE-RSA-AES128-GCM-SHA256', + 'DHE-RSA-AES128-GCM-SHA256', + 'AES256-GCM-SHA384', + 'ECDHE-ECDSA-AES256-GCM-SHA384', + 'ECDHE-RSA-AES256-GCM-SHA384', + 'DHE-RSA-AES256-GCM-SHA384', + } + intersection = names.intersection(expected) + self.assertGreaterEqual( + len(intersection), 2, f"\ngot: {sorted(names)}\nexpected: {sorted(expected)}" + ) def test_options(self): ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) @@ -1281,7 +1297,7 @@ class ContextTests(unittest.TestCase): ctx.maximum_version = ssl.TLSVersion.MINIMUM_SUPPORTED self.assertIn( ctx.maximum_version, - {ssl.TLSVersion.TLSv1, ssl.TLSVersion.SSLv3} + {ssl.TLSVersion.TLSv1, ssl.TLSVersion.TLSv1_1, ssl.TLSVersion.SSLv3} ) ctx.minimum_version = ssl.TLSVersion.MAXIMUM_SUPPORTED @@ -1293,19 +1309,19 @@ class ContextTests(unittest.TestCase): with self.assertRaises(ValueError): ctx.minimum_version = 42 - ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1_1) - - self.assertIn( - ctx.minimum_version, minimum_range - ) - self.assertEqual( - ctx.maximum_version, ssl.TLSVersion.MAXIMUM_SUPPORTED - ) - with self.assertRaises(ValueError): - ctx.minimum_version = ssl.TLSVersion.MINIMUM_SUPPORTED - with self.assertRaises(ValueError): - ctx.maximum_version = ssl.TLSVersion.TLSv1 + if has_tls_protocol(ssl.PROTOCOL_TLSv1_1): + ctx = ssl.SSLContext(ssl.PROTOCOL_TLSv1_1) + self.assertIn( + ctx.minimum_version, minimum_range + ) + self.assertEqual( + ctx.maximum_version, ssl.TLSVersion.MAXIMUM_SUPPORTED + ) + with self.assertRaises(ValueError): + ctx.minimum_version = ssl.TLSVersion.MINIMUM_SUPPORTED + with self.assertRaises(ValueError): + ctx.maximum_version = ssl.TLSVersion.TLSv1 @unittest.skipUnless(have_verify_flags(), "verify_flags need OpenSSL > 0.9.8") @@ -1692,10 +1708,12 @@ class ContextTests(unittest.TestCase): self.assertFalse(ctx.check_hostname) self._assert_context_options(ctx) - ctx = ssl._create_stdlib_context(ssl.PROTOCOL_TLSv1) - self.assertEqual(ctx.protocol, ssl.PROTOCOL_TLSv1) - self.assertEqual(ctx.verify_mode, ssl.CERT_NONE) - self._assert_context_options(ctx) + if has_tls_protocol(ssl.PROTOCOL_TLSv1): + with warnings_helper.check_warnings(): + ctx = ssl._create_stdlib_context(ssl.PROTOCOL_TLSv1) + self.assertEqual(ctx.protocol, ssl.PROTOCOL_TLSv1) + self.assertEqual(ctx.verify_mode, ssl.CERT_NONE) + self._assert_context_options(ctx) ctx = ssl._create_stdlib_context(ssl.PROTOCOL_TLSv1, cert_reqs=ssl.CERT_REQUIRED, @@ -3411,10 +3429,12 @@ class ThreadedTests(unittest.TestCase): client_options=ssl.OP_NO_TLSv1_2) try_protocol_combo(ssl.PROTOCOL_TLS, ssl.PROTOCOL_TLSv1_2, 'TLSv1.2') - try_protocol_combo(ssl.PROTOCOL_TLSv1_2, ssl.PROTOCOL_TLSv1, False) - try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1_2, False) - try_protocol_combo(ssl.PROTOCOL_TLSv1_2, ssl.PROTOCOL_TLSv1_1, False) - try_protocol_combo(ssl.PROTOCOL_TLSv1_1, ssl.PROTOCOL_TLSv1_2, False) + if has_tls_protocol(ssl.PROTOCOL_TLSv1): + try_protocol_combo(ssl.PROTOCOL_TLSv1_2, ssl.PROTOCOL_TLSv1, False) + try_protocol_combo(ssl.PROTOCOL_TLSv1, ssl.PROTOCOL_TLSv1_2, False) + if has_tls_protocol(ssl.PROTOCOL_TLSv1_1): + try_protocol_combo(ssl.PROTOCOL_TLSv1_2, ssl.PROTOCOL_TLSv1_1, False) + try_protocol_combo(ssl.PROTOCOL_TLSv1_1, ssl.PROTOCOL_TLSv1_2, False) def test_starttls(self): """Switching from clear text to encrypted and back again.""" diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index ed85d185..ef32424e 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -409,11 +409,17 @@ class SysModuleTest(unittest.TestCase): self.assertIsInstance(sys.executable, str) self.assertEqual(len(sys.float_info), 11) self.assertEqual(sys.float_info.radix, 2) - self.assertEqual(len(sys.int_info), 2) + self.assertEqual(len(sys.int_info), 4) self.assertTrue(sys.int_info.bits_per_digit % 5 == 0) self.assertTrue(sys.int_info.sizeof_digit >= 1) + self.assertGreaterEqual(sys.int_info.default_max_str_digits, 500) + self.assertGreaterEqual(sys.int_info.str_digits_check_threshold, 100) + self.assertGreater(sys.int_info.default_max_str_digits, + sys.int_info.str_digits_check_threshold) self.assertEqual(type(sys.int_info.bits_per_digit), int) self.assertEqual(type(sys.int_info.sizeof_digit), int) + self.assertIsInstance(sys.int_info.default_max_str_digits, int) + self.assertIsInstance(sys.int_info.str_digits_check_threshold, int) self.assertIsInstance(sys.hexversion, int) self.assertEqual(len(sys.hash_info), 9) @@ -517,7 +523,8 @@ class SysModuleTest(unittest.TestCase): "inspect", "interactive", "optimize", "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", - "hash_randomization", "isolated", "dev_mode", "utf8_mode") + "hash_randomization", "isolated", "dev_mode", "utf8_mode", + "int_max_str_digits") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr == "dev_mode" else int diff --git a/Lib/test/test_xml_etree.py b/Lib/test/test_xml_etree.py index 956d4c58..7c346f27 100644 --- a/Lib/test/test_xml_etree.py +++ b/Lib/test/test_xml_etree.py @@ -3680,13 +3680,7 @@ class IOTest(unittest.TestCase): tree = ET.ElementTree(ET.XML('''\xf8''')) tree.write(TESTFN, encoding='unicode') with open(TESTFN, 'rb') as f: - data = f.read() - expected = "\xf8".encode(encoding, 'xmlcharrefreplace') - if encoding.lower() in ('utf-8', 'ascii'): - self.assertEqual(data, expected) - else: - self.assertIn(b"\xc3\xb8") def test_write_to_text_file(self): self.addCleanup(support.unlink, TESTFN) @@ -3701,17 +3695,13 @@ class IOTest(unittest.TestCase): tree.write(f, encoding='unicode') self.assertFalse(f.closed) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), convlinesep( - b'''\n''' - b'''ø''')) + self.assertEqual(f.read(), b'''ø''') with open(TESTFN, 'w', encoding='ISO-8859-1') as f: tree.write(f, encoding='unicode') self.assertFalse(f.closed) with open(TESTFN, 'rb') as f: - self.assertEqual(f.read(), convlinesep( - b'''\n''' - b'''\xf8''')) + self.assertEqual(f.read(), b'''\xf8''') def test_write_to_binary_file(self): self.addCleanup(support.unlink, TESTFN) diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py index f714b773..d12da2f0 100644 --- a/Lib/test/test_xmlrpc.py +++ b/Lib/test/test_xmlrpc.py @@ -285,6 +285,16 @@ class XMLRPCTestCase(unittest.TestCase): check('9876543210.0123456789', decimal.Decimal('9876543210.0123456789')) + def test_limit_int(self): + check = self.check_loads + maxdigits = 5000 + with support.adjust_int_max_str_digits(maxdigits): + s = '1' * (maxdigits + 1) + with self.assertRaises(ValueError): + check(f'{s}', None) + with self.assertRaises(ValueError): + check(f'{s}', None) + def test_get_host_info(self): # see bug #3613, this raised a TypeError transp = xmlrpc.client.Transport() diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 66c43c2d..46368bb2 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -731,6 +731,7 @@ class ElementTree: with _get_writer(file_or_filename, encoding) as (write, declared_encoding): if method == "xml" and (xml_declaration or (xml_declaration is None and + encoding.lower() != "unicode" and declared_encoding.lower() not in ("utf-8", "us-ascii"))): write("\n" % ( declared_encoding,)) @@ -757,13 +758,10 @@ def _get_writer(file_or_filename, encoding): except AttributeError: # file_or_filename is a file name if encoding.lower() == "unicode": - file = open(file_or_filename, "w", - errors="xmlcharrefreplace") - else: - file = open(file_or_filename, "w", encoding=encoding, - errors="xmlcharrefreplace") - with file: - yield file.write, file.encoding + encoding="utf-8" + with open(file_or_filename, "w", encoding=encoding, + errors="xmlcharrefreplace") as file: + yield file.write, encoding else: # file_or_filename is a file-like object # encoding determines if it is a text or binary writer diff --git a/Makefile.pre.in b/Makefile.pre.in index 42b1ec62..c0272bfc 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1159,6 +1159,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_import.h \ $(srcdir)/Include/internal/pycore_initconfig.h \ $(srcdir)/Include/internal/pycore_interp.h \ + $(srcdir)/Include/internal/pycore_long.h \ $(srcdir)/Include/internal/pycore_object.h \ $(srcdir)/Include/internal/pycore_pathconfig.h \ $(srcdir)/Include/internal/pycore_pyerrors.h \ diff --git a/Misc/ACKS b/Misc/ACKS index a9f15b4f..c6e7c3a0 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1033,6 +1033,7 @@ Robert Li Xuanji Li Zekun Li Zheao Li +Eli Libman Dan Lidral-Porter Robert van Liere Ross Light diff --git a/Misc/NEWS b/Misc/NEWS index 55d7f5ba..1ac3ba12 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -2,6 +2,74 @@ Python News +++++++++++ +What's New in Python 3.9.14 final? +================================== + +*Release date: 2022-09-06* + +Security +-------- + +- gh-issue-95778: Converting between :class:`int` and :class:`str` in bases + other than 2 (binary), 4, 8 (octal), 16 (hexadecimal), or 32 such as base + 10 (decimal) now raises a :exc:`ValueError` if the number of digits in + string form is above a limit to avoid potential denial of service attacks + due to the algorithmic complexity. This is a mitigation for + `CVE-2020-10735 + `_. + + This new limit can be configured or disabled by environment variable, + command line flag, or :mod:`sys` APIs. See the :ref:`integer string + conversion length limitation ` documentation. The + default limit is 4300 digits in string form. + + Patch by Gregory P. Smith [Google] and Christian Heimes [Red Hat] with + feedback from Victor Stinner, Thomas Wouters, Steve Dower, Ned Deily, and + Mark Dickinson. + +- gh-issue-87389: :mod:`http.server`: Fix an open redirection vulnerability + in the HTTP server when an URI path starts with ``//``. Vulnerability + discovered, and initial fix proposed, by Hamza Avvan. + +Core and Builtins +----------------- + +- gh-issue-93065: Fix contextvars HAMT implementation to handle iteration + over deep trees. + + The bug was discovered and fixed by Eli Libman. See + `MagicStack/immutables#84 + `_ for more details. + +Library +------- + +- gh-issue-94821: Fix binding of unix socket to empty address on Linux to + use an available address from the abstract namespace, instead of "\0". + +- gh-issue-91810: Suppress writing an XML declaration in open files in + ``ElementTree.write()`` with ``encoding='unicode'`` and + ``xml_declaration=None``. + +- bpo-45393: Fix the formatting for ``await x`` and ``not x`` in the + operator precedence table when using the :func:`help` system. + +- bpo-46197: Fix :mod:`ensurepip` environment isolation for subprocess + running ``pip``. + +Tests +----- + +- gh-issue-95280: Fix problem with ``test_ssl`` ``test_get_ciphers`` on + systems that require perfect forward secrecy (PFS) ciphers. + +- gh-issue-94208: ``test_ssl`` is now checking for supported TLS version and + protocols in more tests. + +- bpo-47016: Create a GitHub Actions workflow for verifying bundled pip and + setuptools. Patch by Illia Volochii and Adam Turner. + + What's New in Python 3.9.13 final? ================================== diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c index 6d6c92e4..133470f9 100644 --- a/Modules/socketmodule.c +++ b/Modules/socketmodule.c @@ -1715,8 +1715,10 @@ getsockaddrarg(PySocketSockObject *s, PyObject *args, struct sockaddr_un* addr = &addrbuf->un; #ifdef __linux__ - if (path.len > 0 && *(const char *)path.buf == 0) { - /* Linux abstract namespace extension */ + if (path.len == 0 || *(const char *)path.buf == 0) { + /* Linux abstract namespace extension: + - Empty address auto-binding to an abstract address + - Address that starts with null byte */ if ((size_t)path.len > sizeof addr->sun_path) { PyErr_SetString(PyExc_OSError, "AF_UNIX path too long"); diff --git a/Objects/longobject.c b/Objects/longobject.c index cf13b2c4..ec18ec32 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3,7 +3,9 @@ /* XXX The functional organization of this file is terrible */ #include "Python.h" +#include "pycore_initconfig.h" // _Py_global_config_int_max_str_digits #include "pycore_interp.h" // _PY_NSMALLPOSINTS +#include "pycore_long.h" #include "pycore_pystate.h" // _Py_IsMainInterpreter() #include "longintrepr.h" @@ -36,6 +38,9 @@ PyObject *_PyLong_One = NULL; #define IS_SMALL_INT(ival) (-NSMALLNEGINTS <= (ival) && (ival) < NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < NSMALLPOSINTS) +#define _MAX_STR_DIGITS_ERROR_FMT_TO_INT "Exceeds the limit (%d) for integer string conversion: value has %zd digits" +#define _MAX_STR_DIGITS_ERROR_FMT_TO_STR "Exceeds the limit (%d) for integer string conversion" + static PyObject * get_small_int(sdigit ival) { @@ -1718,6 +1723,23 @@ long_to_decimal_string_internal(PyObject *aa, size_a = Py_ABS(Py_SIZE(a)); negative = Py_SIZE(a) < 0; + /* quick and dirty pre-check for overflowing the decimal digit limit, + based on the inequality 10/3 >= log2(10) + + explanation in https://github.com/python/cpython/pull/96537 + */ + if (size_a >= 10 * _PY_LONG_MAX_STR_DIGITS_THRESHOLD + / (3 * PyLong_SHIFT) + 2) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && + (max_str_digits / (3 * PyLong_SHIFT) <= (size_a - 11) / 10)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); + return -1; + } + } + /* quick and dirty upper bound for the number of digits required to express a in base _PyLong_DECIMAL_BASE: @@ -1777,6 +1799,17 @@ long_to_decimal_string_internal(PyObject *aa, tenpow *= 10; strlen++; } + if (strlen > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + Py_ssize_t strlen_nosign = strlen - negative; + if ((max_str_digits > 0) && (strlen_nosign > max_str_digits)) { + Py_DECREF(scratch); + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_STR, + max_str_digits); + return -1; + } + } if (writer) { if (_PyUnicodeWriter_Prepare(writer, strlen, '9') == -1) { Py_DECREF(scratch); @@ -2290,6 +2323,7 @@ PyLong_FromString(const char *str, char **pend, int base) start = str; if ((base & (base - 1)) == 0) { + /* binary bases are not limited by int_max_str_digits */ int res = long_from_binary_base(&str, base, &z); if (res < 0) { /* Syntax error. */ @@ -2441,6 +2475,17 @@ digit beyond the first. goto onError; } + /* Limit the size to avoid excessive computation attacks. */ + if (digits > _PY_LONG_MAX_STR_DIGITS_THRESHOLD) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + int max_str_digits = interp->int_max_str_digits; + if ((max_str_digits > 0) && (digits > max_str_digits)) { + PyErr_Format(PyExc_ValueError, _MAX_STR_DIGITS_ERROR_FMT_TO_INT, + max_str_digits, digits); + return NULL; + } + } + /* Create an int object that can contain the largest possible * integer with this base and length. Note that there's no * need to initialize z->ob_digit -- no slot is read up before @@ -5071,6 +5116,7 @@ long_new_impl(PyTypeObject *type, PyObject *x, PyObject *obase) } return PyLong_FromLong(0L); } + /* default base and limit, forward to standard implementation */ if (obase == NULL) return PyNumber_Long(x); @@ -5723,6 +5769,8 @@ internal representation of integers. The attributes are read only."); static PyStructSequence_Field int_info_fields[] = { {"bits_per_digit", "size of a digit in bits"}, {"sizeof_digit", "size in bytes of the C type used to represent a digit"}, + {"default_max_str_digits", "maximum string conversion digits limitation"}, + {"str_digits_check_threshold", "minimum positive value for int_max_str_digits"}, {NULL, NULL} }; @@ -5730,7 +5778,7 @@ static PyStructSequence_Desc int_info_desc = { "sys.int_info", /* name */ int_info__doc__, /* doc */ int_info_fields, /* fields */ - 2 /* number of fields */ + 4 /* number of fields */ }; PyObject * @@ -5745,6 +5793,17 @@ PyLong_GetInfo(void) PyLong_FromLong(PyLong_SHIFT)); PyStructSequence_SET_ITEM(int_info, field++, PyLong_FromLong(sizeof(digit))); + /* + * The following two fields were added after investigating uses of + * sys.int_info in the wild: Exceedingly rarely used. The ONLY use found was + * numba using sys.int_info.bits_per_digit as attribute access rather than + * sequence unpacking. Cython and sympy also refer to sys.int_info but only + * as info for debugging. No concern about adding these in a backport. + */ + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_DEFAULT_MAX_STR_DIGITS)); + PyStructSequence_SET_ITEM(int_info, field++, + PyLong_FromLong(_PY_LONG_MAX_STR_DIGITS_THRESHOLD)); if (PyErr_Occurred()) { Py_CLEAR(int_info); return NULL; @@ -5790,6 +5849,10 @@ _PyLong_Init(PyThreadState *tstate) } } } + tstate->interp->int_max_str_digits = _Py_global_config_int_max_str_digits; + if (tstate->interp->int_max_str_digits == -1) { + tstate->interp->int_max_str_digits = _PY_LONG_DEFAULT_MAX_STR_DIGITS; + } return 1; } diff --git a/Parser/pegen/pegen.c b/Parser/pegen/pegen.c index cdfbc12d..15b06ce6 100644 --- a/Parser/pegen/pegen.c +++ b/Parser/pegen/pegen.c @@ -967,6 +967,24 @@ _PyPegen_number_token(Parser *p) if (c == NULL) { p->error_indicator = 1; + PyObject *exc_type, *exc_value, *exc_tb; + PyErr_Fetch(&exc_type, &exc_value, &exc_tb); + // The only way a ValueError should happen in _this_ code is via + // PyLong_FromString hitting a length limit. + if (exc_type == PyExc_ValueError && exc_value != NULL) { + // The Fetch acted as PyErr_Clear(), we're replacing the exception. + Py_XDECREF(exc_tb); + Py_DECREF(exc_type); + RAISE_ERROR_KNOWN_LOCATION( + p, PyExc_SyntaxError, + t->lineno, 0 /* col_offset */, + "%S - Consider hexadecimal for huge integer literals " + "to avoid decimal conversion limits.", + exc_value); + Py_DECREF(exc_value); + } else { + PyErr_Restore(exc_type, exc_value, exc_tb); + } return NULL; } diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 4615ebaa..41444080 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -667,6 +667,64 @@ exit: #endif /* defined(USE_MALLOPT) */ +PyDoc_STRVAR(sys_get_int_max_str_digits__doc__, +"get_int_max_str_digits($module, /)\n" +"--\n" +"\n" +"Set the maximum string digits limit for non-binary int<->str conversions."); + +#define SYS_GET_INT_MAX_STR_DIGITS_METHODDEF \ + {"get_int_max_str_digits", (PyCFunction)sys_get_int_max_str_digits, METH_NOARGS, sys_get_int_max_str_digits__doc__}, + +static PyObject * +sys_get_int_max_str_digits_impl(PyObject *module); + +static PyObject * +sys_get_int_max_str_digits(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys_get_int_max_str_digits_impl(module); +} + +PyDoc_STRVAR(sys_set_int_max_str_digits__doc__, +"set_int_max_str_digits($module, /, maxdigits)\n" +"--\n" +"\n" +"Set the maximum string digits limit for non-binary int<->str conversions."); + +#define SYS_SET_INT_MAX_STR_DIGITS_METHODDEF \ + {"set_int_max_str_digits", (PyCFunction)(void(*)(void))sys_set_int_max_str_digits, METH_FASTCALL|METH_KEYWORDS, sys_set_int_max_str_digits__doc__}, + +static PyObject * +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits); + +static PyObject * +sys_set_int_max_str_digits(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"maxdigits", NULL}; + static _PyArg_Parser _parser = {NULL, _keywords, "set_int_max_str_digits", 0}; + PyObject *argsbuf[1]; + int maxdigits; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (PyFloat_Check(args[0])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + maxdigits = _PyLong_AsInt(args[0]); + if (maxdigits == -1 && PyErr_Occurred()) { + goto exit; + } + return_value = sys_set_int_max_str_digits_impl(module, maxdigits); + +exit: + return return_value; +} + PyDoc_STRVAR(sys_getrefcount__doc__, "getrefcount($module, object, /)\n" "--\n" @@ -970,4 +1028,4 @@ sys_getandroidapilevel(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=39eb34a01fb9a919 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=401254a595859ac6 input=a9049054013a1b77]*/ diff --git a/Python/hamt.c b/Python/hamt.c index 8801c5ea..3296109f 100644 --- a/Python/hamt.c +++ b/Python/hamt.c @@ -407,14 +407,22 @@ hamt_hash(PyObject *o) return -1; } - /* While it's suboptimal to reduce Python's 64 bit hash to + /* While it's somewhat suboptimal to reduce Python's 64 bit hash to 32 bits via XOR, it seems that the resulting hash function is good enough (this is also how Long type is hashed in Java.) Storing 10, 100, 1000 Python strings results in a relatively shallow and uniform tree structure. - Please don't change this hashing algorithm, as there are many - tests that test some exact tree shape to cover all code paths. + Also it's worth noting that it would be possible to adapt the tree + structure to 64 bit hashes, but that would increase memory pressure + and provide little to no performance benefits for collections with + fewer than billions of key/value pairs. + + Important: do not change this hash reducing function. There are many + tests that need an exact tree shape to cover all code paths and + we do that by specifying concrete values for test data's `__hash__`. + If this function is changed most of the regression tests would + become useless. */ int32_t xored = (int32_t)(hash & 0xffffffffl) ^ (int32_t)(hash >> 32); return xored == -1 ? -2 : xored; diff --git a/Python/initconfig.c b/Python/initconfig.c index 116ee33f..a2c435f3 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -3,6 +3,7 @@ #include "pycore_getopt.h" // _PyOS_GetOpt() #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_interp.h" // _PyInterpreterState.runtime +#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "pycore_pathconfig.h" // _Py_path_config #include "pycore_pyerrors.h" // _PyErr_Fetch() #include "pycore_pylifecycle.h" // _Py_PreInitializeFromConfig() @@ -99,6 +100,9 @@ static const char usage_3[] = "\ otherwise activate automatically)\n\ -X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\ given directory instead of to the code tree\n\ + -X int_max_str_digits=number: limit the size of int<->str conversions.\n\ + This helps avoid denial of service attacks when parsing untrusted data.\n\ + The default is sys.int_info.default_max_str_digits. 0 disables.\n\ \n\ --check-hash-based-pycs always|default|never:\n\ control how Python invalidates hash-based .pyc files\n\ @@ -125,6 +129,10 @@ static const char usage_6[] = " to seed the hashes of str and bytes objects. It can also be set to an\n" " integer in the range [0,4294967295] to get hash values with a\n" " predictable seed.\n" +"PYTHONINTMAXSTRDIGITS: limits the maximum digit characters in an int value\n" +" when converting from a string and when converting an int back to a str.\n" +" A value of 0 disables the limit. Conversions to or from bases 2, 4, 8,\n" +" 16, and 32 are never limited.\n" "PYTHONMALLOC: set the Python memory allocators and/or install debug hooks\n" " on Python memory allocators. Use PYTHONMALLOC=debug to install debug\n" " hooks.\n" @@ -646,6 +654,10 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->_use_peg_parser = 1; } +/* Excluded from public struct PyConfig for backporting reasons. */ +/* default to unconfigured, _PyLong_Init() does the rest */ +int _Py_global_config_int_max_str_digits = -1; + static void config_init_defaults(PyConfig *config) @@ -1410,6 +1422,48 @@ config_init_tracemalloc(PyConfig *config) return _PyStatus_OK(); } +static PyStatus +config_init_int_max_str_digits(PyConfig *config) +{ + int maxdigits; + int valid = 0; + + const char *env = config_get_env(config, "PYTHONINTMAXSTRDIGITS"); + if (env) { + if (!_Py_str_to_int(env, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + } + if (!valid) { +#define STRINGIFY(VAL) _STRINGIFY(VAL) +#define _STRINGIFY(VAL) #VAL + return _PyStatus_ERR( + "PYTHONINTMAXSTRDIGITS: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); + } + _Py_global_config_int_max_str_digits = maxdigits; + } + + const wchar_t *xoption = config_get_xoption(config, L"int_max_str_digits"); + if (xoption) { + const wchar_t *sep = wcschr(xoption, L'='); + if (sep) { + if (!config_wstr_to_int(sep + 1, &maxdigits)) { + valid = ((maxdigits == 0) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)); + } + } + if (!valid) { + return _PyStatus_ERR( + "-X int_max_str_digits: invalid limit; must be >= " + STRINGIFY(_PY_LONG_MAX_STR_DIGITS_THRESHOLD) + " or 0 for unlimited."); +#undef _STRINGIFY +#undef STRINGIFY + } + _Py_global_config_int_max_str_digits = maxdigits; + } + return _PyStatus_OK(); +} static PyStatus config_init_pycache_prefix(PyConfig *config) @@ -1466,6 +1520,12 @@ config_read_complex_options(PyConfig *config) return status; } } + if (_Py_global_config_int_max_str_digits < 0) { + status = config_init_int_max_str_digits(config); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } if (config->pycache_prefix == NULL) { status = config_init_pycache_prefix(config); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index a52b2994..8efa850d 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -19,6 +19,7 @@ Data members: #include "frameobject.h" // PyFrame_GetBack() #include "pycore_ceval.h" #include "pycore_initconfig.h" +#include "pycore_long.h" // _PY_LONG_MAX_STR_DIGITS_THRESHOLD #include "pycore_object.h" #include "pycore_pathconfig.h" #include "pycore_pyerrors.h" @@ -1636,6 +1637,45 @@ sys_mdebug_impl(PyObject *module, int flag) } #endif /* USE_MALLOPT */ + +/*[clinic input] +sys.get_int_max_str_digits + +Set the maximum string digits limit for non-binary int<->str conversions. +[clinic start generated code]*/ + +static PyObject * +sys_get_int_max_str_digits_impl(PyObject *module) +/*[clinic end generated code: output=0042f5e8ae0e8631 input=8dab13e2023e60d5]*/ +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return PyLong_FromSsize_t(interp->int_max_str_digits); +} + +/*[clinic input] +sys.set_int_max_str_digits + + maxdigits: int + +Set the maximum string digits limit for non-binary int<->str conversions. +[clinic start generated code]*/ + +static PyObject * +sys_set_int_max_str_digits_impl(PyObject *module, int maxdigits) +/*[clinic end generated code: output=734d4c2511f2a56d input=d7e3f325db6910c5]*/ +{ + PyThreadState *tstate = _PyThreadState_GET(); + if ((!maxdigits) || (maxdigits >= _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) { + tstate->interp->int_max_str_digits = maxdigits; + Py_RETURN_NONE; + } else { + PyErr_Format( + PyExc_ValueError, "maxdigits must be 0 or larger than %d", + _PY_LONG_MAX_STR_DIGITS_THRESHOLD); + return NULL; + } +} + size_t _PySys_GetSizeOf(PyObject *o) { @@ -1980,6 +2020,8 @@ static PyMethodDef sys_methods[] = { SYS_GET_ASYNCGEN_HOOKS_METHODDEF SYS_GETANDROIDAPILEVEL_METHODDEF SYS_UNRAISABLEHOOK_METHODDEF + SYS_GET_INT_MAX_STR_DIGITS_METHODDEF + SYS_SET_INT_MAX_STR_DIGITS_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2440,6 +2482,7 @@ static PyStructSequence_Field flags_fields[] = { {"isolated", "-I"}, {"dev_mode", "-X dev"}, {"utf8_mode", "-X utf8"}, + {"int_max_str_digits", "-X int_max_str_digits"}, {0} }; @@ -2447,7 +2490,7 @@ static PyStructSequence_Desc flags_desc = { "sys.flags", /* name */ flags__doc__, /* doc */ flags_fields, /* fields */ - 15 + 16 }; static PyObject* @@ -2483,6 +2526,7 @@ make_flags(PyThreadState *tstate) SetFlag(config->isolated); PyStructSequence_SET_ITEM(seq, pos++, PyBool_FromLong(config->dev_mode)); SetFlag(preconfig->utf8_mode); + SetFlag(_Py_global_config_int_max_str_digits); #undef SetFlag if (_PyErr_Occurred(tstate)) { diff --git a/README.rst b/README.rst index 7a842390..ae3e983b 100644 --- a/README.rst +++ b/README.rst @@ -1,4 +1,4 @@ -This is Python version 3.9.13 +This is Python version 3.9.14 ============================= .. image:: https://travis-ci.org/python/cpython.svg?branch=3.9 diff --git a/Tools/scripts/verify_ensurepip_wheels.py b/Tools/scripts/verify_ensurepip_wheels.py new file mode 100755 index 00000000..044d1fd6 --- /dev/null +++ b/Tools/scripts/verify_ensurepip_wheels.py @@ -0,0 +1,98 @@ +#! /usr/bin/env python3 + +""" +Compare checksums for wheels in :mod:`ensurepip` against the Cheeseshop. + +When GitHub Actions executes the script, output is formatted accordingly. +https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-notice-message +""" + +import hashlib +import json +import os +import re +from pathlib import Path +from urllib.request import urlopen + +PACKAGE_NAMES = ("pip", "setuptools") +ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip" +WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled" +ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8") +GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" + + +def print_notice(file_path: str, message: str) -> None: + if GITHUB_ACTIONS: + message = f"::notice file={file_path}::{message}" + print(message, end="\n\n") + + +def print_error(file_path: str, message: str) -> None: + if GITHUB_ACTIONS: + message = f"::error file={file_path}::{message}" + print(message, end="\n\n") + + +def verify_wheel(package_name: str) -> bool: + # Find the package on disk + package_path = next(WHEEL_DIR.glob(f"{package_name}*.whl"), None) + if not package_path: + print_error("", f"Could not find a {package_name} wheel on disk.") + return False + + print(f"Verifying checksum for {package_path}.") + + # Find the version of the package used by ensurepip + package_version_match = re.search( + f'_{package_name.upper()}_VERSION = "([^"]+)', ENSURE_PIP_INIT_PY_TEXT + ) + if not package_version_match: + print_error( + package_path, + f"No {package_name} version found in Lib/ensurepip/__init__.py.", + ) + return False + package_version = package_version_match[1] + + # Get the SHA 256 digest from the Cheeseshop + try: + raw_text = urlopen(f"https://pypi.org/pypi/{package_name}/json").read() + except (OSError, ValueError): + print_error(package_path, f"Could not fetch JSON metadata for {package_name}.") + return False + + release_files = json.loads(raw_text)["releases"][package_version] + for release_info in release_files: + if package_path.name != release_info["filename"]: + continue + expected_digest = release_info["digests"].get("sha256", "") + break + else: + print_error(package_path, f"No digest for {package_name} found from PyPI.") + return False + + # Compute the SHA 256 digest of the wheel on disk + actual_digest = hashlib.sha256(package_path.read_bytes()).hexdigest() + + print(f"Expected digest: {expected_digest}") + print(f"Actual digest: {actual_digest}") + + if actual_digest != expected_digest: + print_error( + package_path, f"Failed to verify the checksum of the {package_name} wheel." + ) + return False + + print_notice( + package_path, + f"Successfully verified the checksum of the {package_name} wheel.", + ) + return True + + +if __name__ == "__main__": + exit_status = 0 + for package_name in PACKAGE_NAMES: + if not verify_wheel(package_name): + exit_status = 1 + raise SystemExit(exit_status)