From cc5cd4d93e3e079e897da9ceb1732ef16d79d01b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Fri, 31 May 2024 17:08:55 -0500 Subject: [PATCH 01/72] statistics.fmean(): speed-up code path for non-sizeable inputs. (gh-119876) --- Lib/statistics.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index c2f4fe8e054d3d..450edfaabe8def 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -505,13 +505,11 @@ def fmean(data, weights=None): n = len(data) except TypeError: # Handle iterators that do not define __len__(). - n = 0 - def count(iterable): - nonlocal n - for n, x in enumerate(iterable, start=1): - yield x - data = count(data) - total = fsum(data) + counter = count() + total = fsum(map(itemgetter(0), zip(data, counter))) + n = next(counter) + else: + total = fsum(data) if not n: raise StatisticsError('fmean requires at least one data point') return total / n From 3859e09e3d92d004978dd838f0511364e7edfb94 Mon Sep 17 00:00:00 2001 From: Alyssa Coghlan Date: Sat, 1 Jun 2024 13:59:35 +1000 Subject: [PATCH 02/72] gh-74929: PEP 667 C API documentation (gh-119379) * Add docs for new APIs * Add soft-deprecation notices * Add What's New porting entries * Update comments referencing `PyFrame_LocalsToFast()` to mention the proxy instead * Other related cleanups found when looking for refs to the deprecated APIs --- Doc/c-api/reflection.rst | 38 ++++++++++++++++++++++++++++++++++++++ Doc/data/refcounts.dat | 32 ++++++++++++++++++++++++++++++++ Doc/whatsnew/3.13.rst | 16 +++++++++++++++- Lib/test/test_sys.py | 13 +++++++++---- Objects/frameobject.c | 9 +++++++-- Python/bytecodes.c | 2 +- Python/executor_cases.c.h | 2 +- Python/generated_cases.c.h | 2 +- Python/sysmodule.c | 1 - 9 files changed, 104 insertions(+), 11 deletions(-) diff --git a/Doc/c-api/reflection.rst b/Doc/c-api/reflection.rst index 4b1c4770848a30..5dcfe40c2ce92b 100644 --- a/Doc/c-api/reflection.rst +++ b/Doc/c-api/reflection.rst @@ -7,18 +7,30 @@ Reflection .. c:function:: PyObject* PyEval_GetBuiltins(void) + .. deprecated:: 3.13 + + Use :c:func:`PyEval_GetFrameBuiltins` instead. + Return a dictionary of the builtins in the current execution frame, or the interpreter of the thread state if no frame is currently executing. .. c:function:: PyObject* PyEval_GetLocals(void) + .. deprecated:: 3.13 + + Use :c:func:`PyEval_GetFrameLocals` instead. + Return a dictionary of the local variables in the current execution frame, or ``NULL`` if no frame is currently executing. .. c:function:: PyObject* PyEval_GetGlobals(void) + .. deprecated:: 3.13 + + Use :c:func:`PyEval_GetFrameGlobals` instead. + Return a dictionary of the global variables in the current execution frame, or ``NULL`` if no frame is currently executing. @@ -31,6 +43,32 @@ Reflection See also :c:func:`PyThreadState_GetFrame`. +.. c:function:: PyObject* PyEval_GetFrameBuiltins(void) + + Return a dictionary of the builtins in the current execution frame, + or the interpreter of the thread state if no frame is currently executing. + + .. versionadded:: 3.13 + + +.. c:function:: PyObject* PyEval_GetFrameLocals(void) + + Return a dictionary of the local variables in the current execution frame, + or ``NULL`` if no frame is currently executing. Equivalent to calling + :func:`locals` in Python code. + + .. versionadded:: 3.13 + + +.. c:function:: PyObject* PyEval_GetFrameGlobals(void) + + Return a dictionary of the global variables in the current execution frame, + or ``NULL`` if no frame is currently executing. Equivalent to calling + :func:`globals` in Python code. + + .. versionadded:: 3.13 + + .. c:function:: const char* PyEval_GetFuncName(PyObject *func) Return the name of *func* if it is a function, class or instance object, else the diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 62a96146d605ff..a7d06e076a1b55 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -790,6 +790,12 @@ PyEval_GetGlobals:PyObject*::0: PyEval_GetFrame:PyObject*::0: +PyEval_GetFrameBuiltins:PyObject*::+1: + +PyEval_GetFrameLocals:PyObject*::+1: + +PyEval_GetFrameGlobals:PyObject*::+1: + PyEval_GetFuncDesc:const char*::: PyEval_GetFuncDesc:PyObject*:func:0: @@ -916,6 +922,32 @@ PyFloat_FromString:PyObject*:str:0: PyFloat_GetInfo:PyObject*::+1: PyFloat_GetInfo::void:: +PyFrame_GetBack:PyObject*::+1: +PyFrame_GetBack:PyFrameObject*:frame:0: + +PyFrame_GetBuiltins:PyObject*::+1: +PyFrame_GetBuiltins:PyFrameObject*:frame:0: + +PyFrame_GetCode:PyObject*::+1: +PyFrame_GetCode:PyFrameObject*:frame:0: + +PyFrame_GetGenerator:PyObject*::+1: +PyFrame_GetGenerator:PyFrameObject*:frame:0: + +PyFrame_GetGlobals:PyObject*::+1: +PyFrame_GetGlobals:PyFrameObject*:frame:0: + +PyFrame_GetLocals:PyObject*::+1: +PyFrame_GetLocals:PyFrameObject*:frame:0: + +PyFrame_GetVar:PyObject*::+1: +PyFrame_GetVar:PyFrameObject*:frame:0: +PyFrame_GetVar:PyObject*:name:0: + +PyFrame_GetVarString:PyObject*::+1: +PyFrame_GetVarString:PyFrameObject*:frame:0: +PyFrame_GetVarString:const char*:name:: + PyFrozenSet_Check:int::: PyFrozenSet_Check:PyObject*:p:0: diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 29bb3b81f6323c..3a52baf71310a3 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -97,7 +97,7 @@ Interpreter improvements: * :pep:`667`: The :func:`locals` builtin now has :ref:`defined semantics ` when mutating the returned mapping. Python debuggers and similar tools may now more reliably - update local variables in optimized frames even during concurrent code + update local variables in optimized scopes even during concurrent code execution. New typing features: @@ -2143,6 +2143,11 @@ New Features destruction the same way the :mod:`tracemalloc` module does. (Contributed by Pablo Galindo in :gh:`93502`.) +* Add :c:func:`PyEval_GetFrameBuiltins`, :c:func:`PyEval_GetFrameGlobals`, and + :c:func:`PyEval_GetFrameLocals` to the C API. These replacements for + :c:func:`PyEval_GetBuiltins`, :c:func:`PyEval_GetGlobals`, and + :c:func:`PyEval_GetLocals` return :term:`strong references ` + rather than borrowed references. (Added as part of :pep:`667`.) Build Changes ============= @@ -2318,6 +2323,15 @@ Changes in the C API to :c:func:`PyUnstable_Code_GetFirstFree`. (Contributed by Bogdan Romanyuk in :gh:`115781`.) +* :c:func:`!PyFrame_FastToLocals` and :c:func:`!PyFrame_FastToLocalsWithError` + no longer have any effect. Calling these functions has been redundant since + Python 3.11, when :c:func:`PyFrame_GetLocals` was first introduced. + (Changed as part of :pep:`667`.) + +* :c:func:`!PyFrame_LocalsToFast` no longer has any effect. Calling this function + is redundant now that :c:func:`PyFrame_GetLocals` returns a write-through proxy + for :term:`optimized scopes `. (Changed as part of :pep:`667`.) + Removed C APIs -------------- diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 8fe1d77756866a..1e5823f8883957 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -394,10 +394,15 @@ def test_dlopenflags(self): @test.support.refcount_test def test_refcount(self): - # n here must be a global in order for this test to pass while - # tracing with a python function. Tracing calls PyFrame_FastToLocals - # which will add a copy of any locals to the frame object, causing - # the reference count to increase by 2 instead of 1. + # n here originally had to be a global in order for this test to pass + # while tracing with a python function. Tracing used to call + # PyFrame_FastToLocals, which would add a copy of any locals to the + # frame object, causing the ref count to increase by 2 instead of 1. + # While that no longer happens (due to PEP 667), this test case retains + # its original global-based implementation + # PEP 683's immortal objects also made this point moot, since the + # refcount for None doesn't change anyway. Maybe this test should be + # using a different constant value? (e.g. an integer) global n self.assertRaises(TypeError, sys.getrefcount) c = sys.getrefcount(None) diff --git a/Objects/frameobject.c b/Objects/frameobject.c index fc8d6c7a7aee89..5c65007dae46d2 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -1888,8 +1888,7 @@ frame_get_var(_PyInterpreterFrame *frame, PyCodeObject *co, int i, } // (likely) Otherwise it is an arg (kind & CO_FAST_LOCAL), // with the initial value set when the frame was created... - // (unlikely) ...or it was set to some initial value by - // an earlier call to PyFrame_LocalsToFast(). + // (unlikely) ...or it was set via the f_locals proxy. } } } @@ -2002,18 +2001,24 @@ PyFrame_GetVarString(PyFrameObject *frame, const char *name) int PyFrame_FastToLocalsWithError(PyFrameObject *f) { + // Nothing to do here, as f_locals is now a write-through proxy in + // optimized frames. Soft-deprecated, since there's no maintenance hassle. return 0; } void PyFrame_FastToLocals(PyFrameObject *f) { + // Nothing to do here, as f_locals is now a write-through proxy in + // optimized frames. Soft-deprecated, since there's no maintenance hassle. return; } void PyFrame_LocalsToFast(PyFrameObject *f, int clear) { + // Nothing to do here, as f_locals is now a write-through proxy in + // optimized frames. Soft-deprecated, since there's no maintenance hassle. return; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 1c12e1cddbbc10..413ad1105f9428 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1570,7 +1570,7 @@ dummy_func( inst(MAKE_CELL, (--)) { // "initial" is probably NULL but not if it's an arg (or set - // via PyFrame_LocalsToFast() before MAKE_CELL has run). + // via the f_locals proxy before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); PyObject *cell = PyCell_New(initial); if (cell == NULL) { diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 0dfe490cb37047..bab629684c53f6 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1541,7 +1541,7 @@ case _MAKE_CELL: { oparg = CURRENT_OPARG(); // "initial" is probably NULL but not if it's an arg (or set - // via PyFrame_LocalsToFast() before MAKE_CELL has run). + // via the f_locals proxy before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); PyObject *cell = PyCell_New(initial); if (cell == NULL) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 1a991608385405..355be966cbb84a 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -4784,7 +4784,7 @@ next_instr += 1; INSTRUCTION_STATS(MAKE_CELL); // "initial" is probably NULL but not if it's an arg (or set - // via PyFrame_LocalsToFast() before MAKE_CELL has run). + // via the f_locals proxy before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); PyObject *cell = PyCell_New(initial); if (cell == NULL) { diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 4da13e4552e786..00aa95531026b5 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -35,7 +35,6 @@ Data members: #include "pycore_sysmodule.h" // export _PySys_GetSizeOf() #include "pycore_tuple.h" // _PyTuple_FromArray() -#include "frameobject.h" // PyFrame_FastToLocalsWithError() #include "pydtrace.h" // PyDTrace_AUDIT() #include "osdefs.h" // DELIM #include "stdlib_module_names.h" // _Py_stdlib_module_names From 2180991ea3d50f56595edae241cc92dd4e7de642 Mon Sep 17 00:00:00 2001 From: Alyssa Coghlan Date: Sat, 1 Jun 2024 16:21:48 +1000 Subject: [PATCH 03/72] gh-118888: Further PEP 667 docs updates (gh-119893) * Clarify impact on default behaviour of exec, eval, etc * Update documentation for changes to PyEval_GetLocals (gh-74929) Closes gh-11888 --- Doc/c-api/reflection.rst | 21 +++++++++++++++++++-- Doc/whatsnew/3.13.rst | 26 +++++++++++++++++++++++++- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/reflection.rst b/Doc/c-api/reflection.rst index 5dcfe40c2ce92b..af9a1a74ec137e 100644 --- a/Doc/c-api/reflection.rst +++ b/Doc/c-api/reflection.rst @@ -19,11 +19,24 @@ Reflection .. deprecated:: 3.13 - Use :c:func:`PyEval_GetFrameLocals` instead. + To avoid creating a reference cycle in :term:`optimized scopes `, + use either :c:func:`PyEval_GetFrameLocals` to obtain the same behaviour as calling + :func:`locals` in Python code, or else call :c:func:`PyFrame_GetLocals` on the result + of :c:func:`PyEval_GetFrame` to get the same result as this function without having to + cache the proxy instance on the underlying frame. - Return a dictionary of the local variables in the current execution frame, + Return the :attr:`~frame.f_locals` attribute of the currently executing frame, or ``NULL`` if no frame is currently executing. + If the frame refers to an :term:`optimized scope`, this returns a + write-through proxy object that allows modifying the locals. + In all other cases (classes, modules, :func:`exec`, :func:`eval`) it returns + the mapping representing the frame locals directly (as described for + :func:`locals`). + + .. versionchanged:: 3.13 + As part of :pep:`667`, return a proxy object for optimized scopes. + .. c:function:: PyObject* PyEval_GetGlobals(void) @@ -57,6 +70,10 @@ Reflection or ``NULL`` if no frame is currently executing. Equivalent to calling :func:`locals` in Python code. + To access :attr:`~frame.f_locals` on the current frame without making an independent + snapshot in :term:`optimized scopes `, call :c:func:`PyFrame_GetLocals` + on the result of :c:func:`PyEval_GetFrame`. + .. versionadded:: 3.13 diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 3a52baf71310a3..ab260bf2a2d740 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -266,6 +266,21 @@ comprehensions, and generator expressions) to explicitly return independent snapshots of the currently assigned local variables, including locally referenced nonlocal variables captured in closures. +This change to the semantics of :func:`locals` in optimized scopes also affects the default +behaviour of code execution functions that implicitly target ``locals()`` if no explicit +namespace is provided (such as :func:`exec` and :func:`eval`). In previous versions, whether +or not changes could be accessed by calling ``locals()`` after calling the code execution +function was implementation dependent. In CPython specifically, such code would typically +appear to work as desired, but could sometimes fail in optimized scopes based on other code +(including debuggers and code execution tracing tools) potentially resetting the shared +snapshot in that scope. Now, the code will always run against an independent snapshot of the +local variables in optimized scopes, and hence the changes will never be visible in +subsequent calls to ``locals()``. To access the changes made in these cases, an explicit +namespace reference must now be passed to the relevant function. Alternatively, it may make +sense to update affected code to use a higher level code execution API that returns the +resulting code execution namespace (e.g. :func:`runpy.run_path` when executing Python +files from disk). + To ensure debuggers and similar tools can reliably update local variables in scopes affected by this change, :attr:`FrameType.f_locals ` now returns a write-through proxy to the frame's local and locally referenced @@ -2235,7 +2250,10 @@ Changes in the Python API independent snapshot on each call, and hence no longer implicitly updates previously returned references. Obtaining the legacy CPython behaviour now requires explicit calls to update the initially returned dictionary with the - results of subsequent calls to ``locals()``. (Changed as part of :pep:`667`.) + results of subsequent calls to ``locals()``. Code execution functions that + implicitly target ``locals()`` (such as ``exec`` and ``eval``) must be + passed an explicit namespace to access their results in an optimized scope. + (Changed as part of :pep:`667`.) * Calling :func:`locals` from a comprehension at module or class scope (including via ``exec`` or ``eval``) once more behaves as if the comprehension @@ -2323,6 +2341,12 @@ Changes in the C API to :c:func:`PyUnstable_Code_GetFirstFree`. (Contributed by Bogdan Romanyuk in :gh:`115781`.) +* Calling :c:func:`PyFrame_GetLocals` or :c:func:`PyEval_GetLocals` in an + :term:`optimized scope` now returns a write-through proxy rather than a + snapshot that gets updated at ill-specified times. If a snapshot is desired, + it must be created explicitly (e.g. with :c:func:`PyDict_Copy`) or by calling + the new :c:func:`PyEval_GetFrameLocals` API. (Changed as part of :pep:`667`.) + * :c:func:`!PyFrame_FastToLocals` and :c:func:`!PyFrame_FastToLocalsWithError` no longer have any effect. Calling these functions has been redundant since Python 3.11, when :c:func:`PyFrame_GetLocals` was first introduced. From 51191dbfdd115b6de481a1cb5c86b952d622c2d7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 1 Jun 2024 10:11:53 +0000 Subject: [PATCH 04/72] build(deps-dev): bump types-setuptools from 69.5.0.20240423 to 70.0.0.20240524 in /Tools (#119899) --- Tools/requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index 1767727373918f..f430ddedb9f856 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -4,4 +4,4 @@ mypy==1.10.0 # needed for peg_generator: types-psutil==5.9.5.20240423 -types-setuptools==69.5.0.20240423 +types-setuptools==70.0.0.20240524 From 5152120ae746516670c77e7feed5c4a8912f2bbb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 1 Jun 2024 10:38:13 +0000 Subject: [PATCH 05/72] Bump types-psutil from 5.9.5.20240423 to 5.9.5.20240516 in /Tools (#119900) --- Tools/requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-dev.txt b/Tools/requirements-dev.txt index f430ddedb9f856..44316e3d7d8ac5 100644 --- a/Tools/requirements-dev.txt +++ b/Tools/requirements-dev.txt @@ -3,5 +3,5 @@ mypy==1.10.0 # needed for peg_generator: -types-psutil==5.9.5.20240423 +types-psutil==5.9.5.20240516 types-setuptools==70.0.0.20240524 From 60593b2052ca275559c11028d50e19f8e5dfee13 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 1 Jun 2024 10:04:05 -0400 Subject: [PATCH 06/72] gh-117657: Fix TSAN race in free-threaded GC (#119883) Only call `gc_restore_tid()` from stop-the-world contexts. `worklist_pop()` can be called while other threads are running, so use a relaxed atomic to modify `ob_tid`. --- Python/gc_free_threading.c | 5 ++--- Tools/tsan/suppressions_free_threading.txt | 3 --- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index ee006bb4aa12b7..e6bd012c40ee82 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -86,7 +86,7 @@ worklist_pop(struct worklist *worklist) PyObject *op = (PyObject *)worklist->head; if (op != NULL) { worklist->head = op->ob_tid; - op->ob_tid = 0; + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); } return op; } @@ -189,6 +189,7 @@ merge_refcount(PyObject *op, Py_ssize_t extra) static void gc_restore_tid(PyObject *op) { + assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); mi_segment_t *segment = _mi_ptr_segment(op); if (_Py_REF_IS_MERGED(op->ob_ref_shared)) { op->ob_tid = 0; @@ -676,7 +677,6 @@ call_weakref_callbacks(struct collection_state *state) Py_DECREF(temp); } - gc_restore_tid(op); Py_DECREF(op); // drop worklist reference } } @@ -986,7 +986,6 @@ cleanup_worklist(struct worklist *worklist) { PyObject *op; while ((op = worklist_pop(worklist)) != NULL) { - gc_restore_tid(op); gc_clear_unreachable(op); Py_DECREF(op); } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index cda57d78067bb3..951635e7c6533d 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -37,7 +37,6 @@ race_top:_PyImport_ReleaseLock race_top:_PyParkingLot_Park race_top:_PyType_HasFeature race_top:assign_version_tag -race_top:gc_restore_tid race_top:insertdict race_top:lookup_tp_dict race_top:mi_heap_visit_pages @@ -64,7 +63,6 @@ race_top:list_get_item_ref race_top:make_pending_calls race_top:set_add_entry race_top:should_intern_string -race_top:worklist_pop race_top:_PyEval_IsGILEnabled race_top:llist_insert_tail race_top:_Py_slot_tp_getattr_hook @@ -86,7 +84,6 @@ race_top:sock_close race_top:tstate_delete_common race_top:tstate_is_freed race_top:type_modified_unlocked -race_top:update_refs race_top:write_thread_id race_top:PyThreadState_Clear From 90ec19fd33e2452902b9788d4821f1fbf6542304 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 1 Jun 2024 10:04:38 -0400 Subject: [PATCH 07/72] gh-117657: Fix TSAN race in QSBR assertion (#119887) Due to a limitation in TSAN, all reads from `PyThreadState.state` must be atomic to avoid reported races. --- Python/qsbr.c | 3 ++- Tools/tsan/suppressions_free_threading.txt | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/qsbr.c b/Python/qsbr.c index 9cbce9044e2941..a7321154a62ffc 100644 --- a/Python/qsbr.c +++ b/Python/qsbr.c @@ -160,7 +160,8 @@ qsbr_poll_scan(struct _qsbr_shared *shared) bool _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal) { - assert(_PyThreadState_GET()->state == _Py_THREAD_ATTACHED); + assert(_Py_atomic_load_int_relaxed(&_PyThreadState_GET()->state) == _Py_THREAD_ATTACHED); + if (_Py_qbsr_goal_reached(qsbr, goal)) { return true; } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 951635e7c6533d..9a53990f8b2ff8 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -75,8 +75,6 @@ race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize race_top:PyInterpreterState_ThreadHead race_top:_PyObject_TryGetInstanceAttribute -race_top:_Py_qsbr_unregister -race_top:_Py_qsbr_poll race_top:PyThreadState_Next race_top:Py_TYPE race_top:PyUnstable_InterpreterFrame_GetLine From ce2ea7d629788fd051cbec099b5947ecbe50e819 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 1 Jun 2024 10:49:14 -0500 Subject: [PATCH 08/72] Minor speed/accuracy improvement for kde() (gh-119910) --- Lib/statistics.py | 17 +++++++++-------- Lib/test/test_statistics.py | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index 450edfaabe8def..c36145fe7f2a79 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -953,12 +953,14 @@ def kde(data, h, kernel='normal', *, cumulative=False): case 'quartic' | 'biweight': K = lambda t: 15/16 * (1.0 - t * t) ** 2 - W = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2 + W = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), + (t**5, t**3, t, 1.0)) support = 1.0 case 'triweight': K = lambda t: 35/32 * (1.0 - t * t) ** 3 - W = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2 + W = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), + (t**7, t**5, t**3, t, 1.0)) support = 1.0 case 'cosine': @@ -974,12 +976,10 @@ def kde(data, h, kernel='normal', *, cumulative=False): if support is None: def pdf(x): - n = len(data) - return sum(K((x - x_i) / h) for x_i in data) / (n * h) + return sum(K((x - x_i) / h) for x_i in data) / (len(data) * h) def cdf(x): - n = len(data) - return sum(W((x - x_i) / h) for x_i in data) / n + return sum(W((x - x_i) / h) for x_i in data) / len(data) else: @@ -1732,7 +1732,7 @@ def _quartic_invcdf_estimate(p): _quartic_invcdf = _newton_raphson( f_inv_estimate = _quartic_invcdf_estimate, - f = lambda t: 3/16 * t**5 - 5/8 * t**3 + 15/16 * t + 1/2, + f = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), (t**5, t**3, t, 1.0)), f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2) def _triweight_invcdf_estimate(p): @@ -1742,7 +1742,8 @@ def _triweight_invcdf_estimate(p): _triweight_invcdf = _newton_raphson( f_inv_estimate = _triweight_invcdf_estimate, - f = lambda t: 35/32 * (-1/7*t**7 + 3/5*t**5 - t**3 + t) + 1/2, + f = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), + (t**7, t**5, t**3, t, 1.0)), f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3) _kernel_invcdfs = { diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 6f68edd447c953..cded8aba6e8cd7 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2444,7 +2444,7 @@ def test_kde_kernel_invcdfs(self): with self.subTest(kernel=kernel): cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True) for x in xarr: - self.assertAlmostEqual(invcdf(cdf(x)), x, places=5) + self.assertAlmostEqual(invcdf(cdf(x)), x, places=6) @support.requires_resource('cpu') def test_kde_random(self): From cf3bba3f0671d2c9fee099e3ab0f78b98b176131 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Sat, 1 Jun 2024 19:05:19 +0300 Subject: [PATCH 09/72] gh-113892: Add a extra check to `ProactorEventLoop.sock_connect` to ensure that the given socket is in non-blocking mode (#119519) --- Lib/asyncio/proactor_events.py | 2 ++ Lib/test/test_asyncio/test_proactor_events.py | 9 +++++++-- .../2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst | 3 +++ 3 files changed, 12 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst diff --git a/Lib/asyncio/proactor_events.py b/Lib/asyncio/proactor_events.py index 397a8cda757895..7eb55bd63ddb73 100644 --- a/Lib/asyncio/proactor_events.py +++ b/Lib/asyncio/proactor_events.py @@ -721,6 +721,8 @@ async def sock_sendto(self, sock, data, address): return await self._proactor.sendto(sock, data, 0, address) async def sock_connect(self, sock, address): + if self._debug and sock.gettimeout() != 0: + raise ValueError("the socket must be non-blocking") return await self._proactor.connect(sock, address) async def sock_accept(self, sock): diff --git a/Lib/test/test_asyncio/test_proactor_events.py b/Lib/test/test_asyncio/test_proactor_events.py index fcaa2f6ade2b76..4b3d551dd7b3a2 100644 --- a/Lib/test/test_asyncio/test_proactor_events.py +++ b/Lib/test/test_asyncio/test_proactor_events.py @@ -1018,9 +1018,9 @@ def setUp(self): self.addCleanup(self.file.close) super().setUp() - def make_socket(self, cleanup=True): + def make_socket(self, cleanup=True, blocking=False): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.setblocking(False) + sock.setblocking(blocking) sock.setsockopt(socket.SOL_SOCKET, socket.SO_SNDBUF, 1024) sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 1024) if cleanup: @@ -1082,6 +1082,11 @@ def test_sock_sendfile_not_regular_file(self): 0, None)) self.assertEqual(self.file.tell(), 0) + def test_blocking_socket(self): + self.loop.set_debug(True) + sock = self.make_socket(blocking=True) + with self.assertRaisesRegex(ValueError, "must be non-blocking"): + self.run_loop(self.loop.sock_sendfile(sock, self.file)) if __name__ == '__main__': unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst new file mode 100644 index 00000000000000..639d5abe878344 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-24-21-54-55.gh-issue-113892.JKDFqq.rst @@ -0,0 +1,3 @@ +Now, the method ``sock_connect`` of :class:`asyncio.ProactorEventLoop` +raises a :exc:`ValueError` if given socket is not in +non-blocking mode, as well as in other loop implementations. From 7dc745d1f5d9558047a52cad5e01df7567533269 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 1 Jun 2024 12:15:58 -0400 Subject: [PATCH 10/72] gh-117657: Add TSAN suppression for `set_discard_entry` (#119908) Seen in CI occasionally when running `test_weakref`. --- Tools/tsan/suppressions_free_threading.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 9a53990f8b2ff8..f855e9ce2698a5 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -44,6 +44,8 @@ race_top:PyMember_GetOne race_top:PyMember_SetOne race_top:new_reference race_top:set_contains_key +# https://gist.github.com/colesbury/d13d033f413b4ad07929d044bed86c35 +race_top:set_discard_entry race_top:set_inheritable race_top:start_the_world race_top:tstate_set_detached From 63111bfcf021db29ce6ef9ffa4117ffb7a2cb868 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 1 Jun 2024 11:30:24 -0500 Subject: [PATCH 11/72] Add unique() recipe to itertools docs (gh-119911) --- Doc/library/itertools.rst | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 121bfd3de343c4..3dc3f60923a0ba 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -857,7 +857,7 @@ and :term:`generators ` which incur interpreter overhead. return len(take(2, groupby(iterable, key))) <= 1 def unique_justseen(iterable, key=None): - "List unique elements, preserving order. Remember only the element just seen." + "Yield unique elements, preserving order. Remember only the element just seen." # unique_justseen('AAAABBBCCDAABBB') → A B C D A B # unique_justseen('ABBcCAD', str.casefold) → A B c A D if key is None: @@ -865,7 +865,7 @@ and :term:`generators ` which incur interpreter overhead. return map(next, map(operator.itemgetter(1), groupby(iterable, key))) def unique_everseen(iterable, key=None): - "List unique elements, preserving order. Remember all elements ever seen." + "Yield unique elements, preserving order. Remember all elements ever seen." # unique_everseen('AAAABBBCCDAABBB') → A B C D # unique_everseen('ABBcCAD', str.casefold) → A B c D seen = set() @@ -880,6 +880,11 @@ and :term:`generators ` which incur interpreter overhead. seen.add(k) yield element + def unique(iterable, key=None, reverse=False): + "Yield unique elements in sorted order. Supports unhashable inputs." + # unique([[1, 2], [3, 4], [1, 2]]) → [1, 2] [3, 4] + return unique_justseen(sorted(iterable, key=key, reverse=reverse), key=key) + def sliding_window(iterable, n): "Collect data into overlapping fixed-length chunks or blocks." # sliding_window('ABCDEFG', 4) → ABCD BCDE CDEF DEFG @@ -1605,6 +1610,13 @@ The following recipes have a more mathematical flavor: >>> ''.join(input_iterator) 'AAABBBCCDAABBB' + >>> list(unique([[1, 2], [3, 4], [1, 2]])) + [[1, 2], [3, 4]] + >>> list(unique('ABBcCAD', str.casefold)) + ['A', 'B', 'c', 'D'] + >>> list(unique('ABBcCAD', str.casefold, reverse=True)) + ['D', 'c', 'B', 'A'] + >>> d = dict(a=1, b=2, c=3) >>> it = iter_except(d.popitem, KeyError) >>> d['d'] = 4 From 53b1981fb0cda6c656069e992f172fc6aad7c99c Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 1 Jun 2024 19:49:12 +0100 Subject: [PATCH 12/72] GH-89727: Fix `shutil.rmtree()` recursion error on deep trees (#119808) Implement `shutil._rmtree_safe_fd()` using a list as a stack to avoid emitting recursion errors on deeply nested trees. `shutil._rmtree_unsafe()` was fixed in a150679f90. --- Lib/shutil.py | 162 +++++++----------- Lib/test/test_shutil.py | 1 - ...4-05-30-21-37-05.gh-issue-89727.D6S9ig.rst | 2 + 3 files changed, 68 insertions(+), 97 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst diff --git a/Lib/shutil.py b/Lib/shutil.py index 03a9d756030430..b0d49e98cfe5f9 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -635,81 +635,76 @@ def onerror(err): onexc(os.rmdir, path, err) # Version using fd-based APIs to protect against races -def _rmtree_safe_fd(topfd, path, onexc): +def _rmtree_safe_fd(stack, onexc): + # Each stack item has four elements: + # * func: The first operation to perform: os.lstat, os.close or os.rmdir. + # Walking a directory starts with an os.lstat() to detect symlinks; in + # this case, func is updated before subsequent operations and passed to + # onexc() if an error occurs. + # * dirfd: Open file descriptor, or None if we're processing the top-level + # directory given to rmtree() and the user didn't supply dir_fd. + # * path: Path of file to operate upon. This is passed to onexc() if an + # error occurs. + # * orig_entry: os.DirEntry, or None if we're processing the top-level + # directory given to rmtree(). We used the cached stat() of the entry to + # save a call to os.lstat() when walking subdirectories. + func, dirfd, path, orig_entry = stack.pop() + name = path if orig_entry is None else orig_entry.name try: + if func is os.close: + os.close(dirfd) + return + if func is os.rmdir: + os.rmdir(name, dir_fd=dirfd) + return + + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + assert func is os.lstat + if orig_entry is None: + orig_st = os.lstat(name, dir_fd=dirfd) + else: + orig_st = orig_entry.stat(follow_symlinks=False) + + func = os.open # For error reporting. + topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd) + + func = os.path.islink # For error reporting. + try: + if not os.path.samestat(orig_st, os.fstat(topfd)): + # Symlinks to directories are forbidden, see GH-46010. + raise OSError("Cannot call rmtree on a symbolic link") + stack.append((os.rmdir, dirfd, path, orig_entry)) + finally: + stack.append((os.close, topfd, path, orig_entry)) + + func = os.scandir # For error reporting. with os.scandir(topfd) as scandir_it: entries = list(scandir_it) - except FileNotFoundError: - return - except OSError as err: - err.filename = path - onexc(os.scandir, path, err) - return - for entry in entries: - fullname = os.path.join(path, entry.name) - try: - is_dir = entry.is_dir(follow_symlinks=False) - except FileNotFoundError: - continue - except OSError: - is_dir = False - else: - if is_dir: - try: - orig_st = entry.stat(follow_symlinks=False) - is_dir = stat.S_ISDIR(orig_st.st_mode) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.lstat, fullname, err) - continue - if is_dir: + for entry in entries: + fullname = os.path.join(path, entry.name) try: - dirfd = os.open(entry.name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=topfd) - dirfd_closed = False + if entry.is_dir(follow_symlinks=False): + # Traverse into sub-directory. + stack.append((os.lstat, topfd, fullname, entry)) + continue except FileNotFoundError: continue - except OSError as err: - onexc(os.open, fullname, err) - else: - try: - if os.path.samestat(orig_st, os.fstat(dirfd)): - _rmtree_safe_fd(dirfd, fullname, onexc) - try: - os.close(dirfd) - except OSError as err: - # close() should not be retried after an error. - dirfd_closed = True - onexc(os.close, fullname, err) - dirfd_closed = True - try: - os.rmdir(entry.name, dir_fd=topfd) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.rmdir, fullname, err) - else: - try: - # This can only happen if someone replaces - # a directory with a symlink after the call to - # os.scandir or stat.S_ISDIR above. - raise OSError("Cannot call rmtree on a symbolic " - "link") - except OSError as err: - onexc(os.path.islink, fullname, err) - finally: - if not dirfd_closed: - try: - os.close(dirfd) - except OSError as err: - onexc(os.close, fullname, err) - else: + except OSError: + pass try: os.unlink(entry.name, dir_fd=topfd) except FileNotFoundError: continue except OSError as err: onexc(os.unlink, fullname, err) + except FileNotFoundError as err: + if orig_entry is None or func is os.close: + err.filename = path + onexc(func, path, err) + except OSError as err: + err.filename = path + onexc(func, path, err) _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= os.supports_dir_fd and @@ -762,41 +757,16 @@ def onexc(*args): # While the unsafe rmtree works fine on bytes, the fd based does not. if isinstance(path, bytes): path = os.fsdecode(path) - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - try: - orig_st = os.lstat(path, dir_fd=dir_fd) - except OSError as err: - onexc(os.lstat, path, err) - return + stack = [(os.lstat, dir_fd, path, None)] try: - fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dir_fd) - fd_closed = False - except OSError as err: - onexc(os.open, path, err) - return - try: - if os.path.samestat(orig_st, os.fstat(fd)): - _rmtree_safe_fd(fd, path, onexc) - try: - os.close(fd) - except OSError as err: - # close() should not be retried after an error. - fd_closed = True - onexc(os.close, path, err) - fd_closed = True - try: - os.rmdir(path, dir_fd=dir_fd) - except OSError as err: - onexc(os.rmdir, path, err) - else: - try: - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) + while stack: + _rmtree_safe_fd(stack, onexc) finally: - if not fd_closed: + # Close any file descriptors still on the stack. + while stack: + func, fd, path, entry = stack.pop() + if func is not os.close: + continue try: os.close(fd) except OSError as err: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 01f139073dcd97..bccb81e0737c57 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -741,7 +741,6 @@ def _onexc(fn, path, exc): shutil.rmtree(TESTFN) raise - @unittest.skipIf(shutil._use_fd_functions, "fd-based functions remain unfixed (GH-89727)") def test_rmtree_above_recursion_limit(self): recursion_limit = 40 # directory_depth > recursion_limit diff --git a/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst b/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst new file mode 100644 index 00000000000000..854c56609acb8c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst @@ -0,0 +1,2 @@ +Fix issue with :func:`shutil.rmtree` where a :exc:`RecursionError` is raised +on deep directory trees. From c618f7d80e78f83cc24b6bdead33ca38cbd4d27f Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Sat, 1 Jun 2024 23:20:00 +0200 Subject: [PATCH 13/72] gh-119016: Remove outdated sentences from the "classes" tutorial (#119130) Co-authored-by: Alex Waygood --- Doc/tutorial/classes.rst | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Doc/tutorial/classes.rst b/Doc/tutorial/classes.rst index 7ab528acb370f2..1b64741c349ee9 100644 --- a/Doc/tutorial/classes.rst +++ b/Doc/tutorial/classes.rst @@ -338,11 +338,7 @@ code will print the value ``16``, without leaving a trace:: del x.counter The other kind of instance attribute reference is a *method*. A method is a -function that "belongs to" an object. (In Python, the term method is not unique -to class instances: other object types can have methods as well. For example, -list objects have methods called append, insert, remove, sort, and so on. -However, in the following discussion, we'll use the term method exclusively to -mean methods of class instance objects, unless explicitly stated otherwise.) +function that "belongs to" an object. .. index:: pair: object; method From e378dc15b52985724b6ae4782c4ef0afc3393ca9 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 1 Jun 2024 22:07:46 -0500 Subject: [PATCH 14/72] Refactor (mostly rearrange) the statistics module (gh-119930) --- Lib/statistics.py | 1851 ++++++++++++++++++----------------- Lib/test/test_statistics.py | 5 +- 2 files changed, 952 insertions(+), 904 deletions(-) diff --git a/Lib/statistics.py b/Lib/statistics.py index c36145fe7f2a79..c64c6fae4ab010 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -147,445 +147,148 @@ _SQRT2 = sqrt(2.0) _random = random -# === Exceptions === +## Exceptions ############################################################## class StatisticsError(ValueError): pass -# === Private utilities === +## Measures of central tendency (averages) ################################# -def _sum(data): - """_sum(data) -> (type, sum, count) - - Return a high-precision sum of the given numeric data as a fraction, - together with the type to be converted to and the count of items. - - Examples - -------- - - >>> _sum([3, 2.25, 4.5, -0.5, 0.25]) - (, Fraction(19, 2), 5) - - Some sources of round-off error will be avoided: - - # Built-in sum returns zero. - >>> _sum([1e50, 1, -1e50] * 1000) - (, Fraction(1000, 1), 3000) +def mean(data): + """Return the sample arithmetic mean of data. - Fractions and Decimals are also supported: + >>> mean([1, 2, 3, 4, 4]) + 2.8 >>> from fractions import Fraction as F - >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) - (, Fraction(63, 20), 4) + >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) + Fraction(13, 21) >>> from decimal import Decimal as D - >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] - >>> _sum(data) - (, Fraction(6963, 10000), 4) + >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) + Decimal('0.5625') + + If ``data`` is empty, StatisticsError will be raised. - Mixed types are currently treated as an error, except that int is - allowed. """ - count = 0 - types = set() - types_add = types.add - partials = {} - partials_get = partials.get - for typ, values in groupby(data, type): - types_add(typ) - for n, d in map(_exact_ratio, values): - count += 1 - partials[d] = partials_get(d, 0) + n - if None in partials: - # The sum will be a NAN or INF. We can ignore all the finite - # partials, and just look at this special one. - total = partials[None] - assert not _isfinite(total) - else: - # Sum all the partial sums using builtin sum. - total = sum(Fraction(n, d) for d, n in partials.items()) - T = reduce(_coerce, types, int) # or raise TypeError - return (T, total, count) + T, total, n = _sum(data) + if n < 1: + raise StatisticsError('mean requires at least one data point') + return _convert(total / n, T) -def _ss(data, c=None): - """Return the exact mean and sum of square deviations of sequence data. +def fmean(data, weights=None): + """Convert data to floats and compute the arithmetic mean. - Calculations are done in a single pass, allowing the input to be an iterator. + This runs faster than the mean() function and it always returns a float. + If the input dataset is empty, it raises a StatisticsError. - If given *c* is used the mean; otherwise, it is calculated from the data. - Use the *c* argument with care, as it can lead to garbage results. + >>> fmean([3.5, 4.0, 5.25]) + 4.25 """ - if c is not None: - T, ssd, count = _sum((d := x - c) * d for x in data) - return (T, ssd, c, count) - count = 0 - types = set() - types_add = types.add - sx_partials = defaultdict(int) - sxx_partials = defaultdict(int) - for typ, values in groupby(data, type): - types_add(typ) - for n, d in map(_exact_ratio, values): - count += 1 - sx_partials[d] += n - sxx_partials[d] += n * n - if not count: - ssd = c = Fraction(0) - elif None in sx_partials: - # The sum will be a NAN or INF. We can ignore all the finite - # partials, and just look at this special one. - ssd = c = sx_partials[None] - assert not _isfinite(ssd) - else: - sx = sum(Fraction(n, d) for d, n in sx_partials.items()) - sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items()) - # This formula has poor numeric properties for floats, - # but with fractions it is exact. - ssd = (count * sxx - sx * sx) / count - c = sx / count - T = reduce(_coerce, types, int) # or raise TypeError - return (T, ssd, c, count) + if weights is None: + try: + n = len(data) + except TypeError: + # Handle iterators that do not define __len__(). + counter = count() + total = fsum(map(itemgetter(0), zip(data, counter))) + n = next(counter) + else: + total = fsum(data) -def _isfinite(x): - try: - return x.is_finite() # Likely a Decimal. - except AttributeError: - return math.isfinite(x) # Coerces to float first. + if not n: + raise StatisticsError('fmean requires at least one data point') + return total / n -def _coerce(T, S): - """Coerce types T and S to a common type, or raise TypeError. + if not isinstance(weights, (list, tuple)): + weights = list(weights) - Coercion rules are currently an implementation detail. See the CoerceTest - test class in test_statistics for details. - """ - # See http://bugs.python.org/issue24068. - assert T is not bool, "initial type T is bool" - # If the types are the same, no need to coerce anything. Put this - # first, so that the usual case (no coercion needed) happens as soon - # as possible. - if T is S: return T - # Mixed int & other coerce to the other type. - if S is int or S is bool: return T - if T is int: return S - # If one is a (strict) subclass of the other, coerce to the subclass. - if issubclass(S, T): return S - if issubclass(T, S): return T - # Ints coerce to the other type. - if issubclass(T, int): return S - if issubclass(S, int): return T - # Mixed fraction & float coerces to float (or float subclass). - if issubclass(T, Fraction) and issubclass(S, float): - return S - if issubclass(T, float) and issubclass(S, Fraction): - return T - # Any other combination is disallowed. - msg = "don't know how to coerce %s and %s" - raise TypeError(msg % (T.__name__, S.__name__)) + try: + num = sumprod(data, weights) + except ValueError: + raise StatisticsError('data and weights must be the same length') + den = fsum(weights) -def _exact_ratio(x): - """Return Real number x to exact (numerator, denominator) pair. + if not den: + raise StatisticsError('sum of weights must be non-zero') - >>> _exact_ratio(0.25) - (1, 4) + return num / den - x is expected to be an int, Fraction, Decimal or float. - """ - # XXX We should revisit whether using fractions to accumulate exact - # ratios is the right way to go. +def geometric_mean(data): + """Convert data to floats and compute the geometric mean. - # The integer ratios for binary floats can have numerators or - # denominators with over 300 decimal digits. The problem is more - # acute with decimal floats where the default decimal context - # supports a huge range of exponents from Emin=-999999 to - # Emax=999999. When expanded with as_integer_ratio(), numbers like - # Decimal('3.14E+5000') and Decimal('3.14E-5000') have large - # numerators or denominators that will slow computation. + Raises a StatisticsError if the input dataset is empty + or if it contains a negative value. - # When the integer ratios are accumulated as fractions, the size - # grows to cover the full range from the smallest magnitude to the - # largest. For example, Fraction(3.14E+300) + Fraction(3.14E-300), - # has a 616 digit numerator. Likewise, - # Fraction(Decimal('3.14E+5000')) + Fraction(Decimal('3.14E-5000')) - # has 10,003 digit numerator. + Returns zero if the product of inputs is zero. - # This doesn't seem to have been problem in practice, but it is a - # potential pitfall. + No special efforts are made to achieve exact results. + (However, this may change in the future.) - try: - return x.as_integer_ratio() - except AttributeError: - pass - except (OverflowError, ValueError): - # float NAN or INF. - assert not _isfinite(x) - return (x, None) - try: - # x may be an Integral ABC. - return (x.numerator, x.denominator) - except AttributeError: - msg = f"can't convert type '{type(x).__name__}' to numerator/denominator" - raise TypeError(msg) + >>> round(geometric_mean([54, 24, 36]), 9) + 36.0 + """ + n = 0 + found_zero = False -def _convert(value, T): - """Convert value to given numeric type T.""" - if type(value) is T: - # This covers the cases where T is Fraction, or where value is - # a NAN or INF (Decimal or float). - return value - if issubclass(T, int) and value.denominator != 1: - T = float - try: - # FIXME: what do we do if this overflows? - return T(value) - except TypeError: - if issubclass(T, Decimal): - return T(value.numerator) / T(value.denominator) - else: - raise + def count_positive(iterable): + nonlocal n, found_zero + for n, x in enumerate(iterable, start=1): + if x > 0.0 or math.isnan(x): + yield x + elif x == 0.0: + found_zero = True + else: + raise StatisticsError('No negative inputs allowed', x) + total = fsum(map(log, count_positive(data))) + if not n: + raise StatisticsError('Must have a non-empty dataset') + if math.isnan(total): + return math.nan + if found_zero: + return math.nan if total == math.inf else 0.0 -def _fail_neg(values, errmsg='negative value'): - """Iterate over values, failing if any are less than zero.""" - for x in values: - if x < 0: - raise StatisticsError(errmsg) - yield x + return exp(total / n) -def _rank(data, /, *, key=None, reverse=False, ties='average', start=1) -> list[float]: - """Rank order a dataset. The lowest value has rank 1. +def harmonic_mean(data, weights=None): + """Return the harmonic mean of data. - Ties are averaged so that equal values receive the same rank: + The harmonic mean is the reciprocal of the arithmetic mean of the + reciprocals of the data. It can be used for averaging ratios or + rates, for example speeds. - >>> data = [31, 56, 31, 25, 75, 18] - >>> _rank(data) - [3.5, 5.0, 3.5, 2.0, 6.0, 1.0] + Suppose a car travels 40 km/hr for 5 km and then speeds-up to + 60 km/hr for another 5 km. What is the average speed? - The operation is idempotent: + >>> harmonic_mean([40, 60]) + 48.0 - >>> _rank([3.5, 5.0, 3.5, 2.0, 6.0, 1.0]) - [3.5, 5.0, 3.5, 2.0, 6.0, 1.0] + Suppose a car travels 40 km/hr for 5 km, and when traffic clears, + speeds-up to 60 km/hr for the remaining 30 km of the journey. What + is the average speed? - It is possible to rank the data in reverse order so that the - highest value has rank 1. Also, a key-function can extract - the field to be ranked: - - >>> goals = [('eagles', 45), ('bears', 48), ('lions', 44)] - >>> _rank(goals, key=itemgetter(1), reverse=True) - [2.0, 1.0, 3.0] - - Ranks are conventionally numbered starting from one; however, - setting *start* to zero allows the ranks to be used as array indices: - - >>> prize = ['Gold', 'Silver', 'Bronze', 'Certificate'] - >>> scores = [8.1, 7.3, 9.4, 8.3] - >>> [prize[int(i)] for i in _rank(scores, start=0, reverse=True)] - ['Bronze', 'Certificate', 'Gold', 'Silver'] - - """ - # If this function becomes public at some point, more thought - # needs to be given to the signature. A list of ints is - # plausible when ties is "min" or "max". When ties is "average", - # either list[float] or list[Fraction] is plausible. - - # Default handling of ties matches scipy.stats.mstats.spearmanr. - if ties != 'average': - raise ValueError(f'Unknown tie resolution method: {ties!r}') - if key is not None: - data = map(key, data) - val_pos = sorted(zip(data, count()), reverse=reverse) - i = start - 1 - result = [0] * len(val_pos) - for _, g in groupby(val_pos, key=itemgetter(0)): - group = list(g) - size = len(group) - rank = i + (size + 1) / 2 - for value, orig_pos in group: - result[orig_pos] = rank - i += size - return result - - -def _integer_sqrt_of_frac_rto(n: int, m: int) -> int: - """Square root of n/m, rounded to the nearest integer using round-to-odd.""" - # Reference: https://www.lri.fr/~melquion/doc/05-imacs17_1-expose.pdf - a = math.isqrt(n // m) - return a | (a*a*m != n) - - -# For 53 bit precision floats, the bit width used in -# _float_sqrt_of_frac() is 109. -_sqrt_bit_width: int = 2 * sys.float_info.mant_dig + 3 - - -def _float_sqrt_of_frac(n: int, m: int) -> float: - """Square root of n/m as a float, correctly rounded.""" - # See principle and proof sketch at: https://bugs.python.org/msg407078 - q = (n.bit_length() - m.bit_length() - _sqrt_bit_width) // 2 - if q >= 0: - numerator = _integer_sqrt_of_frac_rto(n, m << 2 * q) << q - denominator = 1 - else: - numerator = _integer_sqrt_of_frac_rto(n << -2 * q, m) - denominator = 1 << -q - return numerator / denominator # Convert to float - - -def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: - """Square root of n/m as a Decimal, correctly rounded.""" - # Premise: For decimal, computing (n/m).sqrt() can be off - # by 1 ulp from the correctly rounded result. - # Method: Check the result, moving up or down a step if needed. - if n <= 0: - if not n: - return Decimal('0.0') - n, m = -n, -m - - root = (Decimal(n) / Decimal(m)).sqrt() - nr, dr = root.as_integer_ratio() - - plus = root.next_plus() - np, dp = plus.as_integer_ratio() - # test: n / m > ((root + plus) / 2) ** 2 - if 4 * n * (dr*dp)**2 > m * (dr*np + dp*nr)**2: - return plus - - minus = root.next_minus() - nm, dm = minus.as_integer_ratio() - # test: n / m < ((root + minus) / 2) ** 2 - if 4 * n * (dr*dm)**2 < m * (dr*nm + dm*nr)**2: - return minus - - return root - - -# === Measures of central tendency (averages) === - -def mean(data): - """Return the sample arithmetic mean of data. - - >>> mean([1, 2, 3, 4, 4]) - 2.8 - - >>> from fractions import Fraction as F - >>> mean([F(3, 7), F(1, 21), F(5, 3), F(1, 3)]) - Fraction(13, 21) - - >>> from decimal import Decimal as D - >>> mean([D("0.5"), D("0.75"), D("0.625"), D("0.375")]) - Decimal('0.5625') - - If ``data`` is empty, StatisticsError will be raised. - """ - T, total, n = _sum(data) - if n < 1: - raise StatisticsError('mean requires at least one data point') - return _convert(total / n, T) - - -def fmean(data, weights=None): - """Convert data to floats and compute the arithmetic mean. - - This runs faster than the mean() function and it always returns a float. - If the input dataset is empty, it raises a StatisticsError. - - >>> fmean([3.5, 4.0, 5.25]) - 4.25 - """ - if weights is None: - try: - n = len(data) - except TypeError: - # Handle iterators that do not define __len__(). - counter = count() - total = fsum(map(itemgetter(0), zip(data, counter))) - n = next(counter) - else: - total = fsum(data) - if not n: - raise StatisticsError('fmean requires at least one data point') - return total / n - if not isinstance(weights, (list, tuple)): - weights = list(weights) - try: - num = sumprod(data, weights) - except ValueError: - raise StatisticsError('data and weights must be the same length') - den = fsum(weights) - if not den: - raise StatisticsError('sum of weights must be non-zero') - return num / den - - -def geometric_mean(data): - """Convert data to floats and compute the geometric mean. - - Raises a StatisticsError if the input dataset is empty - or if it contains a negative value. - - Returns zero if the product of inputs is zero. - - No special efforts are made to achieve exact results. - (However, this may change in the future.) - - >>> round(geometric_mean([54, 24, 36]), 9) - 36.0 - """ - n = 0 - found_zero = False - def count_positive(iterable): - nonlocal n, found_zero - for n, x in enumerate(iterable, start=1): - if x > 0.0 or math.isnan(x): - yield x - elif x == 0.0: - found_zero = True - else: - raise StatisticsError('No negative inputs allowed', x) - total = fsum(map(log, count_positive(data))) - if not n: - raise StatisticsError('Must have a non-empty dataset') - if math.isnan(total): - return math.nan - if found_zero: - return math.nan if total == math.inf else 0.0 - return exp(total / n) - - -def harmonic_mean(data, weights=None): - """Return the harmonic mean of data. - - The harmonic mean is the reciprocal of the arithmetic mean of the - reciprocals of the data. It can be used for averaging ratios or - rates, for example speeds. - - Suppose a car travels 40 km/hr for 5 km and then speeds-up to - 60 km/hr for another 5 km. What is the average speed? - - >>> harmonic_mean([40, 60]) - 48.0 - - Suppose a car travels 40 km/hr for 5 km, and when traffic clears, - speeds-up to 60 km/hr for the remaining 30 km of the journey. What - is the average speed? - - >>> harmonic_mean([40, 60], weights=[5, 30]) - 56.0 + >>> harmonic_mean([40, 60], weights=[5, 30]) + 56.0 If ``data`` is empty, or any element is less than zero, ``harmonic_mean`` will raise ``StatisticsError``. + """ if iter(data) is data: data = list(data) + errmsg = 'harmonic mean does not support negative values' + n = len(data) if n < 1: raise StatisticsError('harmonic_mean requires at least one data point') @@ -597,6 +300,7 @@ def harmonic_mean(data, weights=None): return x else: raise TypeError('unsupported type') + if weights is None: weights = repeat(1, n) sum_weights = n @@ -606,16 +310,19 @@ def harmonic_mean(data, weights=None): if len(weights) != n: raise StatisticsError('Number of weights does not match data size') _, sum_weights, _ = _sum(w for w in _fail_neg(weights, errmsg)) + try: data = _fail_neg(data, errmsg) T, total, count = _sum(w / x if w else 0 for w, x in zip(weights, data)) except ZeroDivisionError: return 0 + if total <= 0: raise StatisticsError('Weighted sum must be positive') + return _convert(sum_weights / total, T) -# FIXME: investigate ways to calculate medians without sorting? Quickselect? + def median(data): """Return the median (middle value) of numeric data. @@ -652,6 +359,9 @@ def median_low(data): 3 """ + # Potentially the sorting step could be replaced with a quickselect. + # However, it would require an excellent implementation to beat our + # highly optimized builtin sort. data = sorted(data) n = len(data) if n == 0: @@ -795,6 +505,7 @@ def multimode(data): ['b', 'd', 'f'] >>> multimode('') [] + """ counts = Counter(iter(data)) if not counts: @@ -803,337 +514,36 @@ def multimode(data): return [value for value, count in counts.items() if count == maxcount] -def kde(data, h, kernel='normal', *, cumulative=False): - """Kernel Density Estimation: Create a continuous probability density - function or cumulative distribution function from discrete samples. - - The basic idea is to smooth the data using a kernel function - to help draw inferences about a population from a sample. +## Measures of spread ###################################################### - The degree of smoothing is controlled by the scaling parameter h - which is called the bandwidth. Smaller values emphasize local - features while larger values give smoother results. +def variance(data, xbar=None): + """Return the sample variance of data. - The kernel determines the relative weights of the sample data - points. Generally, the choice of kernel shape does not matter - as much as the more influential bandwidth smoothing parameter. + data should be an iterable of Real-valued numbers, with at least two + values. The optional argument xbar, if given, should be the mean of + the data. If it is missing or None, the mean is automatically calculated. - Kernels that give some weight to every sample point: + Use this function when your data is a sample from a population. To + calculate the variance from the entire population, see ``pvariance``. - normal (gauss) - logistic - sigmoid + Examples: - Kernels that only give weight to sample points within - the bandwidth: + >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] + >>> variance(data) + 1.3720238095238095 - rectangular (uniform) - triangular - parabolic (epanechnikov) - quartic (biweight) - triweight - cosine + If you have already calculated the mean of your data, you can pass it as + the optional second argument ``xbar`` to avoid recalculating it: - If *cumulative* is true, will return a cumulative distribution function. + >>> m = mean(data) + >>> variance(data, m) + 1.3720238095238095 - A StatisticsError will be raised if the data sequence is empty. + This function does not check that ``xbar`` is actually the mean of + ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or + impossible results. - Example - ------- - - Given a sample of six data points, construct a continuous - function that estimates the underlying probability density: - - >>> sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] - >>> f_hat = kde(sample, h=1.5) - - Compute the area under the curve: - - >>> area = sum(f_hat(x) for x in range(-20, 20)) - >>> round(area, 4) - 1.0 - - Plot the estimated probability density function at - evenly spaced points from -6 to 10: - - >>> for x in range(-6, 11): - ... density = f_hat(x) - ... plot = ' ' * int(density * 400) + 'x' - ... print(f'{x:2}: {density:.3f} {plot}') - ... - -6: 0.002 x - -5: 0.009 x - -4: 0.031 x - -3: 0.070 x - -2: 0.111 x - -1: 0.125 x - 0: 0.110 x - 1: 0.086 x - 2: 0.068 x - 3: 0.059 x - 4: 0.066 x - 5: 0.082 x - 6: 0.082 x - 7: 0.058 x - 8: 0.028 x - 9: 0.009 x - 10: 0.002 x - - Estimate P(4.5 < X <= 7.5), the probability that a new sample value - will be between 4.5 and 7.5: - - >>> cdf = kde(sample, h=1.5, cumulative=True) - >>> round(cdf(7.5) - cdf(4.5), 2) - 0.22 - - References - ---------- - - Kernel density estimation and its application: - https://www.itm-conferences.org/articles/itmconf/pdf/2018/08/itmconf_sam2018_00037.pdf - - Kernel functions in common use: - https://en.wikipedia.org/wiki/Kernel_(statistics)#kernel_functions_in_common_use - - Interactive graphical demonstration and exploration: - https://demonstrations.wolfram.com/KernelDensityEstimation/ - - Kernel estimation of cumulative distribution function of a random variable with bounded support - https://www.econstor.eu/bitstream/10419/207829/1/10.21307_stattrans-2016-037.pdf - - """ - - n = len(data) - if not n: - raise StatisticsError('Empty data sequence') - - if not isinstance(data[0], (int, float)): - raise TypeError('Data sequence must contain ints or floats') - - if h <= 0.0: - raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}') - - match kernel: - - case 'normal' | 'gauss': - sqrt2pi = sqrt(2 * pi) - sqrt2 = sqrt(2) - K = lambda t: exp(-1/2 * t * t) / sqrt2pi - W = lambda t: 1/2 * (1.0 + erf(t / sqrt2)) - support = None - - case 'logistic': - # 1.0 / (exp(t) + 2.0 + exp(-t)) - K = lambda t: 1/2 / (1.0 + cosh(t)) - W = lambda t: 1.0 - 1.0 / (exp(t) + 1.0) - support = None - - case 'sigmoid': - # (2/pi) / (exp(t) + exp(-t)) - c1 = 1 / pi - c2 = 2 / pi - K = lambda t: c1 / cosh(t) - W = lambda t: c2 * atan(exp(t)) - support = None - - case 'rectangular' | 'uniform': - K = lambda t: 1/2 - W = lambda t: 1/2 * t + 1/2 - support = 1.0 - - case 'triangular': - K = lambda t: 1.0 - abs(t) - W = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2 - support = 1.0 - - case 'parabolic' | 'epanechnikov': - K = lambda t: 3/4 * (1.0 - t * t) - W = lambda t: -1/4 * t**3 + 3/4 * t + 1/2 - support = 1.0 - - case 'quartic' | 'biweight': - K = lambda t: 15/16 * (1.0 - t * t) ** 2 - W = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), - (t**5, t**3, t, 1.0)) - support = 1.0 - - case 'triweight': - K = lambda t: 35/32 * (1.0 - t * t) ** 3 - W = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), - (t**7, t**5, t**3, t, 1.0)) - support = 1.0 - - case 'cosine': - c1 = pi / 4 - c2 = pi / 2 - K = lambda t: c1 * cos(c2 * t) - W = lambda t: 1/2 * sin(c2 * t) + 1/2 - support = 1.0 - - case _: - raise StatisticsError(f'Unknown kernel name: {kernel!r}') - - if support is None: - - def pdf(x): - return sum(K((x - x_i) / h) for x_i in data) / (len(data) * h) - - def cdf(x): - return sum(W((x - x_i) / h) for x_i in data) / len(data) - - else: - - sample = sorted(data) - bandwidth = h * support - - def pdf(x): - nonlocal n, sample - if len(data) != n: - sample = sorted(data) - n = len(data) - i = bisect_left(sample, x - bandwidth) - j = bisect_right(sample, x + bandwidth) - supported = sample[i : j] - return sum(K((x - x_i) / h) for x_i in supported) / (n * h) - - def cdf(x): - nonlocal n, sample - if len(data) != n: - sample = sorted(data) - n = len(data) - i = bisect_left(sample, x - bandwidth) - j = bisect_right(sample, x + bandwidth) - supported = sample[i : j] - return sum((W((x - x_i) / h) for x_i in supported), i) / n - - if cumulative: - cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}' - return cdf - - else: - pdf.__doc__ = f'PDF estimate with {h=!r} and {kernel=!r}' - return pdf - - -# Notes on methods for computing quantiles -# ---------------------------------------- -# -# There is no one perfect way to compute quantiles. Here we offer -# two methods that serve common needs. Most other packages -# surveyed offered at least one or both of these two, making them -# "standard" in the sense of "widely-adopted and reproducible". -# They are also easy to explain, easy to compute manually, and have -# straight-forward interpretations that aren't surprising. - -# The default method is known as "R6", "PERCENTILE.EXC", or "expected -# value of rank order statistics". The alternative method is known as -# "R7", "PERCENTILE.INC", or "mode of rank order statistics". - -# For sample data where there is a positive probability for values -# beyond the range of the data, the R6 exclusive method is a -# reasonable choice. Consider a random sample of nine values from a -# population with a uniform distribution from 0.0 to 1.0. The -# distribution of the third ranked sample point is described by -# betavariate(alpha=3, beta=7) which has mode=0.250, median=0.286, and -# mean=0.300. Only the latter (which corresponds with R6) gives the -# desired cut point with 30% of the population falling below that -# value, making it comparable to a result from an inv_cdf() function. -# The R6 exclusive method is also idempotent. - -# For describing population data where the end points are known to -# be included in the data, the R7 inclusive method is a reasonable -# choice. Instead of the mean, it uses the mode of the beta -# distribution for the interior points. Per Hyndman & Fan, "One nice -# property is that the vertices of Q7(p) divide the range into n - 1 -# intervals, and exactly 100p% of the intervals lie to the left of -# Q7(p) and 100(1 - p)% of the intervals lie to the right of Q7(p)." - -# If needed, other methods could be added. However, for now, the -# position is that fewer options make for easier choices and that -# external packages can be used for anything more advanced. - -def quantiles(data, *, n=4, method='exclusive'): - """Divide *data* into *n* continuous intervals with equal probability. - - Returns a list of (n - 1) cut points separating the intervals. - - Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. - Set *n* to 100 for percentiles which gives the 99 cuts points that - separate *data* in to 100 equal sized groups. - - The *data* can be any iterable containing sample. - The cut points are linearly interpolated between data points. - - If *method* is set to *inclusive*, *data* is treated as population - data. The minimum value is treated as the 0th percentile and the - maximum value is treated as the 100th percentile. - """ - if n < 1: - raise StatisticsError('n must be at least 1') - data = sorted(data) - ld = len(data) - if ld < 2: - if ld == 1: - return data * (n - 1) - raise StatisticsError('must have at least one data point') - - if method == 'inclusive': - m = ld - 1 - result = [] - for i in range(1, n): - j, delta = divmod(i * m, n) - interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n - result.append(interpolated) - return result - - if method == 'exclusive': - m = ld + 1 - result = [] - for i in range(1, n): - j = i * m // n # rescale i to m/n - j = 1 if j < 1 else ld-1 if j > ld-1 else j # clamp to 1 .. ld-1 - delta = i*m - j*n # exact integer math - interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n - result.append(interpolated) - return result - - raise ValueError(f'Unknown method: {method!r}') - - -# === Measures of spread === - -# See http://mathworld.wolfram.com/Variance.html -# http://mathworld.wolfram.com/SampleVariance.html - - -def variance(data, xbar=None): - """Return the sample variance of data. - - data should be an iterable of Real-valued numbers, with at least two - values. The optional argument xbar, if given, should be the mean of - the data. If it is missing or None, the mean is automatically calculated. - - Use this function when your data is a sample from a population. To - calculate the variance from the entire population, see ``pvariance``. - - Examples: - - >>> data = [2.75, 1.75, 1.25, 0.25, 0.5, 1.25, 3.5] - >>> variance(data) - 1.3720238095238095 - - If you have already calculated the mean of your data, you can pass it as - the optional second argument ``xbar`` to avoid recalculating it: - - >>> m = mean(data) - >>> variance(data, m) - 1.3720238095238095 - - This function does not check that ``xbar`` is actually the mean of - ``data``. Giving arbitrary values for ``xbar`` may lead to invalid or - impossible results. - - Decimals and Fractions are supported: + Decimals and Fractions are supported: >>> from decimal import Decimal as D >>> variance([D("27.5"), D("30.25"), D("30.25"), D("34.5"), D("41.75")]) @@ -1144,6 +554,8 @@ def variance(data, xbar=None): Fraction(67, 108) """ + # http://mathworld.wolfram.com/SampleVariance.html + T, ss, c, n = _ss(data, xbar) if n < 2: raise StatisticsError('variance requires at least two data points') @@ -1185,6 +597,8 @@ def pvariance(data, mu=None): Fraction(13, 72) """ + # http://mathworld.wolfram.com/Variance.html + T, ss, c, n = _ss(data, mu) if n < 1: raise StatisticsError('pvariance requires at least one data point') @@ -1227,46 +641,7 @@ def pstdev(data, mu=None): return _float_sqrt_of_frac(mss.numerator, mss.denominator) -def _mean_stdev(data): - """In one pass, compute the mean and sample standard deviation as floats.""" - T, ss, xbar, n = _ss(data) - if n < 2: - raise StatisticsError('stdev requires at least two data points') - mss = ss / (n - 1) - try: - return float(xbar), _float_sqrt_of_frac(mss.numerator, mss.denominator) - except AttributeError: - # Handle Nans and Infs gracefully - return float(xbar), float(xbar) / float(ss) - -def _sqrtprod(x: float, y: float) -> float: - "Return sqrt(x * y) computed with improved accuracy and without overflow/underflow." - h = sqrt(x * y) - if not isfinite(h): - if isinf(h) and not isinf(x) and not isinf(y): - # Finite inputs overflowed, so scale down, and recompute. - scale = 2.0 ** -512 # sqrt(1 / sys.float_info.max) - return _sqrtprod(scale * x, scale * y) / scale - return h - if not h: - if x and y: - # Non-zero inputs underflowed, so scale up, and recompute. - # Scale: 1 / sqrt(sys.float_info.min * sys.float_info.epsilon) - scale = 2.0 ** 537 - return _sqrtprod(scale * x, scale * y) / scale - return h - # Improve accuracy with a differential correction. - # https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0 - d = sumprod((x, h), (y, -h)) - return h + d / (2.0 * h) - - -# === Statistics for relations between two inputs === - -# See https://en.wikipedia.org/wiki/Covariance -# https://en.wikipedia.org/wiki/Pearson_correlation_coefficient -# https://en.wikipedia.org/wiki/Simple_linear_regression - +## Statistics for relations between two inputs ############################# def covariance(x, y, /): """Covariance @@ -1285,6 +660,7 @@ def covariance(x, y, /): -7.5 """ + # https://en.wikipedia.org/wiki/Covariance n = len(x) if len(y) != n: raise StatisticsError('covariance requires that both inputs have same number of data points') @@ -1318,7 +694,10 @@ def correlation(x, y, /, *, method='linear'): Spearman's rank correlation coefficient is appropriate for ordinal data or for continuous data that doesn't meet the linear proportion requirement for Pearson's correlation coefficient. + """ + # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient + # https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient n = len(x) if len(y) != n: raise StatisticsError('correlation requires that both inputs have same number of data points') @@ -1326,6 +705,7 @@ def correlation(x, y, /, *, method='linear'): raise StatisticsError('correlation requires at least two data points') if method not in {'linear', 'ranked'}: raise ValueError(f'Unknown method: {method!r}') + if method == 'ranked': start = (n - 1) / -2 # Center rankings around zero x = _rank(x, start=start) @@ -1335,9 +715,11 @@ def correlation(x, y, /, *, method='linear'): ybar = fsum(y) / n x = [xi - xbar for xi in x] y = [yi - ybar for yi in y] + sxy = sumprod(x, y) sxx = sumprod(x, x) syy = sumprod(y, y) + try: return sxy / _sqrtprod(sxx, syy) except ZeroDivisionError: @@ -1384,30 +766,447 @@ def linear_regression(x, y, /, *, proportional=False): >>> linear_regression(x, y, proportional=True) #doctest: +ELLIPSIS LinearRegression(slope=2.90475..., intercept=0.0) - """ - n = len(x) - if len(y) != n: - raise StatisticsError('linear regression requires that both inputs have same number of data points') - if n < 2: - raise StatisticsError('linear regression requires at least two data points') - if not proportional: - xbar = fsum(x) / n - ybar = fsum(y) / n - x = [xi - xbar for xi in x] # List because used three times below - y = (yi - ybar for yi in y) # Generator because only used once below - sxy = sumprod(x, y) + 0.0 # Add zero to coerce result to a float - sxx = sumprod(x, x) - try: - slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x) - except ZeroDivisionError: - raise StatisticsError('x is constant') - intercept = 0.0 if proportional else ybar - slope * xbar - return LinearRegression(slope=slope, intercept=intercept) + """ + # https://en.wikipedia.org/wiki/Simple_linear_regression + n = len(x) + if len(y) != n: + raise StatisticsError('linear regression requires that both inputs have same number of data points') + if n < 2: + raise StatisticsError('linear regression requires at least two data points') + + if not proportional: + xbar = fsum(x) / n + ybar = fsum(y) / n + x = [xi - xbar for xi in x] # List because used three times below + y = (yi - ybar for yi in y) # Generator because only used once below + + sxy = sumprod(x, y) + 0.0 # Add zero to coerce result to a float + sxx = sumprod(x, x) + + try: + slope = sxy / sxx # equivalent to: covariance(x, y) / variance(x) + except ZeroDivisionError: + raise StatisticsError('x is constant') + + intercept = 0.0 if proportional else ybar - slope * xbar + return LinearRegression(slope=slope, intercept=intercept) + + +## Kernel Density Estimation ############################################### + +_kernel_specs = {} + +def register(*kernels): + "Load the kernel's pdf, cdf, invcdf, and support into _kernel_specs." + def deco(builder): + spec = dict(zip(('pdf', 'cdf', 'invcdf', 'support'), builder())) + for kernel in kernels: + _kernel_specs[kernel] = spec + return builder + return deco + +@register('normal', 'gauss') +def normal_kernel(): + sqrt2pi = sqrt(2 * pi) + sqrt2 = sqrt(2) + pdf = lambda t: exp(-1/2 * t * t) / sqrt2pi + cdf = lambda t: 1/2 * (1.0 + erf(t / sqrt2)) + invcdf = lambda t: _normal_dist_inv_cdf(t, 0.0, 1.0) + support = None + return pdf, cdf, invcdf, support + +@register('logistic') +def logistic_kernel(): + # 1.0 / (exp(t) + 2.0 + exp(-t)) + pdf = lambda t: 1/2 / (1.0 + cosh(t)) + cdf = lambda t: 1.0 - 1.0 / (exp(t) + 1.0) + invcdf = lambda p: log(p / (1.0 - p)) + support = None + return pdf, cdf, invcdf, support + +@register('sigmoid') +def sigmoid_kernel(): + # (2/pi) / (exp(t) + exp(-t)) + c1 = 1 / pi + c2 = 2 / pi + c3 = pi / 2 + pdf = lambda t: c1 / cosh(t) + cdf = lambda t: c2 * atan(exp(t)) + invcdf = lambda p: log(tan(p * c3)) + support = None + return pdf, cdf, invcdf, support + +@register('rectangular', 'uniform') +def rectangular_kernel(): + pdf = lambda t: 1/2 + cdf = lambda t: 1/2 * t + 1/2 + invcdf = lambda p: 2.0 * p - 1.0 + support = 1.0 + return pdf, cdf, invcdf, support + +@register('triangular') +def triangular_kernel(): + pdf = lambda t: 1.0 - abs(t) + cdf = lambda t: t*t * (1/2 if t < 0.0 else -1/2) + t + 1/2 + invcdf = lambda p: sqrt(2.0*p) - 1.0 if p < 1/2 else 1.0 - sqrt(2.0 - 2.0*p) + support = 1.0 + return pdf, cdf, invcdf, support + +@register('parabolic', 'epanechnikov') +def parabolic_kernel(): + pdf = lambda t: 3/4 * (1.0 - t * t) + cdf = lambda t: sumprod((-1/4, 3/4, 1/2), (t**3, t, 1.0)) + invcdf = lambda p: 2.0 * cos((acos(2.0*p - 1.0) + pi) / 3.0) + support = 1.0 + return pdf, cdf, invcdf, support + +def _newton_raphson(f_inv_estimate, f, f_prime, tolerance=1e-12): + def f_inv(y): + "Return x such that f(x) ≈ y within the specified tolerance." + x = f_inv_estimate(y) + while abs(diff := f(x) - y) > tolerance: + x -= diff / f_prime(x) + return x + return f_inv + +def _quartic_invcdf_estimate(p): + sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p) + x = (2.0 * p) ** 0.4258865685331 - 1.0 + if p >= 0.004 < 0.499: + x += 0.026818732 * sin(7.101753784 * p + 2.73230839482953) + return x * sign + +@register('quartic', 'biweight') +def quartic_kernel(): + pdf = lambda t: 15/16 * (1.0 - t * t) ** 2 + cdf = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), + (t**5, t**3, t, 1.0)) + invcdf = _newton_raphson(_quartic_invcdf_estimate, f=cdf, f_prime=pdf) + support = 1.0 + return pdf, cdf, invcdf, support + +def _triweight_invcdf_estimate(p): + sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p) + x = (2.0 * p) ** 0.3400218741872791 - 1.0 + return x * sign + +@register('triweight') +def triweight_kernel(): + pdf = lambda t: 35/32 * (1.0 - t * t) ** 3 + cdf = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), + (t**7, t**5, t**3, t, 1.0)) + invcdf = _newton_raphson(_triweight_invcdf_estimate, f=cdf, f_prime=pdf) + support = 1.0 + return pdf, cdf, invcdf, support + +@register('cosine') +def cosine_kernel(): + c1 = pi / 4 + c2 = pi / 2 + pdf = lambda t: c1 * cos(c2 * t) + cdf = lambda t: 1/2 * sin(c2 * t) + 1/2 + invcdf = lambda p: 2.0 * asin(2.0 * p - 1.0) / pi + support = 1.0 + return pdf, cdf, invcdf, support + +del register, normal_kernel, logistic_kernel, sigmoid_kernel +del rectangular_kernel, triangular_kernel, parabolic_kernel +del quartic_kernel, triweight_kernel, cosine_kernel + + +def kde(data, h, kernel='normal', *, cumulative=False): + """Kernel Density Estimation: Create a continuous probability density + function or cumulative distribution function from discrete samples. + + The basic idea is to smooth the data using a kernel function + to help draw inferences about a population from a sample. + + The degree of smoothing is controlled by the scaling parameter h + which is called the bandwidth. Smaller values emphasize local + features while larger values give smoother results. + + The kernel determines the relative weights of the sample data + points. Generally, the choice of kernel shape does not matter + as much as the more influential bandwidth smoothing parameter. + + Kernels that give some weight to every sample point: + + normal (gauss) + logistic + sigmoid + + Kernels that only give weight to sample points within + the bandwidth: + + rectangular (uniform) + triangular + parabolic (epanechnikov) + quartic (biweight) + triweight + cosine + + If *cumulative* is true, will return a cumulative distribution function. + + A StatisticsError will be raised if the data sequence is empty. + + Example + ------- + + Given a sample of six data points, construct a continuous + function that estimates the underlying probability density: + + >>> sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] + >>> f_hat = kde(sample, h=1.5) + + Compute the area under the curve: + + >>> area = sum(f_hat(x) for x in range(-20, 20)) + >>> round(area, 4) + 1.0 + + Plot the estimated probability density function at + evenly spaced points from -6 to 10: + + >>> for x in range(-6, 11): + ... density = f_hat(x) + ... plot = ' ' * int(density * 400) + 'x' + ... print(f'{x:2}: {density:.3f} {plot}') + ... + -6: 0.002 x + -5: 0.009 x + -4: 0.031 x + -3: 0.070 x + -2: 0.111 x + -1: 0.125 x + 0: 0.110 x + 1: 0.086 x + 2: 0.068 x + 3: 0.059 x + 4: 0.066 x + 5: 0.082 x + 6: 0.082 x + 7: 0.058 x + 8: 0.028 x + 9: 0.009 x + 10: 0.002 x + + Estimate P(4.5 < X <= 7.5), the probability that a new sample value + will be between 4.5 and 7.5: + + >>> cdf = kde(sample, h=1.5, cumulative=True) + >>> round(cdf(7.5) - cdf(4.5), 2) + 0.22 + + References + ---------- + + Kernel density estimation and its application: + https://www.itm-conferences.org/articles/itmconf/pdf/2018/08/itmconf_sam2018_00037.pdf + + Kernel functions in common use: + https://en.wikipedia.org/wiki/Kernel_(statistics)#kernel_functions_in_common_use + + Interactive graphical demonstration and exploration: + https://demonstrations.wolfram.com/KernelDensityEstimation/ + + Kernel estimation of cumulative distribution function of a random variable with bounded support + https://www.econstor.eu/bitstream/10419/207829/1/10.21307_stattrans-2016-037.pdf + + """ + + n = len(data) + if not n: + raise StatisticsError('Empty data sequence') + + if not isinstance(data[0], (int, float)): + raise TypeError('Data sequence must contain ints or floats') + + if h <= 0.0: + raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}') + + kernel_spec = _kernel_specs.get(kernel) + if kernel_spec is None: + raise StatisticsError(f'Unknown kernel name: {kernel!r}') + K = kernel_spec['pdf'] + W = kernel_spec['cdf'] + support = kernel_spec['support'] + + if support is None: + + def pdf(x): + return sum(K((x - x_i) / h) for x_i in data) / (len(data) * h) + + def cdf(x): + return sum(W((x - x_i) / h) for x_i in data) / len(data) + + else: + + sample = sorted(data) + bandwidth = h * support + + def pdf(x): + nonlocal n, sample + if len(data) != n: + sample = sorted(data) + n = len(data) + i = bisect_left(sample, x - bandwidth) + j = bisect_right(sample, x + bandwidth) + supported = sample[i : j] + return sum(K((x - x_i) / h) for x_i in supported) / (n * h) + + def cdf(x): + nonlocal n, sample + if len(data) != n: + sample = sorted(data) + n = len(data) + i = bisect_left(sample, x - bandwidth) + j = bisect_right(sample, x + bandwidth) + supported = sample[i : j] + return sum((W((x - x_i) / h) for x_i in supported), i) / n + + if cumulative: + cdf.__doc__ = f'CDF estimate with {h=!r} and {kernel=!r}' + return cdf + + else: + pdf.__doc__ = f'PDF estimate with {h=!r} and {kernel=!r}' + return pdf + + +def kde_random(data, h, kernel='normal', *, seed=None): + """Return a function that makes a random selection from the estimated + probability density function created by kde(data, h, kernel). + + Providing a *seed* allows reproducible selections within a single + thread. The seed may be an integer, float, str, or bytes. + + A StatisticsError will be raised if the *data* sequence is empty. + + Example: + + >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] + >>> rand = kde_random(data, h=1.5, seed=8675309) + >>> new_selections = [rand() for i in range(10)] + >>> [round(x, 1) for x in new_selections] + [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6] + + """ + n = len(data) + if not n: + raise StatisticsError('Empty data sequence') + + if not isinstance(data[0], (int, float)): + raise TypeError('Data sequence must contain ints or floats') + + if h <= 0.0: + raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}') + + kernel_spec = _kernel_specs.get(kernel) + if kernel_spec is None: + raise StatisticsError(f'Unknown kernel name: {kernel!r}') + invcdf = kernel_spec['invcdf'] + + prng = _random.Random(seed) + random = prng.random + choice = prng.choice + + def rand(): + return choice(data) + h * invcdf(random()) + + rand.__doc__ = f'Random KDE selection with {h=!r} and {kernel=!r}' + + return rand + + +## Quantiles ############################################################### + +# There is no one perfect way to compute quantiles. Here we offer +# two methods that serve common needs. Most other packages +# surveyed offered at least one or both of these two, making them +# "standard" in the sense of "widely-adopted and reproducible". +# They are also easy to explain, easy to compute manually, and have +# straight-forward interpretations that aren't surprising. + +# The default method is known as "R6", "PERCENTILE.EXC", or "expected +# value of rank order statistics". The alternative method is known as +# "R7", "PERCENTILE.INC", or "mode of rank order statistics". + +# For sample data where there is a positive probability for values +# beyond the range of the data, the R6 exclusive method is a +# reasonable choice. Consider a random sample of nine values from a +# population with a uniform distribution from 0.0 to 1.0. The +# distribution of the third ranked sample point is described by +# betavariate(alpha=3, beta=7) which has mode=0.250, median=0.286, and +# mean=0.300. Only the latter (which corresponds with R6) gives the +# desired cut point with 30% of the population falling below that +# value, making it comparable to a result from an inv_cdf() function. +# The R6 exclusive method is also idempotent. + +# For describing population data where the end points are known to +# be included in the data, the R7 inclusive method is a reasonable +# choice. Instead of the mean, it uses the mode of the beta +# distribution for the interior points. Per Hyndman & Fan, "One nice +# property is that the vertices of Q7(p) divide the range into n - 1 +# intervals, and exactly 100p% of the intervals lie to the left of +# Q7(p) and 100(1 - p)% of the intervals lie to the right of Q7(p)." + +# If needed, other methods could be added. However, for now, the +# position is that fewer options make for easier choices and that +# external packages can be used for anything more advanced. + +def quantiles(data, *, n=4, method='exclusive'): + """Divide *data* into *n* continuous intervals with equal probability. + + Returns a list of (n - 1) cut points separating the intervals. + + Set *n* to 4 for quartiles (the default). Set *n* to 10 for deciles. + Set *n* to 100 for percentiles which gives the 99 cuts points that + separate *data* in to 100 equal sized groups. + + The *data* can be any iterable containing sample. + The cut points are linearly interpolated between data points. + + If *method* is set to *inclusive*, *data* is treated as population + data. The minimum value is treated as the 0th percentile and the + maximum value is treated as the 100th percentile. + + """ + if n < 1: + raise StatisticsError('n must be at least 1') + + data = sorted(data) + + ld = len(data) + if ld < 2: + if ld == 1: + return data * (n - 1) + raise StatisticsError('must have at least one data point') + + if method == 'inclusive': + m = ld - 1 + result = [] + for i in range(1, n): + j, delta = divmod(i * m, n) + interpolated = (data[j] * (n - delta) + data[j + 1] * delta) / n + result.append(interpolated) + return result + + if method == 'exclusive': + m = ld + 1 + result = [] + for i in range(1, n): + j = i * m // n # rescale i to m/n + j = 1 if j < 1 else ld-1 if j > ld-1 else j # clamp to 1 .. ld-1 + delta = i*m - j*n # exact integer math + interpolated = (data[j - 1] * (n - delta) + data[j] * delta) / n + result.append(interpolated) + return result + + raise ValueError(f'Unknown method: {method!r}') ## Normal Distribution ##################################################### - def _normal_dist_inv_cdf(p, mu, sigma): # There is no closed-form solution to the inverse CDF for the normal # distribution, so we use a rational approximation instead: @@ -1415,6 +1214,7 @@ def _normal_dist_inv_cdf(p, mu, sigma): # Normal Distribution". Applied Statistics. Blackwell Publishing. 37 # (3): 477–484. doi:10.2307/2347330. JSTOR 2347330. q = p - 0.5 + if fabs(q) <= 0.425: r = 0.180625 - q * q # Hash sum: 55.88319_28806_14901_4439 @@ -1436,6 +1236,7 @@ def _normal_dist_inv_cdf(p, mu, sigma): 1.0) x = num / den return mu + (x * sigma) + r = p if q <= 0.0 else 1.0 - p r = sqrt(-log(r)) if r <= 5.0: @@ -1476,9 +1277,11 @@ def _normal_dist_inv_cdf(p, mu, sigma): 1.36929_88092_27358_05310e-1) * r + 5.99832_20655_58879_37690e-1) * r + 1.0) + x = num / den if q < 0.0: x = -x + return mu + (x * sigma) @@ -1635,172 +1438,416 @@ def variance(self): def __add__(x1, x2): """Add a constant or another NormalDist instance. - If *other* is a constant, translate mu by the constant, - leaving sigma unchanged. + If *other* is a constant, translate mu by the constant, + leaving sigma unchanged. + + If *other* is a NormalDist, add both the means and the variances. + Mathematically, this works only if the two distributions are + independent or if they are jointly normally distributed. + """ + if isinstance(x2, NormalDist): + return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma)) + return NormalDist(x1._mu + x2, x1._sigma) + + def __sub__(x1, x2): + """Subtract a constant or another NormalDist instance. + + If *other* is a constant, translate by the constant mu, + leaving sigma unchanged. + + If *other* is a NormalDist, subtract the means and add the variances. + Mathematically, this works only if the two distributions are + independent or if they are jointly normally distributed. + """ + if isinstance(x2, NormalDist): + return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma)) + return NormalDist(x1._mu - x2, x1._sigma) + + def __mul__(x1, x2): + """Multiply both mu and sigma by a constant. + + Used for rescaling, perhaps to change measurement units. + Sigma is scaled with the absolute value of the constant. + """ + return NormalDist(x1._mu * x2, x1._sigma * fabs(x2)) + + def __truediv__(x1, x2): + """Divide both mu and sigma by a constant. + + Used for rescaling, perhaps to change measurement units. + Sigma is scaled with the absolute value of the constant. + """ + return NormalDist(x1._mu / x2, x1._sigma / fabs(x2)) + + def __pos__(x1): + "Return a copy of the instance." + return NormalDist(x1._mu, x1._sigma) + + def __neg__(x1): + "Negates mu while keeping sigma the same." + return NormalDist(-x1._mu, x1._sigma) + + __radd__ = __add__ + + def __rsub__(x1, x2): + "Subtract a NormalDist from a constant or another NormalDist." + return -(x1 - x2) + + __rmul__ = __mul__ + + def __eq__(x1, x2): + "Two NormalDist objects are equal if their mu and sigma are both equal." + if not isinstance(x2, NormalDist): + return NotImplemented + return x1._mu == x2._mu and x1._sigma == x2._sigma + + def __hash__(self): + "NormalDist objects hash equal if their mu and sigma are both equal." + return hash((self._mu, self._sigma)) + + def __repr__(self): + return f'{type(self).__name__}(mu={self._mu!r}, sigma={self._sigma!r})' + + def __getstate__(self): + return self._mu, self._sigma + + def __setstate__(self, state): + self._mu, self._sigma = state + + +## Private utilities ####################################################### + +def _sum(data): + """_sum(data) -> (type, sum, count) + + Return a high-precision sum of the given numeric data as a fraction, + together with the type to be converted to and the count of items. + + Examples + -------- + + >>> _sum([3, 2.25, 4.5, -0.5, 0.25]) + (, Fraction(19, 2), 5) + + Some sources of round-off error will be avoided: + + # Built-in sum returns zero. + >>> _sum([1e50, 1, -1e50] * 1000) + (, Fraction(1000, 1), 3000) + + Fractions and Decimals are also supported: + + >>> from fractions import Fraction as F + >>> _sum([F(2, 3), F(7, 5), F(1, 4), F(5, 6)]) + (, Fraction(63, 20), 4) + + >>> from decimal import Decimal as D + >>> data = [D("0.1375"), D("0.2108"), D("0.3061"), D("0.0419")] + >>> _sum(data) + (, Fraction(6963, 10000), 4) + + Mixed types are currently treated as an error, except that int is + allowed. + + """ + count = 0 + types = set() + types_add = types.add + partials = {} + partials_get = partials.get + for typ, values in groupby(data, type): + types_add(typ) + for n, d in map(_exact_ratio, values): + count += 1 + partials[d] = partials_get(d, 0) + n + if None in partials: + # The sum will be a NAN or INF. We can ignore all the finite + # partials, and just look at this special one. + total = partials[None] + assert not _isfinite(total) + else: + # Sum all the partial sums using builtin sum. + total = sum(Fraction(n, d) for d, n in partials.items()) + T = reduce(_coerce, types, int) # or raise TypeError + return (T, total, count) + + +def _ss(data, c=None): + """Return the exact mean and sum of square deviations of sequence data. + + Calculations are done in a single pass, allowing the input to be an iterator. + + If given *c* is used the mean; otherwise, it is calculated from the data. + Use the *c* argument with care, as it can lead to garbage results. + + """ + if c is not None: + T, ssd, count = _sum((d := x - c) * d for x in data) + return (T, ssd, c, count) + + count = 0 + types = set() + types_add = types.add + sx_partials = defaultdict(int) + sxx_partials = defaultdict(int) + for typ, values in groupby(data, type): + types_add(typ) + for n, d in map(_exact_ratio, values): + count += 1 + sx_partials[d] += n + sxx_partials[d] += n * n + + if not count: + ssd = c = Fraction(0) + elif None in sx_partials: + # The sum will be a NAN or INF. We can ignore all the finite + # partials, and just look at this special one. + ssd = c = sx_partials[None] + assert not _isfinite(ssd) + else: + sx = sum(Fraction(n, d) for d, n in sx_partials.items()) + sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items()) + # This formula has poor numeric properties for floats, + # but with fractions it is exact. + ssd = (count * sxx - sx * sx) / count + c = sx / count + + T = reduce(_coerce, types, int) # or raise TypeError + return (T, ssd, c, count) + + +def _isfinite(x): + try: + return x.is_finite() # Likely a Decimal. + except AttributeError: + return math.isfinite(x) # Coerces to float first. + + +def _coerce(T, S): + """Coerce types T and S to a common type, or raise TypeError. + + Coercion rules are currently an implementation detail. See the CoerceTest + test class in test_statistics for details. + + """ + # See http://bugs.python.org/issue24068. + assert T is not bool, "initial type T is bool" + # If the types are the same, no need to coerce anything. Put this + # first, so that the usual case (no coercion needed) happens as soon + # as possible. + if T is S: return T + # Mixed int & other coerce to the other type. + if S is int or S is bool: return T + if T is int: return S + # If one is a (strict) subclass of the other, coerce to the subclass. + if issubclass(S, T): return S + if issubclass(T, S): return T + # Ints coerce to the other type. + if issubclass(T, int): return S + if issubclass(S, int): return T + # Mixed fraction & float coerces to float (or float subclass). + if issubclass(T, Fraction) and issubclass(S, float): + return S + if issubclass(T, float) and issubclass(S, Fraction): + return T + # Any other combination is disallowed. + msg = "don't know how to coerce %s and %s" + raise TypeError(msg % (T.__name__, S.__name__)) + + +def _exact_ratio(x): + """Return Real number x to exact (numerator, denominator) pair. + + >>> _exact_ratio(0.25) + (1, 4) + + x is expected to be an int, Fraction, Decimal or float. - If *other* is a NormalDist, add both the means and the variances. - Mathematically, this works only if the two distributions are - independent or if they are jointly normally distributed. - """ - if isinstance(x2, NormalDist): - return NormalDist(x1._mu + x2._mu, hypot(x1._sigma, x2._sigma)) - return NormalDist(x1._mu + x2, x1._sigma) + """ + try: + return x.as_integer_ratio() + except AttributeError: + pass + except (OverflowError, ValueError): + # float NAN or INF. + assert not _isfinite(x) + return (x, None) - def __sub__(x1, x2): - """Subtract a constant or another NormalDist instance. + try: + # x may be an Integral ABC. + return (x.numerator, x.denominator) + except AttributeError: + msg = f"can't convert type '{type(x).__name__}' to numerator/denominator" + raise TypeError(msg) - If *other* is a constant, translate by the constant mu, - leaving sigma unchanged. - If *other* is a NormalDist, subtract the means and add the variances. - Mathematically, this works only if the two distributions are - independent or if they are jointly normally distributed. - """ - if isinstance(x2, NormalDist): - return NormalDist(x1._mu - x2._mu, hypot(x1._sigma, x2._sigma)) - return NormalDist(x1._mu - x2, x1._sigma) +def _convert(value, T): + """Convert value to given numeric type T.""" + if type(value) is T: + # This covers the cases where T is Fraction, or where value is + # a NAN or INF (Decimal or float). + return value + if issubclass(T, int) and value.denominator != 1: + T = float + try: + # FIXME: what do we do if this overflows? + return T(value) + except TypeError: + if issubclass(T, Decimal): + return T(value.numerator) / T(value.denominator) + else: + raise - def __mul__(x1, x2): - """Multiply both mu and sigma by a constant. - Used for rescaling, perhaps to change measurement units. - Sigma is scaled with the absolute value of the constant. - """ - return NormalDist(x1._mu * x2, x1._sigma * fabs(x2)) +def _fail_neg(values, errmsg='negative value'): + """Iterate over values, failing if any are less than zero.""" + for x in values: + if x < 0: + raise StatisticsError(errmsg) + yield x - def __truediv__(x1, x2): - """Divide both mu and sigma by a constant. - Used for rescaling, perhaps to change measurement units. - Sigma is scaled with the absolute value of the constant. - """ - return NormalDist(x1._mu / x2, x1._sigma / fabs(x2)) +def _rank(data, /, *, key=None, reverse=False, ties='average', start=1) -> list[float]: + """Rank order a dataset. The lowest value has rank 1. - def __pos__(x1): - "Return a copy of the instance." - return NormalDist(x1._mu, x1._sigma) + Ties are averaged so that equal values receive the same rank: - def __neg__(x1): - "Negates mu while keeping sigma the same." - return NormalDist(-x1._mu, x1._sigma) + >>> data = [31, 56, 31, 25, 75, 18] + >>> _rank(data) + [3.5, 5.0, 3.5, 2.0, 6.0, 1.0] - __radd__ = __add__ + The operation is idempotent: - def __rsub__(x1, x2): - "Subtract a NormalDist from a constant or another NormalDist." - return -(x1 - x2) + >>> _rank([3.5, 5.0, 3.5, 2.0, 6.0, 1.0]) + [3.5, 5.0, 3.5, 2.0, 6.0, 1.0] - __rmul__ = __mul__ + It is possible to rank the data in reverse order so that the + highest value has rank 1. Also, a key-function can extract + the field to be ranked: - def __eq__(x1, x2): - "Two NormalDist objects are equal if their mu and sigma are both equal." - if not isinstance(x2, NormalDist): - return NotImplemented - return x1._mu == x2._mu and x1._sigma == x2._sigma + >>> goals = [('eagles', 45), ('bears', 48), ('lions', 44)] + >>> _rank(goals, key=itemgetter(1), reverse=True) + [2.0, 1.0, 3.0] - def __hash__(self): - "NormalDist objects hash equal if their mu and sigma are both equal." - return hash((self._mu, self._sigma)) + Ranks are conventionally numbered starting from one; however, + setting *start* to zero allows the ranks to be used as array indices: - def __repr__(self): - return f'{type(self).__name__}(mu={self._mu!r}, sigma={self._sigma!r})' + >>> prize = ['Gold', 'Silver', 'Bronze', 'Certificate'] + >>> scores = [8.1, 7.3, 9.4, 8.3] + >>> [prize[int(i)] for i in _rank(scores, start=0, reverse=True)] + ['Bronze', 'Certificate', 'Gold', 'Silver'] - def __getstate__(self): - return self._mu, self._sigma + """ + # If this function becomes public at some point, more thought + # needs to be given to the signature. A list of ints is + # plausible when ties is "min" or "max". When ties is "average", + # either list[float] or list[Fraction] is plausible. - def __setstate__(self, state): - self._mu, self._sigma = state + # Default handling of ties matches scipy.stats.mstats.spearmanr. + if ties != 'average': + raise ValueError(f'Unknown tie resolution method: {ties!r}') + if key is not None: + data = map(key, data) + val_pos = sorted(zip(data, count()), reverse=reverse) + i = start - 1 + result = [0] * len(val_pos) + for _, g in groupby(val_pos, key=itemgetter(0)): + group = list(g) + size = len(group) + rank = i + (size + 1) / 2 + for value, orig_pos in group: + result[orig_pos] = rank + i += size + return result -## kde_random() ############################################################## +def _integer_sqrt_of_frac_rto(n: int, m: int) -> int: + """Square root of n/m, rounded to the nearest integer using round-to-odd.""" + # Reference: https://www.lri.fr/~melquion/doc/05-imacs17_1-expose.pdf + a = math.isqrt(n // m) + return a | (a*a*m != n) -def _newton_raphson(f_inv_estimate, f, f_prime, tolerance=1e-12): - def f_inv(y): - "Return x such that f(x) ≈ y within the specified tolerance." - x = f_inv_estimate(y) - while abs(diff := f(x) - y) > tolerance: - x -= diff / f_prime(x) - return x - return f_inv -def _quartic_invcdf_estimate(p): - sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p) - x = (2.0 * p) ** 0.4258865685331 - 1.0 - if p >= 0.004 < 0.499: - x += 0.026818732 * sin(7.101753784 * p + 2.73230839482953) - return x * sign +# For 53 bit precision floats, the bit width used in +# _float_sqrt_of_frac() is 109. +_sqrt_bit_width: int = 2 * sys.float_info.mant_dig + 3 -_quartic_invcdf = _newton_raphson( - f_inv_estimate = _quartic_invcdf_estimate, - f = lambda t: sumprod((3/16, -5/8, 15/16, 1/2), (t**5, t**3, t, 1.0)), - f_prime = lambda t: 15/16 * (1.0 - t * t) ** 2) -def _triweight_invcdf_estimate(p): - sign, p = (1.0, p) if p <= 1/2 else (-1.0, 1.0 - p) - x = (2.0 * p) ** 0.3400218741872791 - 1.0 - return x * sign +def _float_sqrt_of_frac(n: int, m: int) -> float: + """Square root of n/m as a float, correctly rounded.""" + # See principle and proof sketch at: https://bugs.python.org/msg407078 + q = (n.bit_length() - m.bit_length() - _sqrt_bit_width) // 2 + if q >= 0: + numerator = _integer_sqrt_of_frac_rto(n, m << 2 * q) << q + denominator = 1 + else: + numerator = _integer_sqrt_of_frac_rto(n << -2 * q, m) + denominator = 1 << -q + return numerator / denominator # Convert to float -_triweight_invcdf = _newton_raphson( - f_inv_estimate = _triweight_invcdf_estimate, - f = lambda t: sumprod((-5/32, 21/32, -35/32, 35/32, 1/2), - (t**7, t**5, t**3, t, 1.0)), - f_prime = lambda t: 35/32 * (1.0 - t * t) ** 3) - -_kernel_invcdfs = { - 'normal': NormalDist().inv_cdf, - 'logistic': lambda p: log(p / (1 - p)), - 'sigmoid': lambda p: log(tan(p * pi/2)), - 'rectangular': lambda p: 2*p - 1, - 'parabolic': lambda p: 2 * cos((acos(2*p-1) + pi) / 3), - 'quartic': _quartic_invcdf, - 'triweight': _triweight_invcdf, - 'triangular': lambda p: sqrt(2*p) - 1 if p < 1/2 else 1 - sqrt(2 - 2*p), - 'cosine': lambda p: 2 * asin(2*p - 1) / pi, -} -_kernel_invcdfs['gauss'] = _kernel_invcdfs['normal'] -_kernel_invcdfs['uniform'] = _kernel_invcdfs['rectangular'] -_kernel_invcdfs['epanechnikov'] = _kernel_invcdfs['parabolic'] -_kernel_invcdfs['biweight'] = _kernel_invcdfs['quartic'] -def kde_random(data, h, kernel='normal', *, seed=None): - """Return a function that makes a random selection from the estimated - probability density function created by kde(data, h, kernel). +def _decimal_sqrt_of_frac(n: int, m: int) -> Decimal: + """Square root of n/m as a Decimal, correctly rounded.""" + # Premise: For decimal, computing (n/m).sqrt() can be off + # by 1 ulp from the correctly rounded result. + # Method: Check the result, moving up or down a step if needed. + if n <= 0: + if not n: + return Decimal('0.0') + n, m = -n, -m - Providing a *seed* allows reproducible selections within a single - thread. The seed may be an integer, float, str, or bytes. + root = (Decimal(n) / Decimal(m)).sqrt() + nr, dr = root.as_integer_ratio() - A StatisticsError will be raised if the *data* sequence is empty. + plus = root.next_plus() + np, dp = plus.as_integer_ratio() + # test: n / m > ((root + plus) / 2) ** 2 + if 4 * n * (dr*dp)**2 > m * (dr*np + dp*nr)**2: + return plus - Example: + minus = root.next_minus() + nm, dm = minus.as_integer_ratio() + # test: n / m < ((root + minus) / 2) ** 2 + if 4 * n * (dr*dm)**2 < m * (dr*nm + dm*nr)**2: + return minus - >>> data = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2] - >>> rand = kde_random(data, h=1.5, seed=8675309) - >>> new_selections = [rand() for i in range(10)] - >>> [round(x, 1) for x in new_selections] - [0.7, 6.2, 1.2, 6.9, 7.0, 1.8, 2.5, -0.5, -1.8, 5.6] + return root - """ - n = len(data) - if not n: - raise StatisticsError('Empty data sequence') - if not isinstance(data[0], (int, float)): - raise TypeError('Data sequence must contain ints or floats') +def _mean_stdev(data): + """In one pass, compute the mean and sample standard deviation as floats.""" + T, ss, xbar, n = _ss(data) + if n < 2: + raise StatisticsError('stdev requires at least two data points') + mss = ss / (n - 1) + try: + return float(xbar), _float_sqrt_of_frac(mss.numerator, mss.denominator) + except AttributeError: + # Handle Nans and Infs gracefully + return float(xbar), float(xbar) / float(ss) - if h <= 0.0: - raise StatisticsError(f'Bandwidth h must be positive, not {h=!r}') - kernel_invcdf = _kernel_invcdfs.get(kernel) - if kernel_invcdf is None: - raise StatisticsError(f'Unknown kernel name: {kernel!r}') +def _sqrtprod(x: float, y: float) -> float: + "Return sqrt(x * y) computed with improved accuracy and without overflow/underflow." - prng = _random.Random(seed) - random = prng.random - choice = prng.choice + h = sqrt(x * y) - def rand(): - return choice(data) + h * kernel_invcdf(random()) + if not isfinite(h): + if isinf(h) and not isinf(x) and not isinf(y): + # Finite inputs overflowed, so scale down, and recompute. + scale = 2.0 ** -512 # sqrt(1 / sys.float_info.max) + return _sqrtprod(scale * x, scale * y) / scale + return h - rand.__doc__ = f'Random KDE selection with {h=!r} and {kernel=!r}' + if not h: + if x and y: + # Non-zero inputs underflowed, so scale up, and recompute. + # Scale: 1 / sqrt(sys.float_info.min * sys.float_info.epsilon) + scale = 2.0 ** 537 + return _sqrtprod(scale * x, scale * y) / scale + return h - return rand + # Improve accuracy with a differential correction. + # https://www.wolframalpha.com/input/?i=Maclaurin+series+sqrt%28h**2+%2B+x%29+at+x%3D0 + d = sumprod((x, h), (y, -h)) + return h + d / (2.0 * h) diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index cded8aba6e8cd7..0b28459f03d86a 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2435,12 +2435,13 @@ def integrate(func, low, high, steps=10_000): self.assertGreater(f_hat(100), 0.0) def test_kde_kernel_invcdfs(self): - kernel_invcdfs = statistics._kernel_invcdfs + kernel_specs = statistics._kernel_specs kde = statistics.kde # Verify that cdf / invcdf will round trip xarr = [i/100 for i in range(-100, 101)] - for kernel, invcdf in kernel_invcdfs.items(): + for kernel, spec in kernel_specs.items(): + invcdf = spec['invcdf'] with self.subTest(kernel=kernel): cdf = kde([0.0], h=1.0, kernel=kernel, cumulative=True) for x in xarr: From fd6cd621e0cce6ba2e737103d2a62b5ade90f41f Mon Sep 17 00:00:00 2001 From: Alyssa Coghlan Date: Sun, 2 Jun 2024 14:44:29 +1000 Subject: [PATCH 15/72] gh-118934: Fix PyEval_GetLocals docs (PEP 667) (#119932) PEP 667's description of the planned changes to PyEval_GetLocals was internally inconsistent when accepted, so the docs added for gh-74929 didn't match either the current behaviour or the intended behaviour once gh-118934 is fixed. This PR updates the documentation and 3.13 What's New to match the intended behaviour (once gh-118934 is fixed). It also tidies up lingering references to `f_locals` always being a dictionary (this hasn't been true since at least when custom namespace support for class statement execution was added) --- Doc/c-api/reflection.rst | 27 ++++++++++++++++----------- Doc/reference/datamodel.rst | 4 ++-- Doc/whatsnew/3.13.rst | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 46 insertions(+), 21 deletions(-) diff --git a/Doc/c-api/reflection.rst b/Doc/c-api/reflection.rst index af9a1a74ec137e..038e6977104560 100644 --- a/Doc/c-api/reflection.rst +++ b/Doc/c-api/reflection.rst @@ -19,23 +19,28 @@ Reflection .. deprecated:: 3.13 - To avoid creating a reference cycle in :term:`optimized scopes `, - use either :c:func:`PyEval_GetFrameLocals` to obtain the same behaviour as calling + Use either :c:func:`PyEval_GetFrameLocals` to obtain the same behaviour as calling :func:`locals` in Python code, or else call :c:func:`PyFrame_GetLocals` on the result - of :c:func:`PyEval_GetFrame` to get the same result as this function without having to - cache the proxy instance on the underlying frame. + of :c:func:`PyEval_GetFrame` to access the :attr:`~frame.f_locals` attribute of the + currently executing frame. - Return the :attr:`~frame.f_locals` attribute of the currently executing frame, + Return a mapping providing access to the local variables in the current execution frame, or ``NULL`` if no frame is currently executing. - If the frame refers to an :term:`optimized scope`, this returns a - write-through proxy object that allows modifying the locals. - In all other cases (classes, modules, :func:`exec`, :func:`eval`) it returns - the mapping representing the frame locals directly (as described for - :func:`locals`). + Refer to :func:`locals` for details of the mapping returned at different scopes. + + As this function returns a :term:`borrowed reference`, the dictionary returned for + :term:`optimized scopes ` is cached on the frame object and will remain + alive as long as the frame object does. Unlike :c:func:`PyEval_GetFrameLocals` and + :func:`locals`, subsequent calls to this function in the same frame will update the + contents of the cached dictionary to reflect changes in the state of the local variables + rather than returning a new snapshot. .. versionchanged:: 3.13 - As part of :pep:`667`, return a proxy object for optimized scopes. + As part of :pep:`667`, :c:func:`PyFrame_GetLocals`, :func:`locals`, and + :attr:`FrameType.f_locals ` no longer make use of the shared cache + dictionary. Refer to the :ref:`What's New entry ` for + additional details. .. c:function:: PyObject* PyEval_GetGlobals(void) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 134385ed2f1860..9110060a6177e5 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1347,13 +1347,13 @@ Special read-only attributes ``object.__getattr__`` with arguments ``obj`` and ``"f_code"``. * - .. attribute:: frame.f_locals - - The dictionary used by the frame to look up + - The mapping used by the frame to look up :ref:`local variables `. If the frame refers to an :term:`optimized scope`, this may return a write-through proxy object. .. versionchanged:: 3.13 - Return a proxy for functions and comprehensions. + Return a proxy for optimized scopes. * - .. attribute:: frame.f_globals - The dictionary used by the frame to look up diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index ab260bf2a2d740..66626ac06428b9 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -287,8 +287,9 @@ returns a write-through proxy to the frame's local and locally referenced nonlocal variables in these scopes, rather than returning an inconsistently updated shared ``dict`` instance with undefined runtime semantics. -See :pep:`667` for more details, including related C API changes and -deprecations. +See :pep:`667` for more details, including related C API changes and deprecations. Porting +notes are also provided below for the affected :ref:`Python APIs ` +and :ref:`C APIs `. (PEP and implementation contributed by Mark Shannon and Tian Gao in :gh:`74929`. Documentation updates provided by Guido van Rossum and @@ -2246,6 +2247,8 @@ Changes in the Python API returned by :meth:`zipfile.ZipFile.open` was changed from ``'r'`` to ``'rb'``. (Contributed by Serhiy Storchaka in :gh:`115961`.) +.. _pep667-porting-notes-py: + * Calling :func:`locals` in an :term:`optimized scope` now produces an independent snapshot on each call, and hence no longer implicitly updates previously returned references. Obtaining the legacy CPython behaviour now @@ -2341,15 +2344,27 @@ Changes in the C API to :c:func:`PyUnstable_Code_GetFirstFree`. (Contributed by Bogdan Romanyuk in :gh:`115781`.) -* Calling :c:func:`PyFrame_GetLocals` or :c:func:`PyEval_GetLocals` in an - :term:`optimized scope` now returns a write-through proxy rather than a - snapshot that gets updated at ill-specified times. If a snapshot is desired, - it must be created explicitly (e.g. with :c:func:`PyDict_Copy`) or by calling - the new :c:func:`PyEval_GetFrameLocals` API. (Changed as part of :pep:`667`.) +.. _pep667-porting-notes-c: + +* The effects of mutating the dictionary returned from :c:func:`PyEval_GetLocals` in an + :term:`optimized scope` have changed. New dict entries added this way will now *only* be + visible to subsequent :c:func:`PyEval_GetLocals` calls in that frame, as + :c:func:`PyFrame_GetLocals`, :func:`locals`, and + :attr:`FrameType.f_locals ` no longer access the same underlying cached + dictionary. Changes made to entries for actual variable names and names added via the + write-through proxy interfaces will be overwritten on subsequent calls to + :c:func:`PyEval_GetLocals` in that frame. The recommended code update depends on how the + function was being used, so refer to the deprecation notice on the function for details. + (Changed as part of :pep:`667`.) + +* Calling :c:func:`PyFrame_GetLocals` in an :term:`optimized scope` now returns a + write-through proxy rather than a snapshot that gets updated at ill-specified times. + If a snapshot is desired, it must be created explicitly (e.g. with :c:func:`PyDict_Copy`) + or by calling the new :c:func:`PyEval_GetFrameLocals` API. (Changed as part of :pep:`667`.) * :c:func:`!PyFrame_FastToLocals` and :c:func:`!PyFrame_FastToLocalsWithError` no longer have any effect. Calling these functions has been redundant since - Python 3.11, when :c:func:`PyFrame_GetLocals` was first introduced. + Python 3.11, when :c:func:`PyFrame_GetLocals` was first introduced. (Changed as part of :pep:`667`.) * :c:func:`!PyFrame_LocalsToFast` no longer has any effect. Calling this function @@ -2509,6 +2524,11 @@ Deprecated C APIs :c:func:`PyWeakref_GetRef` on Python 3.12 and older. (Contributed by Victor Stinner in :gh:`105927`.) +* Deprecate the :c:func:`PyEval_GetBuiltins`, :c:func:`PyEval_GetGlobals`, and + :c:func:`PyEval_GetLocals` functions, which return a :term:`borrowed reference`. + Refer to the deprecation notices on each function for their recommended replacements. + (Soft deprecated as part of :pep:`667`.) + Pending Removal in Python 3.14 ------------------------------ From 4aed319a8eb63b205d6007c36713cacdbf1ce8a3 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 2 Jun 2024 10:27:20 +0300 Subject: [PATCH 16/72] gh-119775: Remove ability to create immutable types with mutable bases (#119776) --- Doc/whatsnew/3.14.rst | 2 ++ Lib/test/test_capi/test_misc.py | 28 ++----------------- ...-05-30-12-51-21.gh-issue-119775.CBq9IG.rst | 2 ++ Objects/typeobject.c | 16 ++++------- 4 files changed, 13 insertions(+), 35 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-05-30-12-51-21.gh-issue-119775.CBq9IG.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index d443cf9bc56b98..45ffb281fcc032 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -258,3 +258,5 @@ Deprecated Removed ------- +* Creating :c:data:`immutable types ` with mutable + bases was deprecated since 3.12 and now raises a :exc:`TypeError`. diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index f3d16e4a2fc92a..0dc0b530aec971 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -777,33 +777,11 @@ def test_pytype_fromspec_with_repeated_slots(self): with self.assertRaises(SystemError): _testcapi.create_type_from_repeated_slots(variant) - @warnings_helper.ignore_warnings(category=DeprecationWarning) def test_immutable_type_with_mutable_base(self): - # Add deprecation warning here so it's removed in 3.14 - warnings._deprecated( - 'creating immutable classes with mutable bases', remove=(3, 14)) - - class MutableBase: - def meth(self): - return 'original' - - with self.assertWarns(DeprecationWarning): - ImmutableSubclass = _testcapi.make_immutable_type_with_base( - MutableBase) - instance = ImmutableSubclass() + class MutableBase: ... - self.assertEqual(instance.meth(), 'original') - - # Cannot override the static type's method - with self.assertRaisesRegex( - TypeError, - "cannot set 'meth' attribute of immutable type"): - ImmutableSubclass.meth = lambda self: 'overridden' - self.assertEqual(instance.meth(), 'original') - - # Can change the method on the mutable base - MutableBase.meth = lambda self: 'changed' - self.assertEqual(instance.meth(), 'changed') + with self.assertRaisesRegex(TypeError, 'Creating immutable type'): + _testcapi.make_immutable_type_with_base(MutableBase) def test_pynumber_tobase(self): from _testcapi import pynumber_tobase diff --git a/Misc/NEWS.d/next/C API/2024-05-30-12-51-21.gh-issue-119775.CBq9IG.rst b/Misc/NEWS.d/next/C API/2024-05-30-12-51-21.gh-issue-119775.CBq9IG.rst new file mode 100644 index 00000000000000..c342a3814ed5db --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-05-30-12-51-21.gh-issue-119775.CBq9IG.rst @@ -0,0 +1,2 @@ +Creating :c:data:`immutable types ` with mutable +bases was deprecated since 3.12 and now raises a :exc:`TypeError`. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 290306cdb677e5..0095a79a2cafec 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -4613,16 +4613,12 @@ _PyType_FromMetaclass_impl( goto finally; } if (!_PyType_HasFeature(b, Py_TPFLAGS_IMMUTABLETYPE)) { - if (PyErr_WarnFormat( - PyExc_DeprecationWarning, - 0, - "Creating immutable type %s from mutable base %s is " - "deprecated, and slated to be disallowed in Python 3.14.", - spec->name, - b->tp_name)) - { - goto finally; - } + PyErr_Format( + PyExc_TypeError, + "Creating immutable type %s from mutable base %N", + spec->name, b + ); + goto finally; } } } From f79ffc879b919604ed5de22ece83825006cf9a17 Mon Sep 17 00:00:00 2001 From: Mark Dickinson Date: Sun, 2 Jun 2024 10:16:49 +0100 Subject: [PATCH 17/72] gh-119740: Remove deprecated trunc delegation (#119743) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the delegation of `int` to the `__trunc__` special method: `int` will now only delegate to `__int__` and `__index__` (in that order). `__trunc__` continues to exist, but its sole purpose is to support `math.trunc`. --------- Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Serhiy Storchaka --- Doc/library/functions.rst | 11 +-- Doc/reference/datamodel.rst | 7 +- Doc/whatsnew/3.14.rst | 5 + .../pycore_global_objects_fini_generated.h | 1 - Include/internal/pycore_global_strings.h | 1 - .../internal/pycore_runtime_init_generated.h | 1 - .../internal/pycore_unicodeobject_generated.h | 3 - Lib/test/test_int.py | 96 +------------------ Lib/test/test_long.py | 9 -- ...-05-29-18-53-43.gh-issue-119740.zP2JNM.rst | 2 + Objects/abstract.c | 32 ------- 11 files changed, 16 insertions(+), 152 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-29-18-53-43.gh-issue-119740.zP2JNM.rst diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 7291461c69acd2..4617767a71be18 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1004,9 +1004,8 @@ are always available. They are listed here in alphabetical order. 115 If the argument defines :meth:`~object.__int__`, - ``int(x)`` returns ``x.__int__()``. If the argument defines :meth:`~object.__index__`, - it returns ``x.__index__()``. If the argument defines :meth:`~object.__trunc__`, - it returns ``x.__trunc__()``. + ``int(x)`` returns ``x.__int__()``. If the argument defines + :meth:`~object.__index__`, it returns ``x.__index__()``. For floating point numbers, this truncates towards zero. If the argument is not a number or if *base* is given, then it must be a string, @@ -1044,9 +1043,6 @@ are always available. They are listed here in alphabetical order. .. versionchanged:: 3.8 Falls back to :meth:`~object.__index__` if :meth:`~object.__int__` is not defined. - .. versionchanged:: 3.11 - The delegation to :meth:`~object.__trunc__` is deprecated. - .. versionchanged:: 3.11 :class:`int` string inputs and string representations can be limited to help avoid denial of service attacks. A :exc:`ValueError` is raised when @@ -1055,6 +1051,9 @@ are always available. They are listed here in alphabetical order. See the :ref:`integer string conversion length limitation ` documentation. + .. versionchanged:: 3.14 + :func:`int` no longer delegates to the :meth:`~object.__trunc__` method. + .. function:: isinstance(object, classinfo) Return ``True`` if the *object* argument is an instance of the *classinfo* diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 9110060a6177e5..af4c585e1c3e2f 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -3127,11 +3127,8 @@ left undefined. return the value of the object truncated to an :class:`~numbers.Integral` (typically an :class:`int`). - The built-in function :func:`int` falls back to :meth:`__trunc__` if neither - :meth:`__int__` nor :meth:`__index__` is defined. - - .. versionchanged:: 3.11 - The delegation of :func:`int` to :meth:`__trunc__` is deprecated. + .. versionchanged:: 3.14 + :func:`int` no longer delegates to the :meth:`~object.__trunc__` method. .. _context-managers: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 45ffb281fcc032..9f471d24909215 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -225,6 +225,11 @@ Others It had previously raised a :exc:`DeprecationWarning` since Python 3.9. (Contributed by Jelle Zijlstra in :gh:`118767`.) +* The :func:`int` built-in no longer delegates to + :meth:`~object.__trunc__`. Classes that want to support conversion to + integer must implement either :meth:`~object.__int__` or + :meth:`~object.__index__`. (Contributed by Mark Dickinson in :gh:`119743`.) + Porting to Python 3.14 ====================== diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index a0f8fb71c1ff37..b9fae11dfaa85c 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -732,7 +732,6 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__subclasscheck__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__subclasshook__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__truediv__)); - _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__trunc__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__type_params__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_is_unpacked_typevartuple__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_prepare_subst__)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 57d85020f14e05..aa66b20859a472 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -221,7 +221,6 @@ struct _Py_global_strings { STRUCT_FOR_ID(__subclasscheck__) STRUCT_FOR_ID(__subclasshook__) STRUCT_FOR_ID(__truediv__) - STRUCT_FOR_ID(__trunc__) STRUCT_FOR_ID(__type_params__) STRUCT_FOR_ID(__typing_is_unpacked_typevartuple__) STRUCT_FOR_ID(__typing_prepare_subst__) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index e62ebd659d30e8..b27720e9ff6ecf 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -730,7 +730,6 @@ extern "C" { INIT_ID(__subclasscheck__), \ INIT_ID(__subclasshook__), \ INIT_ID(__truediv__), \ - INIT_ID(__trunc__), \ INIT_ID(__type_params__), \ INIT_ID(__typing_is_unpacked_typevartuple__), \ INIT_ID(__typing_prepare_subst__), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 892f580e8a6846..c61c556b758769 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -504,9 +504,6 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(__truediv__); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); - string = &_Py_ID(__trunc__); - assert(_PyUnicode_CheckConsistency(string, 1)); - _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(__type_params__); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/test/test_int.py b/Lib/test/test_int.py index caeccbe1fed026..ce9febd741bba2 100644 --- a/Lib/test/test_int.py +++ b/Lib/test/test_int.py @@ -402,68 +402,8 @@ def __trunc__(self): class JustTrunc(base): def __trunc__(self): return 42 - with self.assertWarns(DeprecationWarning): - self.assertEqual(int(JustTrunc()), 42) - - class ExceptionalTrunc(base): - def __trunc__(self): - 1 / 0 - with self.assertRaises(ZeroDivisionError), \ - self.assertWarns(DeprecationWarning): - int(ExceptionalTrunc()) - - for trunc_result_base in (object, Classic): - class Index(trunc_result_base): - def __index__(self): - return 42 - - class TruncReturnsNonInt(base): - def __trunc__(self): - return Index() - with self.assertWarns(DeprecationWarning): - self.assertEqual(int(TruncReturnsNonInt()), 42) - - class Intable(trunc_result_base): - def __int__(self): - return 42 - - class TruncReturnsNonIndex(base): - def __trunc__(self): - return Intable() - with self.assertWarns(DeprecationWarning): - self.assertEqual(int(TruncReturnsNonInt()), 42) - - class NonIntegral(trunc_result_base): - def __trunc__(self): - # Check that we avoid infinite recursion. - return NonIntegral() - - class TruncReturnsNonIntegral(base): - def __trunc__(self): - return NonIntegral() - try: - with self.assertWarns(DeprecationWarning): - int(TruncReturnsNonIntegral()) - except TypeError as e: - self.assertEqual(str(e), - "__trunc__ returned non-Integral" - " (type NonIntegral)") - else: - self.fail("Failed to raise TypeError with %s" % - ((base, trunc_result_base),)) - - # Regression test for bugs.python.org/issue16060. - class BadInt(trunc_result_base): - def __int__(self): - return 42.0 - - class TruncReturnsBadInt(base): - def __trunc__(self): - return BadInt() - - with self.assertRaises(TypeError), \ - self.assertWarns(DeprecationWarning): - int(TruncReturnsBadInt()) + with self.assertRaises(TypeError): + int(JustTrunc()) def test_int_subclass_with_index(self): class MyIndex(int): @@ -514,18 +454,6 @@ class BadInt2(int): def __int__(self): return True - class TruncReturnsBadIndex: - def __trunc__(self): - return BadIndex() - - class TruncReturnsBadInt: - def __trunc__(self): - return BadInt() - - class TruncReturnsIntSubclass: - def __trunc__(self): - return True - bad_int = BadIndex() with self.assertWarns(DeprecationWarning): n = int(bad_int) @@ -549,26 +477,6 @@ def __trunc__(self): self.assertEqual(n, 1) self.assertIs(type(n), int) - bad_int = TruncReturnsBadIndex() - with self.assertWarns(DeprecationWarning): - n = int(bad_int) - self.assertEqual(n, 1) - self.assertIs(type(n), int) - - bad_int = TruncReturnsBadInt() - with self.assertWarns(DeprecationWarning): - self.assertRaises(TypeError, int, bad_int) - - good_int = TruncReturnsIntSubclass() - with self.assertWarns(DeprecationWarning): - n = int(good_int) - self.assertEqual(n, 1) - self.assertIs(type(n), int) - with self.assertWarns(DeprecationWarning): - n = IntSubclass(good_int) - self.assertEqual(n, 1) - self.assertIs(type(n), IntSubclass) - def test_error_message(self): def check(s, base=None): with self.assertRaises(ValueError, diff --git a/Lib/test/test_long.py b/Lib/test/test_long.py index 41b973da2c7df0..3b2e7c4e71d10d 100644 --- a/Lib/test/test_long.py +++ b/Lib/test/test_long.py @@ -386,15 +386,6 @@ def __long__(self): return 42 self.assertRaises(TypeError, int, JustLong()) - class LongTrunc: - # __long__ should be ignored in 3.x - def __long__(self): - return 42 - def __trunc__(self): - return 1729 - with self.assertWarns(DeprecationWarning): - self.assertEqual(int(LongTrunc()), 1729) - def check_float_conversion(self, n): # Check that int -> float conversion behaviour matches # that of the pure Python version above. diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-29-18-53-43.gh-issue-119740.zP2JNM.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-29-18-53-43.gh-issue-119740.zP2JNM.rst new file mode 100644 index 00000000000000..111e096d262ea0 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-29-18-53-43.gh-issue-119740.zP2JNM.rst @@ -0,0 +1,2 @@ +Remove the previously-deprecated delegation of :func:`int` to +:meth:`~object.__trunc__`. diff --git a/Objects/abstract.c b/Objects/abstract.c index 8357175aa5591e..200817064e3cda 100644 --- a/Objects/abstract.c +++ b/Objects/abstract.c @@ -1521,7 +1521,6 @@ PyNumber_Long(PyObject *o) { PyObject *result; PyNumberMethods *m; - PyObject *trunc_func; Py_buffer view; if (o == NULL) { @@ -1563,37 +1562,6 @@ PyNumber_Long(PyObject *o) if (m && m->nb_index) { return PyNumber_Index(o); } - trunc_func = _PyObject_LookupSpecial(o, &_Py_ID(__trunc__)); - if (trunc_func) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "The delegation of int() to __trunc__ is deprecated.", 1)) { - Py_DECREF(trunc_func); - return NULL; - } - result = _PyObject_CallNoArgs(trunc_func); - Py_DECREF(trunc_func); - if (result == NULL || PyLong_CheckExact(result)) { - return result; - } - if (PyLong_Check(result)) { - Py_SETREF(result, _PyLong_Copy((PyLongObject *)result)); - return result; - } - /* __trunc__ is specified to return an Integral type, - but int() needs to return an int. */ - if (!PyIndex_Check(result)) { - PyErr_Format( - PyExc_TypeError, - "__trunc__ returned non-Integral (type %.200s)", - Py_TYPE(result)->tp_name); - Py_DECREF(result); - return NULL; - } - Py_SETREF(result, PyNumber_Index(result)); - return result; - } - if (PyErr_Occurred()) - return NULL; if (PyUnicode_Check(o)) /* The below check is done in PyLong_FromUnicodeObject(). */ From f3b89a63cbb6d46e5ed40d5cd9813cdf9189ce35 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sun, 2 Jun 2024 10:19:02 -0400 Subject: [PATCH 18/72] gh-117657: Fix TSAN reported race in `_PyEval_IsGILEnabled`. (#119921) The GIL may be disabled concurrently with this call so we need to use a relaxed atomic load. --- Include/internal/pycore_ceval.h | 3 ++- Tools/tsan/suppressions_free_threading.txt | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index bd3ba1225f2597..26ede31b1904b4 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -145,7 +145,8 @@ extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *, static inline int _PyEval_IsGILEnabled(PyThreadState *tstate) { - return tstate->interp->ceval.gil->enabled != 0; + struct _gil_runtime_state *gil = tstate->interp->ceval.gil; + return _Py_atomic_load_int_relaxed(&gil->enabled) != 0; } // Enable or disable the GIL used by the interpreter that owns tstate, which diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index f855e9ce2698a5..78dac6ee0c9068 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -65,7 +65,6 @@ race_top:list_get_item_ref race_top:make_pending_calls race_top:set_add_entry race_top:should_intern_string -race_top:_PyEval_IsGILEnabled race_top:llist_insert_tail race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate From aa9fe98e0649f0a151942914ef4e2810ca6126c2 Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Sun, 2 Jun 2024 08:13:24 -0700 Subject: [PATCH 19/72] Improve documentation for typing.get_type_hints (#119928) - Explicit list of what it does that is different from "just return __annotations__" - Remove reference to PEP 563; adding the future import doesn't do anything to type aliases, and in general it will never make get_type_hints() less likely to fail. - Remove example, as the Annotated docs already have a similar example, and it's unbalanced to have one example about this one edge case but not about other behaviors of the function. Co-authored-by: Alex Waygood --- Doc/library/typing.rst | 54 ++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/Doc/library/typing.rst b/Doc/library/typing.rst index a8068609fcfbe7..94de64fcf835fc 100644 --- a/Doc/library/typing.rst +++ b/Doc/library/typing.rst @@ -3080,35 +3080,37 @@ Introspection helpers Return a dictionary containing type hints for a function, method, module or class object. - This is often the same as ``obj.__annotations__``. In addition, - forward references encoded as string literals are handled by evaluating - them in ``globals``, ``locals`` and (where applicable) - :ref:`type parameter ` namespaces. - For a class ``C``, return - a dictionary constructed by merging all the ``__annotations__`` along - ``C.__mro__`` in reverse order. - - The function recursively replaces all ``Annotated[T, ...]`` with ``T``, - unless ``include_extras`` is set to ``True`` (see :class:`Annotated` for - more information). For example: - - .. testcode:: - - class Student(NamedTuple): - name: Annotated[str, 'some marker'] - - assert get_type_hints(Student) == {'name': str} - assert get_type_hints(Student, include_extras=False) == {'name': str} - assert get_type_hints(Student, include_extras=True) == { - 'name': Annotated[str, 'some marker'] - } + This is often the same as ``obj.__annotations__``, but this function makes + the following changes to the annotations dictionary: + + * Forward references encoded as string literals or :class:`ForwardRef` + objects are handled by evaluating them in *globalns*, *localns*, and + (where applicable) *obj*'s :ref:`type parameter ` namespace. + If *globalns* or *localns* is not given, appropriate namespace + dictionaries are inferred from *obj*. + * ``None`` is replaced with :class:`types.NoneType`. + * If :func:`@no_type_check ` has been applied to *obj*, an + empty dictionary is returned. + * If *obj* is a class ``C``, the function returns a dictionary that merges + annotations from ``C``'s base classes with those on ``C`` directly. This + is done by traversing ``C.__mro__`` and iteratively combining + ``__annotations__`` dictionaries. Annotations on classes appearing + earlier in the :term:`method resolution order` always take precedence over + annotations on classes appearing later in the method resolution order. + * The function recursively replaces all occurrences of ``Annotated[T, ...]`` + with ``T``, unless *include_extras* is set to ``True`` (see + :class:`Annotated` for more information). + + See also :func:`inspect.get_annotations`, a lower-level function that + returns annotations more directly. .. note:: - :func:`get_type_hints` does not work with imported - :ref:`type aliases ` that include forward references. - Enabling postponed evaluation of annotations (:pep:`563`) may remove - the need for most forward references. + If any forward references in the annotations of *obj* are not resolvable + or are not valid Python code, this function will raise an exception + such as :exc:`NameError`. For example, this can happen with imported + :ref:`type aliases ` that include forward references, + or with names imported under :data:`if TYPE_CHECKING `. .. versionchanged:: 3.9 Added ``include_extras`` parameter as part of :pep:`593`. From bd6d4ed6454378e48dab06f50a9be0bae6baa3a2 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 2 Jun 2024 20:39:19 +0100 Subject: [PATCH 20/72] GH-119054: Add "Reading and writing files" section to pathlib docs (#119524) Add a dedicated subsection for `open()`, `read_text()`, `read_bytes()`, `write_text()` and `write_bytes()`. --- Doc/library/pathlib.rst | 163 +++++++++++++++++++++------------------- 1 file changed, 84 insertions(+), 79 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index c72d409a8eb2d6..f37bb33321fa53 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1067,6 +1067,90 @@ Querying file type and status .. versionadded:: 3.5 +Reading and writing files +^^^^^^^^^^^^^^^^^^^^^^^^^ + + +.. method:: Path.open(mode='r', buffering=-1, encoding=None, errors=None, newline=None) + + Open the file pointed to by the path, like the built-in :func:`open` + function does:: + + >>> p = Path('setup.py') + >>> with p.open() as f: + ... f.readline() + ... + '#!/usr/bin/env python3\n' + + +.. method:: Path.read_text(encoding=None, errors=None, newline=None) + + Return the decoded contents of the pointed-to file as a string:: + + >>> p = Path('my_text_file') + >>> p.write_text('Text file contents') + 18 + >>> p.read_text() + 'Text file contents' + + The file is opened and then closed. The optional parameters have the same + meaning as in :func:`open`. + + .. versionadded:: 3.5 + + .. versionchanged:: 3.13 + The *newline* parameter was added. + + +.. method:: Path.read_bytes() + + Return the binary contents of the pointed-to file as a bytes object:: + + >>> p = Path('my_binary_file') + >>> p.write_bytes(b'Binary file contents') + 20 + >>> p.read_bytes() + b'Binary file contents' + + .. versionadded:: 3.5 + + +.. method:: Path.write_text(data, encoding=None, errors=None, newline=None) + + Open the file pointed to in text mode, write *data* to it, and close the + file:: + + >>> p = Path('my_text_file') + >>> p.write_text('Text file contents') + 18 + >>> p.read_text() + 'Text file contents' + + An existing file of the same name is overwritten. The optional parameters + have the same meaning as in :func:`open`. + + .. versionadded:: 3.5 + + .. versionchanged:: 3.10 + The *newline* parameter was added. + + +.. method:: Path.write_bytes(data) + + Open the file pointed to in bytes mode, write *data* to it, and close the + file:: + + >>> p = Path('my_binary_file') + >>> p.write_bytes(b'Binary file contents') + 20 + >>> p.read_bytes() + b'Binary file contents' + + An existing file of the same name is overwritten. + + .. versionadded:: 3.5 + + Other methods ^^^^^^^^^^^^^ @@ -1360,18 +1444,6 @@ example because the path doesn't exist). The *exist_ok* parameter was added. -.. method:: Path.open(mode='r', buffering=-1, encoding=None, errors=None, newline=None) - - Open the file pointed to by the path, like the built-in :func:`open` - function does:: - - >>> p = Path('setup.py') - >>> with p.open() as f: - ... f.readline() - ... - '#!/usr/bin/env python3\n' - - .. method:: Path.owner(*, follow_symlinks=True) Return the name of the user owning the file. :exc:`KeyError` is raised @@ -1388,37 +1460,6 @@ example because the path doesn't exist). The *follow_symlinks* parameter was added. -.. method:: Path.read_bytes() - - Return the binary contents of the pointed-to file as a bytes object:: - - >>> p = Path('my_binary_file') - >>> p.write_bytes(b'Binary file contents') - 20 - >>> p.read_bytes() - b'Binary file contents' - - .. versionadded:: 3.5 - - -.. method:: Path.read_text(encoding=None, errors=None, newline=None) - - Return the decoded contents of the pointed-to file as a string:: - - >>> p = Path('my_text_file') - >>> p.write_text('Text file contents') - 18 - >>> p.read_text() - 'Text file contents' - - The file is opened and then closed. The optional parameters have the same - meaning as in :func:`open`. - - .. versionadded:: 3.5 - - .. versionchanged:: 3.13 - The *newline* parameter was added. - .. method:: Path.readlink() Return the path to which the symbolic link points (as returned by @@ -1593,42 +1634,6 @@ example because the path doesn't exist). The *missing_ok* parameter was added. -.. method:: Path.write_bytes(data) - - Open the file pointed to in bytes mode, write *data* to it, and close the - file:: - - >>> p = Path('my_binary_file') - >>> p.write_bytes(b'Binary file contents') - 20 - >>> p.read_bytes() - b'Binary file contents' - - An existing file of the same name is overwritten. - - .. versionadded:: 3.5 - - -.. method:: Path.write_text(data, encoding=None, errors=None, newline=None) - - Open the file pointed to in text mode, write *data* to it, and close the - file:: - - >>> p = Path('my_text_file') - >>> p.write_text('Text file contents') - 18 - >>> p.read_text() - 'Text file contents' - - An existing file of the same name is overwritten. The optional parameters - have the same meaning as in :func:`open`. - - .. versionadded:: 3.5 - - .. versionchanged:: 3.10 - The *newline* parameter was added. - - .. _pathlib-pattern-language: Pattern language From 117a8acdab997b73ada822cce97815a86f839e15 Mon Sep 17 00:00:00 2001 From: Solomon Himelbloom <7608183+TechSolomon@users.noreply.github.com> Date: Sun, 2 Jun 2024 16:43:03 -0800 Subject: [PATCH 21/72] gh-109975: What's New in Python 3.13: fix broken link for `telnetlib` alternative (#119958) --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 66626ac06428b9..903de3c04b4a07 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1334,7 +1334,7 @@ PEP 594: dead batteries (and other module removals) * :mod:`!sunau`. (Contributed by Victor Stinner in :gh:`104773`.) - * :mod:`!telnetlib`, use the projects :pypi:`telnetlib3 ` or + * :mod:`!telnetlib`, use the projects :pypi:`telnetlib3` or :pypi:`Exscript` instead. (Contributed by Victor Stinner in :gh:`104773`.) From 0594a27e5f1d87d59fa8a761dd8ca9df4e42816d Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 3 Jun 2024 12:22:41 +0900 Subject: [PATCH 22/72] gh-117657: Fix data races report by TSAN unicode-hash (gh-119907) --- Objects/unicodeobject.c | 19 +++++++++++-------- Tools/tsan/suppressions_free_threading.txt | 1 - 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index eb37b478cc4de1..12782754753ef5 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -1633,7 +1633,7 @@ unicode_modifiable(PyObject *unicode) assert(_PyUnicode_CHECK(unicode)); if (Py_REFCNT(unicode) != 1) return 0; - if (_PyUnicode_HASH(unicode) != -1) + if (FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(unicode)) != -1) return 0; if (PyUnicode_CHECK_INTERNED(unicode)) return 0; @@ -10901,9 +10901,10 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right) if (PyUnicode_CHECK_INTERNED(left)) return 0; - assert(_PyUnicode_HASH(right_uni) != -1); - Py_hash_t hash = _PyUnicode_HASH(left); - if (hash != -1 && hash != _PyUnicode_HASH(right_uni)) { + Py_hash_t right_hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(right_uni)); + assert(right_hash != -1); + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(left)); + if (hash != -1 && hash != right_hash) { return 0; } @@ -11388,12 +11389,14 @@ unicode_hash(PyObject *self) #ifdef Py_DEBUG assert(_Py_HashSecret_Initialized); #endif - if (_PyUnicode_HASH(self) != -1) - return _PyUnicode_HASH(self); - + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED(_PyUnicode_HASH(self)); + if (hash != -1) { + return hash; + } x = _Py_HashBytes(PyUnicode_DATA(self), PyUnicode_GET_LENGTH(self) * PyUnicode_KIND(self)); - _PyUnicode_HASH(self) = x; + + FT_ATOMIC_STORE_SSIZE_RELAXED(_PyUnicode_HASH(self), x); return x; } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 78dac6ee0c9068..ff9c8036e92fe7 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -49,7 +49,6 @@ race_top:set_discard_entry race_top:set_inheritable race_top:start_the_world race_top:tstate_set_detached -race_top:unicode_hash race_top:Py_SET_TYPE race_top:_PyDict_CheckConsistency race_top:_PyImport_AcquireLock From 8e6321efd72d12263398994e59c5216edcada3c0 Mon Sep 17 00:00:00 2001 From: wookie184 Date: Mon, 3 Jun 2024 07:05:57 +0100 Subject: [PATCH 23/72] gh-119961: Fix test workflow status badge in README (#119962) Co-authored-by: Nikita Sobolev --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index e3163c5ff636ab..7dd3660b198784 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ This is Python version 3.14.0 alpha 0 ===================================== -.. image:: https://github.com/python/cpython/workflows/Tests/badge.svg +.. image:: https://github.com/python/cpython/actions/workflows/build.yml/badge.svg?branch=main&event=push :alt: CPython build status on GitHub Actions :target: https://github.com/python/cpython/actions From 3ea9b92086240b2f38a74c6945e7a723b480cefe Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 3 Jun 2024 08:45:20 +0200 Subject: [PATCH 24/72] gh-119396: Optimize unicode_decode_utf8_writer() (#119957) Optimize unicode_decode_utf8_writer() Take the ascii_decode() fast-path even if dest is not aligned on size_t bytes. --- Objects/unicodeobject.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 12782754753ef5..53160f1799f2cc 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -4702,8 +4702,9 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest) const char *p = start; #if SIZEOF_SIZE_T <= SIZEOF_VOID_P - assert(_Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T)); - if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) { + if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T) + && _Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T)) + { /* Fast path, see in STRINGLIB(utf8_decode) for an explanation. */ /* Help allocation */ @@ -4948,9 +4949,7 @@ unicode_decode_utf8_writer(_PyUnicodeWriter *writer, const char *end = s + size; Py_ssize_t decoded = 0; Py_UCS1 *dest = (Py_UCS1*)writer->data + writer->pos * writer->kind; - if (writer->kind == PyUnicode_1BYTE_KIND - && _Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T)) - { + if (writer->kind == PyUnicode_1BYTE_KIND) { decoded = ascii_decode(s, end, dest); writer->pos += decoded; From 52586f930f62bd80374f0f240a4ecce0c0238174 Mon Sep 17 00:00:00 2001 From: Radislav Chugunov <52372310+chgnrdv@users.noreply.github.com> Date: Mon, 3 Jun 2024 10:47:36 +0300 Subject: [PATCH 25/72] gh-119506: fix `_io.TextIOWrapper.write()` write during flush (#119507) Co-authored-by: Inada Naoki --- Lib/test/test_io.py | 22 +++++++++++++ ...-05-24-14-32-24.gh-issue-119506.-nMNqq.rst | 1 + Modules/_io/textio.c | 31 +++++++++++++------ 3 files changed, 45 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index e5cb08c2cdd04c..1ca3edac8c8dc9 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4016,6 +4016,28 @@ def write(self, data): t.write("x"*chunk_size) self.assertEqual([b"abcdef", b"ghi", b"x"*chunk_size], buf._write_stack) + def test_issue119506(self): + chunk_size = 8192 + + class MockIO(self.MockRawIO): + written = False + def write(self, data): + if not self.written: + self.written = True + t.write("middle") + return super().write(data) + + buf = MockIO() + t = self.TextIOWrapper(buf) + t.write("abc") + t.write("def") + # writing data which size >= chunk_size cause flushing buffer before write. + t.write("g" * chunk_size) + t.flush() + + self.assertEqual([b"abcdef", b"middle", b"g"*chunk_size], + buf._write_stack) + class PyTextIOWrapperTest(TextIOWrapperTest): io = pyio diff --git a/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst new file mode 100644 index 00000000000000..f9b764ae0c49b3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-24-14-32-24.gh-issue-119506.-nMNqq.rst @@ -0,0 +1 @@ +Fix :meth:`!io.TextIOWrapper.write` method breaks internal buffer when the method is called again during flushing internal buffer. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 9dff8eafb2560f..c162d8106ec1fd 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1719,16 +1719,26 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) bytes_len = PyBytes_GET_SIZE(b); } - if (self->pending_bytes == NULL) { - self->pending_bytes_count = 0; - self->pending_bytes = b; - } - else if (self->pending_bytes_count + bytes_len > self->chunk_size) { - // Prevent to concatenate more than chunk_size data. - if (_textiowrapper_writeflush(self) < 0) { - Py_DECREF(b); - return NULL; + // We should avoid concatinating huge data. + // Flush the buffer before adding b to the buffer if b is not small. + // https://github.com/python/cpython/issues/87426 + if (bytes_len >= self->chunk_size) { + // _textiowrapper_writeflush() calls buffer.write(). + // self->pending_bytes can be appended during buffer->write() + // or other thread. + // We need to loop until buffer becomes empty. + // https://github.com/python/cpython/issues/118138 + // https://github.com/python/cpython/issues/119506 + while (self->pending_bytes != NULL) { + if (_textiowrapper_writeflush(self) < 0) { + Py_DECREF(b); + return NULL; + } } + } + + if (self->pending_bytes == NULL) { + assert(self->pending_bytes_count == 0); self->pending_bytes = b; } else if (!PyList_CheckExact(self->pending_bytes)) { @@ -1737,6 +1747,9 @@ _io_TextIOWrapper_write_impl(textio *self, PyObject *text) Py_DECREF(b); return NULL; } + // Since Python 3.12, allocating GC object won't trigger GC and release + // GIL. See https://github.com/python/cpython/issues/97922 + assert(!PyList_CheckExact(self->pending_bytes)); PyList_SET_ITEM(list, 0, self->pending_bytes); PyList_SET_ITEM(list, 1, b); self->pending_bytes = list; From 84c3191954b40e090db15da49a59fcc40afe34fd Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 3 Jun 2024 10:50:29 +0300 Subject: [PATCH 26/72] gh-118827: Remove `Quoter` from `urllib.parse` (#118828) Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/whatsnew/3.14.rst | 7 +++++++ Lib/test/test_urlparse.py | 7 ------- Lib/urllib/parse.py | 8 -------- .../2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst | 3 +++ 4 files changed, 10 insertions(+), 15 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 9f471d24909215..47f3e30942397f 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -218,6 +218,13 @@ typing * Remove :class:`!typing.ByteString`. It had previously raised a :exc:`DeprecationWarning` since Python 3.12. +urllib +------ + +* Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`. + It had previously raised a :exc:`DeprecationWarning` since Python 3.11. + (Contributed by Nikita Sobolev in :gh:`118827`.) + Others ------ diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 4faad733245df9..d6c83a75c1c03a 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -1507,13 +1507,6 @@ def test_unwrap(self): class DeprecationTest(unittest.TestCase): - - def test_Quoter_deprecation(self): - with self.assertWarns(DeprecationWarning) as cm: - old_class = urllib.parse.Quoter - self.assertIs(old_class, urllib.parse._Quoter) - self.assertIn('Quoter will be removed', str(cm.warning)) - def test_splittype_deprecation(self): with self.assertWarns(DeprecationWarning) as cm: urllib.parse.splittype('') diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py index 3932bb99c7e7d1..8f724f907d4217 100644 --- a/Lib/urllib/parse.py +++ b/Lib/urllib/parse.py @@ -822,14 +822,6 @@ def unquote_plus(string, encoding='utf-8', errors='replace'): b'_.-~') _ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE) -def __getattr__(name): - if name == 'Quoter': - warnings.warn('Deprecated in 3.11. ' - 'urllib.parse.Quoter will be removed in Python 3.14. ' - 'It was not intended to be a public API.', - DeprecationWarning, stacklevel=2) - return _Quoter - raise AttributeError(f'module {__name__!r} has no attribute {name!r}') class _Quoter(dict): """A mapping from bytes numbers (in range(0,256)) to strings. diff --git a/Misc/NEWS.d/next/Library/2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst b/Misc/NEWS.d/next/Library/2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst new file mode 100644 index 00000000000000..40612dd93bd6da --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-09-12-33-25.gh-issue-118827.JrzHz1.rst @@ -0,0 +1,3 @@ +Remove deprecated :class:`!Quoter` class from :mod:`urllib.parse`. It had +previously raised a :exc:`DeprecationWarning` since Python 3.11. +Patch by Nikita Sobolev. From 1e5f615086d23c71a9701abe641b5241e4345234 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 3 Jun 2024 10:52:35 +0300 Subject: [PATCH 27/72] gh-116991: Improve `pygen --help` for `python` subparser (#116992) --- Tools/peg_generator/pegen/__main__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py index 262c8a6db68f6e..0b0b4b291c2b0e 100755 --- a/Tools/peg_generator/pegen/__main__.py +++ b/Tools/peg_generator/pegen/__main__.py @@ -107,7 +107,10 @@ def generate_python_code( help="Suppress code emission for rule actions", ) -python_parser = subparsers.add_parser("python", help="Generate Python code") +python_parser = subparsers.add_parser( + "python", + help="Generate Python code, needs grammar definition with Python actions", +) python_parser.set_defaults(func=generate_python_code) python_parser.add_argument("grammar_filename", help="Grammar description") python_parser.add_argument( From 4223f1d828d3a3e1c8d803e3fdd420afd7d85faf Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 3 Jun 2024 10:15:04 +0200 Subject: [PATCH 28/72] gh-119856: Support exiting help() with just "exit" (#119858) --- Lib/pydoc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 5d854c50f40d6e..2ba597d01f245e 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2007,7 +2007,7 @@ def interact(self): if (len(request) > 2 and request[0] == request[-1] in ("'", '"') and request[0] not in request[1:-1]): request = request[1:-1] - if request.lower() in ('q', 'quit'): break + if request.lower() in ('q', 'quit', 'exit'): break if request == 'help': self.intro() else: @@ -2059,7 +2059,7 @@ def intro(self): enter "modules spam". To quit this help utility and return to the interpreter, -enter "q" or "quit". +enter "q", "quit" or "exit". '''.format('%d.%d' % sys.version_info[:2])) def list(self, items, columns=4, width=80): From 70934fb46982ad2ae677cca485a730b39635919c Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 3 Jun 2024 10:26:13 +0200 Subject: [PATCH 29/72] gh-112026: Deprecate _PyDict_GetItemStringWithError() function (#119855) --- Include/cpython/dictobject.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h index 3fd23b9313c453..e2861c963266ea 100644 --- a/Include/cpython/dictobject.h +++ b/Include/cpython/dictobject.h @@ -37,7 +37,8 @@ typedef struct { PyAPI_FUNC(PyObject *) _PyDict_GetItem_KnownHash(PyObject *mp, PyObject *key, Py_hash_t hash); -PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObject *, const char *); +// PyDict_GetItemStringRef() can be used instead +Py_DEPRECATED(3.14) PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObject *, const char *); PyAPI_FUNC(PyObject *) PyDict_SetDefault( PyObject *mp, PyObject *key, PyObject *defaultobj); From d7fcaa73b71f4c49c1b24cac04c9b6f1cf69b944 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 3 Jun 2024 12:29:01 +0300 Subject: [PATCH 30/72] gh-119838: Treat Fraction as a real value in mixed arithmetic operations with complex (GH-119839) --- Lib/fractions.py | 4 ++-- Lib/test/test_fractions.py | 5 +---- .../Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst | 3 +++ 3 files changed, 6 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst diff --git a/Lib/fractions.py b/Lib/fractions.py index 95adccd86e33a0..565503911bbe97 100644 --- a/Lib/fractions.py +++ b/Lib/fractions.py @@ -668,7 +668,7 @@ def forward(a, b): elif isinstance(b, float): return fallback_operator(float(a), b) elif handle_complex and isinstance(b, complex): - return fallback_operator(complex(a), b) + return fallback_operator(float(a), b) else: return NotImplemented forward.__name__ = '__' + fallback_operator.__name__ + '__' @@ -681,7 +681,7 @@ def reverse(b, a): elif isinstance(a, numbers.Real): return fallback_operator(float(a), float(b)) elif handle_complex and isinstance(a, numbers.Complex): - return fallback_operator(complex(a), complex(b)) + return fallback_operator(complex(a), float(b)) else: return NotImplemented reverse.__name__ = '__r' + fallback_operator.__name__ + '__' diff --git a/Lib/test/test_fractions.py b/Lib/test/test_fractions.py index 3c7780e40db096..71865f68eb0f12 100644 --- a/Lib/test/test_fractions.py +++ b/Lib/test/test_fractions.py @@ -806,10 +806,7 @@ def testMixedMultiplication(self): self.assertTypedEquals(F(3, 2) * Polar(4, 2), Polar(F(6, 1), 2)) self.assertTypedEquals(F(3, 2) * Polar(4.0, 2), Polar(6.0, 2)) self.assertTypedEquals(F(3, 2) * Rect(4, 3), Rect(F(6, 1), F(9, 2))) - with self.assertWarnsRegex(DeprecationWarning, - "argument 'real' must be a real number, not complex"): - self.assertTypedEquals(F(3, 2) * RectComplex(4, 3), - RectComplex(6.0+0j, 4.5+0j)) + self.assertTypedEquals(F(3, 2) * RectComplex(4, 3), RectComplex(6.0, 4.5)) self.assertRaises(TypeError, operator.mul, Polar(4, 2), F(3, 2)) self.assertTypedEquals(Rect(4, 3) * F(3, 2), 6.0 + 4.5j) self.assertEqual(F(3, 2) * SymbolicComplex('X'), SymbolicComplex('3/2 * X')) diff --git a/Misc/NEWS.d/next/Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst b/Misc/NEWS.d/next/Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst new file mode 100644 index 00000000000000..17a87327b5b1d6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-31-13-56-21.gh-issue-119838.H6XHlE.rst @@ -0,0 +1,3 @@ +In mixed arithmetic operations with :class:`~fractions.Fraction` and +complex, the fraction is now converted to :class:`float` instead of +:class:`complex`. From cae4c80714e7266772025676977e2a1b98cdcd7b Mon Sep 17 00:00:00 2001 From: Awbert <119314310+SweetyAngel@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:31:02 +0300 Subject: [PATCH 31/72] gh-119968: Improved monitoring c-api docs (#119969) --- Doc/c-api/monitoring.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/c-api/monitoring.rst b/Doc/c-api/monitoring.rst index ec743b98ba7024..b34035b5548f02 100644 --- a/Doc/c-api/monitoring.rst +++ b/Doc/c-api/monitoring.rst @@ -2,7 +2,7 @@ .. _monitoring: -Monitorong C API +Monitoring C API ================ Added in version 3.13. From 367adc91fb9834eb35b168048fd54705621c3f21 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 3 Jun 2024 10:36:20 +0100 Subject: [PATCH 32/72] gh-119786: move exception handling doc to InternalDocs (#119815) --- InternalDocs/{index.md => README.md} | 4 + InternalDocs/exception_handling.md | 201 +++++++++++++++++++++++++++ Objects/exception_handling_notes.txt | 182 ------------------------ 3 files changed, 205 insertions(+), 182 deletions(-) rename InternalDocs/{index.md => README.md} (92%) create mode 100644 InternalDocs/exception_handling.md delete mode 100644 Objects/exception_handling_notes.txt diff --git a/InternalDocs/index.md b/InternalDocs/README.md similarity index 92% rename from InternalDocs/index.md rename to InternalDocs/README.md index 32b66a254bcf2c..e69e27d1542990 100644 --- a/InternalDocs/index.md +++ b/InternalDocs/README.md @@ -10,3 +10,7 @@ to hold for other implementations of the Python language. The core dev team attempts to keep this documentation up to date. If it is not, please report that through the [issue tracker](https://github.com/python/cpython/issues). + + +[Exception Handling](exception_handling.md) + diff --git a/InternalDocs/exception_handling.md b/InternalDocs/exception_handling.md new file mode 100644 index 00000000000000..22d9c3bf7933f1 --- /dev/null +++ b/InternalDocs/exception_handling.md @@ -0,0 +1,201 @@ +Description of exception handling +--------------------------------- + +Python uses a technique known as "zero-cost" exception handling, which +minimizes the cost of supporting exceptions. In the common case (where +no exception is raised) the cost is reduced to zero (or close to zero). +The cost of raising an exception is increased, but not by much. + +The following code: + +``` +try: + g(0) +except: + res = "fail" + +``` + +compiles into intermediate code like the following: + +``` + RESUME 0 + + 1 SETUP_FINALLY 8 (to L1) + + 2 LOAD_NAME 0 (g) + PUSH_NULL + LOAD_CONST 0 (0) + CALL 1 + POP_TOP + POP_BLOCK + + -- L1: PUSH_EXC_INFO + + 3 POP_TOP + + 4 LOAD_CONST 1 ('fail') + STORE_NAME 1 (res) +``` + +`SETUP_FINALLY` and `POP_BLOCK` are pseudo-instructions. This means +that they can appear in intermediate code but they are not bytecode +instructions. `SETUP_FINALLY` specifies that henceforth, exceptions +are handled by the code at label L1. The `POP_BLOCK` instruction +reverses the effect of the last `SETUP` instruction, so that the +active exception handler reverts to what it was before. + +`SETUP_FINALLY` and `POP_BLOCK` have no effect when no exceptions +are raised. The idea of zero-cost exception handling is to replace +these pseudo-instructions by metadata which is stored alongside the +bytecode, and which is inspected only when an exception occurs. +This metadata is the exception table, and it is stored in the code +object's `co_exceptiontable` field. + +When the pseudo-instructions are translated into bytecode, +`SETUP_FINALLY` and `POP_BLOCK` are removed, and the exception +table is constructed, mapping each instruction to the exception +handler that covers it, if any. Instructions which are not +covered by any exception handler within the same code object's +bytecode, do not appear in the exception table at all. + +For the code object in our example above, the table has a single +entry specifying that all instructions that were between the +`SETUP_FINALLY` and the `POP_BLOCK` are covered by the exception +handler located at label `L1`. + +Handling Exceptions +------------------- + +At runtime, when an exception occurs, the interpreter looks up +the offset of the current instruction in the exception table. If +it finds a handler, control flow transfers to it. Otherwise, the +exception bubbles up to the caller, and the caller's frame is +checked for a handler covering the `CALL` instruction. This +repeats until a handler is found or the topmost frame is reached. +If no handler is found, the program terminates. During unwinding, +the traceback is constructed as each frame is added to it. + +Along with the location of an exception handler, each entry of the +exception table also contains the stack depth of the `try` instruction +and a boolean `lasti` value, which indicates whether the instruction +offset of the raising instruction should be pushed to the stack. + +Handling an exception, once an exception table entry is found, consists +of the following steps: + + 1. pop values from the stack until it matches the stack depth for the handler. + 2. if `lasti` is true, then push the offset that the exception was raised at. + 3. push the exception to the stack. + 4. jump to the target offset and resume execution. + + +Reraising Exceptions and `lasti` +-------------------------------- + +The purpose of pushing `lasti` to the stack is for cases where an exception +needs to be re-raised, and be associated with the original instruction that +raised it. This happens, for example, at the end of a `finally` block, when +any in-flight exception needs to be propagated on. As the frame's instruction +pointer now points into the finally block, a `RERAISE` instruction +(with `oparg > 0`) sets it to the `lasti` value from the stack. + +Format of the exception table +----------------------------- + +Conceptually, the exception table consists of a sequence of 5-tuples: +``` + 1. `start-offset` (inclusive) + 2. `end-offset` (exclusive) + 3. `target` + 4. `stack-depth` + 5. `push-lasti` (boolean) +``` + +All offsets and lengths are in code units, not bytes. + +We want the format to be compact, but quickly searchable. +For it to be compact, it needs to have variable sized entries so that we can store common (small) offsets compactly, but handle large offsets if needed. +For it to be searchable quickly, we need to support binary search giving us log(n) performance in all cases. +Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry. + +It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as: + `start, size, target, depth, push-lasti`. + +Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each code unit takes 2 bytes. +It also happens that depth is generally quite small. + +So, we need to encode: +``` + `start` (up to 30 bits) + `size` (up to 30 bits) + `target` (up to 30 bits) + `depth` (up to ~8 bits) + `lasti` (1 bit) +``` + +We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set. +Since the most significant bit is reserved for marking the start of an entry, we have 7 bits per byte to encode offsets. +Encoding uses a standard varint encoding, but with only 7 bits instead of the usual 8. +The 8 bits of a byte are (msb left) SXdddddd where S is the start bit. X is the extend bit meaning that the next byte is required to extend the offset. + +In addition, we combine `depth` and `lasti` into a single value, `((depth<<1)+lasti)`, before encoding. + +For example, the exception entry: +``` + `start`: 20 + `end`: 28 + `target`: 100 + `depth`: 3 + `lasti`: False +``` + +is encoded by first converting to the more compact four value form: +``` + `start`: 20 + `size`: 8 + `target`: 100 + `depth<<1+lasti`: 6 +``` + +which is then encoded as: +``` + 148 (MSB + 20 for start) + 8 (size) + 65 (Extend bit + 1) + 36 (Remainder of target, 100 == (1<<6)+36) + 6 +``` + +for a total of five bytes. + + +Script to parse the exception table +----------------------------------- + +``` +def parse_varint(iterator): + b = next(iterator) + val = b & 63 + while b&64: + val <<= 6 + b = next(iterator) + val |= b&63 + return val +``` +``` +def parse_exception_table(code): + iterator = iter(code.co_exceptiontable) + try: + while True: + start = parse_varint(iterator)*2 + length = parse_varint(iterator)*2 + end = start + length - 2 # Present as inclusive, not exclusive + target = parse_varint(iterator)*2 + dl = parse_varint(iterator) + depth = dl >> 1 + lasti = bool(dl&1) + yield start, end, target, depth, lasti + except StopIteration: + return +``` diff --git a/Objects/exception_handling_notes.txt b/Objects/exception_handling_notes.txt deleted file mode 100644 index 387ef935ce739e..00000000000000 --- a/Objects/exception_handling_notes.txt +++ /dev/null @@ -1,182 +0,0 @@ -Description of exception handling in Python 3.11 ------------------------------------------------- - -Python 3.11 uses what is known as "zero-cost" exception handling. -Prior to 3.11, exceptions were handled by a runtime stack of "blocks". - -In zero-cost exception handling, the cost of supporting exceptions is minimized. -In the common case (where no exception is raised) the cost is reduced -to zero (or close to zero). -The cost of raising an exception is increased, but not by much. - -The following code: - -def f(): - try: - g(0) - except: - return "fail" - -compiles as follows in 3.10: - - 2 0 SETUP_FINALLY 7 (to 16) - - 3 2 LOAD_GLOBAL 0 (g) - 4 LOAD_CONST 1 (0) - 6 CALL_NO_KW 1 - 8 POP_TOP - 10 POP_BLOCK - 12 LOAD_CONST 0 (None) - 14 RETURN_VALUE - - 4 >> 16 POP_TOP - 18 POP_TOP - 20 POP_TOP - - 5 22 POP_EXCEPT - 24 LOAD_CONST 3 ('fail') - 26 RETURN_VALUE - -Note the explicit instructions to push and pop from the "block" stack: -SETUP_FINALLY and POP_BLOCK. - -In 3.11, the SETUP_FINALLY and POP_BLOCK are eliminated, replaced with -a table to determine where to jump to when an exception is raised. - - 1 0 RESUME 0 - - 2 2 NOP - - 3 4 LOAD_GLOBAL 1 (g + NULL) - 16 LOAD_CONST 1 (0) - 18 PRECALL 1 - 22 CALL 1 - 32 POP_TOP - 34 LOAD_CONST 0 (None) - 36 RETURN_VALUE - >> 38 PUSH_EXC_INFO - - 4 40 POP_TOP - - 5 42 POP_EXCEPT - 44 LOAD_CONST 2 ('fail') - 46 RETURN_VALUE - >> 48 COPY 3 - 50 POP_EXCEPT - 52 RERAISE 1 -ExceptionTable: - 4 to 32 -> 38 [0] - 38 to 40 -> 48 [1] lasti - -(Note this code is from 3.11, later versions may have slightly different bytecode.) - -If an instruction raises an exception then its offset is used to find the target to jump to. -For example, the CALL at offset 22, falls into the range 4 to 32. -So, if g() raises an exception, then control jumps to offset 38. - - -Unwinding ---------- - -When an exception is raised, the current instruction offset is used to find following: -target to jump to, stack depth, and 'lasti', which determines whether the instruction -offset of the raising instruction should be pushed. - -This information is stored in the exception table, described below. - -If there is no relevant entry, the exception bubbles up to the caller. - -If there is an entry, then: - 1. pop values from the stack until it matches the stack depth for the handler. - 2. if 'lasti' is true, then push the offset that the exception was raised at. - 3. push the exception to the stack. - 4. jump to the target offset and resume execution. - - -Format of the exception table ------------------------------ - -Conceptually, the exception table consists of a sequence of 5-tuples: - 1. start-offset (inclusive) - 2. end-offset (exclusive) - 3. target - 4. stack-depth - 5. push-lasti (boolean) - -All offsets and lengths are in instructions, not bytes. - -We want the format to be compact, but quickly searchable. -For it to be compact, it needs to have variable sized entries so that we can store common (small) offsets compactly, but handle large offsets if needed. -For it to be searchable quickly, we need to support binary search giving us log(n) performance in all cases. -Binary search typically assumes fixed size entries, but that is not necessary, as long as we can identify the start of an entry. - -It is worth noting that the size (end-start) is always smaller than the end, so we encode the entries as: - start, size, target, depth, push-lasti - -Also, sizes are limited to 2**30 as the code length cannot exceed 2**31 and each instruction takes 2 bytes. -It also happens that depth is generally quite small. - -So, we need to encode: - start (up to 30 bits) - size (up to 30 bits) - target (up to 30 bits) - depth (up to ~8 bits) - lasti (1 bit) - -We need a marker for the start of the entry, so the first byte of entry will have the most significant bit set. -Since the most significant bit is reserved for marking the start of an entry, we have 7 bits per byte to encode offsets. -Encoding uses a standard varint encoding, but with only 7 bits instead of the usual 8. -The 8 bits of a bit are (msb left) SXdddddd where S is the start bit. X is the extend bit meaning that the next byte is required to extend the offset. - -In addition, we will combine depth and lasti into a single value, ((depth<<1)+lasti), before encoding. - -For example, the exception entry: - start: 20 - end: 28 - target: 100 - depth: 3 - lasti: False - -is encoded first by converting to the more compact four value form: - start: 20 - size: 8 - target: 100 - depth<<1+lasti: 6 - -which is then encoded as: - 148 (MSB + 20 for start) - 8 (size) - 65 (Extend bit + 1) - 36 (Remainder of target, 100 == (1<<6)+36) - 6 - -for a total of five bytes. - - - -Script to parse the exception table ------------------------------------ - -def parse_varint(iterator): - b = next(iterator) - val = b & 63 - while b&64: - val <<= 6 - b = next(iterator) - val |= b&63 - return val - -def parse_exception_table(code): - iterator = iter(code.co_exceptiontable) - try: - while True: - start = parse_varint(iterator)*2 - length = parse_varint(iterator)*2 - end = start + length - 2 # Present as inclusive, not exclusive - target = parse_varint(iterator)*2 - dl = parse_varint(iterator) - depth = dl >> 1 - lasti = bool(dl&1) - yield start, end, target, depth, lasti - except StopIteration: - return From 61d3ab32da92e70bb97a544d76ef2b837501024f Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 3 Jun 2024 15:06:31 +0300 Subject: [PATCH 33/72] gh-116560: Add PyLong_GetSign() public function (#116561) Co-authored-by: Victor Stinner --- Doc/c-api/long.rst | 13 +++++++++++++ Doc/whatsnew/3.14.rst | 3 +++ Include/cpython/longobject.h | 10 +++++++--- Lib/test/test_capi/test_long.py | 16 ++++++++++++++++ ...024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst | 1 + Modules/_testcapi/long.c | 14 ++++++++++++++ Objects/longobject.c | 12 ++++++++++++ 7 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst diff --git a/Doc/c-api/long.rst b/Doc/c-api/long.rst index 522c028cfb8d40..a0e111af5996d7 100644 --- a/Doc/c-api/long.rst +++ b/Doc/c-api/long.rst @@ -494,6 +494,19 @@ distinguished from a number. Use :c:func:`PyErr_Occurred` to disambiguate. .. versionadded:: 3.13 +.. c:function:: int PyLong_GetSign(PyObject *obj, int *sign) + + Get the sign of the integer object *obj*. + + On success, set *\*sign* to the integer sign (0, -1 or +1 for zero, negative or + positive integer, respectively) and return 0. + + On failure, return -1 with an exception set. This function always succeeds + if *obj* is a :c:type:`PyLongObject` or its subtype. + + .. versionadded:: 3.14 + + .. c:function:: int PyUnstable_Long_IsCompact(const PyLongObject* op) Return 1 if *op* is compact, 0 otherwise. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 47f3e30942397f..b2dd80b64a691a 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -255,6 +255,9 @@ C API Changes New Features ------------ +* Add :c:func:`PyLong_GetSign` function to get the sign of :class:`int` objects. + (Contributed by Sergey B Kirpichev in :gh:`116560`.) + Porting to Python 3.14 ---------------------- diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h index 96815938c8277a..19a6722d07734a 100644 --- a/Include/cpython/longobject.h +++ b/Include/cpython/longobject.h @@ -55,9 +55,13 @@ PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); -// _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. -// v must not be NULL, and must be a normalized long. -// There are no error cases. +/* PyLong_GetSign. Get the sign of an integer object: + 0, -1 or +1 for zero, negative or positive integer, respectively. + + - On success, set '*sign' to the integer sign, and return 0. + - On failure, set an exception, and return -1. */ +PyAPI_FUNC(int) PyLong_GetSign(PyObject *v, int *sign); + PyAPI_FUNC(int) _PyLong_Sign(PyObject *v); /* _PyLong_NumBits. Return the number of bits needed to represent the diff --git a/Lib/test/test_capi/test_long.py b/Lib/test/test_capi/test_long.py index 83f894e552f983..06a29b5a0505b4 100644 --- a/Lib/test/test_capi/test_long.py +++ b/Lib/test/test_capi/test_long.py @@ -721,6 +721,22 @@ def test_long_fromnativebytes(self): self.assertEqual(expect_u, fromnativebytes(v_be, n, 4, 1), f"PyLong_FromNativeBytes(buffer, {n}, )") + def test_long_getsign(self): + # Test PyLong_GetSign() + getsign = _testcapi.pylong_getsign + self.assertEqual(getsign(1), 1) + self.assertEqual(getsign(123456), 1) + self.assertEqual(getsign(-2), -1) + self.assertEqual(getsign(0), 0) + self.assertEqual(getsign(True), 1) + self.assertEqual(getsign(IntSubclass(-11)), -1) + self.assertEqual(getsign(False), 0) + + self.assertRaises(TypeError, getsign, 1.0) + self.assertRaises(TypeError, getsign, Index(123)) + + # CRASHES getsign(NULL) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/C API/2024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst b/Misc/NEWS.d/next/C API/2024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst new file mode 100644 index 00000000000000..9bcadfd9247f78 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-03-10-14-55-51.gh-issue-116560.x2mZaO.rst @@ -0,0 +1 @@ +Add :c:func:`PyLong_GetSign` function. Patch by Sergey B Kirpichev. diff --git a/Modules/_testcapi/long.c b/Modules/_testcapi/long.c index 769c3909ea3fb1..2b5e85d5707522 100644 --- a/Modules/_testcapi/long.c +++ b/Modules/_testcapi/long.c @@ -92,6 +92,19 @@ pylong_fromnativebytes(PyObject *module, PyObject *args) return res; } + +static PyObject * +pylong_getsign(PyObject *module, PyObject *arg) +{ + int sign; + NULLABLE(arg); + if (PyLong_GetSign(arg, &sign) == -1) { + return NULL; + } + return PyLong_FromLong(sign); +} + + static PyObject * pylong_aspid(PyObject *module, PyObject *arg) { @@ -109,6 +122,7 @@ static PyMethodDef test_methods[] = { {"pylong_fromunicodeobject", pylong_fromunicodeobject, METH_VARARGS}, {"pylong_asnativebytes", pylong_asnativebytes, METH_VARARGS}, {"pylong_fromnativebytes", pylong_fromnativebytes, METH_VARARGS}, + {"pylong_getsign", pylong_getsign, METH_O}, {"pylong_aspid", pylong_aspid, METH_O}, {NULL}, }; diff --git a/Objects/longobject.c b/Objects/longobject.c index 2dc2cb7a47b460..054689471e7aa9 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -770,6 +770,18 @@ _PyLong_Sign(PyObject *vv) return _PyLong_NonCompactSign(v); } +int +PyLong_GetSign(PyObject *vv, int *sign) +{ + if (!PyLong_Check(vv)) { + PyErr_Format(PyExc_TypeError, "expect int, got %T", vv); + return -1; + } + + *sign = _PyLong_Sign(vv); + return 0; +} + static int bit_length_digit(digit x) { From 4765e1fa292007f8ddc59f33454b747312506a7a Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Mon, 3 Jun 2024 15:27:44 +0200 Subject: [PATCH 34/72] gh-102511: Amend 3.13.0b1.rst (GH-119895) --- Misc/NEWS.d/3.13.0b1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/NEWS.d/3.13.0b1.rst b/Misc/NEWS.d/3.13.0b1.rst index 525491a2603416..09b62c8377aabd 100644 --- a/Misc/NEWS.d/3.13.0b1.rst +++ b/Misc/NEWS.d/3.13.0b1.rst @@ -300,6 +300,7 @@ Improve :exc:`SyntaxError` message for empty type param brackets. .. nonce: qDEB66 .. section: Core and Builtins +Fix :func:`os.path.normpath` for UNC paths on Windows. Speed up :func:`os.path.splitroot` with a native implementation. .. From fd01271366abefa8f991e53f090387882fbd6bdd Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 3 Jun 2024 15:42:45 +0100 Subject: [PATCH 35/72] gh-119679: Ensures correct import libraries are included in Windows install packages (GH-119790) --- ...-05-30-17-39-25.gh-issue-119679.mZC87w.rst | 1 + PC/layout/main.py | 14 +++++++----- Tools/msi/freethreaded/freethreaded_files.wxs | 22 ++++++++++++------- 3 files changed, 23 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Windows/2024-05-30-17-39-25.gh-issue-119679.mZC87w.rst diff --git a/Misc/NEWS.d/next/Windows/2024-05-30-17-39-25.gh-issue-119679.mZC87w.rst b/Misc/NEWS.d/next/Windows/2024-05-30-17-39-25.gh-issue-119679.mZC87w.rst new file mode 100644 index 00000000000000..db9e798d3ddcb8 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-05-30-17-39-25.gh-issue-119679.mZC87w.rst @@ -0,0 +1 @@ +Ensures correct import libraries are included in Windows installs. diff --git a/PC/layout/main.py b/PC/layout/main.py index 1c4842f8588a5b..716f01097fe3b0 100644 --- a/PC/layout/main.py +++ b/PC/layout/main.py @@ -121,7 +121,7 @@ def get_tcltk_lib(ns): def get_layout(ns): - def in_build(f, dest="", new_name=None): + def in_build(f, dest="", new_name=None, no_lib=False): n, _, x = f.rpartition(".") n = new_name or n src = ns.build / f @@ -136,7 +136,7 @@ def in_build(f, dest="", new_name=None): pdb = src.with_suffix(".pdb") if pdb.is_file(): yield dest + n + ".pdb", pdb - if ns.include_dev: + if ns.include_dev and not no_lib: lib = src.with_suffix(".lib") if lib.is_file(): yield "libs/" + n + ".lib", lib @@ -202,7 +202,9 @@ def in_build(f, dest="", new_name=None): yield "LICENSE.txt", ns.build / "LICENSE.txt" - for dest, src in rglob(ns.build, "*.pyd"): + dest="" if ns.flat_dlls else "DLLs/" + + for _, src in rglob(ns.build, "*.pyd"): if ns.include_freethreaded: if not src.match("*.cp*t-win*.pyd"): continue @@ -217,14 +219,14 @@ def in_build(f, dest="", new_name=None): continue if src in TCLTK_PYDS_ONLY and not ns.include_tcltk: continue - yield from in_build(src.name, dest="" if ns.flat_dlls else "DLLs/") + yield from in_build(src.name, dest=dest, no_lib=True) - for dest, src in rglob(ns.build, "*.dll"): + for _, src in rglob(ns.build, "*.dll"): if src.stem.endswith("_d") != bool(ns.debug) and src not in REQUIRED_DLLS: continue if src in EXCLUDE_FROM_DLLS: continue - yield from in_build(src.name, dest="" if ns.flat_dlls else "DLLs/") + yield from in_build(src.name, no_lib=True) if ns.zip_lib: zip_name = PYTHON_ZIP_NAME diff --git a/Tools/msi/freethreaded/freethreaded_files.wxs b/Tools/msi/freethreaded/freethreaded_files.wxs index adaf63c69d5ade..49ecb3429ad8f3 100644 --- a/Tools/msi/freethreaded/freethreaded_files.wxs +++ b/Tools/msi/freethreaded/freethreaded_files.wxs @@ -48,6 +48,12 @@ + + + + + + @@ -69,8 +75,14 @@ - - + + + + + + + + @@ -147,12 +159,6 @@ - - - - - - From 42a34ddb0b63e638905b01e17a7254623a0de427 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Mon, 3 Jun 2024 11:13:07 -0400 Subject: [PATCH 36/72] gh-119588: Implement zipfile.Path.is_symlink (zipp 3.19.0). (#119591) --- Doc/library/zipfile.rst | 9 +++++++ Lib/test/test_zipfile/_path/test_path.py | 27 ++++++++++++------- Lib/zipfile/_path/__init__.py | 7 +++-- ...-05-26-21-28-11.gh-issue-119588.wlLBK5.rst | 1 + 4 files changed, 32 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-26-21-28-11.gh-issue-119588.wlLBK5.rst diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index aad2028523dc34..a4d9a1852f8f0d 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -585,6 +585,15 @@ Path objects are traversable using the ``/`` operator or ``joinpath``. Return ``True`` if the current context references a file. +.. method:: Path.is_symlink() + + Return ``True`` if the current context references a symbolic link. + + .. versionadded:: 3.12 + + .. versionchanged:: 3.12.4 + Prior to 3.12.4, ``is_symlink`` would unconditionally return ``False``. + .. method:: Path.exists() Return ``True`` if the current context references a file or diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py index e5d2acf39a10f8..99842ffd63a64e 100644 --- a/Lib/test/test_zipfile/_path/test_path.py +++ b/Lib/test/test_zipfile/_path/test_path.py @@ -3,6 +3,7 @@ import contextlib import pathlib import pickle +import stat import sys import unittest import zipfile @@ -21,12 +22,17 @@ class itertools: Counter = Counter +def _make_link(info: zipfile.ZipInfo): # type: ignore[name-defined] + info.external_attr |= stat.S_IFLNK << 16 + + def build_alpharep_fixture(): """ Create a zip file with this structure: . ├── a.txt + ├── n.txt (-> a.txt) ├── b │ ├── c.txt │ ├── d @@ -47,6 +53,7 @@ def build_alpharep_fixture(): - multiple files in a directory (b/c, b/f) - a directory containing only a directory (g/h) - a directory with files of different extensions (j/klm) + - a symlink (n) pointing to (a) "alpha" because it uses alphabet "rep" because it's a representative example @@ -61,6 +68,9 @@ def build_alpharep_fixture(): zf.writestr("j/k.bin", b"content of k") zf.writestr("j/l.baz", b"content of l") zf.writestr("j/m.bar", b"content of m") + zf.writestr("n.txt", b"a.txt") + _make_link(zf.infolist()[-1]) + zf.filename = "alpharep.zip" return zf @@ -91,7 +101,7 @@ def zipfile_ondisk(self, alpharep): def test_iterdir_and_types(self, alpharep): root = zipfile.Path(alpharep) assert root.is_dir() - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() assert a.is_file() assert b.is_dir() assert g.is_dir() @@ -111,7 +121,7 @@ def test_is_file_missing(self, alpharep): @pass_alpharep def test_iterdir_on_file(self, alpharep): root = zipfile.Path(alpharep) - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() with self.assertRaises(ValueError): a.iterdir() @@ -126,7 +136,7 @@ def test_subdir_is_dir(self, alpharep): @pass_alpharep def test_open(self, alpharep): root = zipfile.Path(alpharep) - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() with a.open(encoding="utf-8") as strm: data = strm.read() self.assertEqual(data, "content of a") @@ -230,7 +240,7 @@ def test_open_missing_directory(self, alpharep): @pass_alpharep def test_read(self, alpharep): root = zipfile.Path(alpharep) - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() assert a.read_text(encoding="utf-8") == "content of a" # Also check positional encoding arg (gh-101144). assert a.read_text("utf-8") == "content of a" @@ -296,7 +306,7 @@ def test_mutability(self, alpharep): reflect that change. """ root = zipfile.Path(alpharep) - a, b, g, j = root.iterdir() + a, k, b, g, j = root.iterdir() alpharep.writestr('foo.txt', 'foo') alpharep.writestr('bar/baz.txt', 'baz') assert any(child.name == 'foo.txt' for child in root.iterdir()) @@ -513,12 +523,9 @@ def test_eq_hash(self, alpharep): @pass_alpharep def test_is_symlink(self, alpharep): - """ - See python/cpython#82102 for symlink support beyond this object. - """ - root = zipfile.Path(alpharep) - assert not root.is_symlink() + assert not root.joinpath('a.txt').is_symlink() + assert root.joinpath('n.txt').is_symlink() @pass_alpharep def test_relative_to(self, alpharep): diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py index 79ebb777354e03..f5ea18cee61930 100644 --- a/Lib/zipfile/_path/__init__.py +++ b/Lib/zipfile/_path/__init__.py @@ -5,6 +5,7 @@ import contextlib import pathlib import re +import stat import sys from .glob import Translator @@ -390,9 +391,11 @@ def match(self, path_pattern): def is_symlink(self): """ - Return whether this path is a symlink. Always false (python/cpython#82102). + Return whether this path is a symlink. """ - return False + info = self.root.getinfo(self.at) + mode = info.external_attr >> 16 + return stat.S_ISLNK(mode) def glob(self, pattern): if not pattern: diff --git a/Misc/NEWS.d/next/Library/2024-05-26-21-28-11.gh-issue-119588.wlLBK5.rst b/Misc/NEWS.d/next/Library/2024-05-26-21-28-11.gh-issue-119588.wlLBK5.rst new file mode 100644 index 00000000000000..01321d8bfe2ad5 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-26-21-28-11.gh-issue-119588.wlLBK5.rst @@ -0,0 +1 @@ +``zipfile.Path.is_symlink`` now assesses if the given path is a symlink. From 6acb32fac3511c1d5500cac66f1d6397dcdab835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Langa?= Date: Mon, 3 Jun 2024 11:32:40 -0400 Subject: [PATCH 37/72] Use Cirrus M1 macOS runners for CI (GH-119979) Co-authored-by: Ee Durbin --- .github/workflows/build.yml | 8 ++++---- Lib/test/test_pyrepl/test_unix_console.py | 3 ++- Lib/test/test_pyrepl/test_windows_console.py | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7e63737b90b72a..cde93c77a0b82e 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -199,8 +199,8 @@ jobs: uses: ./.github/workflows/reusable-macos.yml with: config_hash: ${{ needs.check_source.outputs.config_hash }} - # macos-14 is M1, macos-13 is Intel - os-matrix: '["macos-14", "macos-13"]' + # Cirrus is M1, macos-13 is default GHA Intel + os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma", "macos-13"]' build_macos_free_threading: name: 'macOS (free-threading)' @@ -210,8 +210,8 @@ jobs: with: config_hash: ${{ needs.check_source.outputs.config_hash }} free-threading: true - # macos-14-large is Intel with 12 cores (most parallelism) - os-matrix: '["macos-14"]' + # Cirrus is M1 + os-matrix: '["ghcr.io/cirruslabs/macos-runner:sonoma"]' build_ubuntu: name: 'Ubuntu' diff --git a/Lib/test/test_pyrepl/test_unix_console.py b/Lib/test/test_pyrepl/test_unix_console.py index d0b98f17ade094..e3bbabcb0089fb 100644 --- a/Lib/test/test_pyrepl/test_unix_console.py +++ b/Lib/test/test_pyrepl/test_unix_console.py @@ -6,12 +6,14 @@ from unittest.mock import MagicMock, call, patch, ANY from .support import handle_all_events, code_to_events + try: from _pyrepl.console import Event from _pyrepl.unix_console import UnixConsole except ImportError: pass + def unix_console(events, **kwargs): console = UnixConsole() console.get_event = MagicMock(side_effect=events) @@ -138,7 +140,6 @@ def test_wrap(self, _os_write): _os_write.assert_any_call(ANY, b"4") con.restore() - def test_cursor_left(self, _os_write): code = "1" events = itertools.chain( diff --git a/Lib/test/test_pyrepl/test_windows_console.py b/Lib/test/test_pyrepl/test_windows_console.py index e52a54d31fb5d8..4a3b2baf64a944 100644 --- a/Lib/test/test_pyrepl/test_windows_console.py +++ b/Lib/test/test_pyrepl/test_windows_console.py @@ -1,7 +1,7 @@ import sys import unittest -if sys.platform != 'win32': +if sys.platform != "win32": raise unittest.SkipTest("test only relevant on win32") From 153b118b78588209850cc2a4cbc977f193a3ab6e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 3 Jun 2024 17:48:02 +0200 Subject: [PATCH 38/72] gh-119981: Use do while(0) in some symtable.c multi-line macros (#119982) --- Python/symtable.c | 97 +++++++++++++++++++++++++---------------------- 1 file changed, 52 insertions(+), 45 deletions(-) diff --git a/Python/symtable.c b/Python/symtable.c index d8240cdd11f7ea..0ee8ca36cf8df0 100644 --- a/Python/symtable.c +++ b/Python/symtable.c @@ -601,16 +601,17 @@ error_at_directive(PySTEntryObject *ste, PyObject *name) global: set of all symbol names explicitly declared as global */ -#define SET_SCOPE(DICT, NAME, I) { \ - PyObject *o = PyLong_FromLong(I); \ - if (!o) \ - return 0; \ - if (PyDict_SetItem((DICT), (NAME), o) < 0) { \ +#define SET_SCOPE(DICT, NAME, I) \ + do { \ + PyObject *o = PyLong_FromLong(I); \ + if (!o) \ + return 0; \ + if (PyDict_SetItem((DICT), (NAME), o) < 0) { \ + Py_DECREF(o); \ + return 0; \ + } \ Py_DECREF(o); \ - return 0; \ - } \ - Py_DECREF(o); \ -} + } while(0) /* Decide on scope of name, given flags. @@ -1562,39 +1563,45 @@ symtable_enter_type_param_block(struct symtable *st, identifier name, return --(ST)->recursion_depth,(X) #define VISIT(ST, TYPE, V) \ - if (!symtable_visit_ ## TYPE((ST), (V))) \ - VISIT_QUIT((ST), 0); - -#define VISIT_SEQ(ST, TYPE, SEQ) { \ - int i; \ - asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ - if (!symtable_visit_ ## TYPE((ST), elt)) \ - VISIT_QUIT((ST), 0); \ - } \ -} - -#define VISIT_SEQ_TAIL(ST, TYPE, SEQ, START) { \ - int i; \ - asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = (START); i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ - if (!symtable_visit_ ## TYPE((ST), elt)) \ - VISIT_QUIT((ST), 0); \ - } \ -} - -#define VISIT_SEQ_WITH_NULL(ST, TYPE, SEQ) { \ - int i = 0; \ - asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ - for (i = 0; i < asdl_seq_LEN(seq); i++) { \ - TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ - if (!elt) continue; /* can be NULL */ \ - if (!symtable_visit_ ## TYPE((ST), elt)) \ - VISIT_QUIT((ST), 0); \ - } \ -} + do { \ + if (!symtable_visit_ ## TYPE((ST), (V))) { \ + VISIT_QUIT((ST), 0); \ + } \ + } while(0) + +#define VISIT_SEQ(ST, TYPE, SEQ) \ + do { \ + int i; \ + asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ + for (i = 0; i < asdl_seq_LEN(seq); i++) { \ + TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ + if (!symtable_visit_ ## TYPE((ST), elt)) \ + VISIT_QUIT((ST), 0); \ + } \ + } while(0) + +#define VISIT_SEQ_TAIL(ST, TYPE, SEQ, START) \ + do { \ + int i; \ + asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ + for (i = (START); i < asdl_seq_LEN(seq); i++) { \ + TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ + if (!symtable_visit_ ## TYPE((ST), elt)) \ + VISIT_QUIT((ST), 0); \ + } \ + } while(0) + +#define VISIT_SEQ_WITH_NULL(ST, TYPE, SEQ) \ + do { \ + int i = 0; \ + asdl_ ## TYPE ## _seq *seq = (SEQ); /* avoid variable capture */ \ + for (i = 0; i < asdl_seq_LEN(seq); i++) { \ + TYPE ## _ty elt = (TYPE ## _ty)asdl_seq_GET(seq, i); \ + if (!elt) continue; /* can be NULL */ \ + if (!symtable_visit_ ## TYPE((ST), elt)) \ + VISIT_QUIT((ST), 0); \ + } \ + } while(0) static int symtable_record_directive(struct symtable *st, identifier name, int lineno, @@ -2261,11 +2268,11 @@ symtable_visit_expr(struct symtable *st, expr_ty e) break; case Slice_kind: if (e->v.Slice.lower) - VISIT(st, expr, e->v.Slice.lower) + VISIT(st, expr, e->v.Slice.lower); if (e->v.Slice.upper) - VISIT(st, expr, e->v.Slice.upper) + VISIT(st, expr, e->v.Slice.upper); if (e->v.Slice.step) - VISIT(st, expr, e->v.Slice.step) + VISIT(st, expr, e->v.Slice.step); break; case Name_kind: if (!symtable_add_def(st, e->v.Name.id, From 1d4c2e4a877a48cdc8bcc9808d799b91c82b3757 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 3 Jun 2024 19:03:56 +0300 Subject: [PATCH 39/72] gh-119057: Use better error messages for zero division (#119066) --- Doc/howto/logging-cookbook.rst | 2 +- Lib/_pylong.py | 2 +- Lib/test/mathdata/ieee754.txt | 2 +- Lib/test/test_builtin.py | 10 ++++++++++ Lib/test/test_doctest/test_doctest.py | 8 ++++---- Lib/test/test_generators.py | 2 +- Lib/test/test_genexps.py | 2 +- .../2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst | 4 ++++ Objects/complexobject.c | 4 ++-- Objects/floatobject.c | 11 +++++------ Objects/longobject.c | 5 ++--- 11 files changed, 32 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst diff --git a/Doc/howto/logging-cookbook.rst b/Doc/howto/logging-cookbook.rst index 60d88204b795f6..3ed2dd6251afe9 100644 --- a/Doc/howto/logging-cookbook.rst +++ b/Doc/howto/logging-cookbook.rst @@ -2950,7 +2950,7 @@ When run, this produces a file with exactly two lines: .. code-block:: none 28/01/2015 07:21:23|INFO|Sample message| - 28/01/2015 07:21:23|ERROR|ZeroDivisionError: integer division or modulo by zero|'Traceback (most recent call last):\n File "logtest7.py", line 30, in main\n x = 1 / 0\nZeroDivisionError: integer division or modulo by zero'| + 28/01/2015 07:21:23|ERROR|ZeroDivisionError: division by zero|'Traceback (most recent call last):\n File "logtest7.py", line 30, in main\n x = 1 / 0\nZeroDivisionError: division by zero'| While the above treatment is simplistic, it points the way to how exception information can be formatted to your liking. The :mod:`traceback` module may be diff --git a/Lib/_pylong.py b/Lib/_pylong.py index f7aabde1434725..a8bf5cd3e638a4 100644 --- a/Lib/_pylong.py +++ b/Lib/_pylong.py @@ -530,7 +530,7 @@ def int_divmod(a, b): Its time complexity is O(n**1.58), where n = #bits(a) + #bits(b). """ if b == 0: - raise ZeroDivisionError + raise ZeroDivisionError('division by zero') elif b < 0: q, r = int_divmod(-a, -b) return q, -r diff --git a/Lib/test/mathdata/ieee754.txt b/Lib/test/mathdata/ieee754.txt index a8b8a0a2148f00..0bc45603b8b18a 100644 --- a/Lib/test/mathdata/ieee754.txt +++ b/Lib/test/mathdata/ieee754.txt @@ -116,7 +116,7 @@ inf >>> 0 ** -1 Traceback (most recent call last): ... -ZeroDivisionError: 0.0 cannot be raised to a negative power +ZeroDivisionError: zero to a negative power >>> pow(0, NAN) nan diff --git a/Lib/test/test_builtin.py b/Lib/test/test_builtin.py index d7ba58847a2992..9ff0f488dc4fa9 100644 --- a/Lib/test/test_builtin.py +++ b/Lib/test/test_builtin.py @@ -662,6 +662,16 @@ def test_divmod(self): self.assertAlmostEqual(result[1], exp_result[1]) self.assertRaises(TypeError, divmod) + self.assertRaisesRegex( + ZeroDivisionError, + "division by zero", + divmod, 1, 0, + ) + self.assertRaisesRegex( + ZeroDivisionError, + "division by zero", + divmod, 0.0, 0, + ) def test_eval(self): self.assertEqual(eval('1+1'), 2) diff --git a/Lib/test/test_doctest/test_doctest.py b/Lib/test/test_doctest/test_doctest.py index 286c3ecfbc9239..b25d57ceeae6aa 100644 --- a/Lib/test/test_doctest/test_doctest.py +++ b/Lib/test/test_doctest/test_doctest.py @@ -1035,7 +1035,7 @@ def exceptions(): r""" ... >>> x = 12 ... >>> print(x//0) ... Traceback (most recent call last): - ... ZeroDivisionError: integer division or modulo by zero + ... ZeroDivisionError: division by zero ... ''' >>> test = doctest.DocTestFinder().find(f)[0] >>> doctest.DocTestRunner(verbose=False).run(test) @@ -1052,7 +1052,7 @@ def exceptions(): r""" ... >>> print('pre-exception output', x//0) ... pre-exception output ... Traceback (most recent call last): - ... ZeroDivisionError: integer division or modulo by zero + ... ZeroDivisionError: division by zero ... ''' >>> test = doctest.DocTestFinder().find(f)[0] >>> doctest.DocTestRunner(verbose=False).run(test) @@ -1063,7 +1063,7 @@ def exceptions(): r""" print('pre-exception output', x//0) Exception raised: ... - ZeroDivisionError: integer division or modulo by zero + ZeroDivisionError: division by zero TestResults(failed=1, attempted=2) Exception messages may contain newlines: @@ -1258,7 +1258,7 @@ def exceptions(): r""" Exception raised: Traceback (most recent call last): ... - ZeroDivisionError: integer division or modulo by zero + ZeroDivisionError: division by zero TestResults(failed=1, attempted=1) >>> _colorize.COLORIZE = save_colorize diff --git a/Lib/test/test_generators.py b/Lib/test/test_generators.py index 6d36df2c7413e0..4598e62122b09c 100644 --- a/Lib/test/test_generators.py +++ b/Lib/test/test_generators.py @@ -907,7 +907,7 @@ def b(): File "", line 1, in ? File "", line 2, in g File "", line 2, in f - ZeroDivisionError: integer division or modulo by zero + ZeroDivisionError: division by zero >>> next(k) # and the generator cannot be resumed Traceback (most recent call last): File "", line 1, in ? diff --git a/Lib/test/test_genexps.py b/Lib/test/test_genexps.py index 4f2d3cdcc7943e..7fb58a67368576 100644 --- a/Lib/test/test_genexps.py +++ b/Lib/test/test_genexps.py @@ -223,7 +223,7 @@ next(g) File "", line 1, in g = (10 // i for i in (5, 0, 2)) - ZeroDivisionError: integer division or modulo by zero + ZeroDivisionError: division by zero >>> next(g) Traceback (most recent call last): File "", line 1, in -toplevel- diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst new file mode 100644 index 00000000000000..d252888906c348 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-15-12-15-58.gh-issue-119057.P3G9G2.rst @@ -0,0 +1,4 @@ +Improve :exc:`ZeroDivisionError` error message. +Now, all error messages are harmonized: all ``/``, ``//``, and ``%`` +operations just use "division by zero" message. +And ``0 ** -1`` operation uses "zero to a negative power". diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 59c84f1359b966..a8be266970afd0 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -523,7 +523,7 @@ complex_div(PyObject *v, PyObject *w) errno = 0; quot = _Py_c_quot(a, b); if (errno == EDOM) { - PyErr_SetString(PyExc_ZeroDivisionError, "complex division by zero"); + PyErr_SetString(PyExc_ZeroDivisionError, "division by zero"); return NULL; } return PyComplex_FromCComplex(quot); @@ -554,7 +554,7 @@ complex_pow(PyObject *v, PyObject *w, PyObject *z) _Py_ADJUST_ERANGE2(p.real, p.imag); if (errno == EDOM) { PyErr_SetString(PyExc_ZeroDivisionError, - "0.0 to a negative or complex power"); + "zero to a negative or complex power"); return NULL; } else if (errno == ERANGE) { diff --git a/Objects/floatobject.c b/Objects/floatobject.c index a5bf356cc9c7f0..2627ba80eed8ca 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -623,7 +623,7 @@ float_div(PyObject *v, PyObject *w) CONVERT_TO_DOUBLE(w, b); if (b == 0.0) { PyErr_SetString(PyExc_ZeroDivisionError, - "float division by zero"); + "division by zero"); return NULL; } a = a / b; @@ -639,7 +639,7 @@ float_rem(PyObject *v, PyObject *w) CONVERT_TO_DOUBLE(w, wx); if (wx == 0.0) { PyErr_SetString(PyExc_ZeroDivisionError, - "float modulo by zero"); + "division by zero"); return NULL; } mod = fmod(vx, wx); @@ -704,7 +704,7 @@ float_divmod(PyObject *v, PyObject *w) CONVERT_TO_DOUBLE(v, vx); CONVERT_TO_DOUBLE(w, wx); if (wx == 0.0) { - PyErr_SetString(PyExc_ZeroDivisionError, "float divmod()"); + PyErr_SetString(PyExc_ZeroDivisionError, "division by zero"); return NULL; } _float_div_mod(vx, wx, &floordiv, &mod); @@ -719,7 +719,7 @@ float_floor_div(PyObject *v, PyObject *w) CONVERT_TO_DOUBLE(v, vx); CONVERT_TO_DOUBLE(w, wx); if (wx == 0.0) { - PyErr_SetString(PyExc_ZeroDivisionError, "float floor division by zero"); + PyErr_SetString(PyExc_ZeroDivisionError, "division by zero"); return NULL; } _float_div_mod(vx, wx, &floordiv, &mod); @@ -788,8 +788,7 @@ float_pow(PyObject *v, PyObject *w, PyObject *z) int iw_is_odd = DOUBLE_IS_ODD_INTEGER(iw); if (iw < 0.0) { PyErr_SetString(PyExc_ZeroDivisionError, - "0.0 cannot be raised to a " - "negative power"); + "zero to a negative power"); return NULL; } /* use correct sign if iw is odd */ diff --git a/Objects/longobject.c b/Objects/longobject.c index 054689471e7aa9..ee0b2a038a2aab 100644 --- a/Objects/longobject.c +++ b/Objects/longobject.c @@ -3121,8 +3121,7 @@ long_divrem(PyLongObject *a, PyLongObject *b, PyLongObject *z; if (size_b == 0) { - PyErr_SetString(PyExc_ZeroDivisionError, - "integer division or modulo by zero"); + PyErr_SetString(PyExc_ZeroDivisionError, "division by zero"); return -1; } if (size_a < size_b || @@ -3185,7 +3184,7 @@ long_rem(PyLongObject *a, PyLongObject *b, PyLongObject **prem) if (size_b == 0) { PyErr_SetString(PyExc_ZeroDivisionError, - "integer modulo by zero"); + "division by zero"); return -1; } if (size_a < size_b || From 4e8aa32245e2d72bf558b711ccdbcee594347615 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 3 Jun 2024 18:34:36 +0200 Subject: [PATCH 40/72] gh-119727: Add --single-process option to regrtest (#119728) --- Lib/test/libregrtest/cmdline.py | 11 ++++++++ Lib/test/libregrtest/main.py | 26 ++++++++++++------- Lib/test/libregrtest/worker.py | 6 ++--- Lib/test/test_regrtest.py | 13 ++++++++++ ...-05-29-15-28-08.gh-issue-119727.dVkaZM.rst | 2 ++ 5 files changed, 46 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Tests/2024-05-29-15-28-08.gh-issue-119727.dVkaZM.rst diff --git a/Lib/test/libregrtest/cmdline.py b/Lib/test/libregrtest/cmdline.py index d4dac77b250ad6..2ff4715e82a41b 100644 --- a/Lib/test/libregrtest/cmdline.py +++ b/Lib/test/libregrtest/cmdline.py @@ -174,6 +174,7 @@ def __init__(self, **kwargs) -> None: self.tempdir = None self._add_python_opts = True self.xmlpath = None + self.single_process = False super().__init__(**kwargs) @@ -307,6 +308,12 @@ def _create_parser(): group.add_argument('-j', '--multiprocess', metavar='PROCESSES', dest='use_mp', type=int, help='run PROCESSES processes at once') + group.add_argument('--single-process', action='store_true', + dest='single_process', + help='always run all tests sequentially in ' + 'a single process, ignore -jN option, ' + 'and failed tests are also rerun sequentially ' + 'in the same process') group.add_argument('-T', '--coverage', action='store_true', dest='trace', help='turn on code coverage tracing using the trace ' @@ -435,6 +442,10 @@ def _parse_args(args, **kwargs): else: ns._add_python_opts = False + # --singleprocess overrides -jN option + if ns.single_process: + ns.use_mp = None + # When both --slow-ci and --fast-ci options are present, # --slow-ci has the priority if ns.slow_ci: diff --git a/Lib/test/libregrtest/main.py b/Lib/test/libregrtest/main.py index 9e7a7d60880091..5148d3070513e8 100644 --- a/Lib/test/libregrtest/main.py +++ b/Lib/test/libregrtest/main.py @@ -89,12 +89,13 @@ def __init__(self, ns: Namespace, _add_python_opts: bool = False): self.cmdline_args: TestList = ns.args # Workers - if ns.use_mp is None: - num_workers = 0 # run sequentially + self.single_process: bool = ns.single_process + if self.single_process or ns.use_mp is None: + num_workers = 0 # run sequentially in a single process elif ns.use_mp <= 0: - num_workers = -1 # use the number of CPUs + num_workers = -1 # run in parallel, use the number of CPUs else: - num_workers = ns.use_mp + num_workers = ns.use_mp # run in parallel self.num_workers: int = num_workers self.worker_json: StrJSON | None = ns.worker_json @@ -236,7 +237,7 @@ def list_tests(tests: TestTuple): def _rerun_failed_tests(self, runtests: RunTests): # Configure the runner to re-run tests - if self.num_workers == 0: + if self.num_workers == 0 and not self.single_process: # Always run tests in fresh processes to have more deterministic # initial state. Don't re-run tests in parallel but limit to a # single worker process to have side effects (on the system load @@ -246,7 +247,6 @@ def _rerun_failed_tests(self, runtests: RunTests): tests, match_tests_dict = self.results.prepare_rerun() # Re-run failed tests - self.log(f"Re-running {len(tests)} failed tests in verbose mode in subprocesses") runtests = runtests.copy( tests=tests, rerun=True, @@ -256,7 +256,15 @@ def _rerun_failed_tests(self, runtests: RunTests): match_tests_dict=match_tests_dict, output_on_failure=False) self.logger.set_tests(runtests) - self._run_tests_mp(runtests, self.num_workers) + + msg = f"Re-running {len(tests)} failed tests in verbose mode" + if not self.single_process: + msg = f"{msg} in subprocesses" + self.log(msg) + self._run_tests_mp(runtests, self.num_workers) + else: + self.log(msg) + self.run_tests_sequentially(runtests) return runtests def rerun_failed_tests(self, runtests: RunTests): @@ -371,7 +379,7 @@ def run_tests_sequentially(self, runtests) -> None: tests = count(jobs, 'test') else: tests = 'tests' - msg = f"Run {tests} sequentially" + msg = f"Run {tests} sequentially in a single process" if runtests.timeout: msg += " (timeout: %s)" % format_duration(runtests.timeout) self.log(msg) @@ -599,7 +607,7 @@ def _add_cross_compile_opts(self, regrtest_opts): keep_environ = True if cross_compile and hostrunner: - if self.num_workers == 0: + if self.num_workers == 0 and not self.single_process: # For now use only two cores for cross-compiled builds; # hostrunner can be expensive. regrtest_opts.extend(['-j', '2']) diff --git a/Lib/test/libregrtest/worker.py b/Lib/test/libregrtest/worker.py index 15d32b5baa04d0..86cc30835fdbda 100644 --- a/Lib/test/libregrtest/worker.py +++ b/Lib/test/libregrtest/worker.py @@ -14,9 +14,9 @@ USE_PROCESS_GROUP = (hasattr(os, "setsid") and hasattr(os, "killpg")) -NEED_TTY = set(''' - test_ioctl -'''.split()) +NEED_TTY = { + 'test_ioctl', +} def create_worker_process(runtests: WorkerRunTests, output_fd: int, diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index 17eff617a56aa4..97ce797f0f6acb 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -473,6 +473,19 @@ def test_verbose3_huntrleaks(self): self.assertEqual(regrtest.hunt_refleak.runs, 10) self.assertFalse(regrtest.output_on_failure) + def test_single_process(self): + args = ['-j2', '--single-process'] + with support.captured_stderr(): + regrtest = self.create_regrtest(args) + self.assertEqual(regrtest.num_workers, 0) + self.assertTrue(regrtest.single_process) + + args = ['--fast-ci', '--single-process'] + with support.captured_stderr(): + regrtest = self.create_regrtest(args) + self.assertEqual(regrtest.num_workers, 0) + self.assertTrue(regrtest.single_process) + @dataclasses.dataclass(slots=True) class Rerun: diff --git a/Misc/NEWS.d/next/Tests/2024-05-29-15-28-08.gh-issue-119727.dVkaZM.rst b/Misc/NEWS.d/next/Tests/2024-05-29-15-28-08.gh-issue-119727.dVkaZM.rst new file mode 100644 index 00000000000000..bf28d8bb77b8a2 --- /dev/null +++ b/Misc/NEWS.d/next/Tests/2024-05-29-15-28-08.gh-issue-119727.dVkaZM.rst @@ -0,0 +1,2 @@ +Add ``--single-process`` command line option to Python test runner (regrtest). +Patch by Victor Stinner. From 2e0aa731aebb8ef3d89ada82f5d39b1bbac65d1f Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Mon, 3 Jun 2024 18:07:06 +0100 Subject: [PATCH 41/72] gh-118835: pyrepl: Fix prompt length computation for custom prompts containing ANSI escape codes (#119942) --- Lib/_pyrepl/reader.py | 10 ++++-- Lib/test/test_pyrepl/test_reader.py | 32 +++++++++++++++++++ ...-06-02-15-09-17.gh-issue-118835.KUAuz6.rst | 1 + 3 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-02-15-09-17.gh-issue-118835.KUAuz6.rst diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index 5401ae7b0ae32d..f2e68ef6f3ee66 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -28,7 +28,7 @@ from . import commands, console, input -from .utils import ANSI_ESCAPE_SEQUENCE, wlen +from .utils import ANSI_ESCAPE_SEQUENCE, wlen, str_width from .trace import trace @@ -339,7 +339,8 @@ def calc_complete_screen(self) -> list[str]: screeninfo.append((0, [])) return screen - def process_prompt(self, prompt: str) -> tuple[str, int]: + @staticmethod + def process_prompt(prompt: str) -> tuple[str, int]: """Process the prompt. This means calculate the length of the prompt. The character \x01 @@ -351,6 +352,11 @@ def process_prompt(self, prompt: str) -> tuple[str, int]: # sequences if they were not explicitly within \x01...\x02. # They are CSI (or ANSI) sequences ( ESC [ ... LETTER ) + # wlen from utils already excludes ANSI_ESCAPE_SEQUENCE chars, + # which breaks the logic below so we redefine it here. + def wlen(s: str) -> int: + return sum(str_width(i) for i in s) + out_prompt = "" l = wlen(prompt) pos = 0 diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index c9b03d5e711539..9fb956b655594f 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -4,6 +4,7 @@ from .support import handle_all_events, handle_events_narrow_console, code_to_events, prepare_reader from _pyrepl.console import Event +from _pyrepl.reader import Reader class TestReader(TestCase): @@ -176,3 +177,34 @@ def test_newline_within_block_trailing_whitespace(self): ) self.assert_screen_equals(reader, expected) self.assertTrue(reader.finished) + + def test_prompt_length(self): + # Handles simple ASCII prompt + ps1 = ">>> " + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, ps1) + self.assertEqual(l, 4) + + # Handles ANSI escape sequences + ps1 = "\033[0;32m>>> \033[0m" + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, "\033[0;32m>>> \033[0m") + self.assertEqual(l, 4) + + # Handles ANSI escape sequences bracketed in \001 .. \002 + ps1 = "\001\033[0;32m\002>>> \001\033[0m\002" + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, "\033[0;32m>>> \033[0m") + self.assertEqual(l, 4) + + # Handles wide characters in prompt + ps1 = "樂>> " + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, ps1) + self.assertEqual(l, 5) + + # Handles wide characters AND ANSI sequences together + ps1 = "\001\033[0;32m\002樂>\001\033[0m\002> " + prompt, l = Reader.process_prompt(ps1) + self.assertEqual(prompt, "\033[0;32m樂>\033[0m> ") + self.assertEqual(l, 5) diff --git a/Misc/NEWS.d/next/Library/2024-06-02-15-09-17.gh-issue-118835.KUAuz6.rst b/Misc/NEWS.d/next/Library/2024-06-02-15-09-17.gh-issue-118835.KUAuz6.rst new file mode 100644 index 00000000000000..ec9ca20a487d76 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-02-15-09-17.gh-issue-118835.KUAuz6.rst @@ -0,0 +1 @@ +Fix _pyrepl crash when using custom prompt with ANSI escape codes. From 41c1cefbae71d687d1a935233b086473df65e15c Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 3 Jun 2024 13:42:27 -0400 Subject: [PATCH 42/72] gh-117657: Avoid `sem_clockwait` in TSAN (#119915) The `sem_clockwait` function is not currently instrumented, which leads to false positives. --- Python/parking_lot.c | 2 +- Tools/tsan/suppressions_free_threading.txt | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/Python/parking_lot.c b/Python/parking_lot.c index e2def9e249cd56..841b1d71ea16cb 100644 --- a/Python/parking_lot.c +++ b/Python/parking_lot.c @@ -119,7 +119,7 @@ _PySemaphore_PlatformWait(_PySemaphore *sema, PyTime_t timeout) if (timeout >= 0) { struct timespec ts; -#if defined(CLOCK_MONOTONIC) && defined(HAVE_SEM_CLOCKWAIT) +#if defined(CLOCK_MONOTONIC) && defined(HAVE_SEM_CLOCKWAIT) && !defined(_Py_THREAD_SANITIZER) PyTime_t now; // silently ignore error: cannot report error to the caller (void)PyTime_MonotonicRaw(&now); diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index ff9c8036e92fe7..39cc5b080c8703 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -15,14 +15,10 @@ race:set_allocator_unlocked # These entries are for warnings that trigger in a library function, as called # by a CPython function. -# https://gist.github.com/swtaarrs/9d41251e603fa6dedd604191a6da820d -race:park_detached_threads # https://gist.github.com/swtaarrs/8e0e365e1d9cecece3269a2fb2f2b8b8 race:sock_recv_impl # https://gist.github.com/swtaarrs/08dfe7883b4c975c31ecb39388987a67 race:free_threadstate -# https://gist.github.com/swtaarrs/cd6aec2006e0c1b561b68d65e9f1a872 -race:_PyParkingLot_Park # These warnings trigger directly in a CPython function. @@ -33,8 +29,6 @@ race_top:_mi_heap_delayed_free_partial race_top:_PyEval_EvalFrameDefault race_top:_PyImport_AcquireLock race_top:_PyImport_ReleaseLock -# https://gist.github.com/mpage/0a24eb2dd458441ededb498e9b0e5de8 -race_top:_PyParkingLot_Park race_top:_PyType_HasFeature race_top:assign_version_tag race_top:insertdict @@ -47,8 +41,6 @@ race_top:set_contains_key # https://gist.github.com/colesbury/d13d033f413b4ad07929d044bed86c35 race_top:set_discard_entry race_top:set_inheritable -race_top:start_the_world -race_top:tstate_set_detached race_top:Py_SET_TYPE race_top:_PyDict_CheckConsistency race_top:_PyImport_AcquireLock @@ -64,7 +56,6 @@ race_top:list_get_item_ref race_top:make_pending_calls race_top:set_add_entry race_top:should_intern_string -race_top:llist_insert_tail race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate race_top:dump_traceback From b8fde5db86334690da23343f5f4326adcd8160fb Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 3 Jun 2024 21:44:36 +0100 Subject: [PATCH 43/72] update CODEOWNERS (#120003) --- .github/CODEOWNERS | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e08d6cc5719737..ca5c8aaa3a0bef 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -34,6 +34,7 @@ Python/ceval*.h @markshannon Python/compile.c @markshannon @iritkatriel Python/assemble.c @markshannon @iritkatriel Python/flowgraph.c @markshannon @iritkatriel +Python/instruction_sequence.c @iritkatriel Python/ast_opt.c @isidentical Python/bytecodes.c @markshannon Python/optimizer*.c @markshannon @@ -74,11 +75,8 @@ Programs/python.c @ericsnowcurrently Tools/build/generate_global_objects.py @ericsnowcurrently # Exceptions -Lib/traceback.py @iritkatriel Lib/test/test_except*.py @iritkatriel -Lib/test/test_traceback.py @iritkatriel Objects/exceptions.c @iritkatriel -Python/traceback.c @iritkatriel # Hashing **/*hashlib* @gpshead @tiran From 47fb4327b5c405da6df066dcaa01b7c1aefab313 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 3 Jun 2024 16:58:41 -0400 Subject: [PATCH 44/72] gh-117657: Fix race involving immortalizing objects (#119927) The free-threaded build currently immortalizes objects that use deferred reference counting (see gh-117783). This typically happens once the first non-main thread is created, but the behavior can be suppressed for tests, in subinterpreters, or during a compile() call. This fixes a race condition involving the tracking of whether the behavior is suppressed. --- Include/internal/pycore_gc.h | 14 +++++-------- Lib/test/support/__init__.py | 4 ++-- Modules/_testinternalcapi.c | 24 ++++++++-------------- Objects/codeobject.c | 4 ++-- Objects/object.c | 2 +- Python/bltinmodule.c | 6 +++--- Python/gc_free_threading.c | 14 ++++++------- Python/pystate.c | 4 +--- Tools/tsan/suppressions_free_threading.txt | 2 -- 9 files changed, 30 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 60582521db5bd7..ba8b8e1903f307 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -347,15 +347,11 @@ struct _gc_runtime_state { /* gh-117783: Deferred reference counting is not fully implemented yet, so as a temporary measure we treat objects using deferred referenence - counting as immortal. */ - struct { - /* Immortalize objects instead of marking them as using deferred - reference counting. */ - int enabled; - - /* Set enabled=1 when the first background thread is created. */ - int enable_on_thread_created; - } immortalize; + counting as immortal. The value may be zero, one, or a negative number: + 0: immortalize deferred RC objects once the first thread is created + 1: immortalize all deferred RC objects immediately + <0: suppressed; don't immortalize objects */ + int immortalize; #endif }; diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 5825efadffcb29..4b320b494bb8dd 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -529,11 +529,11 @@ def suppress_immortalization(suppress=True): yield return - old_values = _testinternalcapi.set_immortalize_deferred(False) + _testinternalcapi.suppress_immortalization(True) try: yield finally: - _testinternalcapi.set_immortalize_deferred(*old_values) + _testinternalcapi.suppress_immortalization(False) def skip_if_suppress_immortalization(): try: diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index d9b9c999603d5a..6d4a00c06ca9de 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1966,24 +1966,18 @@ get_py_thread_id(PyObject *self, PyObject *Py_UNUSED(ignored)) #endif static PyObject * -set_immortalize_deferred(PyObject *self, PyObject *value) +suppress_immortalization(PyObject *self, PyObject *value) { #ifdef Py_GIL_DISABLED - PyInterpreterState *interp = PyInterpreterState_Get(); - int old_enabled = interp->gc.immortalize.enabled; - int old_enabled_on_thread = interp->gc.immortalize.enable_on_thread_created; - int enabled_on_thread = 0; - if (!PyArg_ParseTuple(value, "i|i", - &interp->gc.immortalize.enabled, - &enabled_on_thread)) - { + int suppress = PyObject_IsTrue(value); + if (suppress < 0) { return NULL; } - interp->gc.immortalize.enable_on_thread_created = enabled_on_thread; - return Py_BuildValue("ii", old_enabled, old_enabled_on_thread); -#else - return Py_BuildValue("OO", Py_False, Py_False); + PyInterpreterState *interp = PyInterpreterState_Get(); + // Subtract two to suppress immortalization (so that 1 -> -1) + _Py_atomic_add_int(&interp->gc.immortalize, suppress ? -2 : 2); #endif + Py_RETURN_NONE; } static PyObject * @@ -1991,7 +1985,7 @@ get_immortalize_deferred(PyObject *self, PyObject *Py_UNUSED(ignored)) { #ifdef Py_GIL_DISABLED PyInterpreterState *interp = PyInterpreterState_Get(); - return PyBool_FromLong(interp->gc.immortalize.enable_on_thread_created); + return PyBool_FromLong(_Py_atomic_load_int(&interp->gc.immortalize) >= 0); #else Py_RETURN_FALSE; #endif @@ -2111,7 +2105,7 @@ static PyMethodDef module_functions[] = { #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif - {"set_immortalize_deferred", set_immortalize_deferred, METH_VARARGS}, + {"suppress_immortalization", suppress_immortalization, METH_O}, {"get_immortalize_deferred", get_immortalize_deferred, METH_NOARGS}, #ifdef _Py_TIER2 {"uop_symbols_test", _Py_uop_symbols_test, METH_NOARGS}, diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 2ffda8dee07c90..e3e306bfe810c4 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -110,7 +110,7 @@ should_intern_string(PyObject *o) // unless we've disabled immortalizing objects that use deferred reference // counting. PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->gc.immortalize.enable_on_thread_created) { + if (_Py_atomic_load_int(&interp->gc.immortalize) < 0) { return 1; } #endif @@ -240,7 +240,7 @@ intern_constants(PyObject *tuple, int *modified) PyThreadState *tstate = PyThreadState_GET(); if (!_Py_IsImmortal(v) && !PyCode_Check(v) && !PyUnicode_CheckExact(v) && - tstate->interp->gc.immortalize.enable_on_thread_created) + _Py_atomic_load_int(&tstate->interp->gc.immortalize) >= 0) { PyObject *interned = intern_one_constant(v); if (interned == NULL) { diff --git a/Objects/object.c b/Objects/object.c index d4fe14c5b3d1aa..5d53e9e5eaba4e 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2429,7 +2429,7 @@ _PyObject_SetDeferredRefcount(PyObject *op) assert(op->ob_ref_shared == 0); _PyObject_SET_GC_BITS(op, _PyGC_BITS_DEFERRED); PyInterpreterState *interp = _PyInterpreterState_GET(); - if (interp->gc.immortalize.enabled) { + if (_Py_atomic_load_int_relaxed(&interp->gc.immortalize) == 1) { // gh-117696: immortalize objects instead of using deferred reference // counting for now. _Py_SetImmortal(op); diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 2a02d8161591c6..c4d3ecbeeff0e6 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -870,15 +870,15 @@ builtin_compile_impl(PyObject *module, PyObject *source, PyObject *filename, // gh-118527: Disable immortalization of code constants for explicit // compile() calls to get consistent frozen outputs between the default // and free-threaded builds. + // Subtract two to suppress immortalization (so that 1 -> -1) PyInterpreterState *interp = _PyInterpreterState_GET(); - int old_value = interp->gc.immortalize.enable_on_thread_created; - interp->gc.immortalize.enable_on_thread_created = 0; + _Py_atomic_add_int(&interp->gc.immortalize, -2); #endif result = Py_CompileStringObject(str, filename, start[compile_mode], &cf, optimize); #ifdef Py_GIL_DISABLED - interp->gc.immortalize.enable_on_thread_created = old_value; + _Py_atomic_add_int(&interp->gc.immortalize, 2); #endif Py_XDECREF(source_copy); diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index e6bd012c40ee82..d005b79ff40dbf 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -703,11 +703,9 @@ _PyGC_Init(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - if (_Py_IsMainInterpreter(interp)) { - // gh-117783: immortalize objects that would use deferred refcounting - // once the first non-main thread is created. - gcstate->immortalize.enable_on_thread_created = 1; - } + // gh-117783: immortalize objects that would use deferred refcounting + // once the first non-main thread is created (but not in subinterpreters). + gcstate->immortalize = _Py_IsMainInterpreter(interp) ? 0 : -1; gcstate->garbage = PyList_New(0); if (gcstate->garbage == NULL) { @@ -1808,8 +1806,10 @@ _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp) { struct visitor_args args; _PyEval_StopTheWorld(interp); - gc_visit_heaps(interp, &immortalize_visitor, &args); - interp->gc.immortalize.enabled = 1; + if (interp->gc.immortalize == 0) { + gc_visit_heaps(interp, &immortalize_visitor, &args); + interp->gc.immortalize = 1; + } _PyEval_StartTheWorld(interp); } diff --git a/Python/pystate.c b/Python/pystate.c index 36e4206b4a282e..d0293915db7689 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1583,9 +1583,7 @@ new_threadstate(PyInterpreterState *interp, int whence) } else { #ifdef Py_GIL_DISABLED - if (interp->gc.immortalize.enable_on_thread_created && - !interp->gc.immortalize.enabled) - { + if (_Py_atomic_load_int(&interp->gc.immortalize) == 0) { // Immortalize objects marked as using deferred reference counting // the first time a non-main thread is created. _PyGC_ImmortalizeDeferredObjects(interp); diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index 39cc5b080c8703..d5fcac61f0db04 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -47,7 +47,6 @@ race_top:_PyImport_AcquireLock race_top:_Py_dict_lookup_threadsafe race_top:_imp_release_lock race_top:_multiprocessing_SemLock_acquire_impl -race_top:builtin_compile_impl race_top:dictiter_new race_top:dictresize race_top:insert_to_emptydict @@ -55,7 +54,6 @@ race_top:insertdict race_top:list_get_item_ref race_top:make_pending_calls race_top:set_add_entry -race_top:should_intern_string race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate race_top:dump_traceback From d82a7ba041321e7b58a5a9bbc394670be6ceeb7c Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 3 Jun 2024 17:56:00 -0400 Subject: [PATCH 45/72] gh-117398: Add datetime Module State (gh-119810) I was able to make use of the existing datetime_state struct, but there was one tricky thing I had to sort out. We mostly aren't converting to heap types, so we can't use things like PyType_GetModuleByDef() to look up the module state. The solution I came up with is somewhat novel, but I consider it straightforward. Also, it shouldn't have much impact on performance. In summary, this main changes here are: * I've added some macros to help hide how various objects relate to module state * as a solution to the module state lookup problem, I've stored the last loaded module on the current interpreter's internal dict (actually a weakref) * if the static type method is used after the module has been deleted, it is reloaded * to avoid extra work when loading the module, we directly copy the objects (new refs only) from the old module state into the new state if the old module hasn't been deleted yet * during module init we set various objects on the static types' __dict__s; to simplify things, we only do that the first time; once those static types have a separate __dict__ per interpreter, we'll do it every time * we now clear the module state when the module is destroyed (before, we were leaking everything in _datetime_global_state) --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 3 + Modules/_datetimemodule.c | 532 ++++++++++++------ 5 files changed, 376 insertions(+), 162 deletions(-) diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index b9fae11dfaa85c..b186408931c92e 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -829,6 +829,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_call)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_exception)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_return)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_datetime_module)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_statements)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cadata)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cafile)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index aa66b20859a472..e1808c85acfb2d 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -318,6 +318,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(c_call) STRUCT_FOR_ID(c_exception) STRUCT_FOR_ID(c_return) + STRUCT_FOR_ID(cached_datetime_module) STRUCT_FOR_ID(cached_statements) STRUCT_FOR_ID(cadata) STRUCT_FOR_ID(cafile) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index b27720e9ff6ecf..2dde6febc2cae4 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -827,6 +827,7 @@ extern "C" { INIT_ID(c_call), \ INIT_ID(c_exception), \ INIT_ID(c_return), \ + INIT_ID(cached_datetime_module), \ INIT_ID(cached_statements), \ INIT_ID(cadata), \ INIT_ID(cafile), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index c61c556b758769..b00119a1bad7ff 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -795,6 +795,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(c_return); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(cached_datetime_module); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(cached_statements); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 466382b5148509..16bb4c6980aa08 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -25,17 +25,18 @@ # include /* struct timeval */ #endif + +/* forward declarations */ +static PyTypeObject PyDateTime_DateType; +static PyTypeObject PyDateTime_DateTimeType; +static PyTypeObject PyDateTime_TimeType; +static PyTypeObject PyDateTime_DeltaType; +static PyTypeObject PyDateTime_TZInfoType; +static PyTypeObject PyDateTime_TimeZoneType; + + typedef struct { - /* Static types exposed by the datetime C-API. */ - PyTypeObject *date_type; - PyTypeObject *datetime_type; - PyTypeObject *delta_type; - PyTypeObject *time_type; - PyTypeObject *tzinfo_type; - /* Exposed indirectly via TimeZone_UTC. */ - PyTypeObject *timezone_type; - - /* Other module classes. */ + /* Module heap types. */ PyTypeObject *isocalendar_date_type; /* Conversion factors. */ @@ -47,39 +48,182 @@ typedef struct { PyObject *us_per_week; // 1e6 * 3600 * 24 * 7 as Python int PyObject *seconds_per_day; // 3600 * 24 as Python int - /* The interned UTC timezone instance */ - PyObject *utc; - /* The interned Unix epoch datetime instance */ PyObject *epoch; - - /* While we use a global state, we ensure it's only initialized once */ - int initialized; } datetime_state; -static datetime_state _datetime_global_state; +/* The module has a fixed number of static objects, due to being exposed + * through the datetime C-API. There are five types exposed directly, + * one type exposed indirectly, and one singleton constant (UTC). + * + * Each of these objects is hidden behind a macro in the same way as + * the per-module objects stored in module state. The macros for the + * static objects don't need to be passed a state, but the consistency + * of doing so is more clear. We use a dedicated noop macro, NO_STATE, + * to make the special case obvious. */ + +#define NO_STATE NULL + +#define DATE_TYPE(st) &PyDateTime_DateType +#define DATETIME_TYPE(st) &PyDateTime_DateTimeType +#define TIME_TYPE(st) &PyDateTime_TimeType +#define DELTA_TYPE(st) &PyDateTime_DeltaType +#define TZINFO_TYPE(st) &PyDateTime_TZInfoType +#define TIMEZONE_TYPE(st) &PyDateTime_TimeZoneType +#define ISOCALENDAR_DATE_TYPE(st) st->isocalendar_date_type + +#define PyDate_Check(op) PyObject_TypeCheck(op, DATE_TYPE(NO_STATE)) +#define PyDate_CheckExact(op) Py_IS_TYPE(op, DATE_TYPE(NO_STATE)) + +#define PyDateTime_Check(op) PyObject_TypeCheck(op, DATETIME_TYPE(NO_STATE)) +#define PyDateTime_CheckExact(op) Py_IS_TYPE(op, DATETIME_TYPE(NO_STATE)) + +#define PyTime_Check(op) PyObject_TypeCheck(op, TIME_TYPE(NO_STATE)) +#define PyTime_CheckExact(op) Py_IS_TYPE(op, TIME_TYPE(NO_STATE)) + +#define PyDelta_Check(op) PyObject_TypeCheck(op, DELTA_TYPE(NO_STATE)) +#define PyDelta_CheckExact(op) Py_IS_TYPE(op, DELTA_TYPE(NO_STATE)) + +#define PyTZInfo_Check(op) PyObject_TypeCheck(op, TZINFO_TYPE(NO_STATE)) +#define PyTZInfo_CheckExact(op) Py_IS_TYPE(op, TZINFO_TYPE(NO_STATE)) + +#define PyTimezone_Check(op) PyObject_TypeCheck(op, TIMEZONE_TYPE(NO_STATE)) + +#define CONST_US_PER_MS(st) st->us_per_ms +#define CONST_US_PER_SECOND(st) st->us_per_second +#define CONST_US_PER_MINUTE(st) st->us_per_minute +#define CONST_US_PER_HOUR(st) st->us_per_hour +#define CONST_US_PER_DAY(st) st->us_per_day +#define CONST_US_PER_WEEK(st) st->us_per_week +#define CONST_SEC_PER_DAY(st) st->seconds_per_day +#define CONST_EPOCH(st) st->epoch +#define CONST_UTC(st) ((PyObject *)&utc_timezone) + +static datetime_state * +get_module_state(PyObject *module) +{ + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (datetime_state *)state; +} + + +#define INTERP_KEY ((PyObject *)&_Py_ID(cached_datetime_module)) + +static PyObject * +get_current_module(PyInterpreterState *interp) +{ + PyObject *dict = PyInterpreterState_GetDict(interp); + if (dict == NULL) { + return NULL; + } + PyObject *ref = NULL; + if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { + return NULL; + } + if (ref == NULL) { + return NULL; + } + PyObject *mod = NULL; + (void)PyWeakref_GetRef(ref, &mod); + if (mod == Py_None) { + Py_CLEAR(mod); + } + Py_DECREF(ref); + return mod; +} + +static PyModuleDef datetimemodule; + +static datetime_state * +_get_current_state(PyObject **p_mod) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + PyObject *mod = get_current_module(interp); + if (mod == NULL) { + assert(!PyErr_Occurred()); + if (PyErr_Occurred()) { + return NULL; + } + /* The static types can outlive the module, + * so we must re-import the module. */ + mod = PyImport_ImportModule("_datetime"); + if (mod == NULL) { + return NULL; + } + } + datetime_state *st = get_module_state(mod); + *p_mod = mod; + return st; +} + +#define GET_CURRENT_STATE(MOD_VAR) \ + _get_current_state(&MOD_VAR) +#define RELEASE_CURRENT_STATE(ST_VAR, MOD_VAR) \ + Py_DECREF(MOD_VAR) -static inline datetime_state* get_datetime_state(void) +static int +set_current_module(PyInterpreterState *interp, PyObject *mod) { - return &_datetime_global_state; + assert(mod != NULL); + PyObject *dict = PyInterpreterState_GetDict(interp); + if (dict == NULL) { + return -1; + } + PyObject *ref = PyWeakref_NewRef(mod, NULL); + if (ref == NULL) { + return -1; + } + int rc = PyDict_SetItem(dict, INTERP_KEY, ref); + Py_DECREF(ref); + return rc; } -#define PyDate_Check(op) PyObject_TypeCheck(op, get_datetime_state()->date_type) -#define PyDate_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->date_type) +static void +clear_current_module(PyInterpreterState *interp, PyObject *expected) +{ + PyObject *exc = PyErr_GetRaisedException(); + + PyObject *current = NULL; + + PyObject *dict = PyInterpreterState_GetDict(interp); + if (dict == NULL) { + goto error; + } + + if (expected != NULL) { + PyObject *ref = NULL; + if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { + goto error; + } + if (ref != NULL) { + int rc = PyWeakref_GetRef(ref, ¤t); + Py_DECREF(ref); + if (rc < 0) { + goto error; + } + if (current != expected) { + goto finally; + } + } + } -#define PyDateTime_Check(op) PyObject_TypeCheck(op, get_datetime_state()->datetime_type) -#define PyDateTime_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->datetime_type) + if (PyDict_DelItem(dict, INTERP_KEY) < 0) { + if (!PyErr_ExceptionMatches(PyExc_KeyError)) { + goto error; + } + } -#define PyTime_Check(op) PyObject_TypeCheck(op, get_datetime_state()->time_type) -#define PyTime_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->time_type) + goto finally; -#define PyDelta_Check(op) PyObject_TypeCheck(op, get_datetime_state()->delta_type) -#define PyDelta_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->delta_type) +error: + PyErr_Print(); -#define PyTZInfo_Check(op) PyObject_TypeCheck(op, get_datetime_state()->tzinfo_type) -#define PyTZInfo_CheckExact(op) Py_IS_TYPE(op, get_datetime_state()->tzinfo_type) +finally: + Py_XDECREF(current); + PyErr_SetRaisedException(exc); +} -#define PyTimezone_Check(op) PyObject_TypeCheck(op, get_datetime_state()->timezone_type) /* We require that C int be at least 32 bits, and use int virtually * everywhere. In just a few cases we use a temp long, where a Python @@ -988,7 +1132,7 @@ new_date_ex(int year, int month, int day, PyTypeObject *type) } #define new_date(year, month, day) \ - new_date_ex(year, month, day, get_datetime_state()->date_type) + new_date_ex(year, month, day, DATE_TYPE(NO_STATE)) // Forward declaration static PyObject * @@ -998,13 +1142,12 @@ new_datetime_ex(int, int, int, int, int, int, int, PyObject *, PyTypeObject *); static PyObject * new_date_subclass_ex(int year, int month, int day, PyObject *cls) { - datetime_state *st = get_datetime_state(); PyObject *result; // We have "fast path" constructors for two subclasses: date and datetime - if ((PyTypeObject *)cls == st->date_type) { + if ((PyTypeObject *)cls == DATE_TYPE(NO_STATE)) { result = new_date_ex(year, month, day, (PyTypeObject *)cls); } - else if ((PyTypeObject *)cls == st->datetime_type) { + else if ((PyTypeObject *)cls == DATETIME_TYPE(NO_STATE)) { result = new_datetime_ex(year, month, day, 0, 0, 0, 0, Py_None, (PyTypeObject *)cls); } @@ -1058,8 +1201,7 @@ new_datetime_ex(int year, int month, int day, int hour, int minute, } #define new_datetime(y, m, d, hh, mm, ss, us, tzinfo, fold) \ - new_datetime_ex2(y, m, d, hh, mm, ss, us, tzinfo, fold, \ - get_datetime_state()->datetime_type) + new_datetime_ex2(y, m, d, hh, mm, ss, us, tzinfo, fold, DATETIME_TYPE(NO_STATE)) static PyObject * call_subclass_fold(PyObject *cls, int fold, const char *format, ...) @@ -1100,9 +1242,8 @@ new_datetime_subclass_fold_ex(int year, int month, int day, int hour, int minute int second, int usecond, PyObject *tzinfo, int fold, PyObject *cls) { - datetime_state *st = get_datetime_state(); PyObject* dt; - if ((PyTypeObject*)cls == st->datetime_type) { + if ((PyTypeObject*)cls == DATETIME_TYPE(NO_STATE)) { // Use the fast path constructor dt = new_datetime(year, month, day, hour, minute, second, usecond, tzinfo, fold); @@ -1163,16 +1304,15 @@ new_time_ex(int hour, int minute, int second, int usecond, return new_time_ex2(hour, minute, second, usecond, tzinfo, 0, type); } -#define new_time(hh, mm, ss, us, tzinfo, fold) \ - new_time_ex2(hh, mm, ss, us, tzinfo, fold, get_datetime_state()->time_type) +#define new_time(hh, mm, ss, us, tzinfo, fold) \ + new_time_ex2(hh, mm, ss, us, tzinfo, fold, TIME_TYPE(NO_STATE)) static PyObject * new_time_subclass_fold_ex(int hour, int minute, int second, int usecond, PyObject *tzinfo, int fold, PyObject *cls) { PyObject *t; - datetime_state *st = get_datetime_state(); - if ((PyTypeObject*)cls == st->time_type) { + if ((PyTypeObject*)cls == TIME_TYPE(NO_STATE)) { // Use the fast path constructor t = new_time(hour, minute, second, usecond, tzinfo, fold); } @@ -1224,7 +1364,7 @@ new_delta_ex(int days, int seconds, int microseconds, int normalize, } #define new_delta(d, s, us, normalize) \ - new_delta_ex(d, s, us, normalize, get_datetime_state()->delta_type) + new_delta_ex(d, s, us, normalize, DELTA_TYPE(NO_STATE)) typedef struct @@ -1244,8 +1384,7 @@ static PyObject * create_timezone(PyObject *offset, PyObject *name) { PyDateTime_TimeZone *self; - datetime_state *st = get_datetime_state(); - PyTypeObject *type = st->timezone_type; + PyTypeObject *type = TIMEZONE_TYPE(NO_STATE); assert(offset != NULL); assert(PyDelta_Check(offset)); @@ -1267,6 +1406,7 @@ create_timezone(PyObject *offset, PyObject *name) } static int delta_bool(PyDateTime_Delta *self); +static PyDateTime_TimeZone utc_timezone; static PyObject * new_timezone(PyObject *offset, PyObject *name) @@ -1276,8 +1416,7 @@ new_timezone(PyObject *offset, PyObject *name) assert(name == NULL || PyUnicode_Check(name)); if (name == NULL && delta_bool((PyDateTime_Delta *)offset) == 0) { - datetime_state *st = get_datetime_state(); - return Py_NewRef(st->utc); + return Py_NewRef(CONST_UTC(NO_STATE)); } if ((GET_TD_DAYS(offset) == -1 && GET_TD_SECONDS(offset) == 0 && @@ -1490,8 +1629,7 @@ tzinfo_from_isoformat_results(int rv, int tzoffset, int tz_useconds) if (rv == 1) { // Create a timezone from offset in seconds (0 returns UTC) if (tzoffset == 0) { - datetime_state *st = get_datetime_state(); - return Py_NewRef(st->utc); + return Py_NewRef(CONST_UTC(NO_STATE)); } PyObject *delta = new_delta(0, tzoffset, tz_useconds, 1); @@ -1920,11 +2058,13 @@ delta_to_microseconds(PyDateTime_Delta *self) PyObject *x3 = NULL; PyObject *result = NULL; + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + x1 = PyLong_FromLong(GET_TD_DAYS(self)); if (x1 == NULL) goto Done; - datetime_state *st = get_datetime_state(); - x2 = PyNumber_Multiply(x1, st->seconds_per_day); /* days in seconds */ + x2 = PyNumber_Multiply(x1, CONST_SEC_PER_DAY(st)); /* days in seconds */ if (x2 == NULL) goto Done; Py_SETREF(x1, NULL); @@ -1941,7 +2081,7 @@ delta_to_microseconds(PyDateTime_Delta *self) /* x1 = */ x2 = NULL; /* x3 has days+seconds in seconds */ - x1 = PyNumber_Multiply(x3, st->us_per_second); /* us */ + x1 = PyNumber_Multiply(x3, CONST_US_PER_SECOND(st)); /* us */ if (x1 == NULL) goto Done; Py_SETREF(x3, NULL); @@ -1957,6 +2097,7 @@ delta_to_microseconds(PyDateTime_Delta *self) Py_XDECREF(x1); Py_XDECREF(x2); Py_XDECREF(x3); + RELEASE_CURRENT_STATE(st, current_mod); return result; } @@ -1996,8 +2137,10 @@ microseconds_to_delta_ex(PyObject *pyus, PyTypeObject *type) PyObject *num = NULL; PyObject *result = NULL; - datetime_state *st = get_datetime_state(); - tuple = checked_divmod(pyus, st->us_per_second); + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + + tuple = checked_divmod(pyus, CONST_US_PER_SECOND(st)); if (tuple == NULL) { goto Done; } @@ -2015,7 +2158,7 @@ microseconds_to_delta_ex(PyObject *pyus, PyTypeObject *type) num = Py_NewRef(PyTuple_GET_ITEM(tuple, 0)); /* leftover seconds */ Py_DECREF(tuple); - tuple = checked_divmod(num, st->seconds_per_day); + tuple = checked_divmod(num, CONST_SEC_PER_DAY(st)); if (tuple == NULL) goto Done; Py_DECREF(num); @@ -2040,6 +2183,7 @@ microseconds_to_delta_ex(PyObject *pyus, PyTypeObject *type) Done: Py_XDECREF(tuple); Py_XDECREF(num); + RELEASE_CURRENT_STATE(st, current_mod); return result; BadDivmod: @@ -2049,7 +2193,7 @@ microseconds_to_delta_ex(PyObject *pyus, PyTypeObject *type) } #define microseconds_to_delta(pymicros) \ - microseconds_to_delta_ex(pymicros, get_datetime_state()->delta_type) + microseconds_to_delta_ex(pymicros, DELTA_TYPE(NO_STATE)) static PyObject * multiply_int_timedelta(PyObject *intobj, PyDateTime_Delta *delta) @@ -2577,6 +2721,9 @@ delta_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *self = NULL; + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + /* Argument objects. */ PyObject *day = NULL; PyObject *second = NULL; @@ -2615,29 +2762,28 @@ delta_new(PyTypeObject *type, PyObject *args, PyObject *kw) y = accum("microseconds", x, us, _PyLong_GetOne(), &leftover_us); CLEANUP; } - datetime_state *st = get_datetime_state(); if (ms) { - y = accum("milliseconds", x, ms, st->us_per_ms, &leftover_us); + y = accum("milliseconds", x, ms, CONST_US_PER_MS(st), &leftover_us); CLEANUP; } if (second) { - y = accum("seconds", x, second, st->us_per_second, &leftover_us); + y = accum("seconds", x, second, CONST_US_PER_SECOND(st), &leftover_us); CLEANUP; } if (minute) { - y = accum("minutes", x, minute, st->us_per_minute, &leftover_us); + y = accum("minutes", x, minute, CONST_US_PER_MINUTE(st), &leftover_us); CLEANUP; } if (hour) { - y = accum("hours", x, hour, st->us_per_hour, &leftover_us); + y = accum("hours", x, hour, CONST_US_PER_HOUR(st), &leftover_us); CLEANUP; } if (day) { - y = accum("days", x, day, st->us_per_day, &leftover_us); + y = accum("days", x, day, CONST_US_PER_DAY(st), &leftover_us); CLEANUP; } if (week) { - y = accum("weeks", x, week, st->us_per_week, &leftover_us); + y = accum("weeks", x, week, CONST_US_PER_WEEK(st), &leftover_us); CLEANUP; } if (leftover_us) { @@ -2679,7 +2825,9 @@ delta_new(PyTypeObject *type, PyObject *args, PyObject *kw) self = microseconds_to_delta_ex(x, type); Py_DECREF(x); + Done: + RELEASE_CURRENT_STATE(st, current_mod); return self; #undef CLEANUP @@ -2792,9 +2940,12 @@ delta_total_seconds(PyObject *self, PyObject *Py_UNUSED(ignored)) if (total_microseconds == NULL) return NULL; - datetime_state *st = get_datetime_state(); - total_seconds = PyNumber_TrueDivide(total_microseconds, st->us_per_second); + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + + total_seconds = PyNumber_TrueDivide(total_microseconds, CONST_US_PER_SECOND(st)); + RELEASE_CURRENT_STATE(st, current_mod); Py_DECREF(total_microseconds); return total_seconds; } @@ -3547,9 +3698,12 @@ date_isocalendar(PyDateTime_Date *self, PyObject *Py_UNUSED(ignored)) week = 0; } - datetime_state *st = get_datetime_state(); - PyObject *v = iso_calendar_date_new_impl(st->isocalendar_date_type, + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + + PyObject *v = iso_calendar_date_new_impl(ISOCALENDAR_DATE_TYPE(st), year, week + 1, day + 1); + RELEASE_CURRENT_STATE(st, current_mod); if (v == NULL) { return NULL; } @@ -4018,9 +4172,8 @@ timezone_new(PyTypeObject *type, PyObject *args, PyObject *kw) { PyObject *offset; PyObject *name = NULL; - datetime_state *st = get_datetime_state(); if (PyArg_ParseTupleAndKeywords(args, kw, "O!|U:timezone", timezone_kws, - st->delta_type, &offset, &name)) + DELTA_TYPE(NO_STATE), &offset, &name)) return new_timezone(offset, name); return NULL; @@ -4073,8 +4226,7 @@ timezone_repr(PyDateTime_TimeZone *self) to use Py_TYPE(self)->tp_name here. */ const char *type_name = Py_TYPE(self)->tp_name; - datetime_state *st = get_datetime_state(); - if (((PyObject *)self) == st->utc) { + if ((PyObject *)self == CONST_UTC(NO_STATE)) { return PyUnicode_FromFormat("%s.utc", type_name); } @@ -4096,8 +4248,7 @@ timezone_str(PyDateTime_TimeZone *self) if (self->name != NULL) { return Py_NewRef(self->name); } - datetime_state *st = get_datetime_state(); - if ((PyObject *)self == st->utc || + if ((PyObject *)self == CONST_UTC(NO_STATE) || (GET_TD_DAYS(self->offset) == 0 && GET_TD_SECONDS(self->offset) == 0 && GET_TD_MICROSECONDS(self->offset) == 0)) @@ -4260,7 +4411,7 @@ static PyDateTime_TimeZone * look_up_timezone(PyObject *offset, PyObject *name) { if (offset == utc_timezone.offset && name == NULL) { - return &utc_timezone; + return (PyDateTime_TimeZone *)CONST_UTC(NO_STATE); } return NULL; } @@ -4777,8 +4928,7 @@ time_fromisoformat(PyObject *cls, PyObject *tstr) { } PyObject *t; - datetime_state *st = get_datetime_state(); - if ( (PyTypeObject *)cls == st->time_type) { + if ( (PyTypeObject *)cls == TIME_TYPE(NO_STATE)) { t = new_time(hour, minute, second, microsecond, tzinfo, 0); } else { t = PyObject_CallFunction(cls, "iiiiO", @@ -5376,10 +5526,9 @@ datetime_combine(PyObject *cls, PyObject *args, PyObject *kw) PyObject *tzinfo = NULL; PyObject *result = NULL; - datetime_state *st = get_datetime_state(); if (PyArg_ParseTupleAndKeywords(args, kw, "O!O!|O:combine", keywords, - st->date_type, &date, - st->time_type, &time, &tzinfo)) { + DATE_TYPE(NO_STATE), &date, + TIME_TYPE(NO_STATE), &time, &tzinfo)) { if (tzinfo == NULL) { if (HASTZINFO(time)) tzinfo = ((PyDateTime_Time *)time)->tzinfo; @@ -6209,7 +6358,6 @@ local_timezone_from_timestamp(time_t timestamp) delta = new_delta(0, local_time_tm.tm_gmtoff, 0, 1); #else /* HAVE_STRUCT_TM_TM_ZONE */ { - datetime_state *st = get_datetime_state(); PyObject *local_time, *utc_time; struct tm utc_time_tm; char buf[100]; @@ -6264,8 +6412,11 @@ local_timezone(PyDateTime_DateTime *utc_time) PyObject *one_second; PyObject *seconds; - datetime_state *st = get_datetime_state(); - delta = datetime_subtract((PyObject *)utc_time, st->epoch); + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + + delta = datetime_subtract((PyObject *)utc_time, CONST_EPOCH(st)); + RELEASE_CURRENT_STATE(st, current_mod); if (delta == NULL) return NULL; @@ -6378,7 +6529,6 @@ datetime_astimezone(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) if (result == NULL) return NULL; - datetime_state *st = get_datetime_state(); /* Make sure result is aware and UTC. */ if (!HASTZINFO(result)) { temp = (PyObject *)result; @@ -6390,7 +6540,7 @@ datetime_astimezone(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) DATE_GET_MINUTE(result), DATE_GET_SECOND(result), DATE_GET_MICROSECOND(result), - st->utc, + CONST_UTC(NO_STATE), DATE_GET_FOLD(result), Py_TYPE(result)); Py_DECREF(temp); @@ -6399,7 +6549,7 @@ datetime_astimezone(PyDateTime_DateTime *self, PyObject *args, PyObject *kw) } else { /* Result is already aware - just replace tzinfo. */ - Py_SETREF(result->tzinfo, Py_NewRef(st->utc)); + Py_SETREF(result->tzinfo, Py_NewRef(CONST_UTC(NO_STATE))); } /* Attach new tzinfo and let fromutc() do the rest. */ @@ -6503,9 +6653,12 @@ datetime_timestamp(PyDateTime_DateTime *self, PyObject *Py_UNUSED(ignored)) PyObject *result; if (HASTZINFO(self) && self->tzinfo != Py_None) { - datetime_state *st = get_datetime_state(); + PyObject *current_mod = NULL; + datetime_state *st = GET_CURRENT_STATE(current_mod); + PyObject *delta; - delta = datetime_subtract((PyObject *)self, st->epoch); + delta = datetime_subtract((PyObject *)self, CONST_EPOCH(st)); + RELEASE_CURRENT_STATE(st, current_mod); if (delta == NULL) return NULL; result = delta_total_seconds(delta, NULL); @@ -6839,23 +6992,6 @@ get_datetime_capi(void) return &capi; } -static int -datetime_clear(PyObject *module) -{ - datetime_state *st = get_datetime_state(); - - Py_CLEAR(st->us_per_ms); - Py_CLEAR(st->us_per_second); - Py_CLEAR(st->us_per_minute); - Py_CLEAR(st->us_per_hour); - Py_CLEAR(st->us_per_day); - Py_CLEAR(st->us_per_week); - Py_CLEAR(st->seconds_per_day); - Py_CLEAR(st->utc); - Py_CLEAR(st->epoch); - return 0; -} - static PyObject * create_timezone_from_delta(int days, int sec, int ms, int normalize) { @@ -6869,25 +7005,39 @@ create_timezone_from_delta(int days, int sec, int ms, int normalize) } static int -init_state(datetime_state *st, PyTypeObject *PyDateTime_IsoCalendarDateType) -{ - // While datetime uses global module "state", we unly initialize it once. - // The PyLong objects created here (once per process) are not decref'd. - if (st->initialized) { +init_state(datetime_state *st, PyObject *module, PyObject *old_module) +{ + /* Each module gets its own heap types. */ +#define ADD_TYPE(FIELD, SPEC, BASE) \ + do { \ + PyObject *cls = PyType_FromModuleAndSpec( \ + module, SPEC, (PyObject *)BASE); \ + if (cls == NULL) { \ + return -1; \ + } \ + st->FIELD = (PyTypeObject *)cls; \ + } while (0) + + ADD_TYPE(isocalendar_date_type, &isocal_spec, &PyTuple_Type); +#undef ADD_TYPE + + if (old_module != NULL) { + assert(old_module != module); + datetime_state *st_old = get_module_state(old_module); + *st = (datetime_state){ + .isocalendar_date_type = st->isocalendar_date_type, + .us_per_ms = Py_NewRef(st_old->us_per_ms), + .us_per_second = Py_NewRef(st_old->us_per_second), + .us_per_minute = Py_NewRef(st_old->us_per_minute), + .us_per_hour = Py_NewRef(st_old->us_per_hour), + .us_per_day = Py_NewRef(st_old->us_per_day), + .us_per_week = Py_NewRef(st_old->us_per_week), + .seconds_per_day = Py_NewRef(st_old->seconds_per_day), + .epoch = Py_NewRef(st_old->epoch), + }; return 0; } - /* Static types exposed by the C-API. */ - st->date_type = &PyDateTime_DateType; - st->datetime_type = &PyDateTime_DateTimeType; - st->delta_type = &PyDateTime_DeltaType; - st->time_type = &PyDateTime_TimeType; - st->tzinfo_type = &PyDateTime_TZInfoType; - st->timezone_type = &PyDateTime_TimeZoneType; - - /* Per-module heap types. */ - st->isocalendar_date_type = PyDateTime_IsoCalendarDateType; - st->us_per_ms = PyLong_FromLong(1000); if (st->us_per_ms == NULL) { return -1; @@ -6921,26 +7071,54 @@ init_state(datetime_state *st, PyTypeObject *PyDateTime_IsoCalendarDateType) return -1; } - /* Init UTC timezone */ - st->utc = create_timezone_from_delta(0, 0, 0, 0); - if (st->utc == NULL) { - return -1; - } - /* Init Unix epoch */ - st->epoch = new_datetime(1970, 1, 1, 0, 0, 0, 0, st->utc, 0); + st->epoch = new_datetime( + 1970, 1, 1, 0, 0, 0, 0, (PyObject *)&utc_timezone, 0); if (st->epoch == NULL) { return -1; } - st->initialized = 1; + return 0; +} + +static int +traverse_state(datetime_state *st, visitproc visit, void *arg) +{ + /* heap types */ + Py_VISIT(st->isocalendar_date_type); return 0; } +static int +clear_state(datetime_state *st) +{ + Py_CLEAR(st->isocalendar_date_type); + Py_CLEAR(st->us_per_ms); + Py_CLEAR(st->us_per_second); + Py_CLEAR(st->us_per_minute); + Py_CLEAR(st->us_per_hour); + Py_CLEAR(st->us_per_day); + Py_CLEAR(st->us_per_week); + Py_CLEAR(st->seconds_per_day); + Py_CLEAR(st->epoch); + return 0; +} + static int _datetime_exec(PyObject *module) { + int rc = -1; + datetime_state *st = get_module_state(module); + + PyInterpreterState *interp = PyInterpreterState_Get(); + PyObject *old_module = get_current_module(interp); + if (PyErr_Occurred()) { + assert(old_module == NULL); + goto error; + } + /* We actually set the "current" module right before a successful return. */ + // `&...` is not a constant expression according to a strict reading // of C standards. Fill tp_base at run-time rather than statically. // See https://bugs.python.org/issue40777 @@ -6953,6 +7131,7 @@ _datetime_exec(PyObject *module) &PyDateTime_TimeType, &PyDateTime_DeltaType, &PyDateTime_TZInfoType, + /* Indirectly, via the utc object. */ &PyDateTime_TimeZoneType, }; @@ -6962,29 +7141,16 @@ _datetime_exec(PyObject *module) } } -#define CREATE_TYPE(VAR, SPEC, BASE) \ - do { \ - VAR = (PyTypeObject *)PyType_FromModuleAndSpec( \ - module, SPEC, (PyObject *)BASE); \ - if (VAR == NULL) { \ - goto error; \ - } \ - } while (0) - - PyTypeObject *PyDateTime_IsoCalendarDateType = NULL; - datetime_state *st = get_datetime_state(); - - if (!st->initialized) { - CREATE_TYPE(PyDateTime_IsoCalendarDateType, &isocal_spec, &PyTuple_Type); - } -#undef CREATE_TYPE - - if (init_state(st, PyDateTime_IsoCalendarDateType) < 0) { + if (init_state(st, module, old_module) < 0) { goto error; } + /* For now we only set the objects on the static types once. + * We will relax that once each types __dict__ is per-interpreter. */ #define DATETIME_ADD_MACRO(dict, c, value_expr) \ do { \ + if (PyDict_GetItemString(dict, c) == NULL) { \ + assert(!PyErr_Occurred()); \ PyObject *value = (value_expr); \ if (value == NULL) { \ goto error; \ @@ -6994,29 +7160,30 @@ _datetime_exec(PyObject *module) goto error; \ } \ Py_DECREF(value); \ + } \ } while(0) /* timedelta values */ - PyObject *d = st->delta_type->tp_dict; + PyObject *d = PyDateTime_DeltaType.tp_dict; DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); DATETIME_ADD_MACRO(d, "min", new_delta(-MAX_DELTA_DAYS, 0, 0, 0)); DATETIME_ADD_MACRO(d, "max", new_delta(MAX_DELTA_DAYS, 24*3600-1, 1000000-1, 0)); /* date values */ - d = st->date_type->tp_dict; + d = PyDateTime_DateType.tp_dict; DATETIME_ADD_MACRO(d, "min", new_date(1, 1, 1)); DATETIME_ADD_MACRO(d, "max", new_date(MAXYEAR, 12, 31)); DATETIME_ADD_MACRO(d, "resolution", new_delta(1, 0, 0, 0)); /* time values */ - d = st->time_type->tp_dict; + d = PyDateTime_TimeType.tp_dict; DATETIME_ADD_MACRO(d, "min", new_time(0, 0, 0, 0, Py_None, 0)); DATETIME_ADD_MACRO(d, "max", new_time(23, 59, 59, 999999, Py_None, 0)); DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); /* datetime values */ - d = st->datetime_type->tp_dict; + d = PyDateTime_DateTimeType.tp_dict; DATETIME_ADD_MACRO(d, "min", new_datetime(1, 1, 1, 0, 0, 0, 0, Py_None, 0)); DATETIME_ADD_MACRO(d, "max", new_datetime(MAXYEAR, 12, 31, 23, 59, 59, @@ -7024,8 +7191,8 @@ _datetime_exec(PyObject *module) DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); /* timezone values */ - d = st->timezone_type->tp_dict; - if (PyDict_SetItemString(d, "utc", st->utc) < 0) { + d = PyDateTime_TimeZoneType.tp_dict; + if (PyDict_SetItemString(d, "utc", (PyObject *)&utc_timezone) < 0) { goto error; } @@ -7034,12 +7201,13 @@ _datetime_exec(PyObject *module) * values. This may change in the future.*/ /* -23:59 */ - PyObject *min = create_timezone_from_delta(-1, 60, 0, 1); - DATETIME_ADD_MACRO(d, "min", min); + DATETIME_ADD_MACRO(d, "min", create_timezone_from_delta(-1, 60, 0, 1)); /* +23:59 */ - PyObject *max = create_timezone_from_delta(0, (23 * 60 + 59) * 60, 0, 0); - DATETIME_ADD_MACRO(d, "max", max); + DATETIME_ADD_MACRO( + d, "max", create_timezone_from_delta(0, (23 * 60 + 59) * 60, 0, 0)); + +#undef DATETIME_ADD_MACRO /* Add module level attributes */ if (PyModule_AddIntMacro(module, MINYEAR) < 0) { @@ -7048,7 +7216,7 @@ _datetime_exec(PyObject *module) if (PyModule_AddIntMacro(module, MAXYEAR) < 0) { goto error; } - if (PyModule_AddObjectRef(module, "UTC", st->utc) < 0) { + if (PyModule_AddObjectRef(module, "UTC", (PyObject *)&utc_timezone) < 0) { goto error; } @@ -7081,13 +7249,20 @@ _datetime_exec(PyObject *module) static_assert(DI100Y == 25 * DI4Y - 1, "DI100Y"); assert(DI100Y == days_before_year(100+1)); - return 0; + if (set_current_module(interp, module) < 0) { + goto error; + } + + rc = 0; + goto finally; error: - datetime_clear(module); - return -1; + clear_state(st); + +finally: + Py_XDECREF(old_module); + return rc; } -#undef DATETIME_ADD_MACRO static PyModuleDef_Slot module_slots[] = { {Py_mod_exec, _datetime_exec}, @@ -7096,13 +7271,46 @@ static PyModuleDef_Slot module_slots[] = { {0, NULL}, }; +static int +module_traverse(PyObject *mod, visitproc visit, void *arg) +{ + datetime_state *st = get_module_state(mod); + traverse_state(st, visit, arg); + return 0; +} + +static int +module_clear(PyObject *mod) +{ + datetime_state *st = get_module_state(mod); + clear_state(st); + + PyInterpreterState *interp = PyInterpreterState_Get(); + clear_current_module(interp, mod); + + return 0; +} + +static void +module_free(void *mod) +{ + datetime_state *st = get_module_state((PyObject *)mod); + clear_state(st); + + PyInterpreterState *interp = PyInterpreterState_Get(); + clear_current_module(interp, (PyObject *)mod); +} + static PyModuleDef datetimemodule = { .m_base = PyModuleDef_HEAD_INIT, .m_name = "_datetime", .m_doc = "Fast implementation of the datetime type.", - .m_size = 0, + .m_size = sizeof(datetime_state), .m_methods = module_methods, .m_slots = module_slots, + .m_traverse = module_traverse, + .m_clear = module_clear, + .m_free = module_free, }; PyMODINIT_FUNC From dba7a167dbbd50e83e58df351f3414b7a08e0188 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 3 Jun 2024 18:42:48 -0400 Subject: [PATCH 46/72] gh-117142: Support Importing ctypes in Isolated Interpreters (gh-119991) This makes the support official. Co-authored-by: Kirill Podoprigora --- .../next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst | 2 ++ Modules/_ctypes/_ctypes.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst b/Misc/NEWS.d/next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst new file mode 100644 index 00000000000000..80734ef3946300 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-03-11-18-16.gh-issue-117142.kWTXQo.rst @@ -0,0 +1,2 @@ +The :mod:`ctypes` module may now be imported in all subinterpreters, including +those that have their own GIL. diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index 6c1e5f58b95657..1d9534671a4ee8 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -5939,7 +5939,7 @@ module_free(void *module) static PyModuleDef_Slot module_slots[] = { {Py_mod_exec, _ctypes_mod_exec}, - {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL} }; From 105f22ea46ac16866e6df18ebae2a8ba422b7f45 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Mon, 3 Jun 2024 19:09:18 -0400 Subject: [PATCH 47/72] gh-117398: Use Per-Interpreter State for the _datetime Static Types (gh-119929) We make use of the same mechanism that we use for the static builtin types. This required a few tweaks. The relevant code could use some cleanup but I opted to avoid the significant churn in this change. I'll tackle that separately. This change is the final piece needed to make _datetime support multiple interpreters. I've updated the module slot accordingly. --- Include/internal/pycore_object.h | 2 +- Include/internal/pycore_typeobject.h | 48 ++- ...-06-01-16-58-43.gh-issue-117398.kR0RW7.rst | 2 + Modules/_datetimemodule.c | 197 +++++++++---- Objects/exceptions.c | 2 +- Objects/object.c | 2 +- Objects/structseq.c | 2 +- Objects/typeobject.c | 278 ++++++++++++------ Objects/unicodeobject.c | 6 +- Objects/weakrefobject.c | 2 +- Python/crossinterp_exceptions.h | 4 +- Tools/c-analyzer/cpython/globals-to-fix.tsv | 1 + Tools/c-analyzer/cpython/ignored.tsv | 1 + 13 files changed, 381 insertions(+), 166 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-01-16-58-43.gh-issue-117398.kR0RW7.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index f63e1da6fba025..6f133014ce06e2 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -589,7 +589,7 @@ _PyObject_GET_WEAKREFS_LISTPTR(PyObject *op) if (PyType_Check(op) && ((PyTypeObject *)op)->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState( + managed_static_type_state *state = _PyStaticType_GetState( interp, (PyTypeObject *)op); return _PyStaticType_GET_WEAKREFS_LISTPTR(state); } diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h index 7e533bd138469b..8664ae0e44533f 100644 --- a/Include/internal/pycore_typeobject.h +++ b/Include/internal/pycore_typeobject.h @@ -44,10 +44,12 @@ struct type_cache { /* For now we hard-code this to a value for which we are confident all the static builtin types will fit (for all builds). */ -#define _Py_MAX_STATIC_BUILTIN_TYPES 200 +#define _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES 200 +#define _Py_MAX_MANAGED_STATIC_EXT_TYPES 10 typedef struct { PyTypeObject *type; + int isbuiltin; int readying; int ready; // XXX tp_dict can probably be statically allocated, @@ -59,7 +61,7 @@ typedef struct { are also some diagnostic uses for the list of weakrefs, so we still keep it. */ PyObject *tp_weaklist; -} static_builtin_state; +} managed_static_type_state; struct types_state { /* Used to set PyTypeObject.tp_version_tag. @@ -105,8 +107,16 @@ struct types_state { num_builtins_initialized is incremented once for each static builtin type. Once initialization is over for a subinterpreter, the value will be the same as for all other interpreters. */ - size_t num_builtins_initialized; - static_builtin_state builtins[_Py_MAX_STATIC_BUILTIN_TYPES]; + struct { + size_t num_initialized; + managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_BUILTIN_TYPES]; + } builtins; + /* We apply a similar strategy for managed extension modules. */ + struct { + size_t num_initialized; + size_t next_index; + managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; + } for_extensions; PyMutex mutex; }; @@ -130,12 +140,35 @@ typedef struct wrapperbase pytype_slotdef; static inline PyObject ** -_PyStaticType_GET_WEAKREFS_LISTPTR(static_builtin_state *state) +_PyStaticType_GET_WEAKREFS_LISTPTR(managed_static_type_state *state) { assert(state != NULL); return &state->tp_weaklist; } +extern int _PyStaticType_InitBuiltin( + PyInterpreterState *interp, + PyTypeObject *type); +extern void _PyStaticType_FiniBuiltin( + PyInterpreterState *interp, + PyTypeObject *type); +extern void _PyStaticType_ClearWeakRefs( + PyInterpreterState *interp, + PyTypeObject *type); +extern managed_static_type_state * _PyStaticType_GetState( + PyInterpreterState *interp, + PyTypeObject *type); + +// Export for '_datetime' shared extension. +PyAPI_FUNC(int) _PyStaticType_InitForExtension( + PyInterpreterState *interp, + PyTypeObject *self); +PyAPI_FUNC(void) _PyStaticType_FiniForExtension( + PyInterpreterState *interp, + PyTypeObject *self, + int final); + + /* Like PyType_GetModuleState, but skips verification * that type is a heap type with an associated module */ static inline void * @@ -151,11 +184,6 @@ _PyType_GetModuleState(PyTypeObject *type) } -extern int _PyStaticType_InitBuiltin(PyInterpreterState *, PyTypeObject *type); -extern static_builtin_state * _PyStaticType_GetState(PyInterpreterState *, PyTypeObject *); -extern void _PyStaticType_ClearWeakRefs(PyInterpreterState *, PyTypeObject *type); -extern void _PyStaticType_Dealloc(PyInterpreterState *, PyTypeObject *); - // Export for 'math' shared extension, used via _PyType_IsReady() static inline // function PyAPI_FUNC(PyObject *) _PyType_GetDict(PyTypeObject *); diff --git a/Misc/NEWS.d/next/Library/2024-06-01-16-58-43.gh-issue-117398.kR0RW7.rst b/Misc/NEWS.d/next/Library/2024-06-01-16-58-43.gh-issue-117398.kR0RW7.rst new file mode 100644 index 00000000000000..b0fe06663248f6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-01-16-58-43.gh-issue-117398.kR0RW7.rst @@ -0,0 +1,2 @@ +The ``_datetime`` module (C implementation for :mod:`datetime`) now supports +being imported in multiple interpreters. diff --git a/Modules/_datetimemodule.c b/Modules/_datetimemodule.c index 16bb4c6980aa08..d6fa273c75e15e 100644 --- a/Modules/_datetimemodule.c +++ b/Modules/_datetimemodule.c @@ -111,26 +111,37 @@ get_module_state(PyObject *module) #define INTERP_KEY ((PyObject *)&_Py_ID(cached_datetime_module)) static PyObject * -get_current_module(PyInterpreterState *interp) +get_current_module(PyInterpreterState *interp, int *p_reloading) { + PyObject *mod = NULL; + int reloading = 0; + PyObject *dict = PyInterpreterState_GetDict(interp); if (dict == NULL) { - return NULL; + goto error; } PyObject *ref = NULL; if (PyDict_GetItemRef(dict, INTERP_KEY, &ref) < 0) { - return NULL; + goto error; } - if (ref == NULL) { - return NULL; + if (ref != NULL) { + reloading = 1; + if (ref != Py_None) { + (void)PyWeakref_GetRef(ref, &mod); + if (mod == Py_None) { + Py_CLEAR(mod); + } + Py_DECREF(ref); + } } - PyObject *mod = NULL; - (void)PyWeakref_GetRef(ref, &mod); - if (mod == Py_None) { - Py_CLEAR(mod); + if (p_reloading != NULL) { + *p_reloading = reloading; } - Py_DECREF(ref); return mod; + +error: + assert(PyErr_Occurred()); + return NULL; } static PyModuleDef datetimemodule; @@ -139,7 +150,7 @@ static datetime_state * _get_current_state(PyObject **p_mod) { PyInterpreterState *interp = PyInterpreterState_Get(); - PyObject *mod = get_current_module(interp); + PyObject *mod = get_current_module(interp, NULL); if (mod == NULL) { assert(!PyErr_Occurred()); if (PyErr_Occurred()) { @@ -184,8 +195,6 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) { PyObject *exc = PyErr_GetRaisedException(); - PyObject *current = NULL; - PyObject *dict = PyInterpreterState_GetDict(interp); if (dict == NULL) { goto error; @@ -197,7 +206,10 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) goto error; } if (ref != NULL) { + PyObject *current = NULL; int rc = PyWeakref_GetRef(ref, ¤t); + /* We only need "current" for pointer comparison. */ + Py_XDECREF(current); Py_DECREF(ref); if (rc < 0) { goto error; @@ -208,19 +220,17 @@ clear_current_module(PyInterpreterState *interp, PyObject *expected) } } - if (PyDict_DelItem(dict, INTERP_KEY) < 0) { - if (!PyErr_ExceptionMatches(PyExc_KeyError)) { - goto error; - } + /* We use None to identify that the module was previously loaded. */ + if (PyDict_SetItem(dict, INTERP_KEY, Py_None) < 0) { + goto error; } goto finally; error: - PyErr_Print(); + PyErr_WriteUnraisable(NULL); finally: - Py_XDECREF(current); PyErr_SetRaisedException(exc); } @@ -6947,14 +6957,19 @@ static PyTypeObject PyDateTime_DateTimeType = { }; /* --------------------------------------------------------------------------- - * Module methods and initialization. + * datetime C-API. */ -static PyMethodDef module_methods[] = { - {NULL, NULL} +static PyTypeObject * const capi_types[] = { + &PyDateTime_DateType, + &PyDateTime_DateTimeType, + &PyDateTime_TimeType, + &PyDateTime_DeltaType, + &PyDateTime_TZInfoType, + /* Indirectly, via the utc object. */ + &PyDateTime_TimeZoneType, }; - /* The C-API is process-global. This violates interpreter isolation * due to the objects stored here. Thus each of those objects must * be managed carefully. */ @@ -7004,6 +7019,11 @@ create_timezone_from_delta(int days, int sec, int ms, int normalize) return tz; } + +/* --------------------------------------------------------------------------- + * Module state lifecycle. + */ + static int init_state(datetime_state *st, PyObject *module, PyObject *old_module) { @@ -7105,38 +7125,110 @@ clear_state(datetime_state *st) return 0; } + +/* --------------------------------------------------------------------------- + * Global module state. + */ + +// If we make _PyStaticType_*ForExtension() public +// then all this should be managed by the runtime. + +static struct { + PyMutex mutex; + int64_t interp_count; +} _globals = {0}; + +static void +callback_for_interp_exit(void *Py_UNUSED(data)) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + + assert(_globals.interp_count > 0); + PyMutex_Lock(&_globals.mutex); + _globals.interp_count -= 1; + int final = !_globals.interp_count; + PyMutex_Unlock(&_globals.mutex); + + /* They must be done in reverse order so subclasses are finalized + * before base classes. */ + for (size_t i = Py_ARRAY_LENGTH(capi_types); i > 0; i--) { + PyTypeObject *type = capi_types[i-1]; + _PyStaticType_FiniForExtension(interp, type, final); + } +} + +static int +init_static_types(PyInterpreterState *interp, int reloading) +{ + if (reloading) { + return 0; + } + + // `&...` is not a constant expression according to a strict reading + // of C standards. Fill tp_base at run-time rather than statically. + // See https://bugs.python.org/issue40777 + PyDateTime_TimeZoneType.tp_base = &PyDateTime_TZInfoType; + PyDateTime_DateTimeType.tp_base = &PyDateTime_DateType; + + /* Bases classes must be initialized before subclasses, + * so capi_types must have the types in the appropriate order. */ + for (size_t i = 0; i < Py_ARRAY_LENGTH(capi_types); i++) { + PyTypeObject *type = capi_types[i]; + if (_PyStaticType_InitForExtension(interp, type) < 0) { + return -1; + } + } + + PyMutex_Lock(&_globals.mutex); + assert(_globals.interp_count >= 0); + _globals.interp_count += 1; + PyMutex_Unlock(&_globals.mutex); + + /* It could make sense to add a separate callback + * for each of the types. However, for now we can take the simpler + * approach of a single callback. */ + if (PyUnstable_AtExit(interp, callback_for_interp_exit, NULL) < 0) { + callback_for_interp_exit(NULL); + return -1; + } + + return 0; +} + + +/* --------------------------------------------------------------------------- + * Module methods and initialization. + */ + +static PyMethodDef module_methods[] = { + {NULL, NULL} +}; + + static int _datetime_exec(PyObject *module) { int rc = -1; datetime_state *st = get_module_state(module); + int reloading = 0; PyInterpreterState *interp = PyInterpreterState_Get(); - PyObject *old_module = get_current_module(interp); + PyObject *old_module = get_current_module(interp, &reloading); if (PyErr_Occurred()) { assert(old_module == NULL); goto error; } /* We actually set the "current" module right before a successful return. */ - // `&...` is not a constant expression according to a strict reading - // of C standards. Fill tp_base at run-time rather than statically. - // See https://bugs.python.org/issue40777 - PyDateTime_TimeZoneType.tp_base = &PyDateTime_TZInfoType; - PyDateTime_DateTimeType.tp_base = &PyDateTime_DateType; - - PyTypeObject *capi_types[] = { - &PyDateTime_DateType, - &PyDateTime_DateTimeType, - &PyDateTime_TimeType, - &PyDateTime_DeltaType, - &PyDateTime_TZInfoType, - /* Indirectly, via the utc object. */ - &PyDateTime_TimeZoneType, - }; + if (init_static_types(interp, reloading) < 0) { + goto error; + } for (size_t i = 0; i < Py_ARRAY_LENGTH(capi_types); i++) { - if (PyModule_AddType(module, capi_types[i]) < 0) { + PyTypeObject *type = capi_types[i]; + const char *name = _PyType_Name(type); + assert(name != NULL); + if (PyModule_AddObjectRef(module, name, (PyObject *)type) < 0) { goto error; } } @@ -7145,11 +7237,8 @@ _datetime_exec(PyObject *module) goto error; } - /* For now we only set the objects on the static types once. - * We will relax that once each types __dict__ is per-interpreter. */ #define DATETIME_ADD_MACRO(dict, c, value_expr) \ do { \ - if (PyDict_GetItemString(dict, c) == NULL) { \ assert(!PyErr_Occurred()); \ PyObject *value = (value_expr); \ if (value == NULL) { \ @@ -7160,30 +7249,29 @@ _datetime_exec(PyObject *module) goto error; \ } \ Py_DECREF(value); \ - } \ } while(0) /* timedelta values */ - PyObject *d = PyDateTime_DeltaType.tp_dict; + PyObject *d = _PyType_GetDict(&PyDateTime_DeltaType); DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); DATETIME_ADD_MACRO(d, "min", new_delta(-MAX_DELTA_DAYS, 0, 0, 0)); DATETIME_ADD_MACRO(d, "max", new_delta(MAX_DELTA_DAYS, 24*3600-1, 1000000-1, 0)); /* date values */ - d = PyDateTime_DateType.tp_dict; + d = _PyType_GetDict(&PyDateTime_DateType); DATETIME_ADD_MACRO(d, "min", new_date(1, 1, 1)); DATETIME_ADD_MACRO(d, "max", new_date(MAXYEAR, 12, 31)); DATETIME_ADD_MACRO(d, "resolution", new_delta(1, 0, 0, 0)); /* time values */ - d = PyDateTime_TimeType.tp_dict; + d = _PyType_GetDict(&PyDateTime_TimeType); DATETIME_ADD_MACRO(d, "min", new_time(0, 0, 0, 0, Py_None, 0)); DATETIME_ADD_MACRO(d, "max", new_time(23, 59, 59, 999999, Py_None, 0)); DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); /* datetime values */ - d = PyDateTime_DateTimeType.tp_dict; + d = _PyType_GetDict(&PyDateTime_DateTimeType); DATETIME_ADD_MACRO(d, "min", new_datetime(1, 1, 1, 0, 0, 0, 0, Py_None, 0)); DATETIME_ADD_MACRO(d, "max", new_datetime(MAXYEAR, 12, 31, 23, 59, 59, @@ -7191,7 +7279,7 @@ _datetime_exec(PyObject *module) DATETIME_ADD_MACRO(d, "resolution", new_delta(0, 0, 1, 0)); /* timezone values */ - d = PyDateTime_TimeZoneType.tp_dict; + d = _PyType_GetDict(&PyDateTime_TimeZoneType); if (PyDict_SetItemString(d, "utc", (PyObject *)&utc_timezone) < 0) { goto error; } @@ -7266,7 +7354,7 @@ _datetime_exec(PyObject *module) static PyModuleDef_Slot module_slots[] = { {Py_mod_exec, _datetime_exec}, - {Py_mod_multiple_interpreters, Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, {Py_mod_gil, Py_MOD_GIL_NOT_USED}, {0, NULL}, }; @@ -7288,17 +7376,16 @@ module_clear(PyObject *mod) PyInterpreterState *interp = PyInterpreterState_Get(); clear_current_module(interp, mod); + // We take care of the static types via an interpreter atexit hook. + // See callback_for_interp_exit() above. + return 0; } static void module_free(void *mod) { - datetime_state *st = get_module_state((PyObject *)mod); - clear_state(st); - - PyInterpreterState *interp = PyInterpreterState_Get(); - clear_current_module(interp, (PyObject *)mod); + (void)module_clear((PyObject *)mod); } static PyModuleDef datetimemodule = { diff --git a/Objects/exceptions.c b/Objects/exceptions.c index f9cd577c1c16be..3a72cce1dff0c7 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -3725,7 +3725,7 @@ _PyExc_FiniTypes(PyInterpreterState *interp) { for (Py_ssize_t i=Py_ARRAY_LENGTH(static_exceptions) - 1; i >= 0; i--) { PyTypeObject *exc = static_exceptions[i].exc; - _PyStaticType_Dealloc(interp, exc); + _PyStaticType_FiniBuiltin(interp, exc); } } diff --git a/Objects/object.c b/Objects/object.c index 5d53e9e5eaba4e..2e9962f4651e1c 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -2355,7 +2355,7 @@ _PyTypes_FiniTypes(PyInterpreterState *interp) // their base classes. for (Py_ssize_t i=Py_ARRAY_LENGTH(static_types)-1; i>=0; i--) { PyTypeObject *type = static_types[i]; - _PyStaticType_Dealloc(interp, type); + _PyStaticType_FiniBuiltin(interp, type); } } diff --git a/Objects/structseq.c b/Objects/structseq.c index ec5c5ab45ba813..d8289f2638db0f 100644 --- a/Objects/structseq.c +++ b/Objects/structseq.c @@ -718,7 +718,7 @@ _PyStructSequence_FiniBuiltin(PyInterpreterState *interp, PyTypeObject *type) return; } - _PyStaticType_Dealloc(interp, type); + _PyStaticType_FiniBuiltin(interp, type); if (_Py_IsMainInterpreter(interp)) { // Undo _PyStructSequence_InitBuiltinWithFlags(). diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 0095a79a2cafec..880ac6b9c009fe 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -131,93 +131,159 @@ type_from_ref(PyObject *ref) #ifndef NDEBUG static inline int -static_builtin_index_is_set(PyTypeObject *self) +managed_static_type_index_is_set(PyTypeObject *self) { return self->tp_subclasses != NULL; } #endif static inline size_t -static_builtin_index_get(PyTypeObject *self) +managed_static_type_index_get(PyTypeObject *self) { - assert(static_builtin_index_is_set(self)); + assert(managed_static_type_index_is_set(self)); /* We store a 1-based index so 0 can mean "not initialized". */ return (size_t)self->tp_subclasses - 1; } static inline void -static_builtin_index_set(PyTypeObject *self, size_t index) +managed_static_type_index_set(PyTypeObject *self, size_t index) { - assert(index < _Py_MAX_STATIC_BUILTIN_TYPES); + assert(index < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES); /* We store a 1-based index so 0 can mean "not initialized". */ self->tp_subclasses = (PyObject *)(index + 1); } static inline void -static_builtin_index_clear(PyTypeObject *self) +managed_static_type_index_clear(PyTypeObject *self) { self->tp_subclasses = NULL; } -static inline static_builtin_state * +static inline managed_static_type_state * static_builtin_state_get(PyInterpreterState *interp, PyTypeObject *self) { - return &(interp->types.builtins[static_builtin_index_get(self)]); + return &(interp->types.builtins.initialized[ + managed_static_type_index_get(self)]); +} + +static inline managed_static_type_state * +static_ext_type_state_get(PyInterpreterState *interp, PyTypeObject *self) +{ + return &(interp->types.for_extensions.initialized[ + managed_static_type_index_get(self)]); +} + +static managed_static_type_state * +managed_static_type_state_get(PyInterpreterState *interp, PyTypeObject *self) +{ + // It's probably a builtin type. + size_t index = managed_static_type_index_get(self); + managed_static_type_state *state = + &(interp->types.builtins.initialized[index]); + if (state->type == self) { + return state; + } + if (index > _Py_MAX_MANAGED_STATIC_EXT_TYPES) { + return state; + } + return &(interp->types.for_extensions.initialized[index]); } /* For static types we store some state in an array on each interpreter. */ -static_builtin_state * +managed_static_type_state * _PyStaticType_GetState(PyInterpreterState *interp, PyTypeObject *self) { assert(self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); - return static_builtin_state_get(interp, self); + return managed_static_type_state_get(interp, self); } /* Set the type's per-interpreter state. */ static void -static_builtin_state_init(PyInterpreterState *interp, PyTypeObject *self) +managed_static_type_state_init(PyInterpreterState *interp, PyTypeObject *self, + int isbuiltin, int initial) { - if (_Py_IsMainInterpreter(interp)) { - assert(!static_builtin_index_is_set(self)); - static_builtin_index_set(self, interp->types.num_builtins_initialized); + size_t index; + if (initial) { + assert(!managed_static_type_index_is_set(self)); + if (isbuiltin) { + index = interp->types.builtins.num_initialized; + assert(index < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES); + } + else { + PyMutex_Lock(&interp->types.mutex); + index = interp->types.for_extensions.next_index; + interp->types.for_extensions.next_index++; + PyMutex_Unlock(&interp->types.mutex); + assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); + } + managed_static_type_index_set(self, index); } else { - assert(static_builtin_index_get(self) == - interp->types.num_builtins_initialized); + index = managed_static_type_index_get(self); + if (isbuiltin) { + assert(index == interp->types.builtins.num_initialized); + assert(index < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES); + } + else { + assert(index < _Py_MAX_MANAGED_STATIC_EXT_TYPES); + } } - static_builtin_state *state = static_builtin_state_get(interp, self); - /* It should only be called once for each builtin type. */ + managed_static_type_state *state = isbuiltin + ? &(interp->types.builtins.initialized[index]) + : &(interp->types.for_extensions.initialized[index]); + + /* It should only be called once for each builtin type per interpreter. */ assert(state->type == NULL); state->type = self; + state->isbuiltin = isbuiltin; /* state->tp_subclasses is left NULL until init_subclasses() sets it. */ /* state->tp_weaklist is left NULL until insert_head() or insert_after() (in weakrefobject.c) sets it. */ - interp->types.num_builtins_initialized++; + if (isbuiltin) { + interp->types.builtins.num_initialized++; + } + else { + interp->types.for_extensions.num_initialized++; + } } /* Reset the type's per-interpreter state. - This basically undoes what static_builtin_state_init() did. */ + This basically undoes what managed_static_type_state_init() did. */ static void -static_builtin_state_clear(PyInterpreterState *interp, PyTypeObject *self) +managed_static_type_state_clear(PyInterpreterState *interp, PyTypeObject *self, + int isbuiltin, int final) { - static_builtin_state *state = static_builtin_state_get(interp, self); + managed_static_type_state *state = isbuiltin + ? static_builtin_state_get(interp, self) + : static_ext_type_state_get(interp, self); assert(state->type != NULL); state->type = NULL; assert(state->tp_weaklist == NULL); // It was already cleared out. - if (_Py_IsMainInterpreter(interp)) { - static_builtin_index_clear(self); + if (final) { + managed_static_type_index_clear(self); } - assert(interp->types.num_builtins_initialized > 0); - interp->types.num_builtins_initialized--; + if (isbuiltin) { + assert(interp->types.builtins.num_initialized > 0); + interp->types.builtins.num_initialized--; + } + else { + PyMutex_Lock(&interp->types.mutex); + assert(interp->types.for_extensions.num_initialized > 0); + interp->types.for_extensions.num_initialized--; + if (interp->types.for_extensions.num_initialized == 0) { + interp->types.for_extensions.next_index = 0; + } + PyMutex_Unlock(&interp->types.mutex); + } } -// Also see _PyStaticType_InitBuiltin() and _PyStaticType_Dealloc(). +// Also see _PyStaticType_InitBuiltin() and _PyStaticType_FiniBuiltin(). /* end static builtin helpers */ @@ -227,7 +293,7 @@ start_readying(PyTypeObject *type) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = static_builtin_state_get(interp, type); + managed_static_type_state *state = managed_static_type_state_get(interp, type); assert(state != NULL); assert(!state->readying); state->readying = 1; @@ -242,7 +308,7 @@ stop_readying(PyTypeObject *type) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = static_builtin_state_get(interp, type); + managed_static_type_state *state = managed_static_type_state_get(interp, type); assert(state != NULL); assert(state->readying); state->readying = 0; @@ -257,7 +323,7 @@ is_readying(PyTypeObject *type) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = static_builtin_state_get(interp, type); + managed_static_type_state *state = managed_static_type_state_get(interp, type); assert(state != NULL); return state->readying; } @@ -272,7 +338,7 @@ lookup_tp_dict(PyTypeObject *self) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); assert(state != NULL); return state->tp_dict; } @@ -298,7 +364,7 @@ set_tp_dict(PyTypeObject *self, PyObject *dict) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); assert(state != NULL); state->tp_dict = dict; return; @@ -311,7 +377,7 @@ clear_tp_dict(PyTypeObject *self) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); assert(state != NULL); Py_CLEAR(state->tp_dict); return; @@ -340,13 +406,13 @@ _PyType_GetBases(PyTypeObject *self) } static inline void -set_tp_bases(PyTypeObject *self, PyObject *bases) +set_tp_bases(PyTypeObject *self, PyObject *bases, int initial) { assert(PyTuple_CheckExact(bases)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_bases can probably be statically allocated for each // static builtin type. - assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); + assert(initial); assert(self->tp_bases == NULL); if (PyTuple_GET_SIZE(bases) == 0) { assert(self->tp_base == NULL); @@ -363,10 +429,10 @@ set_tp_bases(PyTypeObject *self, PyObject *bases) } static inline void -clear_tp_bases(PyTypeObject *self) +clear_tp_bases(PyTypeObject *self, int final) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { - if (_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + if (final) { if (self->tp_bases != NULL) { if (PyTuple_GET_SIZE(self->tp_bases) == 0) { Py_CLEAR(self->tp_bases); @@ -413,13 +479,13 @@ _PyType_GetMRO(PyTypeObject *self) } static inline void -set_tp_mro(PyTypeObject *self, PyObject *mro) +set_tp_mro(PyTypeObject *self, PyObject *mro, int initial) { assert(PyTuple_CheckExact(mro)); if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { // XXX tp_mro can probably be statically allocated for each // static builtin type. - assert(_Py_IsMainInterpreter(_PyInterpreterState_GET())); + assert(initial); assert(self->tp_mro == NULL); /* Other checks are done via set_tp_bases. */ _Py_SetImmortal(mro); @@ -428,10 +494,10 @@ set_tp_mro(PyTypeObject *self, PyObject *mro) } static inline void -clear_tp_mro(PyTypeObject *self) +clear_tp_mro(PyTypeObject *self, int final) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { - if (_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + if (final) { if (self->tp_mro != NULL) { if (PyTuple_GET_SIZE(self->tp_mro) == 0) { Py_CLEAR(self->tp_mro); @@ -457,7 +523,7 @@ init_tp_subclasses(PyTypeObject *self) } if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); state->tp_subclasses = subclasses; return subclasses; } @@ -473,7 +539,7 @@ clear_tp_subclasses(PyTypeObject *self) has no subclass. */ if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); Py_CLEAR(state->tp_subclasses); return; } @@ -485,7 +551,7 @@ lookup_tp_subclasses(PyTypeObject *self) { if (self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { PyInterpreterState *interp = _PyInterpreterState_GET(); - static_builtin_state *state = _PyStaticType_GetState(interp, self); + managed_static_type_state *state = _PyStaticType_GetState(interp, self); assert(state != NULL); return state->tp_subclasses; } @@ -774,10 +840,10 @@ _PyTypes_Fini(PyInterpreterState *interp) struct type_cache *cache = &interp->types.type_cache; type_cache_clear(cache, NULL); - assert(interp->types.num_builtins_initialized == 0); + assert(interp->types.builtins.num_initialized == 0); // All the static builtin types should have been finalized already. - for (size_t i = 0; i < _Py_MAX_STATIC_BUILTIN_TYPES; i++) { - assert(interp->types.builtins[i].type == NULL); + for (size_t i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { + assert(interp->types.builtins.initialized[i].type == NULL); } } @@ -1444,7 +1510,7 @@ mro_hierarchy(PyTypeObject *type, PyObject *temp) Py_XDECREF(tuple); if (res < 0) { - set_tp_mro(type, old_mro); + set_tp_mro(type, old_mro, 0); Py_DECREF(new_mro); return -1; } @@ -1545,7 +1611,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, void *context) assert(old_bases != NULL); PyTypeObject *old_base = type->tp_base; - set_tp_bases(type, Py_NewRef(new_bases)); + set_tp_bases(type, Py_NewRef(new_bases), 0); type->tp_base = (PyTypeObject *)Py_NewRef(new_base); PyObject *temp = PyList_New(0); @@ -1593,7 +1659,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, void *context) "", 2, 3, &cls, &new_mro, &old_mro); /* Do not rollback if cls has a newer version of MRO. */ if (lookup_tp_mro(cls) == new_mro) { - set_tp_mro(cls, Py_XNewRef(old_mro)); + set_tp_mro(cls, Py_XNewRef(old_mro), 0); Py_DECREF(new_mro); } } @@ -1603,7 +1669,7 @@ type_set_bases_unlocked(PyTypeObject *type, PyObject *new_bases, void *context) if (lookup_tp_bases(type) == new_bases) { assert(type->tp_base == new_base); - set_tp_bases(type, old_bases); + set_tp_bases(type, old_bases, 0); type->tp_base = old_base; Py_DECREF(new_bases); @@ -3084,7 +3150,7 @@ mro_invoke(PyTypeObject *type) - Returns -1 in case of an error. */ static int -mro_internal_unlocked(PyTypeObject *type, PyObject **p_old_mro) +mro_internal_unlocked(PyTypeObject *type, int initial, PyObject **p_old_mro) { ASSERT_TYPE_LOCK_HELD(); @@ -3107,7 +3173,7 @@ mro_internal_unlocked(PyTypeObject *type, PyObject **p_old_mro) return 0; } - set_tp_mro(type, new_mro); + set_tp_mro(type, new_mro, initial); type_mro_modified(type, new_mro); /* corner case: the super class might have been hidden @@ -3137,7 +3203,7 @@ mro_internal(PyTypeObject *type, PyObject **p_old_mro) { int res; BEGIN_TYPE_LOCK() - res = mro_internal_unlocked(type, p_old_mro); + res = mro_internal_unlocked(type, 0, p_old_mro); END_TYPE_LOCK() return res; } @@ -3732,7 +3798,7 @@ type_new_alloc(type_new_ctx *ctx) type->tp_as_mapping = &et->as_mapping; type->tp_as_buffer = &et->as_buffer; - set_tp_bases(type, Py_NewRef(ctx->bases)); + set_tp_bases(type, Py_NewRef(ctx->bases), 1); type->tp_base = (PyTypeObject *)Py_NewRef(ctx->base); type->tp_dealloc = subtype_dealloc; @@ -4722,7 +4788,7 @@ _PyType_FromMetaclass_impl( /* Set slots we have prepared */ type->tp_base = (PyTypeObject *)Py_NewRef(base); - set_tp_bases(type, bases); + set_tp_bases(type, bases, 1); bases = NULL; // We give our reference to bases to the type type->tp_doc = tp_doc; @@ -5627,7 +5693,7 @@ type_dealloc_common(PyTypeObject *type) static void -clear_static_tp_subclasses(PyTypeObject *type) +clear_static_tp_subclasses(PyTypeObject *type, int isbuiltin) { PyObject *subclasses = lookup_tp_subclasses(type); if (subclasses == NULL) { @@ -5664,47 +5730,64 @@ clear_static_tp_subclasses(PyTypeObject *type) continue; } // All static builtin subtypes should have been finalized already. - assert(!(subclass->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); + assert(!isbuiltin || !(subclass->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN)); Py_DECREF(subclass); } +#else + (void)isbuiltin; #endif clear_tp_subclasses(type); } static void -clear_static_type_objects(PyInterpreterState *interp, PyTypeObject *type) +clear_static_type_objects(PyInterpreterState *interp, PyTypeObject *type, + int isbuiltin, int final) { - if (_Py_IsMainInterpreter(interp)) { + if (final) { Py_CLEAR(type->tp_cache); } clear_tp_dict(type); - clear_tp_bases(type); - clear_tp_mro(type); - clear_static_tp_subclasses(type); + clear_tp_bases(type, final); + clear_tp_mro(type, final); + clear_static_tp_subclasses(type, isbuiltin); } -void -_PyStaticType_Dealloc(PyInterpreterState *interp, PyTypeObject *type) + +static void +fini_static_type(PyInterpreterState *interp, PyTypeObject *type, + int isbuiltin, int final) { assert(type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); assert(_Py_IsImmortal((PyObject *)type)); type_dealloc_common(type); - clear_static_type_objects(interp, type); + clear_static_type_objects(interp, type, isbuiltin, final); - if (_Py_IsMainInterpreter(interp)) { + if (final) { type->tp_flags &= ~Py_TPFLAGS_READY; type->tp_flags &= ~Py_TPFLAGS_VALID_VERSION_TAG; type->tp_version_tag = 0; } _PyStaticType_ClearWeakRefs(interp, type); - static_builtin_state_clear(interp, type); + managed_static_type_state_clear(interp, type, isbuiltin, final); /* We leave _Py_TPFLAGS_STATIC_BUILTIN set on tp_flags. */ } +void +_PyStaticType_FiniForExtension(PyInterpreterState *interp, PyTypeObject *type, int final) +{ + fini_static_type(interp, type, 0, final); +} + +void +_PyStaticType_FiniBuiltin(PyInterpreterState *interp, PyTypeObject *type) +{ + fini_static_type(interp, type, 1, _Py_IsMainInterpreter(interp)); +} + static void type_dealloc(PyObject *self) @@ -7758,10 +7841,10 @@ type_ready_set_type(PyTypeObject *type) } static int -type_ready_set_bases(PyTypeObject *type) +type_ready_set_bases(PyTypeObject *type, int initial) { if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { - if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + if (!initial) { assert(lookup_tp_bases(type) != NULL); return 0; } @@ -7780,7 +7863,7 @@ type_ready_set_bases(PyTypeObject *type) if (bases == NULL) { return -1; } - set_tp_bases(type, bases); + set_tp_bases(type, bases, 1); } return 0; } @@ -7890,12 +7973,12 @@ type_ready_preheader(PyTypeObject *type) } static int -type_ready_mro(PyTypeObject *type) +type_ready_mro(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); if (type->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { - if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) { + if (!initial) { assert(lookup_tp_mro(type) != NULL); return 0; } @@ -7903,7 +7986,7 @@ type_ready_mro(PyTypeObject *type) } /* Calculate method resolution order */ - if (mro_internal_unlocked(type, NULL) < 0) { + if (mro_internal_unlocked(type, initial, NULL) < 0) { return -1; } PyObject *mro = lookup_tp_mro(type); @@ -8058,7 +8141,7 @@ type_ready_add_subclasses(PyTypeObject *type) // Set tp_new and the "__new__" key in the type dictionary. // Use the Py_TPFLAGS_DISALLOW_INSTANTIATION flag. static int -type_ready_set_new(PyTypeObject *type, int rerunbuiltin) +type_ready_set_new(PyTypeObject *type, int initial) { PyTypeObject *base = type->tp_base; /* The condition below could use some explanation. @@ -8080,7 +8163,7 @@ type_ready_set_new(PyTypeObject *type, int rerunbuiltin) if (!(type->tp_flags & Py_TPFLAGS_DISALLOW_INSTANTIATION)) { if (type->tp_new != NULL) { - if (!rerunbuiltin || base == NULL || type->tp_new != base->tp_new) { + if (initial || base == NULL || type->tp_new != base->tp_new) { // If "__new__" key does not exists in the type dictionary, // set it to tp_new_wrapper(). if (add_tp_new_wrapper(type) < 0) { @@ -8162,7 +8245,7 @@ type_ready_post_checks(PyTypeObject *type) static int -type_ready(PyTypeObject *type, int rerunbuiltin) +type_ready(PyTypeObject *type, int initial) { ASSERT_TYPE_LOCK_HELD(); @@ -8192,19 +8275,19 @@ type_ready(PyTypeObject *type, int rerunbuiltin) if (type_ready_set_type(type) < 0) { goto error; } - if (type_ready_set_bases(type) < 0) { + if (type_ready_set_bases(type, initial) < 0) { goto error; } - if (type_ready_mro(type) < 0) { + if (type_ready_mro(type, initial) < 0) { goto error; } - if (type_ready_set_new(type, rerunbuiltin) < 0) { + if (type_ready_set_new(type, initial) < 0) { goto error; } if (type_ready_fill_dict(type) < 0) { goto error; } - if (!rerunbuiltin) { + if (initial) { if (type_ready_inherit(type) < 0) { goto error; } @@ -8218,7 +8301,7 @@ type_ready(PyTypeObject *type, int rerunbuiltin) if (type_ready_add_subclasses(type) < 0) { goto error; } - if (!rerunbuiltin) { + if (initial) { if (type_ready_managed_dict(type) < 0) { goto error; } @@ -8258,7 +8341,7 @@ PyType_Ready(PyTypeObject *type) int res; BEGIN_TYPE_LOCK() if (!(type->tp_flags & Py_TPFLAGS_READY)) { - res = type_ready(type, 0); + res = type_ready(type, 1); } else { res = 0; assert(_PyType_CheckConsistency(type)); @@ -8267,17 +8350,18 @@ PyType_Ready(PyTypeObject *type) return res; } -int -_PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) + +static int +init_static_type(PyInterpreterState *interp, PyTypeObject *self, + int isbuiltin, int initial) { assert(_Py_IsImmortal((PyObject *)self)); assert(!(self->tp_flags & Py_TPFLAGS_HEAPTYPE)); assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_DICT)); assert(!(self->tp_flags & Py_TPFLAGS_MANAGED_WEAKREF)); - int ismain = _Py_IsMainInterpreter(interp); if ((self->tp_flags & Py_TPFLAGS_READY) == 0) { - assert(ismain); + assert(initial); self->tp_flags |= _Py_TPFLAGS_STATIC_BUILTIN; self->tp_flags |= Py_TPFLAGS_IMMUTABLETYPE; @@ -8287,24 +8371,36 @@ _PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) self->tp_flags |= Py_TPFLAGS_VALID_VERSION_TAG; } else { - assert(!ismain); + assert(!initial); assert(self->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN); assert(self->tp_flags & Py_TPFLAGS_VALID_VERSION_TAG); } - static_builtin_state_init(interp, self); + managed_static_type_state_init(interp, self, isbuiltin, initial); int res; BEGIN_TYPE_LOCK(); - res = type_ready(self, !ismain); + res = type_ready(self, initial); END_TYPE_LOCK() if (res < 0) { _PyStaticType_ClearWeakRefs(interp, self); - static_builtin_state_clear(interp, self); + managed_static_type_state_clear(interp, self, isbuiltin, initial); } return res; } +int +_PyStaticType_InitForExtension(PyInterpreterState *interp, PyTypeObject *self) +{ + return init_static_type(interp, self, 0, ((self->tp_flags & Py_TPFLAGS_READY) == 0)); +} + +int +_PyStaticType_InitBuiltin(PyInterpreterState *interp, PyTypeObject *self) +{ + return init_static_type(interp, self, 1, _Py_IsMainInterpreter(interp)); +} + static int add_subclass(PyTypeObject *base, PyTypeObject *type) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 53160f1799f2cc..3b0b4173408724 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15522,9 +15522,9 @@ unicode_is_finalizing(void) void _PyUnicode_FiniTypes(PyInterpreterState *interp) { - _PyStaticType_Dealloc(interp, &EncodingMapType); - _PyStaticType_Dealloc(interp, &PyFieldNameIter_Type); - _PyStaticType_Dealloc(interp, &PyFormatterIter_Type); + _PyStaticType_FiniBuiltin(interp, &EncodingMapType); + _PyStaticType_FiniBuiltin(interp, &PyFieldNameIter_Type); + _PyStaticType_FiniBuiltin(interp, &PyFormatterIter_Type); } diff --git a/Objects/weakrefobject.c b/Objects/weakrefobject.c index 88afaec86827ed..3b027e1b518ba6 100644 --- a/Objects/weakrefobject.c +++ b/Objects/weakrefobject.c @@ -1066,7 +1066,7 @@ PyObject_ClearWeakRefs(PyObject *object) void _PyStaticType_ClearWeakRefs(PyInterpreterState *interp, PyTypeObject *type) { - static_builtin_state *state = _PyStaticType_GetState(interp, type); + managed_static_type_state *state = _PyStaticType_GetState(interp, type); PyObject **list = _PyStaticType_GET_WEAKREFS_LISTPTR(state); // This is safe to do without holding the lock in free-threaded builds; // there is only one thread running and no new threads can be created. diff --git a/Python/crossinterp_exceptions.h b/Python/crossinterp_exceptions.h index 6ecc10c7955fd8..278511da615c75 100644 --- a/Python/crossinterp_exceptions.h +++ b/Python/crossinterp_exceptions.h @@ -90,6 +90,6 @@ static void fini_exceptions(PyInterpreterState *interp) { // Likewise with _fini_not_shareable_error_type(). - _PyStaticType_Dealloc(interp, &_PyExc_InterpreterNotFoundError); - _PyStaticType_Dealloc(interp, &_PyExc_InterpreterError); + _PyStaticType_FiniBuiltin(interp, &_PyExc_InterpreterNotFoundError); + _PyStaticType_FiniBuiltin(interp, &_PyExc_InterpreterError); } diff --git a/Tools/c-analyzer/cpython/globals-to-fix.tsv b/Tools/c-analyzer/cpython/globals-to-fix.tsv index 711ae343a8d876..4586a59f6ac2ef 100644 --- a/Tools/c-analyzer/cpython/globals-to-fix.tsv +++ b/Tools/c-analyzer/cpython/globals-to-fix.tsv @@ -307,6 +307,7 @@ Python/crossinterp_exceptions.h - PyExc_InterpreterNotFoundError - Modules/_datetimemodule.c - zero_delta - Modules/_datetimemodule.c - utc_timezone - Modules/_datetimemodule.c - capi - +Modules/_datetimemodule.c - _globals - Objects/boolobject.c - _Py_FalseStruct - Objects/boolobject.c - _Py_TrueStruct - Objects/dictobject.c - empty_keys_struct - diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index a3bdf0396fd3e1..466f25daa14dc6 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -217,6 +217,7 @@ Modules/_datetimemodule.c - max_fold_seconds - Modules/_datetimemodule.c datetime_isoformat specs - Modules/_datetimemodule.c parse_hh_mm_ss_ff correction - Modules/_datetimemodule.c time_isoformat specs - +Modules/_datetimemodule.c - capi_types - Modules/_decimal/_decimal.c - cond_map_template - Modules/_decimal/_decimal.c - dec_signal_string - Modules/_decimal/_decimal.c - dflt_ctx - From 31a4fb3c74a0284436343858803b54471e2dc9c7 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 4 Jun 2024 03:10:15 +0200 Subject: [PATCH 48/72] gh-119724: Revert "bpo-45759: Better error messages for non-matching 'elif'/'else' statements (#29513)" (#119974) This reverts commit 1c8f912ebdfdb146cd7dd2d7a3a67d2c5045ddb0. --- Grammar/python.gram | 5 - Lib/test/test_syntax.py | 61 +- ...-06-03-13-25-04.gh-issue-119724.EH1dkA.rst | 3 + Parser/parser.c | 998 ++++++++---------- 4 files changed, 455 insertions(+), 612 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-03-13-25-04.gh-issue-119724.EH1dkA.rst diff --git a/Grammar/python.gram b/Grammar/python.gram index 1734479276dd5b..b14e5dd096cdf4 100644 --- a/Grammar/python.gram +++ b/Grammar/python.gram @@ -127,7 +127,6 @@ simple_stmt[stmt_ty] (memo): | &'nonlocal' nonlocal_stmt compound_stmt[stmt_ty]: - | invalid_compound_stmt | &('def' | '@' | 'async') function_def | &'if' if_stmt | &('class' | '@') class_def @@ -1323,10 +1322,6 @@ invalid_import_from_targets: | token=NEWLINE { RAISE_SYNTAX_ERROR_STARTING_FROM(token, "Expected one or more names after 'import'") } -invalid_compound_stmt: - | a='elif' named_expression ':' { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "'elif' must match an if-statement here") } - | a='else' ':' { RAISE_SYNTAX_ERROR_STARTING_FROM(a, "'else' must match a valid statement here") } - invalid_with_stmt: | ['async'] 'with' ','.(expression ['as' star_target])+ NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } | ['async'] 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' NEWLINE { RAISE_SYNTAX_ERROR("expected ':'") } diff --git a/Lib/test/test_syntax.py b/Lib/test/test_syntax.py index b978838ea7003f..491f7fd7908e97 100644 --- a/Lib/test/test_syntax.py +++ b/Lib/test/test_syntax.py @@ -1853,28 +1853,6 @@ Traceback (most recent call last): SyntaxError: positional patterns follow keyword patterns -Non-matching 'elif'/'else' statements: - - >>> if a == b: - ... ... - ... elif a == c: - Traceback (most recent call last): - SyntaxError: 'elif' must match an if-statement here - - >>> if x == y: - ... ... - ... else: - Traceback (most recent call last): - SyntaxError: 'else' must match a valid statement here - - >>> elif m == n: - Traceback (most recent call last): - SyntaxError: 'elif' must match an if-statement here - - >>> else: - Traceback (most recent call last): - SyntaxError: 'else' must match a valid statement here - Uses of the star operator which should fail: A[:*b] @@ -2167,8 +2145,8 @@ def _check_error(self, code, errtext, lineno=None, offset=None, end_lineno=None, end_offset=None): """Check that compiling code raises SyntaxError with errtext. - errtext is a regular expression that must be present in the - test of the exception raised. If subclass is specified, it + errtest is a regular expression that must be present in the + test of the exception raised. If subclass is specified it is the expected subclass of SyntaxError (e.g. IndentationError). """ try: @@ -2192,22 +2170,6 @@ def _check_error(self, code, errtext, else: self.fail("compile() did not raise SyntaxError") - def _check_noerror(self, code, - errtext="compile() raised unexpected SyntaxError", - filename="", mode="exec", subclass=None): - """Check that compiling code does not raise a SyntaxError. - - errtext is the message passed to self.fail if there is - a SyntaxError. If the subclass parameter is specified, - it is the subclass of SyntaxError (e.g. IndentationError) - that the raised error is checked against. - """ - try: - compile(code, filename, mode) - except SyntaxError as err: - if (not subclass) or isinstance(err, subclass): - self.fail(errtext) - def test_expression_with_assignment(self): self._check_error( "print(end1 + end2 = ' ')", @@ -2609,25 +2571,6 @@ def test_syntax_error_on_deeply_nested_blocks(self): """ self._check_error(source, "too many statically nested blocks") - def test_syntax_error_non_matching_elif_else_statements(self): - # Check bpo-45759: 'elif' statements that doesn't match an - # if-statement or 'else' statements that doesn't match any - # valid else-able statement (e.g. 'while') - self._check_error( - "elif m == n:\n ...", - "'elif' must match an if-statement here") - self._check_error( - "else:\n ...", - "'else' must match a valid statement here") - self._check_noerror("if a == b:\n ...\nelif a == c:\n ...") - self._check_noerror("if x == y:\n ...\nelse:\n ...") - self._check_error( - "else = 123", - "invalid syntax") - self._check_error( - "elif 55 = 123", - "cannot assign to literal here") - @support.cpython_only def test_error_on_parser_stack_overflow(self): source = "-" * 100000 + "4" diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-03-13-25-04.gh-issue-119724.EH1dkA.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-03-13-25-04.gh-issue-119724.EH1dkA.rst new file mode 100644 index 00000000000000..78dc48da934cf6 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-03-13-25-04.gh-issue-119724.EH1dkA.rst @@ -0,0 +1,3 @@ +Reverted improvements to error messages for ``elif``/``else`` statements not +matching any valid statements, which made in hard to locate the syntax +errors inside those ``elif``/``else`` blocks. diff --git a/Parser/parser.c b/Parser/parser.c index fec3353d30cb4d..05cd93c2c92f29 100644 --- a/Parser/parser.c +++ b/Parser/parser.c @@ -21,28 +21,28 @@ static KeywordToken *reserved_keywords[] = { (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) {{NULL, -1}}, (KeywordToken[]) { - {"if", 662}, - {"as", 660}, - {"in", 673}, + {"if", 660}, + {"as", 658}, + {"in", 671}, {"or", 581}, {"is", 589}, {NULL, -1}, }, (KeywordToken[]) { {"del", 613}, - {"def", 677}, - {"for", 672}, - {"try", 644}, + {"def", 675}, + {"for", 670}, + {"try", 642}, {"and", 582}, - {"not", 681}, + {"not", 679}, {NULL, -1}, }, (KeywordToken[]) { {"from", 621}, {"pass", 504}, - {"with", 635}, - {"elif", 664}, - {"else", 665}, + {"with", 633}, + {"elif", 662}, + {"else", 663}, {"None", 611}, {"True", 610}, {NULL, -1}, @@ -51,9 +51,9 @@ static KeywordToken *reserved_keywords[] = { {"raise", 525}, {"yield", 580}, {"break", 508}, - {"async", 676}, - {"class", 679}, - {"while", 667}, + {"async", 674}, + {"class", 677}, + {"while", 665}, {"False", 612}, {"await", 590}, {NULL, -1}, @@ -63,12 +63,12 @@ static KeywordToken *reserved_keywords[] = { {"import", 622}, {"assert", 529}, {"global", 526}, - {"except", 657}, + {"except", 655}, {"lambda", 609}, {NULL, -1}, }, (KeywordToken[]) { - {"finally", 653}, + {"finally", 651}, {NULL, -1}, }, (KeywordToken[]) { @@ -308,316 +308,315 @@ static char *soft_keywords[] = { #define invalid_group_type 1221 #define invalid_import_type 1222 #define invalid_import_from_targets_type 1223 -#define invalid_compound_stmt_type 1224 -#define invalid_with_stmt_type 1225 -#define invalid_with_stmt_indent_type 1226 -#define invalid_try_stmt_type 1227 -#define invalid_except_stmt_type 1228 -#define invalid_finally_stmt_type 1229 -#define invalid_except_stmt_indent_type 1230 -#define invalid_except_star_stmt_indent_type 1231 -#define invalid_match_stmt_type 1232 -#define invalid_case_block_type 1233 -#define invalid_as_pattern_type 1234 -#define invalid_class_pattern_type 1235 -#define invalid_class_argument_pattern_type 1236 -#define invalid_if_stmt_type 1237 -#define invalid_elif_stmt_type 1238 -#define invalid_else_stmt_type 1239 -#define invalid_while_stmt_type 1240 -#define invalid_for_stmt_type 1241 -#define invalid_def_raw_type 1242 -#define invalid_class_def_raw_type 1243 -#define invalid_double_starred_kvpairs_type 1244 -#define invalid_kvpair_type 1245 -#define invalid_starred_expression_unpacking_type 1246 -#define invalid_starred_expression_type 1247 -#define invalid_replacement_field_type 1248 -#define invalid_conversion_character_type 1249 -#define invalid_arithmetic_type 1250 -#define invalid_factor_type 1251 -#define invalid_type_params_type 1252 -#define _loop0_1_type 1253 -#define _loop0_2_type 1254 -#define _loop1_3_type 1255 -#define _loop0_5_type 1256 -#define _gather_4_type 1257 -#define _tmp_6_type 1258 -#define _tmp_7_type 1259 -#define _tmp_8_type 1260 -#define _tmp_9_type 1261 -#define _tmp_10_type 1262 -#define _tmp_11_type 1263 -#define _tmp_12_type 1264 -#define _tmp_13_type 1265 -#define _loop1_14_type 1266 -#define _tmp_15_type 1267 -#define _tmp_16_type 1268 -#define _tmp_17_type 1269 -#define _loop0_19_type 1270 -#define _gather_18_type 1271 -#define _loop0_21_type 1272 -#define _gather_20_type 1273 -#define _tmp_22_type 1274 -#define _tmp_23_type 1275 -#define _loop0_24_type 1276 -#define _loop1_25_type 1277 -#define _loop0_27_type 1278 -#define _gather_26_type 1279 -#define _tmp_28_type 1280 -#define _loop0_30_type 1281 -#define _gather_29_type 1282 -#define _tmp_31_type 1283 -#define _loop1_32_type 1284 -#define _tmp_33_type 1285 -#define _tmp_34_type 1286 -#define _tmp_35_type 1287 -#define _loop0_36_type 1288 -#define _loop0_37_type 1289 -#define _loop0_38_type 1290 -#define _loop1_39_type 1291 -#define _loop0_40_type 1292 -#define _loop1_41_type 1293 -#define _loop1_42_type 1294 -#define _loop1_43_type 1295 -#define _loop0_44_type 1296 -#define _loop1_45_type 1297 -#define _loop0_46_type 1298 -#define _loop1_47_type 1299 -#define _loop0_48_type 1300 -#define _loop0_49_type 1301 -#define _loop1_50_type 1302 -#define _loop0_52_type 1303 -#define _gather_51_type 1304 -#define _loop0_54_type 1305 -#define _gather_53_type 1306 -#define _loop0_56_type 1307 -#define _gather_55_type 1308 -#define _loop0_58_type 1309 -#define _gather_57_type 1310 -#define _tmp_59_type 1311 -#define _loop1_60_type 1312 -#define _loop1_61_type 1313 -#define _tmp_62_type 1314 -#define _tmp_63_type 1315 -#define _loop1_64_type 1316 -#define _loop0_66_type 1317 -#define _gather_65_type 1318 -#define _tmp_67_type 1319 -#define _tmp_68_type 1320 -#define _tmp_69_type 1321 -#define _tmp_70_type 1322 -#define _loop0_72_type 1323 -#define _gather_71_type 1324 -#define _loop0_74_type 1325 -#define _gather_73_type 1326 -#define _tmp_75_type 1327 -#define _loop0_77_type 1328 -#define _gather_76_type 1329 -#define _loop0_79_type 1330 -#define _gather_78_type 1331 -#define _loop0_81_type 1332 -#define _gather_80_type 1333 -#define _loop1_82_type 1334 -#define _loop1_83_type 1335 -#define _loop0_85_type 1336 -#define _gather_84_type 1337 -#define _loop1_86_type 1338 -#define _loop1_87_type 1339 -#define _loop1_88_type 1340 -#define _tmp_89_type 1341 -#define _loop0_91_type 1342 -#define _gather_90_type 1343 -#define _tmp_92_type 1344 -#define _tmp_93_type 1345 -#define _tmp_94_type 1346 -#define _tmp_95_type 1347 -#define _tmp_96_type 1348 -#define _tmp_97_type 1349 -#define _loop0_98_type 1350 -#define _loop0_99_type 1351 -#define _loop0_100_type 1352 -#define _loop1_101_type 1353 -#define _loop0_102_type 1354 -#define _loop1_103_type 1355 -#define _loop1_104_type 1356 -#define _loop1_105_type 1357 -#define _loop0_106_type 1358 -#define _loop1_107_type 1359 -#define _loop0_108_type 1360 -#define _loop1_109_type 1361 -#define _loop0_110_type 1362 -#define _loop1_111_type 1363 -#define _loop0_112_type 1364 -#define _loop0_113_type 1365 -#define _loop1_114_type 1366 -#define _tmp_115_type 1367 -#define _loop0_117_type 1368 -#define _gather_116_type 1369 -#define _loop1_118_type 1370 -#define _loop0_119_type 1371 -#define _loop0_120_type 1372 -#define _tmp_121_type 1373 -#define _loop0_123_type 1374 -#define _gather_122_type 1375 -#define _tmp_124_type 1376 -#define _loop0_126_type 1377 -#define _gather_125_type 1378 -#define _loop0_128_type 1379 -#define _gather_127_type 1380 -#define _loop0_130_type 1381 -#define _gather_129_type 1382 -#define _loop0_132_type 1383 -#define _gather_131_type 1384 -#define _loop0_133_type 1385 -#define _loop0_135_type 1386 -#define _gather_134_type 1387 -#define _loop1_136_type 1388 -#define _tmp_137_type 1389 -#define _loop0_139_type 1390 -#define _gather_138_type 1391 -#define _loop0_141_type 1392 -#define _gather_140_type 1393 -#define _loop0_143_type 1394 -#define _gather_142_type 1395 -#define _loop0_145_type 1396 -#define _gather_144_type 1397 -#define _loop0_147_type 1398 -#define _gather_146_type 1399 -#define _tmp_148_type 1400 -#define _tmp_149_type 1401 -#define _loop0_151_type 1402 -#define _gather_150_type 1403 -#define _tmp_152_type 1404 -#define _tmp_153_type 1405 -#define _tmp_154_type 1406 -#define _tmp_155_type 1407 -#define _tmp_156_type 1408 -#define _tmp_157_type 1409 -#define _tmp_158_type 1410 -#define _tmp_159_type 1411 -#define _tmp_160_type 1412 -#define _tmp_161_type 1413 -#define _loop0_162_type 1414 -#define _loop0_163_type 1415 -#define _loop0_164_type 1416 -#define _tmp_165_type 1417 -#define _tmp_166_type 1418 -#define _tmp_167_type 1419 -#define _tmp_168_type 1420 -#define _loop0_169_type 1421 -#define _loop0_170_type 1422 -#define _loop0_171_type 1423 -#define _loop1_172_type 1424 -#define _tmp_173_type 1425 -#define _loop0_174_type 1426 -#define _tmp_175_type 1427 -#define _loop0_176_type 1428 -#define _loop1_177_type 1429 -#define _tmp_178_type 1430 -#define _tmp_179_type 1431 -#define _tmp_180_type 1432 -#define _loop0_181_type 1433 -#define _tmp_182_type 1434 -#define _tmp_183_type 1435 -#define _loop1_184_type 1436 -#define _tmp_185_type 1437 -#define _loop0_186_type 1438 -#define _loop0_187_type 1439 -#define _loop0_188_type 1440 -#define _loop0_190_type 1441 -#define _gather_189_type 1442 -#define _tmp_191_type 1443 -#define _loop0_192_type 1444 -#define _tmp_193_type 1445 -#define _loop0_194_type 1446 -#define _loop1_195_type 1447 -#define _loop1_196_type 1448 -#define _tmp_197_type 1449 -#define _tmp_198_type 1450 -#define _loop0_199_type 1451 -#define _tmp_200_type 1452 -#define _tmp_201_type 1453 -#define _tmp_202_type 1454 -#define _tmp_203_type 1455 -#define _loop0_205_type 1456 -#define _gather_204_type 1457 -#define _loop0_207_type 1458 -#define _gather_206_type 1459 -#define _loop0_209_type 1460 -#define _gather_208_type 1461 -#define _loop0_211_type 1462 -#define _gather_210_type 1463 -#define _loop0_213_type 1464 -#define _gather_212_type 1465 -#define _tmp_214_type 1466 -#define _loop0_215_type 1467 -#define _loop1_216_type 1468 -#define _tmp_217_type 1469 -#define _loop0_218_type 1470 -#define _loop1_219_type 1471 -#define _tmp_220_type 1472 -#define _tmp_221_type 1473 -#define _tmp_222_type 1474 -#define _tmp_223_type 1475 -#define _tmp_224_type 1476 -#define _tmp_225_type 1477 -#define _tmp_226_type 1478 -#define _tmp_227_type 1479 -#define _tmp_228_type 1480 -#define _tmp_229_type 1481 -#define _tmp_230_type 1482 -#define _loop0_232_type 1483 -#define _gather_231_type 1484 -#define _tmp_233_type 1485 -#define _tmp_234_type 1486 -#define _tmp_235_type 1487 -#define _tmp_236_type 1488 -#define _tmp_237_type 1489 -#define _tmp_238_type 1490 -#define _tmp_239_type 1491 -#define _loop0_240_type 1492 -#define _tmp_241_type 1493 -#define _tmp_242_type 1494 -#define _tmp_243_type 1495 -#define _tmp_244_type 1496 -#define _tmp_245_type 1497 -#define _tmp_246_type 1498 -#define _tmp_247_type 1499 -#define _tmp_248_type 1500 -#define _tmp_249_type 1501 -#define _tmp_250_type 1502 -#define _tmp_251_type 1503 -#define _tmp_252_type 1504 -#define _tmp_253_type 1505 -#define _tmp_254_type 1506 -#define _tmp_255_type 1507 -#define _tmp_256_type 1508 -#define _tmp_257_type 1509 -#define _tmp_258_type 1510 -#define _tmp_259_type 1511 -#define _tmp_260_type 1512 -#define _tmp_261_type 1513 -#define _tmp_262_type 1514 -#define _tmp_263_type 1515 -#define _tmp_264_type 1516 -#define _tmp_265_type 1517 -#define _loop0_266_type 1518 -#define _tmp_267_type 1519 -#define _tmp_268_type 1520 -#define _tmp_269_type 1521 -#define _tmp_270_type 1522 -#define _tmp_271_type 1523 -#define _tmp_272_type 1524 -#define _loop0_274_type 1525 -#define _gather_273_type 1526 -#define _tmp_275_type 1527 -#define _tmp_276_type 1528 -#define _tmp_277_type 1529 -#define _tmp_278_type 1530 -#define _tmp_279_type 1531 -#define _tmp_280_type 1532 -#define _tmp_281_type 1533 +#define invalid_with_stmt_type 1224 +#define invalid_with_stmt_indent_type 1225 +#define invalid_try_stmt_type 1226 +#define invalid_except_stmt_type 1227 +#define invalid_finally_stmt_type 1228 +#define invalid_except_stmt_indent_type 1229 +#define invalid_except_star_stmt_indent_type 1230 +#define invalid_match_stmt_type 1231 +#define invalid_case_block_type 1232 +#define invalid_as_pattern_type 1233 +#define invalid_class_pattern_type 1234 +#define invalid_class_argument_pattern_type 1235 +#define invalid_if_stmt_type 1236 +#define invalid_elif_stmt_type 1237 +#define invalid_else_stmt_type 1238 +#define invalid_while_stmt_type 1239 +#define invalid_for_stmt_type 1240 +#define invalid_def_raw_type 1241 +#define invalid_class_def_raw_type 1242 +#define invalid_double_starred_kvpairs_type 1243 +#define invalid_kvpair_type 1244 +#define invalid_starred_expression_unpacking_type 1245 +#define invalid_starred_expression_type 1246 +#define invalid_replacement_field_type 1247 +#define invalid_conversion_character_type 1248 +#define invalid_arithmetic_type 1249 +#define invalid_factor_type 1250 +#define invalid_type_params_type 1251 +#define _loop0_1_type 1252 +#define _loop0_2_type 1253 +#define _loop1_3_type 1254 +#define _loop0_5_type 1255 +#define _gather_4_type 1256 +#define _tmp_6_type 1257 +#define _tmp_7_type 1258 +#define _tmp_8_type 1259 +#define _tmp_9_type 1260 +#define _tmp_10_type 1261 +#define _tmp_11_type 1262 +#define _tmp_12_type 1263 +#define _tmp_13_type 1264 +#define _loop1_14_type 1265 +#define _tmp_15_type 1266 +#define _tmp_16_type 1267 +#define _tmp_17_type 1268 +#define _loop0_19_type 1269 +#define _gather_18_type 1270 +#define _loop0_21_type 1271 +#define _gather_20_type 1272 +#define _tmp_22_type 1273 +#define _tmp_23_type 1274 +#define _loop0_24_type 1275 +#define _loop1_25_type 1276 +#define _loop0_27_type 1277 +#define _gather_26_type 1278 +#define _tmp_28_type 1279 +#define _loop0_30_type 1280 +#define _gather_29_type 1281 +#define _tmp_31_type 1282 +#define _loop1_32_type 1283 +#define _tmp_33_type 1284 +#define _tmp_34_type 1285 +#define _tmp_35_type 1286 +#define _loop0_36_type 1287 +#define _loop0_37_type 1288 +#define _loop0_38_type 1289 +#define _loop1_39_type 1290 +#define _loop0_40_type 1291 +#define _loop1_41_type 1292 +#define _loop1_42_type 1293 +#define _loop1_43_type 1294 +#define _loop0_44_type 1295 +#define _loop1_45_type 1296 +#define _loop0_46_type 1297 +#define _loop1_47_type 1298 +#define _loop0_48_type 1299 +#define _loop0_49_type 1300 +#define _loop1_50_type 1301 +#define _loop0_52_type 1302 +#define _gather_51_type 1303 +#define _loop0_54_type 1304 +#define _gather_53_type 1305 +#define _loop0_56_type 1306 +#define _gather_55_type 1307 +#define _loop0_58_type 1308 +#define _gather_57_type 1309 +#define _tmp_59_type 1310 +#define _loop1_60_type 1311 +#define _loop1_61_type 1312 +#define _tmp_62_type 1313 +#define _tmp_63_type 1314 +#define _loop1_64_type 1315 +#define _loop0_66_type 1316 +#define _gather_65_type 1317 +#define _tmp_67_type 1318 +#define _tmp_68_type 1319 +#define _tmp_69_type 1320 +#define _tmp_70_type 1321 +#define _loop0_72_type 1322 +#define _gather_71_type 1323 +#define _loop0_74_type 1324 +#define _gather_73_type 1325 +#define _tmp_75_type 1326 +#define _loop0_77_type 1327 +#define _gather_76_type 1328 +#define _loop0_79_type 1329 +#define _gather_78_type 1330 +#define _loop0_81_type 1331 +#define _gather_80_type 1332 +#define _loop1_82_type 1333 +#define _loop1_83_type 1334 +#define _loop0_85_type 1335 +#define _gather_84_type 1336 +#define _loop1_86_type 1337 +#define _loop1_87_type 1338 +#define _loop1_88_type 1339 +#define _tmp_89_type 1340 +#define _loop0_91_type 1341 +#define _gather_90_type 1342 +#define _tmp_92_type 1343 +#define _tmp_93_type 1344 +#define _tmp_94_type 1345 +#define _tmp_95_type 1346 +#define _tmp_96_type 1347 +#define _tmp_97_type 1348 +#define _loop0_98_type 1349 +#define _loop0_99_type 1350 +#define _loop0_100_type 1351 +#define _loop1_101_type 1352 +#define _loop0_102_type 1353 +#define _loop1_103_type 1354 +#define _loop1_104_type 1355 +#define _loop1_105_type 1356 +#define _loop0_106_type 1357 +#define _loop1_107_type 1358 +#define _loop0_108_type 1359 +#define _loop1_109_type 1360 +#define _loop0_110_type 1361 +#define _loop1_111_type 1362 +#define _loop0_112_type 1363 +#define _loop0_113_type 1364 +#define _loop1_114_type 1365 +#define _tmp_115_type 1366 +#define _loop0_117_type 1367 +#define _gather_116_type 1368 +#define _loop1_118_type 1369 +#define _loop0_119_type 1370 +#define _loop0_120_type 1371 +#define _tmp_121_type 1372 +#define _loop0_123_type 1373 +#define _gather_122_type 1374 +#define _tmp_124_type 1375 +#define _loop0_126_type 1376 +#define _gather_125_type 1377 +#define _loop0_128_type 1378 +#define _gather_127_type 1379 +#define _loop0_130_type 1380 +#define _gather_129_type 1381 +#define _loop0_132_type 1382 +#define _gather_131_type 1383 +#define _loop0_133_type 1384 +#define _loop0_135_type 1385 +#define _gather_134_type 1386 +#define _loop1_136_type 1387 +#define _tmp_137_type 1388 +#define _loop0_139_type 1389 +#define _gather_138_type 1390 +#define _loop0_141_type 1391 +#define _gather_140_type 1392 +#define _loop0_143_type 1393 +#define _gather_142_type 1394 +#define _loop0_145_type 1395 +#define _gather_144_type 1396 +#define _loop0_147_type 1397 +#define _gather_146_type 1398 +#define _tmp_148_type 1399 +#define _tmp_149_type 1400 +#define _loop0_151_type 1401 +#define _gather_150_type 1402 +#define _tmp_152_type 1403 +#define _tmp_153_type 1404 +#define _tmp_154_type 1405 +#define _tmp_155_type 1406 +#define _tmp_156_type 1407 +#define _tmp_157_type 1408 +#define _tmp_158_type 1409 +#define _tmp_159_type 1410 +#define _tmp_160_type 1411 +#define _tmp_161_type 1412 +#define _loop0_162_type 1413 +#define _loop0_163_type 1414 +#define _loop0_164_type 1415 +#define _tmp_165_type 1416 +#define _tmp_166_type 1417 +#define _tmp_167_type 1418 +#define _tmp_168_type 1419 +#define _loop0_169_type 1420 +#define _loop0_170_type 1421 +#define _loop0_171_type 1422 +#define _loop1_172_type 1423 +#define _tmp_173_type 1424 +#define _loop0_174_type 1425 +#define _tmp_175_type 1426 +#define _loop0_176_type 1427 +#define _loop1_177_type 1428 +#define _tmp_178_type 1429 +#define _tmp_179_type 1430 +#define _tmp_180_type 1431 +#define _loop0_181_type 1432 +#define _tmp_182_type 1433 +#define _tmp_183_type 1434 +#define _loop1_184_type 1435 +#define _tmp_185_type 1436 +#define _loop0_186_type 1437 +#define _loop0_187_type 1438 +#define _loop0_188_type 1439 +#define _loop0_190_type 1440 +#define _gather_189_type 1441 +#define _tmp_191_type 1442 +#define _loop0_192_type 1443 +#define _tmp_193_type 1444 +#define _loop0_194_type 1445 +#define _loop1_195_type 1446 +#define _loop1_196_type 1447 +#define _tmp_197_type 1448 +#define _tmp_198_type 1449 +#define _loop0_199_type 1450 +#define _tmp_200_type 1451 +#define _tmp_201_type 1452 +#define _tmp_202_type 1453 +#define _tmp_203_type 1454 +#define _loop0_205_type 1455 +#define _gather_204_type 1456 +#define _loop0_207_type 1457 +#define _gather_206_type 1458 +#define _loop0_209_type 1459 +#define _gather_208_type 1460 +#define _loop0_211_type 1461 +#define _gather_210_type 1462 +#define _loop0_213_type 1463 +#define _gather_212_type 1464 +#define _tmp_214_type 1465 +#define _loop0_215_type 1466 +#define _loop1_216_type 1467 +#define _tmp_217_type 1468 +#define _loop0_218_type 1469 +#define _loop1_219_type 1470 +#define _tmp_220_type 1471 +#define _tmp_221_type 1472 +#define _tmp_222_type 1473 +#define _tmp_223_type 1474 +#define _tmp_224_type 1475 +#define _tmp_225_type 1476 +#define _tmp_226_type 1477 +#define _tmp_227_type 1478 +#define _tmp_228_type 1479 +#define _tmp_229_type 1480 +#define _tmp_230_type 1481 +#define _loop0_232_type 1482 +#define _gather_231_type 1483 +#define _tmp_233_type 1484 +#define _tmp_234_type 1485 +#define _tmp_235_type 1486 +#define _tmp_236_type 1487 +#define _tmp_237_type 1488 +#define _tmp_238_type 1489 +#define _tmp_239_type 1490 +#define _loop0_240_type 1491 +#define _tmp_241_type 1492 +#define _tmp_242_type 1493 +#define _tmp_243_type 1494 +#define _tmp_244_type 1495 +#define _tmp_245_type 1496 +#define _tmp_246_type 1497 +#define _tmp_247_type 1498 +#define _tmp_248_type 1499 +#define _tmp_249_type 1500 +#define _tmp_250_type 1501 +#define _tmp_251_type 1502 +#define _tmp_252_type 1503 +#define _tmp_253_type 1504 +#define _tmp_254_type 1505 +#define _tmp_255_type 1506 +#define _tmp_256_type 1507 +#define _tmp_257_type 1508 +#define _tmp_258_type 1509 +#define _tmp_259_type 1510 +#define _tmp_260_type 1511 +#define _tmp_261_type 1512 +#define _tmp_262_type 1513 +#define _tmp_263_type 1514 +#define _tmp_264_type 1515 +#define _tmp_265_type 1516 +#define _loop0_266_type 1517 +#define _tmp_267_type 1518 +#define _tmp_268_type 1519 +#define _tmp_269_type 1520 +#define _tmp_270_type 1521 +#define _tmp_271_type 1522 +#define _tmp_272_type 1523 +#define _loop0_274_type 1524 +#define _gather_273_type 1525 +#define _tmp_275_type 1526 +#define _tmp_276_type 1527 +#define _tmp_277_type 1528 +#define _tmp_278_type 1529 +#define _tmp_279_type 1530 +#define _tmp_280_type 1531 +#define _tmp_281_type 1532 static mod_ty file_rule(Parser *p); static mod_ty interactive_rule(Parser *p); @@ -843,7 +842,6 @@ static void *invalid_for_target_rule(Parser *p); static void *invalid_group_rule(Parser *p); static void *invalid_import_rule(Parser *p); static void *invalid_import_from_targets_rule(Parser *p); -static void *invalid_compound_stmt_rule(Parser *p); static void *invalid_with_stmt_rule(Parser *p); static void *invalid_with_stmt_indent_rule(Parser *p); static void *invalid_try_stmt_rule(Parser *p); @@ -2062,7 +2060,6 @@ simple_stmt_rule(Parser *p) } // compound_stmt: -// | invalid_compound_stmt // | &('def' | '@' | 'async') function_def // | &'if' if_stmt // | &('class' | '@') class_def @@ -2083,25 +2080,6 @@ compound_stmt_rule(Parser *p) } stmt_ty _res = NULL; int _mark = p->mark; - if (p->call_invalid_rules) { // invalid_compound_stmt - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "invalid_compound_stmt")); - void *invalid_compound_stmt_var; - if ( - (invalid_compound_stmt_var = invalid_compound_stmt_rule(p)) // invalid_compound_stmt - ) - { - D(fprintf(stderr, "%*c+ compound_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "invalid_compound_stmt")); - _res = invalid_compound_stmt_var; - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s compound_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "invalid_compound_stmt")); - } { // &('def' | '@' | 'async') function_def if (p->error_indicator) { p->level--; @@ -2131,7 +2109,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'if' if_stmt")); stmt_ty if_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 662) // token='if' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 660) // token='if' && (if_stmt_var = if_stmt_rule(p)) // if_stmt ) @@ -2215,7 +2193,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'try' try_stmt")); stmt_ty try_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 644) // token='try' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 642) // token='try' && (try_stmt_var = try_stmt_rule(p)) // try_stmt ) @@ -2236,7 +2214,7 @@ compound_stmt_rule(Parser *p) D(fprintf(stderr, "%*c> compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "&'while' while_stmt")); stmt_ty while_stmt_var; if ( - _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 667) // token='while' + _PyPegen_lookahead_with_int(1, _PyPegen_expect_token, p, 665) // token='while' && (while_stmt_var = while_stmt_rule(p)) // while_stmt ) @@ -4376,7 +4354,7 @@ class_def_raw_rule(Parser *p) asdl_stmt_seq* c; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 679)) // token='class' + (_keyword = _PyPegen_expect_token(p, 677)) // token='class' && (a = _PyPegen_name_token(p)) // NAME && @@ -4543,7 +4521,7 @@ function_def_raw_rule(Parser *p) void *t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='def' + (_keyword = _PyPegen_expect_token(p, 675)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -4604,9 +4582,9 @@ function_def_raw_rule(Parser *p) void *t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 677)) // token='def' + (_keyword_1 = _PyPegen_expect_token(p, 675)) // token='def' && (n = _PyPegen_name_token(p)) // NAME && @@ -5944,7 +5922,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -5989,7 +5967,7 @@ if_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (a = named_expression_rule(p)) // named_expression && @@ -6084,7 +6062,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; stmt_ty c; if ( - (_keyword = _PyPegen_expect_token(p, 664)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 662)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -6129,7 +6107,7 @@ elif_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 664)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 662)) // token='elif' && (a = named_expression_rule(p)) // named_expression && @@ -6210,7 +6188,7 @@ else_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 665)) // token='else' + (_keyword = _PyPegen_expect_token(p, 663)) // token='else' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6289,7 +6267,7 @@ while_stmt_rule(Parser *p) asdl_stmt_seq* b; void *c; if ( - (_keyword = _PyPegen_expect_token(p, 667)) // token='while' + (_keyword = _PyPegen_expect_token(p, 665)) // token='while' && (a = named_expression_rule(p)) // named_expression && @@ -6389,11 +6367,11 @@ for_stmt_rule(Parser *p) expr_ty t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 671)) // token='in' && (_cut_var = 1) && @@ -6451,13 +6429,13 @@ for_stmt_rule(Parser *p) expr_ty t; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword_1 = _PyPegen_expect_token(p, 670)) // token='for' && (t = star_targets_rule(p)) // star_targets && - (_keyword_2 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_2 = _PyPegen_expect_token(p, 671)) // token='in' && (_cut_var = 1) && @@ -6586,7 +6564,7 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -6637,7 +6615,7 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' && (a = (asdl_withitem_seq*)_gather_53_rule(p)) // ','.with_item+ && @@ -6686,9 +6664,9 @@ with_stmt_rule(Parser *p) asdl_withitem_seq* a; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword_1 = _PyPegen_expect_token(p, 633)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -6738,9 +6716,9 @@ with_stmt_rule(Parser *p) asdl_stmt_seq* b; void *tc; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword_1 = _PyPegen_expect_token(p, 633)) // token='with' && (a = (asdl_withitem_seq*)_gather_57_rule(p)) // ','.with_item+ && @@ -6826,7 +6804,7 @@ with_item_rule(Parser *p) if ( (e = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (t = star_target_rule(p)) // star_target && @@ -6951,7 +6929,7 @@ try_stmt_rule(Parser *p) asdl_stmt_seq* b; asdl_stmt_seq* f; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -6995,7 +6973,7 @@ try_stmt_rule(Parser *p) asdl_excepthandler_seq* ex; void *f; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7043,7 +7021,7 @@ try_stmt_rule(Parser *p) asdl_excepthandler_seq* ex; void *f; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7141,7 +7119,7 @@ except_block_rule(Parser *p) expr_ty e; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' && (e = expression_rule(p)) // expression && @@ -7184,7 +7162,7 @@ except_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* b; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -7295,7 +7273,7 @@ except_star_block_rule(Parser *p) expr_ty e; void *t; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -7397,7 +7375,7 @@ finally_block_rule(Parser *p) Token * _literal; asdl_stmt_seq* a; if ( - (_keyword = _PyPegen_expect_token(p, 653)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 651)) // token='finally' && (_literal = _PyPegen_expect_forced_token(p, 11, ":")) // forced_token=':' && @@ -7705,7 +7683,7 @@ guard_rule(Parser *p) Token * _keyword; expr_ty guard; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (guard = named_expression_rule(p)) // named_expression ) @@ -7900,7 +7878,7 @@ as_pattern_rule(Parser *p) if ( (pattern = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (target = pattern_capture_target_rule(p)) // pattern_capture_target ) @@ -11195,11 +11173,11 @@ expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 665)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 663)) // token='else' && (c = expression_rule(p)) // expression ) @@ -12081,7 +12059,7 @@ inversion_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 681)) // token='not' + (_keyword = _PyPegen_expect_token(p, 679)) // token='not' && (a = inversion_rule(p)) // inversion ) @@ -12735,9 +12713,9 @@ notin_bitwise_or_rule(Parser *p) Token * _keyword_1; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 681)) // token='not' + (_keyword = _PyPegen_expect_token(p, 679)) // token='not' && - (_keyword_1 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 671)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -12783,7 +12761,7 @@ in_bitwise_or_rule(Parser *p) Token * _keyword; expr_ty a; if ( - (_keyword = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword = _PyPegen_expect_token(p, 671)) // token='in' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -12832,7 +12810,7 @@ isnot_bitwise_or_rule(Parser *p) if ( (_keyword = _PyPegen_expect_token(p, 589)) // token='is' && - (_keyword_1 = _PyPegen_expect_token(p, 681)) // token='not' + (_keyword_1 = _PyPegen_expect_token(p, 679)) // token='not' && (a = bitwise_or_rule(p)) // bitwise_or ) @@ -17003,13 +16981,13 @@ for_if_clause_rule(Parser *p) expr_ty b; asdl_expr_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' && - (_keyword_1 = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword_1 = _PyPegen_expect_token(p, 670)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_2 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_2 = _PyPegen_expect_token(p, 671)) // token='in' && (_cut_var = 1) && @@ -17048,11 +17026,11 @@ for_if_clause_rule(Parser *p) expr_ty b; asdl_expr_seq* c; if ( - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (a = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 671)) // token='in' && (_cut_var = 1) && @@ -20353,11 +20331,11 @@ expression_without_invalid_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (b = disjunction_rule(p)) // disjunction && - (_keyword_1 = _PyPegen_expect_token(p, 665)) // token='else' + (_keyword_1 = _PyPegen_expect_token(p, 663)) // token='else' && (c = expression_rule(p)) // expression ) @@ -20623,7 +20601,7 @@ invalid_expression_rule(Parser *p) if ( (a = disjunction_rule(p)) // disjunction && - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (b = disjunction_rule(p)) // disjunction && @@ -22561,7 +22539,7 @@ invalid_with_item_rule(Parser *p) if ( (expression_var = expression_rule(p)) // expression && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (a = expression_rule(p)) // expression && @@ -22611,13 +22589,13 @@ invalid_for_if_clause_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings void *_tmp_203_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (_tmp_203_var = _tmp_203_rule(p)) // bitwise_or ((',' bitwise_or))* ','? && - _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 673) // token='in' + _PyPegen_lookahead_with_int(0, _PyPegen_expect_token, p, 671) // token='in' ) { D(fprintf(stderr, "%*c+ invalid_for_if_clause[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'? 'for' (bitwise_or ((',' bitwise_or))* ','?) !'in'")); @@ -22663,9 +22641,9 @@ invalid_for_target_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings expr_ty a; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (a = star_expressions_rule(p)) // star_expressions ) @@ -22923,82 +22901,6 @@ invalid_import_from_targets_rule(Parser *p) return _res; } -// invalid_compound_stmt: 'elif' named_expression ':' | 'else' ':' -static void * -invalid_compound_stmt_rule(Parser *p) -{ - if (p->level++ == MAXSTACK) { - _Pypegen_stack_overflow(p); - } - if (p->error_indicator) { - p->level--; - return NULL; - } - void * _res = NULL; - int _mark = p->mark; - { // 'elif' named_expression ':' - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> invalid_compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'elif' named_expression ':'")); - Token * _literal; - Token * a; - expr_ty named_expression_var; - if ( - (a = _PyPegen_expect_token(p, 664)) // token='elif' - && - (named_expression_var = named_expression_rule(p)) // named_expression - && - (_literal = _PyPegen_expect_token(p, 11)) // token=':' - ) - { - D(fprintf(stderr, "%*c+ invalid_compound_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'elif' named_expression ':'")); - _res = RAISE_SYNTAX_ERROR_STARTING_FROM ( a , "'elif' must match an if-statement here" ); - if (_res == NULL && PyErr_Occurred()) { - p->error_indicator = 1; - p->level--; - return NULL; - } - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s invalid_compound_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'elif' named_expression ':'")); - } - { // 'else' ':' - if (p->error_indicator) { - p->level--; - return NULL; - } - D(fprintf(stderr, "%*c> invalid_compound_stmt[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else' ':'")); - Token * _literal; - Token * a; - if ( - (a = _PyPegen_expect_token(p, 665)) // token='else' - && - (_literal = _PyPegen_expect_token(p, 11)) // token=':' - ) - { - D(fprintf(stderr, "%*c+ invalid_compound_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else' ':'")); - _res = RAISE_SYNTAX_ERROR_STARTING_FROM ( a , "'else' must match a valid statement here" ); - if (_res == NULL && PyErr_Occurred()) { - p->error_indicator = 1; - p->level--; - return NULL; - } - goto done; - } - p->mark = _mark; - D(fprintf(stderr, "%*c%s invalid_compound_stmt[%d-%d]: %s failed!\n", p->level, ' ', - p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "'else' ':'")); - } - _res = NULL; - done: - p->level--; - return _res; -} - // invalid_with_stmt: // | 'async'? 'with' ','.(expression ['as' star_target])+ NEWLINE // | 'async'? 'with' '(' ','.(expressions ['as' star_target])+ ','? ')' NEWLINE @@ -23026,9 +22928,9 @@ invalid_with_stmt_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' && (_gather_206_var = _gather_206_rule(p)) // ','.(expression ['as' star_target])+ && @@ -23064,9 +22966,9 @@ invalid_with_stmt_rule(Parser *p) UNUSED(_opt_var_1); // Silence compiler warnings Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -23126,9 +23028,9 @@ invalid_with_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 635)) // token='with' + (a = _PyPegen_expect_token(p, 633)) // token='with' && (_gather_210_var = _gather_210_rule(p)) // ','.(expression ['as' star_target])+ && @@ -23169,9 +23071,9 @@ invalid_with_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 635)) // token='with' + (a = _PyPegen_expect_token(p, 633)) // token='with' && (_literal = _PyPegen_expect_token(p, 7)) // token='(' && @@ -23234,7 +23136,7 @@ invalid_try_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 644)) // token='try' + (a = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23266,7 +23168,7 @@ invalid_try_stmt_rule(Parser *p) Token * _literal; asdl_stmt_seq* block_var; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23305,7 +23207,7 @@ invalid_try_stmt_rule(Parser *p) Token * b; expr_ty expression_var; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23313,7 +23215,7 @@ invalid_try_stmt_rule(Parser *p) && (_loop1_216_var = _loop1_216_rule(p)) // except_block+ && - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (b = _PyPegen_expect_token(p, 16)) // token='*' && @@ -23352,7 +23254,7 @@ invalid_try_stmt_rule(Parser *p) UNUSED(_opt_var); // Silence compiler warnings Token * a; if ( - (_keyword = _PyPegen_expect_token(p, 644)) // token='try' + (_keyword = _PyPegen_expect_token(p, 642)) // token='try' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23360,7 +23262,7 @@ invalid_try_stmt_rule(Parser *p) && (_loop1_219_var = _loop1_219_rule(p)) // except_star_block+ && - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_opt_var = _tmp_220_rule(p), !p->error_indicator) // [expression ['as' NAME]] && @@ -23419,7 +23321,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty a; expr_ty expressions_var; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' && (_opt_var = _PyPegen_expect_token(p, 16), !p->error_indicator) // '*'? && @@ -23461,7 +23363,7 @@ invalid_except_stmt_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_opt_var = _PyPegen_expect_token(p, 16), !p->error_indicator) // '*'? && @@ -23494,7 +23396,7 @@ invalid_except_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (newline_var = _PyPegen_expect_token(p, NEWLINE)) // token='NEWLINE' ) @@ -23522,7 +23424,7 @@ invalid_except_stmt_rule(Parser *p) void *_tmp_223_var; Token * a; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -23571,7 +23473,7 @@ invalid_finally_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 653)) // token='finally' + (a = _PyPegen_expect_token(p, 651)) // token='finally' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23627,7 +23529,7 @@ invalid_except_stmt_indent_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (expression_var = expression_rule(p)) // expression && @@ -23663,7 +23565,7 @@ invalid_except_stmt_indent_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -23719,7 +23621,7 @@ invalid_except_star_stmt_indent_rule(Parser *p) expr_ty expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 657)) // token='except' + (a = _PyPegen_expect_token(p, 655)) // token='except' && (_literal = _PyPegen_expect_token(p, 16)) // token='*' && @@ -23958,7 +23860,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (a = _PyPegen_expect_soft_keyword(p, "_")) // soft_keyword='"_"' ) @@ -23988,7 +23890,7 @@ invalid_as_pattern_rule(Parser *p) if ( (or_pattern_var = or_pattern_rule(p)) // or_pattern && - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && _PyPegen_lookahead_with_name(0, _PyPegen_name_token, p) && @@ -24142,7 +24044,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24173,7 +24075,7 @@ invalid_if_stmt_rule(Parser *p) expr_ty a_1; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 662)) // token='if' + (a = _PyPegen_expect_token(p, 660)) // token='if' && (a_1 = named_expression_rule(p)) // named_expression && @@ -24228,7 +24130,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 664)) // token='elif' + (_keyword = _PyPegen_expect_token(p, 662)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24259,7 +24161,7 @@ invalid_elif_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 664)) // token='elif' + (a = _PyPegen_expect_token(p, 662)) // token='elif' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24312,7 +24214,7 @@ invalid_else_stmt_rule(Parser *p) Token * a; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 665)) // token='else' + (a = _PyPegen_expect_token(p, 663)) // token='else' && (_literal = _PyPegen_expect_token(p, 11)) // token=':' && @@ -24365,7 +24267,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 667)) // token='while' + (_keyword = _PyPegen_expect_token(p, 665)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24396,7 +24298,7 @@ invalid_while_stmt_rule(Parser *p) expr_ty named_expression_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 667)) // token='while' + (a = _PyPegen_expect_token(p, 665)) // token='while' && (named_expression_var = named_expression_rule(p)) // named_expression && @@ -24455,13 +24357,13 @@ invalid_for_stmt_rule(Parser *p) expr_ty star_expressions_var; expr_ty star_targets_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' && (star_targets_var = star_targets_rule(p)) // star_targets && - (_keyword_1 = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword_1 = _PyPegen_expect_token(p, 671)) // token='in' && (star_expressions_var = star_expressions_rule(p)) // star_expressions && @@ -24496,13 +24398,13 @@ invalid_for_stmt_rule(Parser *p) expr_ty star_expressions_var; expr_ty star_targets_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 672)) // token='for' + (a = _PyPegen_expect_token(p, 670)) // token='for' && (star_targets_var = star_targets_rule(p)) // star_targets && - (_keyword = _PyPegen_expect_token(p, 673)) // token='in' + (_keyword = _PyPegen_expect_token(p, 671)) // token='in' && (star_expressions_var = star_expressions_rule(p)) // star_expressions && @@ -24568,9 +24470,9 @@ invalid_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (a = _PyPegen_expect_token(p, 677)) // token='def' + (a = _PyPegen_expect_token(p, 675)) // token='def' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -24627,9 +24529,9 @@ invalid_def_raw_rule(Parser *p) asdl_stmt_seq* block_var; expr_ty name_var; if ( - (_opt_var = _PyPegen_expect_token(p, 676), !p->error_indicator) // 'async'? + (_opt_var = _PyPegen_expect_token(p, 674), !p->error_indicator) // 'async'? && - (_keyword = _PyPegen_expect_token(p, 677)) // token='def' + (_keyword = _PyPegen_expect_token(p, 675)) // token='def' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -24693,7 +24595,7 @@ invalid_class_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (_keyword = _PyPegen_expect_token(p, 679)) // token='class' + (_keyword = _PyPegen_expect_token(p, 677)) // token='class' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -24732,7 +24634,7 @@ invalid_class_def_raw_rule(Parser *p) expr_ty name_var; Token * newline_var; if ( - (a = _PyPegen_expect_token(p, 679)) // token='class' + (a = _PyPegen_expect_token(p, 677)) // token='class' && (name_var = _PyPegen_name_token(p)) // NAME && @@ -25550,7 +25452,7 @@ invalid_arithmetic_rule(Parser *p) && (_tmp_243_var = _tmp_243_rule(p)) // '+' | '-' | '*' | '/' | '%' | '//' | '@' && - (a = _PyPegen_expect_token(p, 681)) // token='not' + (a = _PyPegen_expect_token(p, 679)) // token='not' && (b = inversion_rule(p)) // inversion ) @@ -25599,7 +25501,7 @@ invalid_factor_rule(Parser *p) if ( (_tmp_244_var = _tmp_244_rule(p)) // '+' | '-' | '~' && - (a = _PyPegen_expect_token(p, 681)) // token='not' + (a = _PyPegen_expect_token(p, 679)) // token='not' && (b = factor_rule(p)) // factor ) @@ -26070,7 +25972,7 @@ _tmp_7_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_7[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'def'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 677)) // token='def' + (_keyword = _PyPegen_expect_token(p, 675)) // token='def' ) { D(fprintf(stderr, "%*c+ _tmp_7[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'def'")); @@ -26108,7 +26010,7 @@ _tmp_7_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_7[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_7[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -26146,7 +26048,7 @@ _tmp_8_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_8[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'class'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 679)) // token='class' + (_keyword = _PyPegen_expect_token(p, 677)) // token='class' ) { D(fprintf(stderr, "%*c+ _tmp_8[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'class'")); @@ -26203,7 +26105,7 @@ _tmp_9_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_9[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'with'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 635)) // token='with' + (_keyword = _PyPegen_expect_token(p, 633)) // token='with' ) { D(fprintf(stderr, "%*c+ _tmp_9[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'with'")); @@ -26222,7 +26124,7 @@ _tmp_9_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_9[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_9[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -26260,7 +26162,7 @@ _tmp_10_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_10[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'for'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 672)) // token='for' + (_keyword = _PyPegen_expect_token(p, 670)) // token='for' ) { D(fprintf(stderr, "%*c+ _tmp_10[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'for'")); @@ -26279,7 +26181,7 @@ _tmp_10_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_10[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'async'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 676)) // token='async' + (_keyword = _PyPegen_expect_token(p, 674)) // token='async' ) { D(fprintf(stderr, "%*c+ _tmp_10[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'async'")); @@ -27303,7 +27205,7 @@ _tmp_28_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -27466,7 +27368,7 @@ _tmp_31_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -29453,7 +29355,7 @@ _tmp_62_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -29499,7 +29401,7 @@ _tmp_63_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (z = _PyPegen_name_token(p)) // NAME ) @@ -35341,7 +35243,7 @@ _tmp_158_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_158[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'else'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 665)) // token='else' + (_keyword = _PyPegen_expect_token(p, 663)) // token='else' ) { D(fprintf(stderr, "%*c+ _tmp_158[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'else'")); @@ -38922,7 +38824,7 @@ _tmp_214_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_214[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'except'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 657)) // token='except' + (_keyword = _PyPegen_expect_token(p, 655)) // token='except' ) { D(fprintf(stderr, "%*c+ _tmp_214[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'except'")); @@ -38941,7 +38843,7 @@ _tmp_214_rule(Parser *p) D(fprintf(stderr, "%*c> _tmp_214[%d-%d]: %s\n", p->level, ' ', _mark, p->mark, "'finally'")); Token * _keyword; if ( - (_keyword = _PyPegen_expect_token(p, 653)) // token='finally' + (_keyword = _PyPegen_expect_token(p, 651)) // token='finally' ) { D(fprintf(stderr, "%*c+ _tmp_214[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'finally'")); @@ -39119,7 +39021,7 @@ _tmp_217_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -39341,7 +39243,7 @@ _tmp_221_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -39382,7 +39284,7 @@ _tmp_222_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -39480,7 +39382,7 @@ _tmp_224_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -39521,7 +39423,7 @@ _tmp_225_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -41216,7 +41118,7 @@ _tmp_255_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -41262,7 +41164,7 @@ _tmp_256_rule(Parser *p) Token * _keyword; expr_ty z; if ( - (_keyword = _PyPegen_expect_token(p, 662)) // token='if' + (_keyword = _PyPegen_expect_token(p, 660)) // token='if' && (z = disjunction_rule(p)) // disjunction ) @@ -41975,7 +41877,7 @@ _tmp_271_rule(Parser *p) Token * _keyword; expr_ty name_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (name_var = _PyPegen_name_token(p)) // NAME ) @@ -42234,7 +42136,7 @@ _tmp_276_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -42275,7 +42177,7 @@ _tmp_277_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -42316,7 +42218,7 @@ _tmp_278_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) @@ -42357,7 +42259,7 @@ _tmp_279_rule(Parser *p) Token * _keyword; expr_ty star_target_var; if ( - (_keyword = _PyPegen_expect_token(p, 660)) // token='as' + (_keyword = _PyPegen_expect_token(p, 658)) // token='as' && (star_target_var = star_target_rule(p)) // star_target ) From 8d63c8d47b9edd8ac2f0b395b2fa0ae5f571252d Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 4 Jun 2024 02:36:28 -0400 Subject: [PATCH 49/72] gh-106531: Apply changes from importlib_resources 6.3.2 (#117054) Apply changes from importlib_resources 6.3.2. --- Lib/importlib/resources/_common.py | 2 + Lib/importlib/resources/readers.py | 54 ++++++- .../resources/data01/subdirectory/binary.file | Bin 4 -> 4 bytes .../namespacedata01/subdirectory/binary.file | 1 + .../test_importlib/resources/test_contents.py | 2 +- .../test_importlib/resources/test_custom.py | 6 +- .../test_importlib/resources/test_files.py | 14 +- .../test_importlib/resources/test_open.py | 6 +- .../test_importlib/resources/test_path.py | 12 +- .../test_importlib/resources/test_read.py | 29 +++- .../test_importlib/resources/test_reader.py | 29 ++-- .../test_importlib/resources/test_resource.py | 134 ++++++++---------- Lib/test/test_importlib/resources/util.py | 51 +++---- Lib/test/test_importlib/resources/zip.py | 30 ++++ Makefile.pre.in | 1 + ...-03-19-21-41-31.gh-issue-106531.Mgd--6.rst | 6 + 16 files changed, 231 insertions(+), 146 deletions(-) create mode 100644 Lib/test/test_importlib/resources/namespacedata01/subdirectory/binary.file create mode 100755 Lib/test/test_importlib/resources/zip.py create mode 100644 Misc/NEWS.d/next/Library/2024-03-19-21-41-31.gh-issue-106531.Mgd--6.rst diff --git a/Lib/importlib/resources/_common.py b/Lib/importlib/resources/_common.py index e18082fb3d26a0..ca5b06743b46a6 100644 --- a/Lib/importlib/resources/_common.py +++ b/Lib/importlib/resources/_common.py @@ -25,6 +25,8 @@ def package_to_anchor(func): >>> files('a', 'b') Traceback (most recent call last): TypeError: files() takes from 0 to 1 positional arguments but 2 were given + + Remove this compatibility in Python 3.14. """ undefined = object() diff --git a/Lib/importlib/resources/readers.py b/Lib/importlib/resources/readers.py index c3cdf769cbecb0..b86cdeff57c4c2 100644 --- a/Lib/importlib/resources/readers.py +++ b/Lib/importlib/resources/readers.py @@ -1,7 +1,10 @@ import collections +import contextlib import itertools import pathlib import operator +import re +import warnings import zipfile from . import abc @@ -62,7 +65,7 @@ class MultiplexedPath(abc.Traversable): """ def __init__(self, *paths): - self._paths = list(map(pathlib.Path, remove_duplicates(paths))) + self._paths = list(map(_ensure_traversable, remove_duplicates(paths))) if not self._paths: message = 'MultiplexedPath must contain at least one path' raise FileNotFoundError(message) @@ -130,7 +133,36 @@ class NamespaceReader(abc.TraversableResources): def __init__(self, namespace_path): if 'NamespacePath' not in str(namespace_path): raise ValueError('Invalid path') - self.path = MultiplexedPath(*list(namespace_path)) + self.path = MultiplexedPath(*map(self._resolve, namespace_path)) + + @classmethod + def _resolve(cls, path_str) -> abc.Traversable: + r""" + Given an item from a namespace path, resolve it to a Traversable. + + path_str might be a directory on the filesystem or a path to a + zipfile plus the path within the zipfile, e.g. ``/foo/bar`` or + ``/foo/baz.zip/inner_dir`` or ``foo\baz.zip\inner_dir\sub``. + """ + (dir,) = (cand for cand in cls._candidate_paths(path_str) if cand.is_dir()) + return dir + + @classmethod + def _candidate_paths(cls, path_str): + yield pathlib.Path(path_str) + yield from cls._resolve_zip_path(path_str) + + @staticmethod + def _resolve_zip_path(path_str): + for match in reversed(list(re.finditer(r'[\\/]', path_str))): + with contextlib.suppress( + FileNotFoundError, + IsADirectoryError, + NotADirectoryError, + PermissionError, + ): + inner = path_str[match.end() :].replace('\\', '/') + '/' + yield zipfile.Path(path_str[: match.start()], inner.lstrip('/')) def resource_path(self, resource): """ @@ -142,3 +174,21 @@ def resource_path(self, resource): def files(self): return self.path + + +def _ensure_traversable(path): + """ + Convert deprecated string arguments to traversables (pathlib.Path). + + Remove with Python 3.15. + """ + if not isinstance(path, str): + return path + + warnings.warn( + "String arguments are deprecated. Pass a Traversable instead.", + DeprecationWarning, + stacklevel=3, + ) + + return pathlib.Path(path) diff --git a/Lib/test/test_importlib/resources/data01/subdirectory/binary.file b/Lib/test/test_importlib/resources/data01/subdirectory/binary.file index eaf36c1daccfdf325514461cd1a2ffbc139b5464..5bd8bb897b13225c93a1d26baa88c96b7bd5d817 100644 GIT binary patch literal 4 LcmZQ!Wn%{b05$*@ literal 4 LcmZQzWMT#Y01f~L diff --git a/Lib/test/test_importlib/resources/namespacedata01/subdirectory/binary.file b/Lib/test/test_importlib/resources/namespacedata01/subdirectory/binary.file new file mode 100644 index 00000000000000..100f50643d8d21 --- /dev/null +++ b/Lib/test/test_importlib/resources/namespacedata01/subdirectory/binary.file @@ -0,0 +1 @@ +  \ No newline at end of file diff --git a/Lib/test/test_importlib/resources/test_contents.py b/Lib/test/test_importlib/resources/test_contents.py index 1a13f043a86f03..beab67ccc21680 100644 --- a/Lib/test/test_importlib/resources/test_contents.py +++ b/Lib/test/test_importlib/resources/test_contents.py @@ -31,8 +31,8 @@ class ContentsZipTests(ContentsTests, util.ZipSetup, unittest.TestCase): class ContentsNamespaceTests(ContentsTests, unittest.TestCase): expected = { # no __init__ because of namespace design - # no subdirectory as incidental difference in fixture 'binary.file', + 'subdirectory', 'utf-16.file', 'utf-8.file', } diff --git a/Lib/test/test_importlib/resources/test_custom.py b/Lib/test/test_importlib/resources/test_custom.py index 73127209a2761b..640f90fc0dd91a 100644 --- a/Lib/test/test_importlib/resources/test_custom.py +++ b/Lib/test/test_importlib/resources/test_custom.py @@ -5,6 +5,7 @@ from test.support import os_helper from importlib import resources +from importlib.resources import abc from importlib.resources.abc import TraversableResources, ResourceReader from . import util @@ -39,8 +40,9 @@ def setUp(self): self.addCleanup(self.fixtures.close) def test_custom_loader(self): - temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) + temp_dir = pathlib.Path(self.fixtures.enter_context(os_helper.temp_dir())) loader = SimpleLoader(MagicResources(temp_dir)) pkg = util.create_package_from_loader(loader) files = resources.files(pkg) - assert files is temp_dir + assert isinstance(files, abc.Traversable) + assert list(files.iterdir()) == [] diff --git a/Lib/test/test_importlib/resources/test_files.py b/Lib/test/test_importlib/resources/test_files.py index 26c8b04e44c3b9..7df6d03ead7480 100644 --- a/Lib/test/test_importlib/resources/test_files.py +++ b/Lib/test/test_importlib/resources/test_files.py @@ -1,4 +1,3 @@ -import typing import textwrap import unittest import warnings @@ -32,13 +31,14 @@ def test_read_text(self): actual = files.joinpath('utf-8.file').read_text(encoding='utf-8') assert actual == 'Hello, UTF-8 world!\n' - @unittest.skipUnless( - hasattr(typing, 'runtime_checkable'), - "Only suitable when typing supports runtime_checkable", - ) def test_traversable(self): assert isinstance(resources.files(self.data), Traversable) + def test_joinpath_with_multiple_args(self): + files = resources.files(self.data) + binfile = files.joinpath('subdirectory', 'binary.file') + self.assertTrue(binfile.is_file()) + def test_old_parameter(self): """ Files used to take a 'package' parameter. Make sure anyone @@ -64,6 +64,10 @@ def setUp(self): self.data = namespacedata01 +class OpenNamespaceZipTests(FilesTests, util.ZipSetup, unittest.TestCase): + ZIP_MODULE = 'namespacedata01' + + class SiteDir: def setUp(self): self.fixtures = contextlib.ExitStack() diff --git a/Lib/test/test_importlib/resources/test_open.py b/Lib/test/test_importlib/resources/test_open.py index 86becb4bfaad37..3b6b2142ef47b1 100644 --- a/Lib/test/test_importlib/resources/test_open.py +++ b/Lib/test/test_importlib/resources/test_open.py @@ -24,7 +24,7 @@ def test_open_binary(self): target = resources.files(self.data) / 'binary.file' with target.open('rb') as fp: result = fp.read() - self.assertEqual(result, b'\x00\x01\x02\x03') + self.assertEqual(result, bytes(range(4))) def test_open_text_default_encoding(self): target = resources.files(self.data) / 'utf-8.file' @@ -81,5 +81,9 @@ class OpenZipTests(OpenTests, util.ZipSetup, unittest.TestCase): pass +class OpenNamespaceZipTests(OpenTests, util.ZipSetup, unittest.TestCase): + ZIP_MODULE = 'namespacedata01' + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_importlib/resources/test_path.py b/Lib/test/test_importlib/resources/test_path.py index 34a6bdd2d58b91..90b22905ab8692 100644 --- a/Lib/test/test_importlib/resources/test_path.py +++ b/Lib/test/test_importlib/resources/test_path.py @@ -1,4 +1,5 @@ import io +import pathlib import unittest from importlib import resources @@ -15,18 +16,13 @@ def execute(self, package, path): class PathTests: def test_reading(self): """ - Path should be readable. - - Test also implicitly verifies the returned object is a pathlib.Path - instance. + Path should be readable and a pathlib.Path instance. """ target = resources.files(self.data) / 'utf-8.file' with resources.as_file(target) as path: + self.assertIsInstance(path, pathlib.Path) self.assertTrue(path.name.endswith("utf-8.file"), repr(path)) - # pathlib.Path.read_text() was introduced in Python 3.5. - with path.open('r', encoding='utf-8') as file: - text = file.read() - self.assertEqual('Hello, UTF-8 world!\n', text) + self.assertEqual('Hello, UTF-8 world!\n', path.read_text(encoding='utf-8')) class PathDiskTests(PathTests, unittest.TestCase): diff --git a/Lib/test/test_importlib/resources/test_read.py b/Lib/test/test_importlib/resources/test_read.py index 088982681e8b0c..984feecbb9ed69 100644 --- a/Lib/test/test_importlib/resources/test_read.py +++ b/Lib/test/test_importlib/resources/test_read.py @@ -18,7 +18,7 @@ def execute(self, package, path): class ReadTests: def test_read_bytes(self): result = resources.files(self.data).joinpath('binary.file').read_bytes() - self.assertEqual(result, b'\0\1\2\3') + self.assertEqual(result, bytes(range(4))) def test_read_text_default_encoding(self): result = ( @@ -57,17 +57,15 @@ class ReadDiskTests(ReadTests, unittest.TestCase): class ReadZipTests(ReadTests, util.ZipSetup, unittest.TestCase): def test_read_submodule_resource(self): - submodule = import_module('ziptestdata.subdirectory') + submodule = import_module('data01.subdirectory') result = resources.files(submodule).joinpath('binary.file').read_bytes() - self.assertEqual(result, b'\0\1\2\3') + self.assertEqual(result, bytes(range(4, 8))) def test_read_submodule_resource_by_name(self): result = ( - resources.files('ziptestdata.subdirectory') - .joinpath('binary.file') - .read_bytes() + resources.files('data01.subdirectory').joinpath('binary.file').read_bytes() ) - self.assertEqual(result, b'\0\1\2\3') + self.assertEqual(result, bytes(range(4, 8))) class ReadNamespaceTests(ReadTests, unittest.TestCase): @@ -77,5 +75,22 @@ def setUp(self): self.data = namespacedata01 +class ReadNamespaceZipTests(ReadTests, util.ZipSetup, unittest.TestCase): + ZIP_MODULE = 'namespacedata01' + + def test_read_submodule_resource(self): + submodule = import_module('namespacedata01.subdirectory') + result = resources.files(submodule).joinpath('binary.file').read_bytes() + self.assertEqual(result, bytes(range(12, 16))) + + def test_read_submodule_resource_by_name(self): + result = ( + resources.files('namespacedata01.subdirectory') + .joinpath('binary.file') + .read_bytes() + ) + self.assertEqual(result, bytes(range(12, 16))) + + if __name__ == '__main__': unittest.main() diff --git a/Lib/test/test_importlib/resources/test_reader.py b/Lib/test/test_importlib/resources/test_reader.py index 8670f72a334585..dac9c2a892ffd2 100644 --- a/Lib/test/test_importlib/resources/test_reader.py +++ b/Lib/test/test_importlib/resources/test_reader.py @@ -10,8 +10,7 @@ class MultiplexedPathTest(unittest.TestCase): @classmethod def setUpClass(cls): - path = pathlib.Path(__file__).parent / 'namespacedata01' - cls.folder = str(path) + cls.folder = pathlib.Path(__file__).parent / 'namespacedata01' def test_init_no_paths(self): with self.assertRaises(FileNotFoundError): @@ -19,7 +18,7 @@ def test_init_no_paths(self): def test_init_file(self): with self.assertRaises(NotADirectoryError): - MultiplexedPath(os.path.join(self.folder, 'binary.file')) + MultiplexedPath(self.folder / 'binary.file') def test_iterdir(self): contents = {path.name for path in MultiplexedPath(self.folder).iterdir()} @@ -27,10 +26,12 @@ def test_iterdir(self): contents.remove('__pycache__') except (KeyError, ValueError): pass - self.assertEqual(contents, {'binary.file', 'utf-16.file', 'utf-8.file'}) + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-16.file', 'utf-8.file'} + ) def test_iterdir_duplicate(self): - data01 = os.path.abspath(os.path.join(__file__, '..', 'data01')) + data01 = pathlib.Path(__file__).parent.joinpath('data01') contents = { path.name for path in MultiplexedPath(self.folder, data01).iterdir() } @@ -60,17 +61,17 @@ def test_open_file(self): path.open() def test_join_path(self): - prefix = os.path.abspath(os.path.join(__file__, '..')) - data01 = os.path.join(prefix, 'data01') + data01 = pathlib.Path(__file__).parent.joinpath('data01') + prefix = str(data01.parent) path = MultiplexedPath(self.folder, data01) self.assertEqual( str(path.joinpath('binary.file'))[len(prefix) + 1 :], os.path.join('namespacedata01', 'binary.file'), ) - self.assertEqual( - str(path.joinpath('subdirectory'))[len(prefix) + 1 :], - os.path.join('data01', 'subdirectory'), - ) + sub = path.joinpath('subdirectory') + assert isinstance(sub, MultiplexedPath) + assert 'namespacedata01' in str(sub) + assert 'data01' in str(sub) self.assertEqual( str(path.joinpath('imaginary'))[len(prefix) + 1 :], os.path.join('namespacedata01', 'imaginary'), @@ -82,9 +83,9 @@ def test_join_path_compound(self): assert not path.joinpath('imaginary/foo.py').exists() def test_join_path_common_subdir(self): - prefix = os.path.abspath(os.path.join(__file__, '..')) - data01 = os.path.join(prefix, 'data01') - data02 = os.path.join(prefix, 'data02') + data01 = pathlib.Path(__file__).parent.joinpath('data01') + data02 = pathlib.Path(__file__).parent.joinpath('data02') + prefix = str(data01.parent) path = MultiplexedPath(data01, data02) self.assertIsInstance(path.joinpath('subdirectory'), MultiplexedPath) self.assertEqual( diff --git a/Lib/test/test_importlib/resources/test_resource.py b/Lib/test/test_importlib/resources/test_resource.py index 6f75cf57f03d02..d1d45d9b4617f3 100644 --- a/Lib/test/test_importlib/resources/test_resource.py +++ b/Lib/test/test_importlib/resources/test_resource.py @@ -1,15 +1,10 @@ -import contextlib import sys import unittest -import uuid import pathlib from . import data01 -from . import zipdata01, zipdata02 from . import util from importlib import resources, import_module -from test.support import import_helper, os_helper -from test.support.os_helper import unlink class ResourceTests: @@ -89,34 +84,32 @@ def test_package_has_no_reader_fallback(self): class ResourceFromZipsTest01(util.ZipSetupBase, unittest.TestCase): - ZIP_MODULE = zipdata01 # type: ignore + ZIP_MODULE = 'data01' def test_is_submodule_resource(self): - submodule = import_module('ziptestdata.subdirectory') + submodule = import_module('data01.subdirectory') self.assertTrue(resources.files(submodule).joinpath('binary.file').is_file()) def test_read_submodule_resource_by_name(self): self.assertTrue( - resources.files('ziptestdata.subdirectory') - .joinpath('binary.file') - .is_file() + resources.files('data01.subdirectory').joinpath('binary.file').is_file() ) def test_submodule_contents(self): - submodule = import_module('ziptestdata.subdirectory') + submodule = import_module('data01.subdirectory') self.assertEqual( names(resources.files(submodule)), {'__init__.py', 'binary.file'} ) def test_submodule_contents_by_name(self): self.assertEqual( - names(resources.files('ziptestdata.subdirectory')), + names(resources.files('data01.subdirectory')), {'__init__.py', 'binary.file'}, ) def test_as_file_directory(self): - with resources.as_file(resources.files('ziptestdata')) as data: - assert data.name == 'ziptestdata' + with resources.as_file(resources.files('data01')) as data: + assert data.name == 'data01' assert data.is_dir() assert data.joinpath('subdirectory').is_dir() assert len(list(data.iterdir())) @@ -124,7 +117,7 @@ def test_as_file_directory(self): class ResourceFromZipsTest02(util.ZipSetupBase, unittest.TestCase): - ZIP_MODULE = zipdata02 # type: ignore + ZIP_MODULE = 'data02' def test_unrelated_contents(self): """ @@ -132,93 +125,48 @@ def test_unrelated_contents(self): distinct resources. Ref python/importlib_resources#44. """ self.assertEqual( - names(resources.files('ziptestdata.one')), + names(resources.files('data02.one')), {'__init__.py', 'resource1.txt'}, ) self.assertEqual( - names(resources.files('ziptestdata.two')), + names(resources.files('data02.two')), {'__init__.py', 'resource2.txt'}, ) -@contextlib.contextmanager -def zip_on_path(dir): - data_path = pathlib.Path(zipdata01.__file__) - source_zip_path = data_path.parent.joinpath('ziptestdata.zip') - zip_path = pathlib.Path(dir) / f'{uuid.uuid4()}.zip' - zip_path.write_bytes(source_zip_path.read_bytes()) - sys.path.append(str(zip_path)) - import_module('ziptestdata') - - try: - yield - finally: - with contextlib.suppress(ValueError): - sys.path.remove(str(zip_path)) - - with contextlib.suppress(KeyError): - del sys.path_importer_cache[str(zip_path)] - del sys.modules['ziptestdata'] - - with contextlib.suppress(OSError): - unlink(zip_path) - - -class DeletingZipsTest(unittest.TestCase): +class DeletingZipsTest(util.ZipSetupBase, unittest.TestCase): """Having accessed resources in a zip file should not keep an open reference to the zip. """ - def setUp(self): - self.fixtures = contextlib.ExitStack() - self.addCleanup(self.fixtures.close) - - modules = import_helper.modules_setup() - self.addCleanup(import_helper.modules_cleanup, *modules) - - temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) - self.fixtures.enter_context(zip_on_path(temp_dir)) - def test_iterdir_does_not_keep_open(self): - [item.name for item in resources.files('ziptestdata').iterdir()] + [item.name for item in resources.files('data01').iterdir()] def test_is_file_does_not_keep_open(self): - resources.files('ziptestdata').joinpath('binary.file').is_file() + resources.files('data01').joinpath('binary.file').is_file() def test_is_file_failure_does_not_keep_open(self): - resources.files('ziptestdata').joinpath('not-present').is_file() + resources.files('data01').joinpath('not-present').is_file() @unittest.skip("Desired but not supported.") def test_as_file_does_not_keep_open(self): # pragma: no cover - resources.as_file(resources.files('ziptestdata') / 'binary.file') + resources.as_file(resources.files('data01') / 'binary.file') def test_entered_path_does_not_keep_open(self): """ Mimic what certifi does on import to make its bundle available for the process duration. """ - resources.as_file(resources.files('ziptestdata') / 'binary.file').__enter__() + resources.as_file(resources.files('data01') / 'binary.file').__enter__() def test_read_binary_does_not_keep_open(self): - resources.files('ziptestdata').joinpath('binary.file').read_bytes() + resources.files('data01').joinpath('binary.file').read_bytes() def test_read_text_does_not_keep_open(self): - resources.files('ziptestdata').joinpath('utf-8.file').read_text( - encoding='utf-8' - ) + resources.files('data01').joinpath('utf-8.file').read_text(encoding='utf-8') -class ResourceFromNamespaceTest01(unittest.TestCase): - site_dir = str(pathlib.Path(__file__).parent) - - @classmethod - def setUpClass(cls): - sys.path.append(cls.site_dir) - - @classmethod - def tearDownClass(cls): - sys.path.remove(cls.site_dir) - +class ResourceFromNamespaceTests: def test_is_submodule_resource(self): self.assertTrue( resources.files(import_module('namespacedata01')) @@ -237,7 +185,9 @@ def test_submodule_contents(self): contents.remove('__pycache__') except KeyError: pass - self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'}) + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-8.file', 'utf-16.file'} + ) def test_submodule_contents_by_name(self): contents = names(resources.files('namespacedata01')) @@ -245,7 +195,45 @@ def test_submodule_contents_by_name(self): contents.remove('__pycache__') except KeyError: pass - self.assertEqual(contents, {'binary.file', 'utf-8.file', 'utf-16.file'}) + self.assertEqual( + contents, {'subdirectory', 'binary.file', 'utf-8.file', 'utf-16.file'} + ) + + def test_submodule_sub_contents(self): + contents = names(resources.files(import_module('namespacedata01.subdirectory'))) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file'}) + + def test_submodule_sub_contents_by_name(self): + contents = names(resources.files('namespacedata01.subdirectory')) + try: + contents.remove('__pycache__') + except KeyError: + pass + self.assertEqual(contents, {'binary.file'}) + + +class ResourceFromNamespaceDiskTests(ResourceFromNamespaceTests, unittest.TestCase): + site_dir = str(pathlib.Path(__file__).parent) + + @classmethod + def setUpClass(cls): + sys.path.append(cls.site_dir) + + @classmethod + def tearDownClass(cls): + sys.path.remove(cls.site_dir) + + +class ResourceFromNamespaceZipTests( + util.ZipSetupBase, + ResourceFromNamespaceTests, + unittest.TestCase, +): + ZIP_MODULE = 'namespacedata01' if __name__ == '__main__': diff --git a/Lib/test/test_importlib/resources/util.py b/Lib/test/test_importlib/resources/util.py index dbe6ee81476699..d4bf3e6cc5dfdc 100644 --- a/Lib/test/test_importlib/resources/util.py +++ b/Lib/test/test_importlib/resources/util.py @@ -4,11 +4,12 @@ import sys import types import pathlib +import contextlib from . import data01 -from . import zipdata01 from importlib.resources.abc import ResourceReader -from test.support import import_helper +from test.support import import_helper, os_helper +from . import zip as zip_ from importlib.machinery import ModuleSpec @@ -141,39 +142,23 @@ def test_useless_loader(self): class ZipSetupBase: - ZIP_MODULE = None - - @classmethod - def setUpClass(cls): - data_path = pathlib.Path(cls.ZIP_MODULE.__file__) - data_dir = data_path.parent - cls._zip_path = str(data_dir / 'ziptestdata.zip') - sys.path.append(cls._zip_path) - cls.data = importlib.import_module('ziptestdata') - - @classmethod - def tearDownClass(cls): - try: - sys.path.remove(cls._zip_path) - except ValueError: - pass - - try: - del sys.path_importer_cache[cls._zip_path] - del sys.modules[cls.data.__name__] - except KeyError: - pass - - try: - del cls.data - del cls._zip_path - except AttributeError: - pass + ZIP_MODULE = 'data01' def setUp(self): - modules = import_helper.modules_setup() - self.addCleanup(import_helper.modules_cleanup, *modules) + self.fixtures = contextlib.ExitStack() + self.addCleanup(self.fixtures.close) + + self.fixtures.enter_context(import_helper.isolated_modules()) + + temp_dir = self.fixtures.enter_context(os_helper.temp_dir()) + modules = pathlib.Path(temp_dir) / 'zipped modules.zip' + src_path = pathlib.Path(__file__).parent.joinpath(self.ZIP_MODULE) + self.fixtures.enter_context( + import_helper.DirsOnSysPath(str(zip_.make_zip_file(src_path, modules))) + ) + + self.data = importlib.import_module(self.ZIP_MODULE) class ZipSetup(ZipSetupBase): - ZIP_MODULE = zipdata01 # type: ignore + pass diff --git a/Lib/test/test_importlib/resources/zip.py b/Lib/test/test_importlib/resources/zip.py new file mode 100755 index 00000000000000..4dcf6facc770cb --- /dev/null +++ b/Lib/test/test_importlib/resources/zip.py @@ -0,0 +1,30 @@ +""" +Generate zip test data files. +""" + +import contextlib +import os +import pathlib +import zipfile + + +def make_zip_file(src, dst): + """ + Zip the files in src into a new zipfile at dst. + """ + with zipfile.ZipFile(dst, 'w') as zf: + for src_path, rel in walk(src): + dst_name = src.name / pathlib.PurePosixPath(rel.as_posix()) + zf.write(src_path, dst_name) + zipfile._path.CompleteDirs.inject(zf) + return dst + + +def walk(datapath): + for dirpath, dirnames, filenames in os.walk(datapath): + with contextlib.suppress(ValueError): + dirnames.remove('__pycache__') + for filename in filenames: + res = pathlib.Path(dirpath) / filename + rel = res.relative_to(datapath) + yield res, rel diff --git a/Makefile.pre.in b/Makefile.pre.in index 9a2fc34f030662..22dba279faa935 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2438,6 +2438,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/test_importlib/resources/data03/namespace/portion1 \ test/test_importlib/resources/data03/namespace/portion2 \ test/test_importlib/resources/namespacedata01 \ + test/test_importlib/resources/namespacedata01/subdirectory \ test/test_importlib/resources/zipdata01 \ test/test_importlib/resources/zipdata02 \ test/test_importlib/source \ diff --git a/Misc/NEWS.d/next/Library/2024-03-19-21-41-31.gh-issue-106531.Mgd--6.rst b/Misc/NEWS.d/next/Library/2024-03-19-21-41-31.gh-issue-106531.Mgd--6.rst new file mode 100644 index 00000000000000..6a5783c5ad9846 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-19-21-41-31.gh-issue-106531.Mgd--6.rst @@ -0,0 +1,6 @@ +In :mod:`importlib.resources`, sync with `importlib_resources 6.3.2 +`_, +including: ``MultiplexedPath`` now expects ``Traversable`` paths, +deprecating string arguments to ``MultiplexedPath``; Enabled support for +resources in namespace packages in zip files; Fixed ``NotADirectoryError`` +when calling files on a subdirectory of a namespace package. From a8f1152b70d707340b394689cd09aa0831da3601 Mon Sep 17 00:00:00 2001 From: "d.grigonis" Date: Tue, 4 Jun 2024 10:44:49 +0300 Subject: [PATCH 50/72] gh-119879: str.find(): Utilize last character gap for two-way periodic needles (#119880) --- ...-06-02-06-12-35.gh-issue-119879.Jit951.rst | 1 + Objects/stringlib/fastsearch.h | 63 ++++++++++--------- 2 files changed, 36 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-02-06-12-35.gh-issue-119879.Jit951.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-02-06-12-35.gh-issue-119879.Jit951.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-02-06-12-35.gh-issue-119879.Jit951.rst new file mode 100644 index 00000000000000..89de6b0299a35a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-02-06-12-35.gh-issue-119879.Jit951.rst @@ -0,0 +1 @@ +String search is now slightly faster for certain cases. It now utilizes last character gap (good suffix rule) for two-way periodic needles. diff --git a/Objects/stringlib/fastsearch.h b/Objects/stringlib/fastsearch.h index 257b7bd6788ad2..309ed1554f4699 100644 --- a/Objects/stringlib/fastsearch.h +++ b/Objects/stringlib/fastsearch.h @@ -256,7 +256,7 @@ STRINGLIB(_factorize)(const STRINGLIB_CHAR *needle, The local period of the cut is the minimal length of a string w such that (left endswith w or w endswith left) - and (right startswith w or w startswith left). + and (right startswith w or w startswith right). The Critical Factorization Theorem says that this maximal local period is the global period of the string. @@ -337,21 +337,20 @@ STRINGLIB(_preprocess)(const STRINGLIB_CHAR *needle, Py_ssize_t len_needle, if (p->is_periodic) { assert(p->cut <= len_needle/2); assert(p->cut < p->period); - p->gap = 0; // unused } else { // A lower bound on the period p->period = Py_MAX(p->cut, len_needle - p->cut) + 1; - // The gap between the last character and the previous - // occurrence of an equivalent character (modulo TABLE_SIZE) - p->gap = len_needle; - STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; - for (Py_ssize_t i = len_needle - 2; i >= 0; i--) { - STRINGLIB_CHAR x = needle[i] & TABLE_MASK; - if (x == last) { - p->gap = len_needle - 1 - i; - break; - } + } + // The gap between the last character and the previous + // occurrence of an equivalent character (modulo TABLE_SIZE) + p->gap = len_needle; + STRINGLIB_CHAR last = needle[len_needle - 1] & TABLE_MASK; + for (Py_ssize_t i = len_needle - 2; i >= 0; i--) { + STRINGLIB_CHAR x = needle[i] & TABLE_MASK; + if (x == last) { + p->gap = len_needle - 1 - i; + break; } } // Fill up a compressed Boyer-Moore "Bad Character" table @@ -383,6 +382,8 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, const STRINGLIB_CHAR *window; LOG("===== Two-way: \"%s\" in \"%s\". =====\n", needle, haystack); + Py_ssize_t gap = p->gap; + Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); if (p->is_periodic) { LOG("Needle is periodic.\n"); Py_ssize_t memory = 0; @@ -408,8 +409,16 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, Py_ssize_t i = Py_MAX(cut, memory); for (; i < len_needle; i++) { if (needle[i] != window[i]) { - LOG("Right half does not match.\n"); - window_last += i - cut + 1; + if (i < gap_jump_end) { + LOG("Early right half mismatch: jump by gap.\n"); + assert(gap >= i - cut + 1); + window_last += gap; + } + else { + LOG("Late right half mismatch: jump by n (>gap)\n"); + assert(i - cut + 1 > gap); + window_last += i - cut + 1; + } memory = 0; goto periodicwindowloop; } @@ -442,10 +451,8 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, } } else { - Py_ssize_t gap = p->gap; period = Py_MAX(gap, period); LOG("Needle is not periodic.\n"); - Py_ssize_t gap_jump_end = Py_MIN(len_needle, cut + gap); windowloop: while (window_last < haystack_end) { for (;;) { @@ -463,19 +470,19 @@ STRINGLIB(_two_way)(const STRINGLIB_CHAR *haystack, Py_ssize_t len_haystack, window = window_last - len_needle + 1; assert((window[len_needle - 1] & TABLE_MASK) == (needle[len_needle - 1] & TABLE_MASK)); - for (Py_ssize_t i = cut; i < gap_jump_end; i++) { - if (needle[i] != window[i]) { - LOG("Early right half mismatch: jump by gap.\n"); - assert(gap >= i - cut + 1); - window_last += gap; - goto windowloop; - } - } - for (Py_ssize_t i = gap_jump_end; i < len_needle; i++) { + Py_ssize_t i = cut; + for (; i < len_needle; i++) { if (needle[i] != window[i]) { - LOG("Late right half mismatch.\n"); - assert(i - cut + 1 > gap); - window_last += i - cut + 1; + if (i < gap_jump_end) { + LOG("Early right half mismatch: jump by gap.\n"); + assert(gap >= i - cut + 1); + window_last += gap; + } + else { + LOG("Late right half mismatch: jump by n (>gap)\n"); + assert(i - cut + 1 > gap); + window_last += i - cut + 1; + } goto windowloop; } } From 5c48eb0cc6c3e84aafda0a734a05ecec14fc0ccf Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Tue, 4 Jun 2024 09:17:45 +0100 Subject: [PATCH 51/72] gh-119070: Update test_shebang_executable_extension to always use non-installed version (GH-119846) --- Lib/test/test_launcher.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py index 6d358ac6f16a27..58baae25df3df7 100644 --- a/Lib/test/test_launcher.py +++ b/Lib/test/test_launcher.py @@ -766,9 +766,9 @@ def test_shebang_command_in_venv(self): self.assertEqual(data["stdout"].strip(), f"{quote(exe)} arg1 {quote(script)}") def test_shebang_executable_extension(self): - with self.script('#! /usr/bin/env python3.12') as script: - data = self.run_py([script]) - expect = "# Search PATH for python3.12.exe" + with self.script('#! /usr/bin/env python3.99') as script: + data = self.run_py([script], expect_returncode=103) + expect = "# Search PATH for python3.99.exe" actual = [line.strip() for line in data["stderr"].splitlines() if line.startswith("# Search PATH")] self.assertEqual([expect], actual) From 26e5c6e8351adb1a77a88920ff33fc8ebee9a99e Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Jun 2024 11:23:55 +0200 Subject: [PATCH 52/72] gh-119613: Soft deprecate the Py_MEMCPY() macro (#120020) Use directly memcpy() instead. --- Include/pyport.h | 1 + .../next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst diff --git a/Include/pyport.h b/Include/pyport.h index 2ba81a4be42822..1f7a9b41e0ae2b 100644 --- a/Include/pyport.h +++ b/Include/pyport.h @@ -180,6 +180,7 @@ typedef Py_ssize_t Py_ssize_clean_t; # define Py_LOCAL_INLINE(type) static inline type #endif +// Soft deprecated since Python 3.14, use memcpy() instead. #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 # define Py_MEMCPY memcpy #endif diff --git a/Misc/NEWS.d/next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst b/Misc/NEWS.d/next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst new file mode 100644 index 00000000000000..11f075b79e6f67 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2024-06-04-10-58-20.gh-issue-119613.qOr9GF.rst @@ -0,0 +1,2 @@ +Soft deprecate the :c:macro:`!Py_MEMCPY` macro: use directly ``memcpy()`` +instead. Patch by Victor Stinner. From 5a1205b641df133932ed4c65b9a4ff5724e89963 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 4 Jun 2024 11:39:07 +0200 Subject: [PATCH 53/72] gh-111499: Fix PYTHONMALLOCSTATS at Python exit (#120021) Call _PyObject_DebugMallocStats() earlier in Py_FinalizeEx(), before the interpreter is deleted. --- Python/pylifecycle.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 67bbbd01ca0c48..cbdf5c1b771fff 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -2119,6 +2119,12 @@ Py_FinalizeEx(void) } #endif /* Py_TRACE_REFS */ +#ifdef WITH_PYMALLOC + if (malloc_stats) { + _PyObject_DebugMallocStats(stderr); + } +#endif + finalize_interp_delete(tstate->interp); #ifdef Py_REF_DEBUG @@ -2129,12 +2135,6 @@ Py_FinalizeEx(void) #endif _Py_FinalizeAllocatedBlocks(runtime); -#ifdef WITH_PYMALLOC - if (malloc_stats) { - _PyObject_DebugMallocStats(stderr); - } -#endif - call_ll_exitfuncs(runtime); _PyRuntime_Finalize(); From 9e052619a6d32051394444c24d3185db1735a893 Mon Sep 17 00:00:00 2001 From: Xie Yanbo Date: Tue, 4 Jun 2024 18:22:22 +0800 Subject: [PATCH 54/72] Fix typos in documentation and comments (#119763) --- Python/brc.c | 4 ++-- Python/ceval.c | 2 +- Python/flowgraph.c | 2 +- Python/gc.c | 4 ++-- Python/import.c | 2 +- Python/optimizer.c | 2 +- Python/vm-state.md | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Python/brc.c b/Python/brc.c index 8f87bc33007bcf..d27687052aec19 100644 --- a/Python/brc.c +++ b/Python/brc.c @@ -14,7 +14,7 @@ // thread states within each bucket. // // The queueing thread uses the eval breaker mechanism to notify the owning -// thread that it has objects to merge. Additionaly, all queued objects are +// thread that it has objects to merge. Additionally, all queued objects are // merged during GC. #include "Python.h" #include "pycore_object.h" // _Py_ExplicitMergeRefcount @@ -197,7 +197,7 @@ _Py_brc_after_fork(PyInterpreterState *interp) { // Unlock all bucket mutexes. Some of the buckets may be locked because // locks can be handed off to a parked thread (see lock.c). We don't have - // to worry about consistency here, becuase no thread can be actively + // to worry about consistency here, because no thread can be actively // modifying a bucket, but it might be paused (not yet woken up) on a // PyMutex_Lock while holding that lock. for (Py_ssize_t i = 0; i < _Py_BRC_NUM_BUCKETS; i++) { diff --git a/Python/ceval.c b/Python/ceval.c index 324d062fe9bb43..e3968b07486463 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -1478,7 +1478,7 @@ initialize_locals(PyThreadState *tstate, PyFunctionObject *func, localsplus[total_args] = u; } else if (argcount > n) { - /* Too many postional args. Error is reported later */ + /* Too many positional args. Error is reported later */ for (j = n; j < argcount; j++) { Py_DECREF(args[j]); } diff --git a/Python/flowgraph.c b/Python/flowgraph.c index b0c8004130fb07..17617e119fdaa4 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -2861,7 +2861,7 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g, } /* This is used by _PyCompile_Assemble to fill in the jump and exception - * targets in a synthetic CFG (which is not the ouptut of the builtin compiler). + * targets in a synthetic CFG (which is not the output of the builtin compiler). */ int _PyCfg_JumpLabelsToTargets(cfg_builder *g) diff --git a/Python/gc.c b/Python/gc.c index aa8b216124c36a..b87697e1e5ecfd 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1,5 +1,5 @@ // This implements the reference cycle garbage collector. -// The Python module inteface to the collector is in gcmodule.c. +// The Python module interface to the collector is in gcmodule.c. // See https://devguide.python.org/internals/garbage-collector/ #include "Python.h" @@ -1260,7 +1260,7 @@ gc_list_set_space(PyGC_Head *list, int space) * the incremental collector must progress through the old * space faster than objects are added to the old space. * - * Each young or incremental collection adds a numebr of + * Each young or incremental collection adds a number of * objects, S (for survivors) to the old space, and * incremental collectors scan I objects from the old space. * I > S must be true. We also want I > S * N to be where diff --git a/Python/import.c b/Python/import.c index 6fe6df4db4f55e..351d463dcab465 100644 --- a/Python/import.c +++ b/Python/import.c @@ -1961,7 +1961,7 @@ import_run_extension(PyThreadState *tstate, PyModInitFunction p0, * * However, for single-phase init the module's init function will * create the module, create other objects (and allocate other - * memory), populate it and its module state, and initialze static + * memory), populate it and its module state, and initialize static * types. Some modules store other objects and data in global C * variables and register callbacks with the runtime/stdlib or * even external libraries (which is part of why we can't just diff --git a/Python/optimizer.c b/Python/optimizer.c index 5b4a6ff8cb3dad..4dc3438b6c23a4 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -1455,7 +1455,7 @@ PyUnstable_Optimizer_NewCounter(void) /* We use a bloomfilter with k = 6, m = 256 * The choice of k and the following constants - * could do with a more rigourous analysis, + * could do with a more rigorous analysis, * but here is a simple analysis: * * We want to keep the false positive rate low. diff --git a/Python/vm-state.md b/Python/vm-state.md index 4c68ba3b575cc8..b3246557dbeea3 100644 --- a/Python/vm-state.md +++ b/Python/vm-state.md @@ -87,4 +87,4 @@ Tier 2 IR entries are all the same size; there is no equivalent to `EXTENDED_ARG - **opcode**: Sometimes the same as a Tier 1 opcode, sometimes a separate micro opcode. Tier 2 opcodes are 9 bits (as opposed to Tier 1 opcodes, which fit in 8 bits). By convention, Tier 2 opcode names start with `_`. - **oparg**: The argument. Usually the same as the Tier 1 oparg after expansion of `EXTENDED_ARG` prefixes. Up to 32 bits. -- **operand**: An aditional argument, Typically the value of *one* cache item from the Tier 1 inline cache, up to 64 bits. +- **operand**: An additional argument, Typically the value of *one* cache item from the Tier 1 inline cache, up to 64 bits. From dce14bb2dce7887df40ae5c13b0d13e0dafceff7 Mon Sep 17 00:00:00 2001 From: Kaundur Date: Tue, 4 Jun 2024 12:48:05 +0100 Subject: [PATCH 55/72] gh-118868: logging QueueHandler fix passing of kwargs (GH-118869) Co-authored-by: Nice Zombies Co-authored-by: Vinay Sajip --- Lib/logging/config.py | 16 +++++----- Lib/test/test_logging.py | 29 +++++++++++++++++++ ...-05-09-21-36-11.gh-issue-118868.uckxxP.rst | 2 ++ 3 files changed, 39 insertions(+), 8 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-09-21-36-11.gh-issue-118868.uckxxP.rst diff --git a/Lib/logging/config.py b/Lib/logging/config.py index 860e4751207470..ac45d6809c805c 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -725,16 +725,16 @@ def add_filters(self, filterer, filters): def _configure_queue_handler(self, klass, **kwargs): if 'queue' in kwargs: - q = kwargs['queue'] + q = kwargs.pop('queue') else: q = queue.Queue() # unbounded - rhl = kwargs.get('respect_handler_level', False) - if 'listener' in kwargs: - lklass = kwargs['listener'] - else: - lklass = logging.handlers.QueueListener - listener = lklass(q, *kwargs.get('handlers', []), respect_handler_level=rhl) - handler = klass(q) + + rhl = kwargs.pop('respect_handler_level', False) + lklass = kwargs.pop('listener', logging.handlers.QueueListener) + handlers = kwargs.pop('handlers', []) + + listener = lklass(q, *handlers, respect_handler_level=rhl) + handler = klass(q, **kwargs) handler.listener = listener return handler diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 97d7c9fb167ec1..9ebd3457a18d68 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -3976,6 +3976,35 @@ def test_111615(self): } logging.config.dictConfig(config) + # gh-118868: check if kwargs are passed to logging QueueHandler + def test_kwargs_passing(self): + class CustomQueueHandler(logging.handlers.QueueHandler): + def __init__(self, *args, **kwargs): + super().__init__(queue.Queue()) + self.custom_kwargs = kwargs + + custom_kwargs = {'foo': 'bar'} + + config = { + 'version': 1, + 'handlers': { + 'custom': { + 'class': CustomQueueHandler, + **custom_kwargs + }, + }, + 'root': { + 'level': 'DEBUG', + 'handlers': ['custom'] + } + } + + logging.config.dictConfig(config) + + handler = logging.getHandlerByName('custom') + self.assertEqual(handler.custom_kwargs, custom_kwargs) + + class ManagerTest(BaseTest): def test_manager_loggerclass(self): logged = [] diff --git a/Misc/NEWS.d/next/Library/2024-05-09-21-36-11.gh-issue-118868.uckxxP.rst b/Misc/NEWS.d/next/Library/2024-05-09-21-36-11.gh-issue-118868.uckxxP.rst new file mode 100644 index 00000000000000..372a809d9594b0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-09-21-36-11.gh-issue-118868.uckxxP.rst @@ -0,0 +1,2 @@ +Fixed issue where kwargs were no longer passed to the logging handler +QueueHandler From 99d945c0c006e3246ac00338e37c443c6e08fc5c Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Tue, 4 Jun 2024 13:20:50 +0100 Subject: [PATCH 56/72] =?UTF-8?q?gh-119819:=20Fix=20regression=20to=20allo?= =?UTF-8?q?w=20logging=20configuration=20with=20multipr=E2=80=A6=20(GH-120?= =?UTF-8?q?030)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Lib/logging/config.py | 4 ++- Lib/test/test_logging.py | 26 +++++++++++++++++++ ...-06-04-12-23-01.gh-issue-119819.WKKrYh.rst | 2 ++ 3 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-12-23-01.gh-issue-119819.WKKrYh.rst diff --git a/Lib/logging/config.py b/Lib/logging/config.py index ac45d6809c805c..0b10bf82b60a36 100644 --- a/Lib/logging/config.py +++ b/Lib/logging/config.py @@ -781,8 +781,10 @@ def configure_handler(self, config): # raise ValueError('No handlers specified for a QueueHandler') if 'queue' in config: from multiprocessing.queues import Queue as MPQueue + from multiprocessing import Manager as MM + proxy_queue = MM().Queue() qspec = config['queue'] - if not isinstance(qspec, (queue.Queue, MPQueue)): + if not isinstance(qspec, (queue.Queue, MPQueue, type(proxy_queue))): if isinstance(qspec, str): q = self.resolve(qspec) if not callable(q): diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 9ebd3457a18d68..d3e5ac2be2e21e 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -3926,6 +3926,32 @@ def test_config_queue_handler(self): msg = str(ctx.exception) self.assertEqual(msg, "Unable to configure handler 'ah'") + @unittest.skipIf(support.is_wasi, "WASI does not have multiprocessing.") + def test_multiprocessing_queues(self): + # See gh-119819 + cd = copy.deepcopy(self.config_queue_handler) + from multiprocessing import Queue as MQ, Manager as MM + q1 = MQ() # this can't be pickled + q2 = MM().Queue() # a proxy queue for use when pickling is needed + for qspec in (q1, q2): + fn = make_temp_file('.log', 'test_logging-cmpqh-') + cd['handlers']['h1']['filename'] = fn + cd['handlers']['ah']['queue'] = qspec + qh = None + try: + self.apply_config(cd) + qh = logging.getHandlerByName('ah') + self.assertEqual(sorted(logging.getHandlerNames()), ['ah', 'h1']) + self.assertIsNotNone(qh.listener) + self.assertIs(qh.queue, qspec) + self.assertIs(qh.listener.queue, qspec) + finally: + h = logging.getHandlerByName('h1') + if h: + self.addCleanup(closeFileHandler, h, fn) + else: + self.addCleanup(os.remove, fn) + def test_90195(self): # See gh-90195 config = { diff --git a/Misc/NEWS.d/next/Library/2024-06-04-12-23-01.gh-issue-119819.WKKrYh.rst b/Misc/NEWS.d/next/Library/2024-06-04-12-23-01.gh-issue-119819.WKKrYh.rst new file mode 100644 index 00000000000000..f9e49c00f671f2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-12-23-01.gh-issue-119819.WKKrYh.rst @@ -0,0 +1,2 @@ +Fix regression to allow logging configuration with multiprocessing queue +types. From bd8c1f97e1709b5e8b07c31b1bc7b73acc76169d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mark=20Jason=20Dominus=20=28=E9=99=B6=E6=95=8F=E4=BF=AE=29?= Date: Tue, 4 Jun 2024 08:59:56 -0400 Subject: [PATCH 57/72] gh-94808: Reorganize _make_posargs and mark unused code (GH-119227) * Reorganize four-way if-elsif-elsif-elsif as nested if-elses * Mark unused branch in _make_posargs `names_with_default` is never `NULL`, even if there are no names with defaults. In that case it points to a structure with `size` zero. Rather than eliminating the branch, we leave it behind with an `assert(0)` in case a future change to the grammar exercises the branch. --- Parser/action_helpers.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 3f6c282ffa7a68..91b7e2f1058423 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -543,22 +543,30 @@ _make_posargs(Parser *p, asdl_arg_seq *plain_names, asdl_seq *names_with_default, asdl_arg_seq **posargs) { - if (plain_names != NULL && names_with_default != NULL) { - asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default); - if (!names_with_default_names) { - return -1; + + if (names_with_default != NULL) { + if (plain_names != NULL) { + asdl_arg_seq *names_with_default_names = _get_names(p, names_with_default); + if (!names_with_default_names) { + return -1; + } + *posargs = (asdl_arg_seq*)_PyPegen_join_sequences( + p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names); + } + else { + *posargs = _get_names(p, names_with_default); } - *posargs = (asdl_arg_seq*)_PyPegen_join_sequences( - p,(asdl_seq*)plain_names, (asdl_seq*)names_with_default_names); - } - else if (plain_names == NULL && names_with_default != NULL) { - *posargs = _get_names(p, names_with_default); - } - else if (plain_names != NULL && names_with_default == NULL) { - *posargs = plain_names; } else { - *posargs = _Py_asdl_arg_seq_new(0, p->arena); + if (plain_names != NULL) { + // With the current grammar, we never get here. + // If that has changed, remove the assert, and test thoroughly. + assert(0); + *posargs = plain_names; + } + else { + *posargs = _Py_asdl_arg_seq_new(0, p->arena); + } } return *posargs == NULL ? -1 : 0; } From e69d068ad0bd6a25434ea476a647b635da4d82bb Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 09:42:13 -0400 Subject: [PATCH 58/72] gh-117657: Fix race involving GC and heap initialization (#119923) The `_PyThreadState_Bind()` function is called before the first `PyEval_AcquireThread()` so it's not synchronized with the stop the world GC. We had a race where `gc_visit_heaps()` might visit a thread's heap while it's being initialized. Use a simple atomic int to avoid visiting heaps for threads that are not yet fully initialized (i.e., before `tstate_mimalloc_bind()` is called). The race was reproducible by running: `python Lib/test/test_importlib/partial/pool_in_threads.py`. --- Include/internal/pycore_mimalloc.h | 1 + Python/gc_free_threading.c | 4 ++++ Python/pystate.c | 2 ++ Tools/tsan/suppressions_free_threading.txt | 3 --- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h index 100f78d53021ee..d10b01d5b49b19 100644 --- a/Include/internal/pycore_mimalloc.h +++ b/Include/internal/pycore_mimalloc.h @@ -52,6 +52,7 @@ struct _mimalloc_thread_state { mi_heap_t *current_object_heap; mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT]; mi_tld_t tld; + int initialized; struct llist_node page_list; }; #endif diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index d005b79ff40dbf..f19362c9573812 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -252,6 +252,10 @@ gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor // visit each thread's heaps for GC objects for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc; + if (!_Py_atomic_load_int(&m->initialized)) { + // The thread may not have called tstate_mimalloc_bind() yet. + continue; + } arg->offset = offset_base; if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC], true, diff --git a/Python/pystate.c b/Python/pystate.c index d0293915db7689..e1a95907b57d20 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -3074,6 +3074,8 @@ tstate_mimalloc_bind(PyThreadState *tstate) // _PyObject_GC_New() and similar functions temporarily override this to // use one of the GC heaps. mts->current_object_heap = &mts->heaps[_Py_MIMALLOC_HEAP_OBJECT]; + + _Py_atomic_store_int(&mts->initialized, 1); #endif } diff --git a/Tools/tsan/suppressions_free_threading.txt b/Tools/tsan/suppressions_free_threading.txt index d5fcac61f0db04..8b64d1ff321858 100644 --- a/Tools/tsan/suppressions_free_threading.txt +++ b/Tools/tsan/suppressions_free_threading.txt @@ -25,7 +25,6 @@ race:free_threadstate race_top:_add_to_weak_set race_top:_in_weak_set -race_top:_mi_heap_delayed_free_partial race_top:_PyEval_EvalFrameDefault race_top:_PyImport_AcquireLock race_top:_PyImport_ReleaseLock @@ -33,7 +32,6 @@ race_top:_PyType_HasFeature race_top:assign_version_tag race_top:insertdict race_top:lookup_tp_dict -race_top:mi_heap_visit_pages race_top:PyMember_GetOne race_top:PyMember_SetOne race_top:new_reference @@ -58,7 +56,6 @@ race_top:_Py_slot_tp_getattr_hook race_top:add_threadstate race_top:dump_traceback race_top:fatal_error -race_top:mi_page_decode_padding race_top:_multiprocessing_SemLock_release_impl race_top:_PyFrame_GetCode race_top:_PyFrame_Initialize From ff1857d6ed52fab8ef1507c289d89ee545ca6478 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Tue, 4 Jun 2024 16:24:22 +0200 Subject: [PATCH 59/72] gh-120029: export `DEF_TYPE_PARAM` compiler flag (#120028) --- Doc/library/symtable.rst | 4 ++++ Lib/symtable.py | 17 +++++++++++++---- Lib/test/test_symtable.py | 2 ++ ...24-06-04-14-54-46.gh-issue-120029._1YdTf.rst | 2 ++ Modules/symtablemodule.c | 4 +++- 5 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-14-54-46.gh-issue-120029._1YdTf.rst diff --git a/Doc/library/symtable.rst b/Doc/library/symtable.rst index 0480502158433a..e17a33f7feb1ab 100644 --- a/Doc/library/symtable.rst +++ b/Doc/library/symtable.rst @@ -151,6 +151,10 @@ Examining Symbol Tables Return ``True`` if the symbol is a parameter. + .. method:: is_type_parameter() + + Return ``True`` if the symbol is a type parameter. + .. method:: is_global() Return ``True`` if the symbol is global. diff --git a/Lib/symtable.py b/Lib/symtable.py index 17f820abd56660..ba2f0dafcd0063 100644 --- a/Lib/symtable.py +++ b/Lib/symtable.py @@ -1,9 +1,13 @@ """Interface to the compiler's internal symbol tables""" import _symtable -from _symtable import (USE, DEF_GLOBAL, DEF_NONLOCAL, DEF_LOCAL, DEF_PARAM, - DEF_IMPORT, DEF_BOUND, DEF_ANNOT, SCOPE_OFF, SCOPE_MASK, FREE, - LOCAL, GLOBAL_IMPLICIT, GLOBAL_EXPLICIT, CELL) +from _symtable import ( + USE, + DEF_GLOBAL, DEF_NONLOCAL, DEF_LOCAL, + DEF_PARAM, DEF_TYPE_PARAM, DEF_IMPORT, DEF_BOUND, DEF_ANNOT, + SCOPE_OFF, SCOPE_MASK, + FREE, LOCAL, GLOBAL_IMPLICIT, GLOBAL_EXPLICIT, CELL +) import weakref @@ -253,13 +257,18 @@ def is_referenced(self): """Return *True* if the symbol is used in its block. """ - return bool(self.__flags & _symtable.USE) + return bool(self.__flags & USE) def is_parameter(self): """Return *True* if the symbol is a parameter. """ return bool(self.__flags & DEF_PARAM) + def is_type_parameter(self): + """Return *True* if the symbol is a type parameter. + """ + return bool(self.__flags & DEF_TYPE_PARAM) + def is_global(self): """Return *True* if the symbol is global. """ diff --git a/Lib/test/test_symtable.py b/Lib/test/test_symtable.py index 92b78a8086a83d..ef2a228b15ed4e 100644 --- a/Lib/test/test_symtable.py +++ b/Lib/test/test_symtable.py @@ -299,6 +299,8 @@ def test_symbol_repr(self): "") self.assertEqual(repr(self.other_internal.lookup("some_var")), "") + self.assertEqual(repr(self.GenericMine.lookup("T")), + "") def test_symtable_entry_repr(self): expected = f"" diff --git a/Misc/NEWS.d/next/Library/2024-06-04-14-54-46.gh-issue-120029._1YdTf.rst b/Misc/NEWS.d/next/Library/2024-06-04-14-54-46.gh-issue-120029._1YdTf.rst new file mode 100644 index 00000000000000..e8ea1077139f71 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-14-54-46.gh-issue-120029._1YdTf.rst @@ -0,0 +1,2 @@ +Expose :meth:`symtable.Symbol.is_type_parameter` in the :mod:`symtable` +module. Patch by Bénédikt Tran. diff --git a/Modules/symtablemodule.c b/Modules/symtablemodule.c index b4dbb54c3b47b0..63c4dd4225298d 100644 --- a/Modules/symtablemodule.c +++ b/Modules/symtablemodule.c @@ -75,6 +75,7 @@ symtable_init_constants(PyObject *m) if (PyModule_AddIntMacro(m, DEF_NONLOCAL) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_LOCAL) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_PARAM) < 0) return -1; + if (PyModule_AddIntMacro(m, DEF_TYPE_PARAM) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_FREE) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_FREE_CLASS) < 0) return -1; if (PyModule_AddIntMacro(m, DEF_IMPORT) < 0) return -1; @@ -83,7 +84,8 @@ symtable_init_constants(PyObject *m) if (PyModule_AddIntConstant(m, "TYPE_FUNCTION", FunctionBlock) < 0) return -1; - if (PyModule_AddIntConstant(m, "TYPE_CLASS", ClassBlock) < 0) return -1; + if (PyModule_AddIntConstant(m, "TYPE_CLASS", ClassBlock) < 0) + return -1; if (PyModule_AddIntConstant(m, "TYPE_MODULE", ModuleBlock) < 0) return -1; if (PyModule_AddIntConstant(m, "TYPE_ANNOTATION", AnnotationBlock) < 0) From 4dcd91ceafce91ec37bb1a9d544e41fc65578994 Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Tue, 4 Jun 2024 11:20:01 -0400 Subject: [PATCH 60/72] gh-119588: Update docs to reflect decision to include the change with Python 3.13 and not 3.12. (#120043) --- Doc/library/zipfile.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index a4d9a1852f8f0d..5583c6b24be5c6 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -591,8 +591,8 @@ Path objects are traversable using the ``/`` operator or ``joinpath``. .. versionadded:: 3.12 - .. versionchanged:: 3.12.4 - Prior to 3.12.4, ``is_symlink`` would unconditionally return ``False``. + .. versionchanged:: 3.13 + Previously, ``is_symlink`` would unconditionally return ``False``. .. method:: Path.exists() From 8fc7653766b106bdbc4ff6154e0020aea4ab15e6 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 4 Jun 2024 18:09:31 +0200 Subject: [PATCH 61/72] gh-120041: Do not use append_to_screen when completions are visible (GH-120042) --- Lib/_pyrepl/commands.py | 7 +++++- Lib/_pyrepl/completing_reader.py | 20 +++++++++------- Lib/test/test_pyrepl/support.py | 2 +- Lib/test/test_pyrepl/test_reader.py | 37 ++++++++++++++++++++++++++++- 4 files changed, 55 insertions(+), 11 deletions(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 2ef5dada9d9e58..b967f5206614f8 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -365,7 +365,12 @@ def do(self) -> None: r = self.reader text = self.event * r.get_arg() r.insert(text) - if len(text) == 1 and r.pos == len(r.buffer): + if ( + len(text) == 1 and + r.pos == len(r.buffer) and + not r.cmpltn_menu_visible and # type: ignore[attr-defined] + not r.cmpltn_message_visible # type: ignore[attr-defined] + ): r.calc_screen = r.append_to_screen diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py index c11d2dabdd2792..215ad8753c9f8b 100644 --- a/Lib/_pyrepl/completing_reader.py +++ b/Lib/_pyrepl/completing_reader.py @@ -187,18 +187,20 @@ def do(self) -> None: if p: r.insert(p) if last_is_completer: - if not r.cmpltn_menu_visible: - r.cmpltn_menu_visible = True + r.cmpltn_menu_visible = True + r.cmpltn_message_visible = False r.cmpltn_menu, r.cmpltn_menu_end = build_menu( r.console, completions, r.cmpltn_menu_end, r.use_brackets, r.sort_in_column) r.dirty = True - elif stem + p in completions: - r.msg = "[ complete but not unique ]" - r.dirty = True - else: - r.msg = "[ not unique ]" - r.dirty = True + elif not r.cmpltn_menu_visible: + r.cmpltn_message_visible = True + if stem + p in completions: + r.msg = "[ complete but not unique ]" + r.dirty = True + else: + r.msg = "[ not unique ]" + r.dirty = True class self_insert(commands.self_insert): @@ -236,6 +238,7 @@ class CompletingReader(Reader): ### Instance variables cmpltn_menu: list[str] = field(init=False) cmpltn_menu_visible: bool = field(init=False) + cmpltn_message_visible: bool = field(init=False) cmpltn_menu_end: int = field(init=False) cmpltn_menu_choices: list[str] = field(init=False) @@ -271,6 +274,7 @@ def finish(self) -> None: def cmpltn_reset(self) -> None: self.cmpltn_menu = [] self.cmpltn_menu_visible = False + self.cmpltn_message_visible = False self.cmpltn_menu_end = 0 self.cmpltn_menu_choices = [] diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py index d2f5429aea7a11..e807b5f3404550 100644 --- a/Lib/test/test_pyrepl/support.py +++ b/Lib/test/test_pyrepl/support.py @@ -39,7 +39,7 @@ def code_to_events(code: str): def prepare_reader(console: Console, **kwargs): - config = ReadlineConfig(readline_completer=None) + config = ReadlineConfig(readline_completer=kwargs.pop("readline_completer", None)) reader = ReadlineAlikeReader(console=console, config=config) reader.more_lines = partial(more_lines, namespace=None) reader.paste_mode = True # Avoid extra indents diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 9fb956b655594f..d02815bfa11d74 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -1,5 +1,6 @@ import itertools import functools +import rlcompleter from unittest import TestCase from .support import handle_all_events, handle_events_narrow_console, code_to_events, prepare_reader @@ -9,7 +10,7 @@ class TestReader(TestCase): def assert_screen_equals(self, reader, expected): - actual = reader.calc_screen() + actual = reader.screen expected = expected.split("\n") self.assertListEqual(actual, expected) @@ -208,3 +209,37 @@ def test_prompt_length(self): prompt, l = Reader.process_prompt(ps1) self.assertEqual(prompt, "\033[0;32m樂>\033[0m> ") self.assertEqual(l, 5) + + def test_completions_updated_on_key_press(self): + namespace = {"itertools": itertools} + code = "itertools." + events = itertools.chain(code_to_events(code), [ + Event(evt='key', data='\t', raw=bytearray(b'\t')), # Two tabs for completion + Event(evt='key', data='\t', raw=bytearray(b'\t')), + ], code_to_events("a")) + + completing_reader = functools.partial( + prepare_reader, + readline_completer=rlcompleter.Completer(namespace).complete + ) + reader, _ = handle_all_events(events, prepare_reader=completing_reader) + + actual = reader.screen + self.assertEqual(len(actual), 2) + self.assertEqual(actual[0].rstrip(), "itertools.accumulate(") + self.assertEqual(actual[1], f"{code}a") + + def test_key_press_on_tab_press_once(self): + namespace = {"itertools": itertools} + code = "itertools." + events = itertools.chain(code_to_events(code), [ + Event(evt='key', data='\t', raw=bytearray(b'\t')), + ], code_to_events("a")) + + completing_reader = functools.partial( + prepare_reader, + readline_completer=rlcompleter.Completer(namespace).complete + ) + reader, _ = handle_all_events(events, prepare_reader=completing_reader) + + self.assert_screen_equals(reader, f"{code}a") From 5f03f0913413ecc4942367cf62ce3a5a5b5d84a5 Mon Sep 17 00:00:00 2001 From: Trey Hunner Date: Tue, 4 Jun 2024 09:28:08 -0700 Subject: [PATCH 62/72] Fix incorrect pull GitHub link in What's New (#120045) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/whatsnew/3.13.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 903de3c04b4a07..dfbeadce0eea27 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1006,7 +1006,7 @@ random ------ * Add a :ref:`command-line interface `. - (Contributed by Hugo van Kemenade in :gh:`54321`.) + (Contributed by Hugo van Kemenade in :gh:`118131`.) re -- From 7111d9605f9db7aa0b095bb8ece7ccc0b8115c3f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 4 Jun 2024 19:36:37 +0300 Subject: [PATCH 63/72] gh-89928: Fix integer conversion of device numbers (GH-31794) Fix os.major(), os.minor() and os.makedev(). Support device numbers larger than 2**63-1. Support non-existent device number (NODEV). --- Lib/test/test_posix.py | 15 +++- .../2022-03-10-16-47-57.bpo-45767.ywmyo1.rst | 3 + Modules/clinic/posixmodule.c.h | 32 +++---- Modules/posixmodule.c | 88 +++++++++++++++---- 4 files changed, 99 insertions(+), 39 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-03-10-16-47-57.bpo-45767.ywmyo1.rst diff --git a/Lib/test/test_posix.py b/Lib/test/test_posix.py index 7e5f04c22bd6d3..908354cb8574d1 100644 --- a/Lib/test/test_posix.py +++ b/Lib/test/test_posix.py @@ -704,7 +704,8 @@ def test_makedev(self): self.assertEqual(posix.major(dev), major) self.assertRaises(TypeError, posix.major, float(dev)) self.assertRaises(TypeError, posix.major) - self.assertRaises((ValueError, OverflowError), posix.major, -1) + for x in -2, 2**64, -2**63-1: + self.assertRaises((ValueError, OverflowError), posix.major, x) minor = posix.minor(dev) self.assertIsInstance(minor, int) @@ -712,13 +713,23 @@ def test_makedev(self): self.assertEqual(posix.minor(dev), minor) self.assertRaises(TypeError, posix.minor, float(dev)) self.assertRaises(TypeError, posix.minor) - self.assertRaises((ValueError, OverflowError), posix.minor, -1) + for x in -2, 2**64, -2**63-1: + self.assertRaises((ValueError, OverflowError), posix.minor, x) self.assertEqual(posix.makedev(major, minor), dev) self.assertRaises(TypeError, posix.makedev, float(major), minor) self.assertRaises(TypeError, posix.makedev, major, float(minor)) self.assertRaises(TypeError, posix.makedev, major) self.assertRaises(TypeError, posix.makedev) + for x in -2, 2**32, 2**64, -2**63-1: + self.assertRaises((ValueError, OverflowError), posix.makedev, x, minor) + self.assertRaises((ValueError, OverflowError), posix.makedev, major, x) + + if sys.platform == 'linux': + NODEV = -1 + self.assertEqual(posix.major(NODEV), NODEV) + self.assertEqual(posix.minor(NODEV), NODEV) + self.assertEqual(posix.makedev(NODEV, NODEV), NODEV) def _test_all_chown_common(self, chown_func, first_param, stat_func): """Common code for chown, fchown and lchown tests.""" diff --git a/Misc/NEWS.d/next/Library/2022-03-10-16-47-57.bpo-45767.ywmyo1.rst b/Misc/NEWS.d/next/Library/2022-03-10-16-47-57.bpo-45767.ywmyo1.rst new file mode 100644 index 00000000000000..0cdf1e84157777 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-03-10-16-47-57.bpo-45767.ywmyo1.rst @@ -0,0 +1,3 @@ +Fix integer conversion in :func:`os.major`, :func:`os.minor`, and +:func:`os.makedev`. Support device numbers larger than ``2**63-1``. Support +non-existent device number (``NODEV``). diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index c7a447b455c594..83dcc7a60c2110 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -8685,7 +8685,7 @@ PyDoc_STRVAR(os_major__doc__, #define OS_MAJOR_METHODDEF \ {"major", (PyCFunction)os_major, METH_O, os_major__doc__}, -static unsigned int +static PyObject * os_major_impl(PyObject *module, dev_t device); static PyObject * @@ -8693,16 +8693,11 @@ os_major(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; dev_t device; - unsigned int _return_value; if (!_Py_Dev_Converter(arg, &device)) { goto exit; } - _return_value = os_major_impl(module, device); - if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + return_value = os_major_impl(module, device); exit: return return_value; @@ -8721,7 +8716,7 @@ PyDoc_STRVAR(os_minor__doc__, #define OS_MINOR_METHODDEF \ {"minor", (PyCFunction)os_minor, METH_O, os_minor__doc__}, -static unsigned int +static PyObject * os_minor_impl(PyObject *module, dev_t device); static PyObject * @@ -8729,16 +8724,11 @@ os_minor(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; dev_t device; - unsigned int _return_value; if (!_Py_Dev_Converter(arg, &device)) { goto exit; } - _return_value = os_minor_impl(module, device); - if ((_return_value == (unsigned int)-1) && PyErr_Occurred()) { - goto exit; - } - return_value = PyLong_FromUnsignedLong((unsigned long)_return_value); + return_value = os_minor_impl(module, device); exit: return return_value; @@ -8758,25 +8748,23 @@ PyDoc_STRVAR(os_makedev__doc__, {"makedev", _PyCFunction_CAST(os_makedev), METH_FASTCALL, os_makedev__doc__}, static dev_t -os_makedev_impl(PyObject *module, int major, int minor); +os_makedev_impl(PyObject *module, dev_t major, dev_t minor); static PyObject * os_makedev(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - int major; - int minor; + dev_t major; + dev_t minor; dev_t _return_value; if (!_PyArg_CheckPositional("makedev", nargs, 2, 2)) { goto exit; } - major = PyLong_AsInt(args[0]); - if (major == -1 && PyErr_Occurred()) { + if (!_Py_Dev_Converter(args[0], &major)) { goto exit; } - minor = PyLong_AsInt(args[1]); - if (minor == -1 && PyErr_Occurred()) { + if (!_Py_Dev_Converter(args[1], &minor)) { goto exit; } _return_value = os_makedev_impl(module, major, minor); @@ -12795,4 +12783,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=300bd1c54dc43765 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=49c2d7a65f7a9f3b input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index bb35cfd9cdb138..1251ea63348946 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -18,6 +18,7 @@ #include "pycore_fileutils.h" // _Py_closerange() #include "pycore_import.h" // _PyImport_ReInitLock() #include "pycore_initconfig.h" // _PyStatus_EXCEPTION() +#include "pycore_long.h" // _PyLong_IsNegative() #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_object.h" // _PyObject_LookupSpecial() #include "pycore_pylifecycle.h" // _PyOS_URandom() @@ -967,16 +968,46 @@ _Py_Gid_Converter(PyObject *obj, gid_t *p) #endif /* MS_WINDOWS */ -#define _PyLong_FromDev PyLong_FromLongLong +static PyObject * +_PyLong_FromDev(dev_t dev) +{ +#ifdef NODEV + if (dev == NODEV) { + return PyLong_FromLongLong((long long)dev); + } +#endif + return PyLong_FromUnsignedLongLong((unsigned long long)dev); +} #if (defined(HAVE_MKNOD) && defined(HAVE_MAKEDEV)) || defined(HAVE_DEVICE_MACROS) static int _Py_Dev_Converter(PyObject *obj, void *p) { - *((dev_t *)p) = PyLong_AsUnsignedLongLong(obj); - if (PyErr_Occurred()) +#ifdef NODEV + if (PyLong_Check(obj) && _PyLong_IsNegative((PyLongObject *)obj)) { + int overflow; + long long result = PyLong_AsLongLongAndOverflow(obj, &overflow); + if (result == -1 && PyErr_Occurred()) { + return 0; + } + if (!overflow && result == (long long)NODEV) { + *((dev_t *)p) = NODEV; + return 1; + } + } +#endif + + unsigned long long result = PyLong_AsUnsignedLongLong(obj); + if (result == (unsigned long long)-1 && PyErr_Occurred()) { + return 0; + } + if ((unsigned long long)(dev_t)result != result) { + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C dev_t"); return 0; + } + *((dev_t *)p) = (dev_t)result; return 1; } #endif /* (HAVE_MKNOD && HAVE_MAKEDEV) || HAVE_DEVICE_MACROS */ @@ -12517,9 +12548,31 @@ os_mknod_impl(PyObject *module, path_t *path, int mode, dev_t device, #endif /* defined(HAVE_MKNOD) && defined(HAVE_MAKEDEV) */ +static PyObject * +major_minor_conv(unsigned int value) +{ +#ifdef NODEV + if (value == (unsigned int)NODEV) { + return PyLong_FromLong((int)NODEV); + } +#endif + return PyLong_FromUnsignedLong(value); +} + +static int +major_minor_check(dev_t value) +{ +#ifdef NODEV + if (value == NODEV) { + return 1; + } +#endif + return (dev_t)(unsigned int)value == value; +} + #ifdef HAVE_DEVICE_MACROS /*[clinic input] -os.major -> unsigned_int +os.major device: dev_t / @@ -12527,16 +12580,16 @@ os.major -> unsigned_int Extracts a device major number from a raw device number. [clinic start generated code]*/ -static unsigned int +static PyObject * os_major_impl(PyObject *module, dev_t device) -/*[clinic end generated code: output=5b3b2589bafb498e input=1e16a4d30c4d4462]*/ +/*[clinic end generated code: output=4071ffee17647891 input=b1a0a14ec9448229]*/ { - return major(device); + return major_minor_conv(major(device)); } /*[clinic input] -os.minor -> unsigned_int +os.minor device: dev_t / @@ -12544,28 +12597,33 @@ os.minor -> unsigned_int Extracts a device minor number from a raw device number. [clinic start generated code]*/ -static unsigned int +static PyObject * os_minor_impl(PyObject *module, dev_t device) -/*[clinic end generated code: output=5e1a25e630b0157d input=0842c6d23f24c65e]*/ +/*[clinic end generated code: output=306cb78e3bc5004f input=2f686e463682a9da]*/ { - return minor(device); + return major_minor_conv(minor(device)); } /*[clinic input] os.makedev -> dev_t - major: int - minor: int + major: dev_t + minor: dev_t / Composes a raw device number from the major and minor device numbers. [clinic start generated code]*/ static dev_t -os_makedev_impl(PyObject *module, int major, int minor) -/*[clinic end generated code: output=881aaa4aba6f6a52 input=4b9fd8fc73cbe48f]*/ +os_makedev_impl(PyObject *module, dev_t major, dev_t minor) +/*[clinic end generated code: output=cad6125c51f5af80 input=2146126ec02e55c1]*/ { + if (!major_minor_check(major) || !major_minor_check(minor)) { + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C unsigned int"); + return (dev_t)-1; + } return makedev(major, minor); } #endif /* HAVE_DEVICE_MACROS */ From e0799352823289fafb8131341abd751923ee9c08 Mon Sep 17 00:00:00 2001 From: Christopher Chavez Date: Tue, 4 Jun 2024 11:47:15 -0500 Subject: [PATCH 64/72] gh-112672: Fix builtin Tkinter with Tcl 9.0 (GH-112681) * Add declaration of Tcl_AppInit(), missing in Tcl 9.0. * Use Tcl_Size instead of int where needed. Co-authored-by: Serhiy Storchaka --- ...-06-04-19-03-25.gh-issue-112672.K2XfZH.rst | 1 + Modules/_tkinter.c | 38 ++++++++++++------- 2 files changed, 25 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-06-04-19-03-25.gh-issue-112672.K2XfZH.rst diff --git a/Misc/NEWS.d/next/Library/2024-06-04-19-03-25.gh-issue-112672.K2XfZH.rst b/Misc/NEWS.d/next/Library/2024-06-04-19-03-25.gh-issue-112672.K2XfZH.rst new file mode 100644 index 00000000000000..46345bff117b19 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-06-04-19-03-25.gh-issue-112672.K2XfZH.rst @@ -0,0 +1 @@ +Support building :mod:`tkinter` with Tcl 9.0. diff --git a/Modules/_tkinter.c b/Modules/_tkinter.c index 0cff36dd307c39..24f87c8d34c6b2 100644 --- a/Modules/_tkinter.c +++ b/Modules/_tkinter.c @@ -69,6 +69,12 @@ Copyright (C) 1994 Steen Lumholt. #define USE_DEPRECATED_TOMMATH_API 1 #endif +// As suggested by https://core.tcl-lang.org/tcl/wiki?name=Migrating+C+extensions+to+Tcl+9 +#ifndef TCL_SIZE_MAX +typedef int Tcl_Size; +#define TCL_SIZE_MAX INT_MAX +#endif + #if !(defined(MS_WINDOWS) || defined(__CYGWIN__)) #define HAVE_CREATEFILEHANDLER #endif @@ -489,7 +495,7 @@ unicodeFromTclString(const char *s) static PyObject * unicodeFromTclObj(Tcl_Obj *value) { - int len; + Tcl_Size len; #if USE_TCL_UNICODE int byteorder = NATIVE_BYTEORDER; const Tcl_UniChar *u = Tcl_GetUnicodeFromObj(value, &len); @@ -517,6 +523,10 @@ class _tkinter.tktimertoken "TkttObject *" "&Tktt_Type_spec" /**** Tkapp Object ****/ +#if TK_MAJOR_VERSION >= 9 +int Tcl_AppInit(Tcl_Interp *); +#endif + #ifndef WITH_APPINIT int Tcl_AppInit(Tcl_Interp *interp) @@ -1142,7 +1152,7 @@ FromObj(TkappObject *tkapp, Tcl_Obj *value) } if (value->typePtr == tkapp->ByteArrayType) { - int size; + Tcl_Size size; char *data = (char*)Tcl_GetByteArrayFromObj(value, &size); return PyBytes_FromStringAndSize(data, size); } @@ -1168,8 +1178,8 @@ FromObj(TkappObject *tkapp, Tcl_Obj *value) } if (value->typePtr == tkapp->ListType) { - int size; - int i, status; + Tcl_Size i, size; + int status; PyObject *elem; Tcl_Obj *tcl_elem; @@ -1225,9 +1235,9 @@ typedef struct Tkapp_CallEvent { } Tkapp_CallEvent; static void -Tkapp_CallDeallocArgs(Tcl_Obj** objv, Tcl_Obj** objStore, int objc) +Tkapp_CallDeallocArgs(Tcl_Obj** objv, Tcl_Obj** objStore, Tcl_Size objc) { - int i; + Tcl_Size i; for (i = 0; i < objc; i++) Tcl_DecrRefCount(objv[i]); if (objv != objStore) @@ -1238,7 +1248,7 @@ Tkapp_CallDeallocArgs(Tcl_Obj** objv, Tcl_Obj** objStore, int objc) interpreter thread, which may or may not be the calling thread. */ static Tcl_Obj** -Tkapp_CallArgs(PyObject *args, Tcl_Obj** objStore, int *pobjc) +Tkapp_CallArgs(PyObject *args, Tcl_Obj** objStore, Tcl_Size *pobjc) { Tcl_Obj **objv = objStore; Py_ssize_t objc = 0, i; @@ -1286,10 +1296,10 @@ Tkapp_CallArgs(PyObject *args, Tcl_Obj** objStore, int *pobjc) Tcl_IncrRefCount(objv[i]); } } - *pobjc = (int)objc; + *pobjc = (Tcl_Size)objc; return objv; finally: - Tkapp_CallDeallocArgs(objv, objStore, (int)objc); + Tkapp_CallDeallocArgs(objv, objStore, (Tcl_Size)objc); return NULL; } @@ -1356,7 +1366,7 @@ Tkapp_CallProc(Tcl_Event *evPtr, int flags) Tkapp_CallEvent *e = (Tkapp_CallEvent *)evPtr; Tcl_Obj *objStore[ARGSZ]; Tcl_Obj **objv; - int objc; + Tcl_Size objc; int i; ENTER_PYTHON if (e->self->trace && !Tkapp_Trace(e->self, PyTuple_Pack(1, e->args))) { @@ -1412,7 +1422,7 @@ Tkapp_Call(PyObject *selfptr, PyObject *args) { Tcl_Obj *objStore[ARGSZ]; Tcl_Obj **objv = NULL; - int objc, i; + Tcl_Size objc; PyObject *res = NULL; TkappObject *self = (TkappObject*)selfptr; int flags = TCL_EVAL_DIRECT | TCL_EVAL_GLOBAL; @@ -1459,6 +1469,7 @@ Tkapp_Call(PyObject *selfptr, PyObject *args) { TRACE(self, ("(O)", args)); + int i; objv = Tkapp_CallArgs(args, objStore, &objc); if (!objv) return NULL; @@ -2193,13 +2204,12 @@ _tkinter_tkapp_splitlist(TkappObject *self, PyObject *arg) /*[clinic end generated code: output=13b51d34386d36fb input=2b2e13351e3c0b53]*/ { char *list; - int argc; + Tcl_Size argc, i; const char **argv; PyObject *v; - int i; if (PyTclObject_Check(arg)) { - int objc; + Tcl_Size objc; Tcl_Obj **objv; if (Tcl_ListObjGetElements(Tkapp_Interp(self), ((PyTclObject*)arg)->value, From bf8e5e53d0c359a1f9c285d855e7a5e9b6d91375 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 4 Jun 2024 19:26:44 +0200 Subject: [PATCH 65/72] gh-120041: Refactor check for visible completion menu in completing_reader (#120055) --- Lib/_pyrepl/commands.py | 7 +------ Lib/_pyrepl/completing_reader.py | 3 +++ 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index b967f5206614f8..2ef5dada9d9e58 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -365,12 +365,7 @@ def do(self) -> None: r = self.reader text = self.event * r.get_arg() r.insert(text) - if ( - len(text) == 1 and - r.pos == len(r.buffer) and - not r.cmpltn_menu_visible and # type: ignore[attr-defined] - not r.cmpltn_message_visible # type: ignore[attr-defined] - ): + if len(text) == 1 and r.pos == len(r.buffer): r.calc_screen = r.append_to_screen diff --git a/Lib/_pyrepl/completing_reader.py b/Lib/_pyrepl/completing_reader.py index 215ad8753c9f8b..8df35ccb9117b1 100644 --- a/Lib/_pyrepl/completing_reader.py +++ b/Lib/_pyrepl/completing_reader.py @@ -210,6 +210,9 @@ def do(self) -> None: commands.self_insert.do(self) + if r.cmpltn_menu_visible or r.cmpltn_message_visible: + r.calc_screen = r.calc_complete_screen + if r.cmpltn_menu_visible: stem = r.get_stem() if len(stem) < 1: From d419d468ff4aaf6bc673354d0ee41b273d09dd3f Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 13:38:29 -0400 Subject: [PATCH 66/72] gh-120039: Reduce expected timeout in test_siginterrupt_off (#120047) The process is expected to time out. In the refleak builds, `support.SHORT_TIMEOUT` is often five minutes and we run the tests six times, so test_signal was taking >30 minutes. --- Lib/test/test_signal.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_signal.py b/Lib/test/test_signal.py index 61fb047caf6dab..591cd4177d9f41 100644 --- a/Lib/test/test_signal.py +++ b/Lib/test/test_signal.py @@ -698,7 +698,7 @@ def handler(signum, frame): @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") class SiginterruptTest(unittest.TestCase): - def readpipe_interrupted(self, interrupt): + def readpipe_interrupted(self, interrupt, timeout=support.SHORT_TIMEOUT): """Perform a read during which a signal will arrive. Return True if the read is interrupted by the signal and raises an exception. Return False if it returns normally. @@ -746,7 +746,7 @@ def handler(signum, frame): # wait until the child process is loaded and has started first_line = process.stdout.readline() - stdout, stderr = process.communicate(timeout=support.SHORT_TIMEOUT) + stdout, stderr = process.communicate(timeout=timeout) except subprocess.TimeoutExpired: process.kill() return False @@ -777,7 +777,7 @@ def test_siginterrupt_off(self): # If a signal handler is installed and siginterrupt is called with # a false value for the second argument, when that signal arrives, it # does not interrupt a syscall that's in progress. - interrupted = self.readpipe_interrupted(False) + interrupted = self.readpipe_interrupted(False, timeout=2) self.assertFalse(interrupted) From 010ea93b2b888149561becefeee90826bf8a2934 Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Tue, 4 Jun 2024 19:46:33 +0200 Subject: [PATCH 67/72] gh-119553: Clear reader on Ctrl-C command (GH-119801) --- Lib/_pyrepl/commands.py | 1 + Lib/test/test_pyrepl/support.py | 2 ++ Lib/test/test_pyrepl/test_reader.py | 16 ++++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/Lib/_pyrepl/commands.py b/Lib/_pyrepl/commands.py index 2ef5dada9d9e58..e94e8c25d379c1 100644 --- a/Lib/_pyrepl/commands.py +++ b/Lib/_pyrepl/commands.py @@ -221,6 +221,7 @@ def do(self) -> None: class ctrl_c(Command): def do(self) -> None: + self.reader.finish() raise KeyboardInterrupt diff --git a/Lib/test/test_pyrepl/support.py b/Lib/test/test_pyrepl/support.py index e807b5f3404550..70e12286f7d781 100644 --- a/Lib/test/test_pyrepl/support.py +++ b/Lib/test/test_pyrepl/support.py @@ -75,6 +75,8 @@ def handle_all_events( reader.handle1() except StopIteration: pass + except KeyboardInterrupt: + pass return reader, console diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index d02815bfa11d74..079c963d19aad5 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -179,6 +179,22 @@ def test_newline_within_block_trailing_whitespace(self): self.assert_screen_equals(reader, expected) self.assertTrue(reader.finished) + def test_keyboard_interrupt_clears_screen(self): + namespace = {"itertools": itertools} + code = "import itertools\nitertools." + events = itertools.chain(code_to_events(code), [ + Event(evt='key', data='\t', raw=bytearray(b'\t')), # Two tabs for completion + Event(evt='key', data='\t', raw=bytearray(b'\t')), + Event(evt='key', data='\x03', raw=bytearray(b'\x03')), # Ctrl-C + ]) + + completing_reader = functools.partial( + prepare_reader, + readline_completer=rlcompleter.Completer(namespace).complete + ) + reader, _ = handle_all_events(events, prepare_reader=completing_reader) + self.assertEqual(reader.calc_screen(), code.split("\n")) + def test_prompt_length(self): # Handles simple ASCII prompt ps1 = ">>> " From bf5e1065f4ec2077c6ca352fc1ad940a76d1f6c9 Mon Sep 17 00:00:00 2001 From: Paulo Freitas Date: Tue, 4 Jun 2024 14:55:11 -0300 Subject: [PATCH 68/72] doc: Mention the missing reflected special methods for all binary operations (GH-119931) --- Doc/reference/expressions.rst | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index 00b57effd3e1c0..872773f4d28235 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1211,7 +1211,8 @@ Raising ``0.0`` to a negative power results in a :exc:`ZeroDivisionError`. Raising a negative number to a fractional power results in a :class:`complex` number. (In earlier versions it raised a :exc:`ValueError`.) -This operation can be customized using the special :meth:`~object.__pow__` method. +This operation can be customized using the special :meth:`~object.__pow__` and +:meth:`~object.__rpow__` methods. .. _unary: @@ -1299,6 +1300,9 @@ This operation can be customized using the special :meth:`~object.__mul__` and The ``@`` (at) operator is intended to be used for matrix multiplication. No builtin Python types implement this operator. +This operation can be customized using the special :meth:`~object.__matmul__` and +:meth:`~object.__rmatmul__` methods. + .. versionadded:: 3.5 .. index:: @@ -1314,8 +1318,10 @@ integer; the result is that of mathematical division with the 'floor' function applied to the result. Division by zero raises the :exc:`ZeroDivisionError` exception. -This operation can be customized using the special :meth:`~object.__truediv__` and -:meth:`~object.__floordiv__` methods. +The division operation can be customized using the special :meth:`~object.__truediv__` +and :meth:`~object.__rtruediv__` methods. +The floor division operation can be customized using the special +:meth:`~object.__floordiv__` and :meth:`~object.__rfloordiv__` methods. .. index:: single: modulo @@ -1340,7 +1346,8 @@ also overloaded by string objects to perform old-style string formatting (also known as interpolation). The syntax for string formatting is described in the Python Library Reference, section :ref:`old-string-formatting`. -The *modulo* operation can be customized using the special :meth:`~object.__mod__` method. +The *modulo* operation can be customized using the special :meth:`~object.__mod__` +and :meth:`~object.__rmod__` methods. The floor division operator, the modulo operator, and the :func:`divmod` function are not defined for complex numbers. Instead, convert to a floating @@ -1367,7 +1374,8 @@ This operation can be customized using the special :meth:`~object.__add__` and The ``-`` (subtraction) operator yields the difference of its arguments. The numeric arguments are first converted to a common type. -This operation can be customized using the special :meth:`~object.__sub__` method. +This operation can be customized using the special :meth:`~object.__sub__` and +:meth:`~object.__rsub__` methods. .. _shifting: @@ -1388,8 +1396,10 @@ The shifting operations have lower priority than the arithmetic operations: These operators accept integers as arguments. They shift the first argument to the left or right by the number of bits given by the second argument. -This operation can be customized using the special :meth:`~object.__lshift__` and -:meth:`~object.__rshift__` methods. +The left shift operation can be customized using the special :meth:`~object.__lshift__` +and :meth:`~object.__rlshift__` methods. +The right shift operation can be customized using the special :meth:`~object.__rshift__` +and :meth:`~object.__rrshift__` methods. .. index:: pair: exception; ValueError From d9095194dde27eaabfc0b86a11989cdb9a2acfe1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Tue, 4 Jun 2024 19:32:43 +0100 Subject: [PATCH 69/72] gh-119842: Honor PyOS_InputHook in the new REPL (GH-119843) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Pablo Galindo Co-authored-by: Łukasz Langa Co-authored-by: Michael Droettboom --- Lib/_pyrepl/console.py | 12 +++++- Lib/_pyrepl/reader.py | 10 ++++- Lib/_pyrepl/unix_console.py | 22 ++++++++--- Lib/_pyrepl/windows_console.py | 22 ++++++++++- Lib/test/test_pyrepl/test_reader.py | 17 +++++++++ ...-05-31-12-06-11.gh-issue-119842.tCGVsv.rst | 1 + Modules/clinic/posixmodule.c.h | 38 ++++++++++++++++++- Modules/posixmodule.c | 33 ++++++++++++++++ 8 files changed, 144 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-05-31-12-06-11.gh-issue-119842.tCGVsv.rst diff --git a/Lib/_pyrepl/console.py b/Lib/_pyrepl/console.py index aa0bde865825c9..a8d3f520340dcf 100644 --- a/Lib/_pyrepl/console.py +++ b/Lib/_pyrepl/console.py @@ -33,6 +33,7 @@ if TYPE_CHECKING: from typing import IO + from typing import Callable @dataclass @@ -134,8 +135,15 @@ def getpending(self) -> Event: ... @abstractmethod - def wait(self) -> None: - """Wait for an event.""" + def wait(self, timeout: float | None) -> bool: + """Wait for an event. The return value is True if an event is + available, False if the timeout has been reached. If timeout is + None, wait forever. The timeout is in milliseconds.""" + ... + + @property + def input_hook(self) -> Callable[[], int] | None: + """Returns the current input hook.""" ... @abstractmethod diff --git a/Lib/_pyrepl/reader.py b/Lib/_pyrepl/reader.py index f2e68ef6f3ee66..beee7764e0eb84 100644 --- a/Lib/_pyrepl/reader.py +++ b/Lib/_pyrepl/reader.py @@ -650,7 +650,15 @@ def handle1(self, block: bool = True) -> bool: self.dirty = True while True: - event = self.console.get_event(block) + input_hook = self.console.input_hook + if input_hook: + input_hook() + # We use the same timeout as in readline.c: 100ms + while not self.console.wait(100): + input_hook() + event = self.console.get_event(block=False) + else: + event = self.console.get_event(block) if not event: # can only happen if we're not blocking return False diff --git a/Lib/_pyrepl/unix_console.py b/Lib/_pyrepl/unix_console.py index 4bdb02261982c3..2f73a59dd1fced 100644 --- a/Lib/_pyrepl/unix_console.py +++ b/Lib/_pyrepl/unix_console.py @@ -118,9 +118,12 @@ def __init__(self): def register(self, fd, flag): self.fd = fd - - def poll(self): # note: a 'timeout' argument would be *milliseconds* - r, w, e = select.select([self.fd], [], []) + # note: The 'timeout' argument is received as *milliseconds* + def poll(self, timeout: float | None = None) -> list[int]: + if timeout is None: + r, w, e = select.select([self.fd], [], []) + else: + r, w, e = select.select([self.fd], [], [], timeout/1000) return r poll = MinimalPoll # type: ignore[assignment] @@ -385,11 +388,11 @@ def get_event(self, block: bool = True) -> Event | None: break return self.event_queue.get() - def wait(self): + def wait(self, timeout: float | None = None) -> bool: """ Wait for events on the console. """ - self.pollob.poll() + return bool(self.pollob.poll(timeout)) def set_cursor_vis(self, visible): """ @@ -527,6 +530,15 @@ def clear(self): self.__posxy = 0, 0 self.screen = [] + @property + def input_hook(self): + try: + import posix + except ImportError: + return None + if posix._is_inputhook_installed(): + return posix._inputhook + def __enable_bracketed_paste(self) -> None: os.write(self.output_fd, b"\x1b[?2004h") diff --git a/Lib/_pyrepl/windows_console.py b/Lib/_pyrepl/windows_console.py index 2277865e3262fc..f691ca3fbb07b8 100644 --- a/Lib/_pyrepl/windows_console.py +++ b/Lib/_pyrepl/windows_console.py @@ -23,6 +23,8 @@ from multiprocessing import Value import os import sys +import time +import msvcrt from abc import ABC, abstractmethod from collections import deque @@ -202,6 +204,15 @@ def refresh(self, screen: list[str], c_xy: tuple[int, int]) -> None: self.screen = screen self.move_cursor(cx, cy) + @property + def input_hook(self): + try: + import nt + except ImportError: + return None + if nt._is_inputhook_installed(): + return nt._inputhook + def __write_changed_line( self, y: int, oldline: str, newline: str, px_coord: int ) -> None: @@ -460,9 +471,16 @@ def getpending(self) -> Event: processed.""" return Event("key", "", b"") - def wait(self) -> None: + def wait(self, timeout: float | None) -> bool: """Wait for an event.""" - raise NotImplementedError("No wait support") + # Poor man's Windows select loop + start_time = time.time() + while True: + if msvcrt.kbhit(): # type: ignore[attr-defined] + return True + if timeout and time.time() - start_time > timeout: + return False + time.sleep(0.01) def repaint(self) -> None: raise NotImplementedError("No repaint support") diff --git a/Lib/test/test_pyrepl/test_reader.py b/Lib/test/test_pyrepl/test_reader.py index 079c963d19aad5..78b11323d60a85 100644 --- a/Lib/test/test_pyrepl/test_reader.py +++ b/Lib/test/test_pyrepl/test_reader.py @@ -2,8 +2,10 @@ import functools import rlcompleter from unittest import TestCase +from unittest.mock import MagicMock, patch from .support import handle_all_events, handle_events_narrow_console, code_to_events, prepare_reader +from test.support import import_helper from _pyrepl.console import Event from _pyrepl.reader import Reader @@ -179,6 +181,21 @@ def test_newline_within_block_trailing_whitespace(self): self.assert_screen_equals(reader, expected) self.assertTrue(reader.finished) + def test_input_hook_is_called_if_set(self): + input_hook = MagicMock() + def _prepare_console(events): + console = MagicMock() + console.get_event.side_effect = events + console.height = 100 + console.width = 80 + console.input_hook = input_hook + return console + + events = code_to_events("a") + reader, _ = handle_all_events(events, prepare_console=_prepare_console) + + self.assertEqual(len(input_hook.mock_calls), 4) + def test_keyboard_interrupt_clears_screen(self): namespace = {"itertools": itertools} code = "import itertools\nitertools." diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-05-31-12-06-11.gh-issue-119842.tCGVsv.rst b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-12-06-11.gh-issue-119842.tCGVsv.rst new file mode 100644 index 00000000000000..2fcb170f6226e5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-05-31-12-06-11.gh-issue-119842.tCGVsv.rst @@ -0,0 +1 @@ +Honor :c:func:`PyOS_InputHook` in the new REPL. Patch by Pablo Galindo diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 83dcc7a60c2110..69fc178331c09c 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -12116,6 +12116,42 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #endif /* defined(MS_WINDOWS) */ +PyDoc_STRVAR(os__inputhook__doc__, +"_inputhook($module, /)\n" +"--\n" +"\n" +"Calls PyOS_CallInputHook droppong the GIL first"); + +#define OS__INPUTHOOK_METHODDEF \ + {"_inputhook", (PyCFunction)os__inputhook, METH_NOARGS, os__inputhook__doc__}, + +static PyObject * +os__inputhook_impl(PyObject *module); + +static PyObject * +os__inputhook(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return os__inputhook_impl(module); +} + +PyDoc_STRVAR(os__is_inputhook_installed__doc__, +"_is_inputhook_installed($module, /)\n" +"--\n" +"\n" +"Checks if PyOS_CallInputHook is set"); + +#define OS__IS_INPUTHOOK_INSTALLED_METHODDEF \ + {"_is_inputhook_installed", (PyCFunction)os__is_inputhook_installed, METH_NOARGS, os__is_inputhook_installed__doc__}, + +static PyObject * +os__is_inputhook_installed_impl(PyObject *module); + +static PyObject * +os__is_inputhook_installed(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return os__is_inputhook_installed_impl(module); +} + #ifndef OS_TTYNAME_METHODDEF #define OS_TTYNAME_METHODDEF #endif /* !defined(OS_TTYNAME_METHODDEF) */ @@ -12783,4 +12819,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=49c2d7a65f7a9f3b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=faaa5e5ffb7b165d input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 1251ea63348946..386e942d53f539 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -16784,6 +16784,37 @@ os__supports_virtual_terminal_impl(PyObject *module) } #endif +/*[clinic input] +os._inputhook + +Calls PyOS_CallInputHook droppong the GIL first +[clinic start generated code]*/ + +static PyObject * +os__inputhook_impl(PyObject *module) +/*[clinic end generated code: output=525aca4ef3c6149f input=fc531701930d064f]*/ +{ + int result = 0; + if (PyOS_InputHook) { + Py_BEGIN_ALLOW_THREADS; + result = PyOS_InputHook(); + Py_END_ALLOW_THREADS; + } + return PyLong_FromLong(result); +} + +/*[clinic input] +os._is_inputhook_installed + +Checks if PyOS_CallInputHook is set +[clinic start generated code]*/ + +static PyObject * +os__is_inputhook_installed_impl(PyObject *module) +/*[clinic end generated code: output=3b3eab4f672c689a input=ff177c9938dd76d8]*/ +{ + return PyBool_FromLong(PyOS_InputHook != NULL); +} static PyMethodDef posix_methods[] = { @@ -16997,6 +17028,8 @@ static PyMethodDef posix_methods[] = { OS__PATH_LEXISTS_METHODDEF OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF + OS__INPUTHOOK_METHODDEF + OS__IS_INPUTHOOK_INSTALLED_METHODDEF {NULL, NULL} /* Sentinel */ }; From 710cbea6604d27c7d59ae4953bf522b997a82cc7 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 14:59:23 -0400 Subject: [PATCH 70/72] gh-120048: Make `test_imaplib` faster (#120050) The `test_imaplib` was taking 40+ minutes in the refleak build bots because the tests waiting on a client `self._setup()` was creating a client that prevented progress until its connection timed out, which scaled with the global timeout. We should set `connect=False` for the tests that don't want `_setup()` to create a client. Co-authored-by: Serhiy Storchaka --- Lib/test/test_imaplib.py | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/Lib/test/test_imaplib.py b/Lib/test/test_imaplib.py index 79bf7dbdbb81a0..b5384b59463742 100644 --- a/Lib/test/test_imaplib.py +++ b/Lib/test/test_imaplib.py @@ -458,18 +458,14 @@ def test_simple_with_statement(self): with self.imap_class(*server.server_address): pass - @requires_resource('walltime') def test_imaplib_timeout_test(self): - _, server = self._setup(SimpleIMAPHandler) - addr = server.server_address[1] - client = self.imap_class("localhost", addr, timeout=None) - self.assertEqual(client.sock.timeout, None) - client.shutdown() - client = self.imap_class("localhost", addr, timeout=support.LOOPBACK_TIMEOUT) - self.assertEqual(client.sock.timeout, support.LOOPBACK_TIMEOUT) - client.shutdown() + _, server = self._setup(SimpleIMAPHandler, connect=False) + with self.imap_class(*server.server_address, timeout=None) as client: + self.assertEqual(client.sock.timeout, None) + with self.imap_class(*server.server_address, timeout=support.LOOPBACK_TIMEOUT) as client: + self.assertEqual(client.sock.timeout, support.LOOPBACK_TIMEOUT) with self.assertRaises(ValueError): - client = self.imap_class("localhost", addr, timeout=0) + self.imap_class(*server.server_address, timeout=0) def test_imaplib_timeout_functionality_test(self): class TimeoutHandler(SimpleIMAPHandler): @@ -552,7 +548,6 @@ class NewIMAPSSLTests(NewIMAPTestsMixin, unittest.TestCase): imap_class = IMAP4_SSL server_class = SecureTCPServer - @requires_resource('walltime') def test_ssl_raises(self): ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) self.assertEqual(ssl_context.verify_mode, ssl.CERT_REQUIRED) @@ -566,17 +561,16 @@ def test_ssl_raises(self): CERTIFICATE_VERIFY_FAILED # AWS-LC )""", re.X) with self.assertRaisesRegex(ssl.CertificateError, regex): - _, server = self._setup(SimpleIMAPHandler) + _, server = self._setup(SimpleIMAPHandler, connect=False) client = self.imap_class(*server.server_address, ssl_context=ssl_context) client.shutdown() - @requires_resource('walltime') def test_ssl_verified(self): ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ssl_context.load_verify_locations(CAFILE) - _, server = self._setup(SimpleIMAPHandler) + _, server = self._setup(SimpleIMAPHandler, connect=False) client = self.imap_class("localhost", server.server_address[1], ssl_context=ssl_context) client.shutdown() From 109e1082ea92f89d42cd70f2cc7ca6fba6be9bab Mon Sep 17 00:00:00 2001 From: Vinay Sajip Date: Tue, 4 Jun 2024 20:16:43 +0100 Subject: [PATCH 71/72] gh-119819: Update test to skip if _multiprocessing is unavailable. (GH-120067) --- Lib/test/test_logging.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index d3e5ac2be2e21e..0c9a24e58dfd8c 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -3926,9 +3926,9 @@ def test_config_queue_handler(self): msg = str(ctx.exception) self.assertEqual(msg, "Unable to configure handler 'ah'") - @unittest.skipIf(support.is_wasi, "WASI does not have multiprocessing.") def test_multiprocessing_queues(self): # See gh-119819 + import_helper.import_module('_multiprocessing') # will skip test if it's not available cd = copy.deepcopy(self.config_queue_handler) from multiprocessing import Queue as MQ, Manager as MM q1 = MQ() # this can't be pickled From 4055577221f5f52af329e87f31d81bb8fb02c504 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 4 Jun 2024 15:26:26 -0400 Subject: [PATCH 72/72] gh-119999: Fix potential race condition in `_Py_ExplicitMergeRefcount` (#120000) We need to write to `ob_ref_local` and `ob_tid` before `ob_ref_shared`. Once we mark `ob_ref_shared` as merged, some other thread may free the object because the caller also passes in `-1` as `extra` to give up its only reference. --- Objects/object.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Objects/object.c b/Objects/object.c index 2e9962f4651e1c..b7730475ac3768 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -401,24 +401,27 @@ Py_ssize_t _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) { assert(!_Py_IsImmortal(op)); + +#ifdef Py_REF_DEBUG + _Py_AddRefTotal(_PyThreadState_GET(), extra); +#endif + + // gh-119999: Write to ob_ref_local and ob_tid before merging the refcount. + Py_ssize_t local = (Py_ssize_t)op->ob_ref_local; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); + Py_ssize_t refcnt; Py_ssize_t new_shared; Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); do { refcnt = Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT); - refcnt += (Py_ssize_t)op->ob_ref_local; + refcnt += local; refcnt += extra; new_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); } while (!_Py_atomic_compare_exchange_ssize(&op->ob_ref_shared, &shared, new_shared)); - -#ifdef Py_REF_DEBUG - _Py_AddRefTotal(_PyThreadState_GET(), extra); -#endif - - _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); - _Py_atomic_store_uintptr_relaxed(&op->ob_tid, 0); return refcnt; } #endif /* Py_GIL_DISABLED */