From 72abb8c5d487ead9eb115fec8132ccef5ba189e5 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Thu, 18 Jan 2024 16:18:42 -0600 Subject: [PATCH 001/160] gh-114123: Migrate docstring from _csv to csv (#114124) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Éric --- Lib/csv.py | 76 +++++++++++++++++-- Lib/test/test_csv.py | 3 +- ...-01-17-11-40-03.gh-issue-114123.LuueXf.rst | 7 ++ Modules/_csv.c | 71 +---------------- 4 files changed, 78 insertions(+), 79 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2024-01-17-11-40-03.gh-issue-114123.LuueXf.rst diff --git a/Lib/csv.py b/Lib/csv.py index 77f30c8d2b1f61..a079279b8b8cbc 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -1,28 +1,90 @@ -""" -csv.py - read/write/investigate CSV files +r""" +CSV parsing and writing. + +This module provides classes that assist in the reading and writing +of Comma Separated Value (CSV) files, and implements the interface +described by PEP 305. Although many CSV files are simple to parse, +the format is not formally defined by a stable specification and +is subtle enough that parsing lines of a CSV file with something +like line.split(",") is bound to fail. The module supports three +basic APIs: reading, writing, and registration of dialects. + + +DIALECT REGISTRATION: + +Readers and writers support a dialect argument, which is a convenient +handle on a group of settings. When the dialect argument is a string, +it identifies one of the dialects previously registered with the module. +If it is a class or instance, the attributes of the argument are used as +the settings for the reader or writer: + + class excel: + delimiter = ',' + quotechar = '"' + escapechar = None + doublequote = True + skipinitialspace = False + lineterminator = '\r\n' + quoting = QUOTE_MINIMAL + +SETTINGS: + + * quotechar - specifies a one-character string to use as the + quoting character. It defaults to '"'. + * delimiter - specifies a one-character string to use as the + field separator. It defaults to ','. + * skipinitialspace - specifies how to interpret spaces which + immediately follow a delimiter. It defaults to False, which + means that spaces immediately following a delimiter is part + of the following field. + * lineterminator - specifies the character sequence which should + terminate rows. + * quoting - controls when quotes should be generated by the writer. + It can take on any of the following module constants: + + csv.QUOTE_MINIMAL means only when required, for example, when a + field contains either the quotechar or the delimiter + csv.QUOTE_ALL means that quotes are always placed around fields. + csv.QUOTE_NONNUMERIC means that quotes are always placed around + fields which do not parse as integers or floating point + numbers. + csv.QUOTE_STRINGS means that quotes are always placed around + fields which are strings. Note that the Python value None + is not a string. + csv.QUOTE_NOTNULL means that quotes are only placed around fields + that are not the Python value None. + csv.QUOTE_NONE means that quotes are never placed around fields. + * escapechar - specifies a one-character string used to escape + the delimiter when quoting is set to QUOTE_NONE. + * doublequote - controls the handling of quotes inside fields. When + True, two consecutive quotes are interpreted as one during read, + and when writing, each quote character embedded in the data is + written as two quotes """ import re import types -from _csv import Error, __version__, writer, reader, register_dialect, \ +from _csv import Error, writer, reader, register_dialect, \ unregister_dialect, get_dialect, list_dialects, \ field_size_limit, \ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \ - QUOTE_STRINGS, QUOTE_NOTNULL, \ - __doc__ + QUOTE_STRINGS, QUOTE_NOTNULL from _csv import Dialect as _Dialect from io import StringIO __all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE", "QUOTE_STRINGS", "QUOTE_NOTNULL", - "Error", "Dialect", "__doc__", "excel", "excel_tab", + "Error", "Dialect", "excel", "excel_tab", "field_size_limit", "reader", "writer", "register_dialect", "get_dialect", "list_dialects", "Sniffer", - "unregister_dialect", "__version__", "DictReader", "DictWriter", + "unregister_dialect", "DictReader", "DictWriter", "unix_dialect"] +__version__ = "1.0" + + class Dialect: """Describe a CSV dialect. diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 282cff4344b77f..36da86e6a2c622 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -1416,8 +1416,7 @@ def test_ordered_dict_reader(self): class MiscTestCase(unittest.TestCase): def test__all__(self): - extra = {'__doc__', '__version__'} - support.check__all__(self, csv, ('csv', '_csv'), extra=extra) + support.check__all__(self, csv, ('csv', '_csv')) def test_subclassable(self): # issue 44089 diff --git a/Misc/NEWS.d/next/Documentation/2024-01-17-11-40-03.gh-issue-114123.LuueXf.rst b/Misc/NEWS.d/next/Documentation/2024-01-17-11-40-03.gh-issue-114123.LuueXf.rst new file mode 100644 index 00000000000000..1d93a422840077 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-01-17-11-40-03.gh-issue-114123.LuueXf.rst @@ -0,0 +1,7 @@ +Move the :mod:`csv` module docstring to the :mod:`!csv` module +instead of reexporting it from the internal :mod:`!_csv` module, +and remove ``__doc__`` from ``csv.__all__``. + +Move :attr:`!csv.__version__` to the :mod:`!csv` module +instead of reexporting it from the internal :mod:`!_csv` module, +and remove ``__version__`` from ``csv.__all__``. diff --git a/Modules/_csv.c b/Modules/_csv.c index d45a15aa8c255a..8d941563025580 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -8,8 +8,6 @@ module instead. */ -#define MODULE_VERSION "1.0" - // clinic/_csv.c.h uses internal pycore_modsupport.h API #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 @@ -1607,68 +1605,7 @@ PyType_Spec error_spec = { * MODULE */ -PyDoc_STRVAR(csv_module_doc, -"CSV parsing and writing.\n" -"\n" -"This module provides classes that assist in the reading and writing\n" -"of Comma Separated Value (CSV) files, and implements the interface\n" -"described by PEP 305. Although many CSV files are simple to parse,\n" -"the format is not formally defined by a stable specification and\n" -"is subtle enough that parsing lines of a CSV file with something\n" -"like line.split(\",\") is bound to fail. The module supports three\n" -"basic APIs: reading, writing, and registration of dialects.\n" -"\n" -"\n" -"DIALECT REGISTRATION:\n" -"\n" -"Readers and writers support a dialect argument, which is a convenient\n" -"handle on a group of settings. When the dialect argument is a string,\n" -"it identifies one of the dialects previously registered with the module.\n" -"If it is a class or instance, the attributes of the argument are used as\n" -"the settings for the reader or writer:\n" -"\n" -" class excel:\n" -" delimiter = ','\n" -" quotechar = '\"'\n" -" escapechar = None\n" -" doublequote = True\n" -" skipinitialspace = False\n" -" lineterminator = '\\r\\n'\n" -" quoting = QUOTE_MINIMAL\n" -"\n" -"SETTINGS:\n" -"\n" -" * quotechar - specifies a one-character string to use as the\n" -" quoting character. It defaults to '\"'.\n" -" * delimiter - specifies a one-character string to use as the\n" -" field separator. It defaults to ','.\n" -" * skipinitialspace - specifies how to interpret spaces which\n" -" immediately follow a delimiter. It defaults to False, which\n" -" means that spaces immediately following a delimiter is part\n" -" of the following field.\n" -" * lineterminator - specifies the character sequence which should\n" -" terminate rows.\n" -" * quoting - controls when quotes should be generated by the writer.\n" -" It can take on any of the following module constants:\n" -"\n" -" csv.QUOTE_MINIMAL means only when required, for example, when a\n" -" field contains either the quotechar or the delimiter\n" -" csv.QUOTE_ALL means that quotes are always placed around fields.\n" -" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" -" fields which do not parse as integers or floating point\n" -" numbers.\n" -" csv.QUOTE_STRINGS means that quotes are always placed around\n" -" fields which are strings. Note that the Python value None\n" -" is not a string.\n" -" csv.QUOTE_NOTNULL means that quotes are only placed around fields\n" -" that are not the Python value None.\n" -" csv.QUOTE_NONE means that quotes are never placed around fields.\n" -" * escapechar - specifies a one-character string used to escape\n" -" the delimiter when quoting is set to QUOTE_NONE.\n" -" * doublequote - controls the handling of quotes inside fields. When\n" -" True, two consecutive quotes are interpreted as one during read,\n" -" and when writing, each quote character embedded in the data is\n" -" written as two quotes\n"); +PyDoc_STRVAR(csv_module_doc, "CSV parsing and writing.\n"); PyDoc_STRVAR(csv_reader_doc, " csv_reader = reader(iterable [, dialect='excel']\n" @@ -1741,12 +1678,6 @@ csv_exec(PyObject *module) { return -1; } - /* Add version to the module. */ - if (PyModule_AddStringConstant(module, "__version__", - MODULE_VERSION) == -1) { - return -1; - } - /* Set the field limit */ module_state->field_limit = 128 * 1024; From d5442851a6fad16ba32b62723c09e40a1392aa38 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Fri, 19 Jan 2024 08:03:28 +0900 Subject: [PATCH 002/160] gh-112087: Remove duplicated critical_section (gh-114268) --- Objects/listobject.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Objects/listobject.c b/Objects/listobject.c index 79ef8f532fbb98..401d1026133f4e 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -816,13 +816,10 @@ static PyObject * list_insert_impl(PyListObject *self, Py_ssize_t index, PyObject *object) /*[clinic end generated code: output=7f35e32f60c8cb78 input=b1987ca998a4ae2d]*/ { - PyObject *ret = Py_None; - Py_BEGIN_CRITICAL_SECTION(self); - if (ins1(self, index, object) < 0) { - ret = NULL; + if (ins1(self, index, object) == 0) { + Py_RETURN_NONE; } - Py_END_CRITICAL_SECTION(); - return ret; + return NULL; } /*[clinic input] From a34e4db28a98904f6c9976675ed7121ed61edabe Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Fri, 19 Jan 2024 08:25:13 +0900 Subject: [PATCH 003/160] gh-111968: Fix --without-freelists build (gh-114270) --- Objects/sliceobject.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 9ec8ea8e1b307f..440c1da30620c3 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -105,16 +105,20 @@ PyObject _Py_EllipsisObject = _PyObject_HEAD_INIT(&PyEllipsis_Type); void _PySlice_ClearCache(_PyFreeListState *state) { +#ifdef WITH_FREELISTS PySliceObject *obj = state->slice_state.slice_cache; if (obj != NULL) { state->slice_state.slice_cache = NULL; PyObject_GC_Del(obj); } +#endif } void _PySlice_Fini(_PyFreeListState *state) { +#ifdef WITH_FREELISTS _PySlice_ClearCache(state); +#endif } /* start, stop, and step are python objects with None indicating no @@ -125,15 +129,17 @@ static PySliceObject * _PyBuildSlice_Consume2(PyObject *start, PyObject *stop, PyObject *step) { assert(start != NULL && stop != NULL && step != NULL); - - _PyFreeListState *state = _PyFreeListState_GET(); PySliceObject *obj; +#ifdef WITH_FREELISTS + _PyFreeListState *state = _PyFreeListState_GET(); if (state->slice_state.slice_cache != NULL) { obj = state->slice_state.slice_cache; state->slice_state.slice_cache = NULL; _Py_NewReference((PyObject *)obj); } - else { + else +#endif + { obj = PyObject_GC_New(PySliceObject, &PySlice_Type); if (obj == NULL) { goto error; @@ -358,15 +364,18 @@ Create a slice object. This is used for extended slicing (e.g. a[0:10:2])."); static void slice_dealloc(PySliceObject *r) { - _PyFreeListState *state = _PyFreeListState_GET(); _PyObject_GC_UNTRACK(r); Py_DECREF(r->step); Py_DECREF(r->start); Py_DECREF(r->stop); +#ifdef WITH_FREELISTS + _PyFreeListState *state = _PyFreeListState_GET(); if (state->slice_state.slice_cache == NULL) { state->slice_state.slice_cache = r; } - else { + else +#endif + { PyObject_GC_Del(r); } } From 05e47202a34e6ae05e699af1083455f5b8b59496 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 19 Jan 2024 13:25:05 +0300 Subject: [PATCH 004/160] gh-114286: Fix `maybe-uninitialized` warning in `Modules/_io/fileio.c` (GH-114287) --- Modules/_io/fileio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/fileio.c b/Modules/_io/fileio.c index af4375c3640679..9cf268ca0b26c8 100644 --- a/Modules/_io/fileio.c +++ b/Modules/_io/fileio.c @@ -157,7 +157,7 @@ _io_FileIO_close_impl(fileio *self, PyTypeObject *cls) return res; } - PyObject *exc; + PyObject *exc = NULL; if (res == NULL) { exc = PyErr_GetRaisedException(); } From 28eacf27efaa07cb8d9c0c3e9253a07b9012415f Mon Sep 17 00:00:00 2001 From: mpage Date: Fri, 19 Jan 2024 04:17:51 -0800 Subject: [PATCH 005/160] gh-113884: Refactor `queue.SimpleQueue` to use a ring buffer to store items (#114259) Use a ring buffer instead of a Python list in order to simplify the process of making queue.SimpleQueue thread-safe in free-threaded builds. The ring buffer implementation has no places where critical sections may be released. --- Modules/_queuemodule.c | 209 +++++++++++++++++++++++++++++++++-------- 1 file changed, 169 insertions(+), 40 deletions(-) diff --git a/Modules/_queuemodule.c b/Modules/_queuemodule.c index 81a06cdb79a4f2..8fca3cdd0deb18 100644 --- a/Modules/_queuemodule.c +++ b/Modules/_queuemodule.c @@ -7,6 +7,7 @@ #include "pycore_moduleobject.h" // _PyModule_GetState() #include "pycore_time.h" // _PyTime_t +#include #include // offsetof() typedef struct { @@ -25,12 +26,167 @@ static struct PyModuleDef queuemodule; #define simplequeue_get_state_by_type(type) \ (simplequeue_get_state(PyType_GetModuleByDef(type, &queuemodule))) +static const Py_ssize_t INITIAL_RING_BUF_CAPACITY = 8; + +typedef struct { + // Where to place the next item + Py_ssize_t put_idx; + + // Where to get the next item + Py_ssize_t get_idx; + + PyObject **items; + + // Total number of items that may be stored + Py_ssize_t items_cap; + + // Number of items stored + Py_ssize_t num_items; +} RingBuf; + +static int +RingBuf_Init(RingBuf *buf) +{ + buf->put_idx = 0; + buf->get_idx = 0; + buf->items_cap = INITIAL_RING_BUF_CAPACITY; + buf->num_items = 0; + buf->items = PyMem_Calloc(buf->items_cap, sizeof(PyObject *)); + if (buf->items == NULL) { + PyErr_NoMemory(); + return -1; + } + return 0; +} + +static PyObject * +RingBuf_At(RingBuf *buf, Py_ssize_t idx) +{ + assert(idx >= 0 && idx < buf->num_items); + return buf->items[(buf->get_idx + idx) % buf->items_cap]; +} + +static void +RingBuf_Fini(RingBuf *buf) +{ + PyObject **items = buf->items; + Py_ssize_t num_items = buf->num_items; + Py_ssize_t cap = buf->items_cap; + Py_ssize_t idx = buf->get_idx; + buf->items = NULL; + buf->put_idx = 0; + buf->get_idx = 0; + buf->num_items = 0; + buf->items_cap = 0; + for (Py_ssize_t n = num_items; n > 0; idx = (idx + 1) % cap, n--) { + Py_DECREF(items[idx]); + } + PyMem_Free(items); +} + +// Resize the underlying items array of buf to the new capacity and arrange +// the items contiguously in the new items array. +// +// Returns -1 on allocation failure or 0 on success. +static int +resize_ringbuf(RingBuf *buf, Py_ssize_t capacity) +{ + Py_ssize_t new_capacity = Py_MAX(INITIAL_RING_BUF_CAPACITY, capacity); + if (new_capacity == buf->items_cap) { + return 0; + } + assert(buf->num_items <= new_capacity); + + PyObject **new_items = PyMem_Calloc(new_capacity, sizeof(PyObject *)); + if (new_items == NULL) { + return -1; + } + + // Copy the "tail" of the old items array. This corresponds to "head" of + // the abstract ring buffer. + Py_ssize_t tail_size = + Py_MIN(buf->num_items, buf->items_cap - buf->get_idx); + if (tail_size > 0) { + memcpy(new_items, buf->items + buf->get_idx, + tail_size * sizeof(PyObject *)); + } + + // Copy the "head" of the old items array, if any. This corresponds to the + // "tail" of the abstract ring buffer. + Py_ssize_t head_size = buf->num_items - tail_size; + if (head_size > 0) { + memcpy(new_items + tail_size, buf->items, + head_size * sizeof(PyObject *)); + } + + PyMem_Free(buf->items); + buf->items = new_items; + buf->items_cap = new_capacity; + buf->get_idx = 0; + buf->put_idx = buf->num_items; + + return 0; +} + +// Returns a strong reference from the head of the buffer. +static PyObject * +RingBuf_Get(RingBuf *buf) +{ + assert(buf->num_items > 0); + + if (buf->num_items < (buf->items_cap / 4)) { + // Items is less than 25% occupied, shrink it by 50%. This allows for + // growth without immediately needing to resize the underlying items + // array. + // + // It's safe it ignore allocation failures here; shrinking is an + // optimization that isn't required for correctness. + (void)resize_ringbuf(buf, buf->items_cap / 2); + } + + PyObject *item = buf->items[buf->get_idx]; + buf->items[buf->get_idx] = NULL; + buf->get_idx = (buf->get_idx + 1) % buf->items_cap; + buf->num_items--; + return item; +} + +// Returns 0 on success or -1 if the buffer failed to grow +static int +RingBuf_Put(RingBuf *buf, PyObject *item) +{ + assert(buf->num_items <= buf->items_cap); + + if (buf->num_items == buf->items_cap) { + // Buffer is full, grow it. + if (resize_ringbuf(buf, buf->items_cap * 2) < 0) { + PyErr_NoMemory(); + return -1; + } + } + buf->items[buf->put_idx] = Py_NewRef(item); + buf->put_idx = (buf->put_idx + 1) % buf->items_cap; + buf->num_items++; + return 0; +} + +static Py_ssize_t +RingBuf_Len(RingBuf *buf) +{ + return buf->num_items; +} + +static bool +RingBuf_IsEmpty(RingBuf *buf) +{ + return buf->num_items == 0; +} + typedef struct { PyObject_HEAD PyThread_type_lock lock; int locked; - PyObject *lst; - Py_ssize_t lst_pos; + RingBuf buf; PyObject *weakreflist; } simplequeueobject; @@ -43,7 +199,7 @@ class _queue.SimpleQueue "simplequeueobject *" "simplequeue_get_state_by_type(ty static int simplequeue_clear(simplequeueobject *self) { - Py_CLEAR(self->lst); + RingBuf_Fini(&self->buf); return 0; } @@ -69,7 +225,10 @@ simplequeue_dealloc(simplequeueobject *self) static int simplequeue_traverse(simplequeueobject *self, visitproc visit, void *arg) { - Py_VISIT(self->lst); + RingBuf *buf = &self->buf; + for (Py_ssize_t i = 0, num_items = buf->num_items; i < num_items; i++) { + Py_VISIT(RingBuf_At(buf, i)); + } Py_VISIT(Py_TYPE(self)); return 0; } @@ -90,15 +249,13 @@ simplequeue_new_impl(PyTypeObject *type) self = (simplequeueobject *) type->tp_alloc(type, 0); if (self != NULL) { self->weakreflist = NULL; - self->lst = PyList_New(0); self->lock = PyThread_allocate_lock(); - self->lst_pos = 0; if (self->lock == NULL) { Py_DECREF(self); PyErr_SetString(PyExc_MemoryError, "can't allocate lock"); return NULL; } - if (self->lst == NULL) { + if (RingBuf_Init(&self->buf) < 0) { Py_DECREF(self); return NULL; } @@ -126,7 +283,7 @@ _queue_SimpleQueue_put_impl(simplequeueobject *self, PyObject *item, /*[clinic end generated code: output=4333136e88f90d8b input=6e601fa707a782d5]*/ { /* BEGIN GIL-protected critical section */ - if (PyList_Append(self->lst, item) < 0) + if (RingBuf_Put(&self->buf, item) < 0) return NULL; if (self->locked) { /* A get() may be waiting, wake it up */ @@ -155,33 +312,6 @@ _queue_SimpleQueue_put_nowait_impl(simplequeueobject *self, PyObject *item) return _queue_SimpleQueue_put_impl(self, item, 0, Py_None); } -static PyObject * -simplequeue_pop_item(simplequeueobject *self) -{ - Py_ssize_t count, n; - PyObject *item; - - n = PyList_GET_SIZE(self->lst); - assert(self->lst_pos < n); - - item = PyList_GET_ITEM(self->lst, self->lst_pos); - Py_INCREF(Py_None); - PyList_SET_ITEM(self->lst, self->lst_pos, Py_None); - self->lst_pos += 1; - count = n - self->lst_pos; - if (self->lst_pos > count) { - /* The list is more than 50% empty, reclaim space at the beginning */ - if (PyList_SetSlice(self->lst, 0, self->lst_pos, NULL)) { - /* Undo pop */ - self->lst_pos -= 1; - PyList_SET_ITEM(self->lst, self->lst_pos, item); - return NULL; - } - self->lst_pos = 0; - } - return item; -} - /*[clinic input] _queue.SimpleQueue.get @@ -249,7 +379,7 @@ _queue_SimpleQueue_get_impl(simplequeueobject *self, PyTypeObject *cls, * So we simply try to acquire the lock in a loop, until the condition * (queue non-empty) becomes true. */ - while (self->lst_pos == PyList_GET_SIZE(self->lst)) { + while (RingBuf_IsEmpty(&self->buf)) { /* First a simple non-blocking try without releasing the GIL */ r = PyThread_acquire_lock_timed(self->lock, 0, 0); if (r == PY_LOCK_FAILURE && microseconds != 0) { @@ -279,8 +409,7 @@ _queue_SimpleQueue_get_impl(simplequeueobject *self, PyTypeObject *cls, } /* BEGIN GIL-protected critical section */ - assert(self->lst_pos < PyList_GET_SIZE(self->lst)); - item = simplequeue_pop_item(self); + item = RingBuf_Get(&self->buf); if (self->locked) { PyThread_release_lock(self->lock); self->locked = 0; @@ -320,7 +449,7 @@ static int _queue_SimpleQueue_empty_impl(simplequeueobject *self) /*[clinic end generated code: output=1a02a1b87c0ef838 input=1a98431c45fd66f9]*/ { - return self->lst_pos == PyList_GET_SIZE(self->lst); + return RingBuf_IsEmpty(&self->buf); } /*[clinic input] @@ -333,7 +462,7 @@ static Py_ssize_t _queue_SimpleQueue_qsize_impl(simplequeueobject *self) /*[clinic end generated code: output=f9dcd9d0a90e121e input=7a74852b407868a1]*/ { - return PyList_GET_SIZE(self->lst) - self->lst_pos; + return RingBuf_Len(&self->buf); } static int From efb81a60f5ce7e192095230a0f7ff9684d6f835a Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 19 Jan 2024 16:00:34 +0300 Subject: [PATCH 006/160] gh-114275: Skip doctests that use `asyncio` in `test_pdb` for WASI builds (#114309) --- Lib/test/test_pdb.py | 410 ++++++++++++++++++++++--------------------- 1 file changed, 208 insertions(+), 202 deletions(-) diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 6f982ce37df692..03487aa6ffd81f 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -19,6 +19,9 @@ from test.support.pty_helper import run_pty, FakeInput from unittest.mock import patch +# gh-114275: WASI fails to run asyncio tests, similar skip than test_asyncio. +SKIP_ASYNCIO_TESTS = (not support.has_socket_support) + class PdbTestInput(object): """Context manager that makes testing Pdb in doctests easier.""" @@ -1693,122 +1696,123 @@ def test_pdb_next_command_for_generator(): finished """ -def test_pdb_next_command_for_coroutine(): - """Testing skip unwindng stack on yield for coroutines for "next" command - - >>> import asyncio - - >>> async def test_coro(): - ... await asyncio.sleep(0) - ... await asyncio.sleep(0) - ... await asyncio.sleep(0) - - >>> async def test_main(): - ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() - ... await test_coro() - - >>> def test_function(): - ... loop = asyncio.new_event_loop() - ... loop.run_until_complete(test_main()) - ... loop.close() - ... asyncio.set_event_loop_policy(None) - ... print("finished") - - >>> with PdbTestInput(['step', - ... 'step', - ... 'next', - ... 'next', - ... 'next', - ... 'step', - ... 'continue']): - ... test_function() - > (3)test_main() - -> await test_coro() - (Pdb) step - --Call-- - > (1)test_coro() - -> async def test_coro(): - (Pdb) step - > (2)test_coro() - -> await asyncio.sleep(0) - (Pdb) next - > (3)test_coro() - -> await asyncio.sleep(0) - (Pdb) next - > (4)test_coro() - -> await asyncio.sleep(0) - (Pdb) next - Internal StopIteration - > (3)test_main() - -> await test_coro() - (Pdb) step - --Return-- - > (3)test_main()->None - -> await test_coro() - (Pdb) continue - finished - """ - -def test_pdb_next_command_for_asyncgen(): - """Testing skip unwindng stack on yield for coroutines for "next" command - - >>> import asyncio - - >>> async def agen(): - ... yield 1 - ... await asyncio.sleep(0) - ... yield 2 - - >>> async def test_coro(): - ... async for x in agen(): - ... print(x) - - >>> async def test_main(): - ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() - ... await test_coro() - - >>> def test_function(): - ... loop = asyncio.new_event_loop() - ... loop.run_until_complete(test_main()) - ... loop.close() - ... asyncio.set_event_loop_policy(None) - ... print("finished") +if not SKIP_ASYNCIO_TESTS: + def test_pdb_next_command_for_coroutine(): + """Testing skip unwindng stack on yield for coroutines for "next" command + + >>> import asyncio + + >>> async def test_coro(): + ... await asyncio.sleep(0) + ... await asyncio.sleep(0) + ... await asyncio.sleep(0) + + >>> async def test_main(): + ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + ... await test_coro() + + >>> def test_function(): + ... loop = asyncio.new_event_loop() + ... loop.run_until_complete(test_main()) + ... loop.close() + ... asyncio.set_event_loop_policy(None) + ... print("finished") + + >>> with PdbTestInput(['step', + ... 'step', + ... 'next', + ... 'next', + ... 'next', + ... 'step', + ... 'continue']): + ... test_function() + > (3)test_main() + -> await test_coro() + (Pdb) step + --Call-- + > (1)test_coro() + -> async def test_coro(): + (Pdb) step + > (2)test_coro() + -> await asyncio.sleep(0) + (Pdb) next + > (3)test_coro() + -> await asyncio.sleep(0) + (Pdb) next + > (4)test_coro() + -> await asyncio.sleep(0) + (Pdb) next + Internal StopIteration + > (3)test_main() + -> await test_coro() + (Pdb) step + --Return-- + > (3)test_main()->None + -> await test_coro() + (Pdb) continue + finished + """ - >>> with PdbTestInput(['step', - ... 'step', - ... 'next', - ... 'next', - ... 'step', - ... 'next', - ... 'continue']): - ... test_function() - > (3)test_main() - -> await test_coro() - (Pdb) step - --Call-- - > (1)test_coro() - -> async def test_coro(): - (Pdb) step - > (2)test_coro() - -> async for x in agen(): - (Pdb) next - > (3)test_coro() - -> print(x) - (Pdb) next - 1 - > (2)test_coro() - -> async for x in agen(): - (Pdb) step - --Call-- - > (2)agen() - -> yield 1 - (Pdb) next - > (3)agen() - -> await asyncio.sleep(0) - (Pdb) continue - 2 - finished - """ + def test_pdb_next_command_for_asyncgen(): + """Testing skip unwindng stack on yield for coroutines for "next" command + + >>> import asyncio + + >>> async def agen(): + ... yield 1 + ... await asyncio.sleep(0) + ... yield 2 + + >>> async def test_coro(): + ... async for x in agen(): + ... print(x) + + >>> async def test_main(): + ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + ... await test_coro() + + >>> def test_function(): + ... loop = asyncio.new_event_loop() + ... loop.run_until_complete(test_main()) + ... loop.close() + ... asyncio.set_event_loop_policy(None) + ... print("finished") + + >>> with PdbTestInput(['step', + ... 'step', + ... 'next', + ... 'next', + ... 'step', + ... 'next', + ... 'continue']): + ... test_function() + > (3)test_main() + -> await test_coro() + (Pdb) step + --Call-- + > (1)test_coro() + -> async def test_coro(): + (Pdb) step + > (2)test_coro() + -> async for x in agen(): + (Pdb) next + > (3)test_coro() + -> print(x) + (Pdb) next + 1 + > (2)test_coro() + -> async for x in agen(): + (Pdb) step + --Call-- + > (2)agen() + -> yield 1 + (Pdb) next + > (3)agen() + -> await asyncio.sleep(0) + (Pdb) continue + 2 + finished + """ def test_pdb_return_command_for_generator(): """Testing no unwindng stack on yield for generators @@ -1865,47 +1869,48 @@ def test_pdb_return_command_for_generator(): finished """ -def test_pdb_return_command_for_coroutine(): - """Testing no unwindng stack on yield for coroutines for "return" command - - >>> import asyncio - - >>> async def test_coro(): - ... await asyncio.sleep(0) - ... await asyncio.sleep(0) - ... await asyncio.sleep(0) - - >>> async def test_main(): - ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() - ... await test_coro() - - >>> def test_function(): - ... loop = asyncio.new_event_loop() - ... loop.run_until_complete(test_main()) - ... loop.close() - ... asyncio.set_event_loop_policy(None) - ... print("finished") - - >>> with PdbTestInput(['step', - ... 'step', - ... 'next', - ... 'continue']): - ... test_function() - > (3)test_main() - -> await test_coro() - (Pdb) step - --Call-- - > (1)test_coro() - -> async def test_coro(): - (Pdb) step - > (2)test_coro() - -> await asyncio.sleep(0) - (Pdb) next - > (3)test_coro() - -> await asyncio.sleep(0) - (Pdb) continue - finished - """ +if not SKIP_ASYNCIO_TESTS: + def test_pdb_return_command_for_coroutine(): + """Testing no unwindng stack on yield for coroutines for "return" command + + >>> import asyncio + + >>> async def test_coro(): + ... await asyncio.sleep(0) + ... await asyncio.sleep(0) + ... await asyncio.sleep(0) + + >>> async def test_main(): + ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + ... await test_coro() + + >>> def test_function(): + ... loop = asyncio.new_event_loop() + ... loop.run_until_complete(test_main()) + ... loop.close() + ... asyncio.set_event_loop_policy(None) + ... print("finished") + + >>> with PdbTestInput(['step', + ... 'step', + ... 'next', + ... 'continue']): + ... test_function() + > (3)test_main() + -> await test_coro() + (Pdb) step + --Call-- + > (1)test_coro() + -> async def test_coro(): + (Pdb) step + > (2)test_coro() + -> await asyncio.sleep(0) + (Pdb) next + > (3)test_coro() + -> await asyncio.sleep(0) + (Pdb) continue + finished + """ def test_pdb_until_command_for_generator(): """Testing no unwindng stack on yield for generators @@ -1951,52 +1956,53 @@ def test_pdb_until_command_for_generator(): finished """ -def test_pdb_until_command_for_coroutine(): - """Testing no unwindng stack for coroutines - for "until" command if target breakpoint is not reached - - >>> import asyncio - - >>> async def test_coro(): - ... print(0) - ... await asyncio.sleep(0) - ... print(1) - ... await asyncio.sleep(0) - ... print(2) - ... await asyncio.sleep(0) - ... print(3) - - >>> async def test_main(): - ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() - ... await test_coro() - - >>> def test_function(): - ... loop = asyncio.new_event_loop() - ... loop.run_until_complete(test_main()) - ... loop.close() - ... asyncio.set_event_loop_policy(None) - ... print("finished") - - >>> with PdbTestInput(['step', - ... 'until 8', - ... 'continue']): - ... test_function() - > (3)test_main() - -> await test_coro() - (Pdb) step - --Call-- - > (1)test_coro() - -> async def test_coro(): - (Pdb) until 8 - 0 - 1 - 2 - > (8)test_coro() - -> print(3) - (Pdb) continue - 3 - finished - """ +if not SKIP_ASYNCIO_TESTS: + def test_pdb_until_command_for_coroutine(): + """Testing no unwindng stack for coroutines + for "until" command if target breakpoint is not reached + + >>> import asyncio + + >>> async def test_coro(): + ... print(0) + ... await asyncio.sleep(0) + ... print(1) + ... await asyncio.sleep(0) + ... print(2) + ... await asyncio.sleep(0) + ... print(3) + + >>> async def test_main(): + ... import pdb; pdb.Pdb(nosigint=True, readrc=False).set_trace() + ... await test_coro() + + >>> def test_function(): + ... loop = asyncio.new_event_loop() + ... loop.run_until_complete(test_main()) + ... loop.close() + ... asyncio.set_event_loop_policy(None) + ... print("finished") + + >>> with PdbTestInput(['step', + ... 'until 8', + ... 'continue']): + ... test_function() + > (3)test_main() + -> await test_coro() + (Pdb) step + --Call-- + > (1)test_coro() + -> async def test_coro(): + (Pdb) until 8 + 0 + 1 + 2 + > (8)test_coro() + -> print(3) + (Pdb) continue + 3 + finished + """ def test_pdb_next_command_in_generator_for_loop(): """The next command on returning from a generator controlled by a for loop. From 7e49f27b41d5728cde1f8790586d113ddc25f18d Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 19 Jan 2024 14:49:26 +0000 Subject: [PATCH 007/160] gh-114265: move line number propagation before cfg optimization, remove guarantee_lineno_for_exits (#114267) --- Lib/importlib/_bootstrap_external.py | 3 +- Lib/test/test_dis.py | 14 +-- ...-01-19-13-18-13.gh-issue-114265.7HAi--.rst | 1 + Python/flowgraph.c | 107 +++++++++--------- 4 files changed, 61 insertions(+), 64 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-19-13-18-13.gh-issue-114265.7HAi--.rst diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 97858ee83f790f..a4d2b7e0184409 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -463,6 +463,7 @@ def _write_atomic(path, data, mode=0o666): # Python 3.13a1 3564 (Removed oparg from YIELD_VALUE, changed oparg values of RESUME) # Python 3.13a1 3565 (Oparg of YIELD_VALUE indicates whether it is in a yield-from) # Python 3.13a1 3566 (Emit JUMP_NO_INTERRUPT instead of JUMP for non-loop no-lineno cases) +# Python 3.13a1 3567 (Reimplement line number propagation by the compiler) # Python 3.14 will start with 3600 @@ -479,7 +480,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3566).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3567).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 097f05afdf1517..3ae81b2f5d62b0 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -577,14 +577,10 @@ async def _asyncwith(c): RETURN_CONST 0 (None) %4d L12: CLEANUP_THROW - - -- L13: JUMP_BACKWARD_NO_INTERRUPT 25 (to L5) - -%4d L14: CLEANUP_THROW - - -- L15: JUMP_BACKWARD_NO_INTERRUPT 9 (to L11) - -%4d L16: PUSH_EXC_INFO + L13: JUMP_BACKWARD_NO_INTERRUPT 25 (to L5) + L14: CLEANUP_THROW + L15: JUMP_BACKWARD_NO_INTERRUPT 9 (to L11) + L16: PUSH_EXC_INFO WITH_EXCEPT_START GET_AWAITABLE 2 LOAD_CONST 0 (None) @@ -630,8 +626,6 @@ async def _asyncwith(c): _asyncwith.__code__.co_firstlineno + 1, _asyncwith.__code__.co_firstlineno + 3, _asyncwith.__code__.co_firstlineno + 1, - _asyncwith.__code__.co_firstlineno + 1, - _asyncwith.__code__.co_firstlineno + 1, _asyncwith.__code__.co_firstlineno + 3, ) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-19-13-18-13.gh-issue-114265.7HAi--.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-19-13-18-13.gh-issue-114265.7HAi--.rst new file mode 100644 index 00000000000000..74affbbd09ffb4 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-19-13-18-13.gh-issue-114265.7HAi--.rst @@ -0,0 +1 @@ +Compiler propagates line numbers before optimization, leading to more optimization opportunities and removing the need for the ``guarantee_lineno_for_exits`` hack. diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 4778f89e19b143..e84030c87b1b4b 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -29,6 +29,7 @@ typedef struct _PyCfgInstruction { int i_opcode; int i_oparg; _PyCompilerSrcLocation i_loc; + unsigned i_loc_propagated : 1; /* location was set by propagate_line_numbers */ struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */ struct _PyCfgBasicblock *i_except; /* target block when exception is raised */ } cfg_instr; @@ -504,6 +505,21 @@ no_redundant_jumps(cfg_builder *g) { return true; } +static bool +all_exits_have_lineno(basicblock *entryblock) { + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + for (int i = 0; i < b->b_iused; i++) { + cfg_instr *instr = &b->b_instr[i]; + if (instr->i_opcode == RETURN_VALUE) { + if (instr->i_loc.lineno < 0) { + assert(0); + return false; + } + } + } + } + return true; +} #endif /***** CFG preprocessing (jump targets and exceptions) *****/ @@ -940,7 +956,10 @@ label_exception_targets(basicblock *entryblock) { /***** CFG optimizations *****/ static int -mark_reachable(basicblock *entryblock) { +remove_unreachable(basicblock *entryblock) { + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + b->b_predecessors = 0; + } basicblock **stack = make_cfg_traversal_stack(entryblock); if (stack == NULL) { return ERROR; @@ -972,6 +991,14 @@ mark_reachable(basicblock *entryblock) { } } PyMem_Free(stack); + + /* Delete unreachable instructions */ + for (basicblock *b = entryblock; b != NULL; b = b->b_next) { + if (b->b_predecessors == 0) { + b->b_iused = 0; + b->b_except_handler = 0; + } + } return SUCCESS; } @@ -1149,13 +1176,15 @@ jump_thread(cfg_instr *inst, cfg_instr *target, int opcode) assert(is_jump(target)); // bpo-45773: If inst->i_target == target->i_target, then nothing actually // changes (and we fall into an infinite loop): + if (inst->i_loc.lineno == -1) assert(inst->i_loc_propagated); + if (target->i_loc.lineno == -1) assert(target->i_loc_propagated); if ((inst->i_loc.lineno == target->i_loc.lineno || - inst->i_loc.lineno == -1 || target->i_loc.lineno == -1) && + inst->i_loc_propagated || target->i_loc_propagated) && inst->i_target != target->i_target) { inst->i_target = target->i_target; inst->i_opcode = opcode; - if (inst->i_loc.lineno == -1) { + if (inst->i_loc_propagated && !target->i_loc_propagated) { inst->i_loc = target->i_loc; } return true; @@ -1714,6 +1743,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) return ERROR; } +static int resolve_line_numbers(cfg_builder *g, int firstlineno); /* Perform optimizations on a control flow graph. The consts object should still be in list form to allow new constants @@ -1723,41 +1753,31 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) NOPs. Later those NOPs are removed. */ static int -optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache) +optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstlineno) { assert(PyDict_CheckExact(const_cache)); RETURN_IF_ERROR(check_cfg(g)); for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { RETURN_IF_ERROR(inline_small_exit_blocks(b)); } + RETURN_IF_ERROR(remove_unreachable(g->g_entryblock)); + RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno)); for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts)); - assert(b->b_predecessors == 0); } RETURN_IF_ERROR(remove_redundant_nops_and_pairs(g->g_entryblock)); for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { RETURN_IF_ERROR(inline_small_exit_blocks(b)); } - RETURN_IF_ERROR(mark_reachable(g->g_entryblock)); - - /* Delete unreachable instructions */ - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - if (b->b_predecessors == 0) { - b->b_iused = 0; - b->b_except_handler = 0; - } - } - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); - } - RETURN_IF_ERROR(remove_redundant_jumps(g)); + RETURN_IF_ERROR(remove_unreachable(g->g_entryblock)); - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); + for (int n = 0; n < 2; n++) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + remove_redundant_nops(b); + } + RETURN_IF_ERROR(remove_redundant_jumps(g)); } - RETURN_IF_ERROR(remove_redundant_jumps(g)); - assert(no_redundant_jumps(g)); return SUCCESS; } @@ -2174,7 +2194,13 @@ push_cold_blocks_to_end(cfg_builder *g) { if (!IS_LABEL(b->b_next->b_label)) { b->b_next->b_label.id = next_lbl++; } - basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id, NO_LOCATION); + cfg_instr *prev_instr = basicblock_last_instr(b); + // b cannot be empty because at the end of an exception handler + // there is always a POP_EXCEPT + RERAISE/RETURN + assert(prev_instr); + + basicblock_addop(explicit_jump, JUMP_NO_INTERRUPT, b->b_next->b_label.id, + prev_instr->i_loc); explicit_jump->b_cold = 1; explicit_jump->b_next = b->b_next; b->b_next = explicit_jump; @@ -2345,6 +2371,7 @@ propagate_line_numbers(basicblock *entryblock) { for (int i = 0; i < b->b_iused; i++) { if (b->b_instr[i].i_loc.lineno < 0) { b->b_instr[i].i_loc = prev_location; + b->b_instr[i].i_loc_propagated = 1; } else { prev_location = b->b_instr[i].i_loc; @@ -2354,6 +2381,7 @@ propagate_line_numbers(basicblock *entryblock) { if (b->b_next->b_iused > 0) { if (b->b_next->b_instr[0].i_loc.lineno < 0) { b->b_next->b_instr[0].i_loc = prev_location; + b->b_next->b_instr[0].i_loc_propagated = 1; } } } @@ -2362,46 +2390,18 @@ propagate_line_numbers(basicblock *entryblock) { if (target->b_predecessors == 1) { if (target->b_instr[0].i_loc.lineno < 0) { target->b_instr[0].i_loc = prev_location; + target->b_instr[0].i_loc_propagated = 1; } } } } } -/* Make sure that all returns have a line number, even if early passes - * have failed to propagate a correct line number. - * The resulting line number may not be correct according to PEP 626, - * but should be "good enough", and no worse than in older versions. */ -static void -guarantee_lineno_for_exits(basicblock *entryblock, int firstlineno) { - int lineno = firstlineno; - assert(lineno > 0); - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - cfg_instr *last = basicblock_last_instr(b); - if (last == NULL) { - continue; - } - if (last->i_loc.lineno < 0) { - if (last->i_opcode == RETURN_VALUE) { - for (int i = 0; i < b->b_iused; i++) { - assert(b->b_instr[i].i_loc.lineno < 0); - - b->b_instr[i].i_loc.lineno = lineno; - } - } - } - else { - lineno = last->i_loc.lineno; - } - } -} - static int resolve_line_numbers(cfg_builder *g, int firstlineno) { RETURN_IF_ERROR(duplicate_exits_without_lineno(g)); propagate_line_numbers(g->g_entryblock); - guarantee_lineno_for_exits(g->g_entryblock, firstlineno); return SUCCESS; } @@ -2417,7 +2417,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache, RETURN_IF_ERROR(label_exception_targets(g->g_entryblock)); /** Optimization **/ - RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache)); + RETURN_IF_ERROR(optimize_cfg(g, consts, const_cache, firstlineno)); RETURN_IF_ERROR(remove_unused_consts(g->g_entryblock, consts)); RETURN_IF_ERROR( add_checks_for_loads_of_uninitialized_variables( @@ -2425,6 +2425,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache, insert_superinstructions(g); RETURN_IF_ERROR(push_cold_blocks_to_end(g)); + assert(all_exits_have_lineno(g->g_entryblock)); RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno)); return SUCCESS; } From 229ee5bea1fb8f9de1f4d29397634cd3a433fb8d Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Fri, 19 Jan 2024 20:52:42 +0200 Subject: [PATCH 008/160] Retain shorter tables of contents for Sphinx 5.2.3+ (#114318) Disable toc_object_entries, new in Sphinx 5.2.3 --- Doc/conf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/conf.py b/Doc/conf.py index dc09b0b51ca84c..458954370debe2 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -74,6 +74,10 @@ # Minimum version of sphinx required needs_sphinx = '4.2' +# Create table of contents entries for domain objects (e.g. functions, classes, +# attributes, etc.). Default is True. +toc_object_entries = False + # Ignore any .rst files in the includes/ directory; # they're embedded in pages but not rendered individually. # Ignore any .rst files in the venv/ directory. From 681e9e85a2c1f72576ddfbd766506e2d6db34862 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Fri, 19 Jan 2024 11:38:52 -0800 Subject: [PATCH 009/160] Add a `clean` subcommand to `Tools/wasm/wasi.py` (GH-114274) --- Tools/wasm/wasi.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/Tools/wasm/wasi.py b/Tools/wasm/wasi.py index 36bc70ffd9db7b..e71b0b302a5561 100644 --- a/Tools/wasm/wasi.py +++ b/Tools/wasm/wasi.py @@ -17,11 +17,15 @@ CHECKOUT = pathlib.Path(__file__).parent.parent.parent + CROSS_BUILD_DIR = CHECKOUT / "cross-build" BUILD_DIR = CROSS_BUILD_DIR / "build" HOST_TRIPLE = "wasm32-wasi" HOST_DIR = CROSS_BUILD_DIR / HOST_TRIPLE +LOCAL_SETUP = CHECKOUT / "Modules" / "Setup.local" +LOCAL_SETUP_MARKER = "# Generated by Tools/wasm/wasi.py\n".encode("utf-8") + def updated_env(updates={}): """Create a new dict representing the environment to use. @@ -119,12 +123,11 @@ def build_python_path(): @subdir(BUILD_DIR, clean_ok=True) def configure_build_python(context, working_dir): """Configure the build/host Python.""" - local_setup = CHECKOUT / "Modules" / "Setup.local" - if local_setup.exists(): - print(f"👍 {local_setup} exists ...") + if LOCAL_SETUP.exists(): + print(f"👍 {LOCAL_SETUP} exists ...") else: - print(f"📝 Touching {local_setup} ...") - local_setup.touch() + print(f"📝 Touching {LOCAL_SETUP} ...") + LOCAL_SETUP.write_bytes(LOCAL_SETUP_MARKER) configure = [os.path.relpath(CHECKOUT / 'configure', working_dir)] if context.args: @@ -260,6 +263,17 @@ def build_all(context): for step in steps: step(context) +def clean_contents(context): + """Delete all files created by this script.""" + if CROSS_BUILD_DIR.exists(): + print(f"🧹 Deleting {CROSS_BUILD_DIR} ...") + shutil.rmtree(CROSS_BUILD_DIR) + + if LOCAL_SETUP.exists(): + with LOCAL_SETUP.open("rb") as file: + if file.read(len(LOCAL_SETUP_MARKER)) == LOCAL_SETUP_MARKER: + print(f"🧹 Deleting generated {LOCAL_SETUP} ...") + def main(): default_host_runner = (f"{shutil.which('wasmtime')} run " @@ -290,11 +304,13 @@ def main(): "Python)") make_host = subcommands.add_parser("make-host", help="Run `make` for the host/WASI") + clean = subcommands.add_parser("clean", help="Delete files and directories " + "created by this script") for subcommand in build, configure_build, make_build, configure_host, make_host: subcommand.add_argument("--quiet", action="store_true", default=False, dest="quiet", help="Redirect output from subprocesses to a log file") - for subcommand in build, configure_build, configure_host: + for subcommand in configure_build, configure_host: subcommand.add_argument("--clean", action="store_true", default=False, dest="clean", help="Delete any relevant directories before building") @@ -319,7 +335,8 @@ def main(): "make-build-python": make_build_python, "configure-host": configure_wasi_python, "make-host": make_wasi_python, - "build": build_all} + "build": build_all, + "clean": clean_contents} dispatch[context.subcommand](context) From 6313cdde58f34648a430d2830357c9d2a5b67b87 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 20 Jan 2024 02:10:25 +0000 Subject: [PATCH 010/160] GH-79634: Accept path-like objects as pathlib glob patterns. (#114017) Allow `os.PathLike` objects to be passed as patterns to `pathlib.Path.glob()` and `rglob()`. (It's already possible to use them in `PurePath.match()`) While we're in the area: - Allow empty glob patterns in `PathBase` (but not `Path`) - Speed up globbing in `PathBase` by generating paths with trailing slashes only as a final step, rather than for every intermediate directory. - Simplify and speed up handling of rare patterns involving both `**` and `..` segments. --- Doc/library/pathlib.rst | 6 ++ Lib/pathlib/__init__.py | 49 ++++++---- Lib/pathlib/_abc.py | 98 +++++++++---------- Lib/test/test_pathlib/test_pathlib.py | 23 +++++ Lib/test/test_pathlib/test_pathlib_abc.py | 9 +- ...4-01-12-17-32-36.gh-issue-79634.uTSTRI.rst | 2 + 6 files changed, 115 insertions(+), 72 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-12-17-32-36.gh-issue-79634.uTSTRI.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index be207ca222274e..b924f470e0be04 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1036,6 +1036,9 @@ call fails (for example because the path doesn't exist). future Python release, patterns with this ending will match both files and directories. Add a trailing slash to match only directories. + .. versionchanged:: 3.13 + The *pattern* parameter accepts a :term:`path-like object`. + .. method:: Path.group(*, follow_symlinks=True) Return the name of the group owning the file. :exc:`KeyError` is raised @@ -1498,6 +1501,9 @@ call fails (for example because the path doesn't exist). .. versionchanged:: 3.13 The *follow_symlinks* parameter was added. + .. versionchanged:: 3.13 + The *pattern* parameter accepts a :term:`path-like object`. + .. method:: Path.rmdir() Remove this directory. The directory must be empty. diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index f14d35bb0038d0..b043aed12b3849 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -467,6 +467,29 @@ def as_uri(self): from urllib.parse import quote_from_bytes return prefix + quote_from_bytes(os.fsencode(path)) + @property + def _pattern_stack(self): + """Stack of path components, to be used with patterns in glob().""" + parts = self._tail.copy() + pattern = self._raw_path + if self.anchor: + raise NotImplementedError("Non-relative patterns are unsupported") + elif not parts: + raise ValueError("Unacceptable pattern: {!r}".format(pattern)) + elif pattern[-1] in (self.pathmod.sep, self.pathmod.altsep): + # GH-65238: pathlib doesn't preserve trailing slash. Add it back. + parts.append('') + elif parts[-1] == '**': + # GH-70303: '**' only matches directories. Add trailing slash. + warnings.warn( + "Pattern ending '**' will match files and directories in a " + "future Python release. Add a trailing slash to match only " + "directories and remove this warning.", + FutureWarning, 4) + parts.append('') + parts.reverse() + return parts + # Subclassing os.PathLike makes isinstance() checks slower, # which in turn makes Path construction slower. Register instead! @@ -580,7 +603,7 @@ def iterdir(self): def _scandir(self): return os.scandir(self) - def _make_child_entry(self, entry, is_dir=False): + def _make_child_entry(self, entry): # Transform an entry yielded from _scandir() into a path object. path_str = entry.name if str(self) == '.' else entry.path path = self.with_segments(path_str) @@ -591,6 +614,8 @@ def _make_child_entry(self, entry, is_dir=False): return path def _make_child_relpath(self, name): + if not name: + return self path_str = str(self) tail = self._tail if tail: @@ -611,14 +636,8 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): kind, including directories) matching the given relative pattern. """ sys.audit("pathlib.Path.glob", self, pattern) - if pattern.endswith('**'): - # GH-70303: '**' only matches directories. Add trailing slash. - warnings.warn( - "Pattern ending '**' will match files and directories in a " - "future Python release. Add a trailing slash to match only " - "directories and remove this warning.", - FutureWarning, 2) - pattern = f'{pattern}/' + if not isinstance(pattern, PurePath): + pattern = self.with_segments(pattern) return _abc.PathBase.glob( self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks) @@ -628,15 +647,9 @@ def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): this subtree. """ sys.audit("pathlib.Path.rglob", self, pattern) - if pattern.endswith('**'): - # GH-70303: '**' only matches directories. Add trailing slash. - warnings.warn( - "Pattern ending '**' will match files and directories in a " - "future Python release. Add a trailing slash to match only " - "directories and remove this warning.", - FutureWarning, 2) - pattern = f'{pattern}/' - pattern = f'**/{pattern}' + if not isinstance(pattern, PurePath): + pattern = self.with_segments(pattern) + pattern = '**' / pattern return _abc.PathBase.glob( self, pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 48a6c218309385..e5eeb4afce2ea9 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -63,6 +63,12 @@ def _compile_pattern(pat, sep, case_sensitive): return re.compile(regex, flags=flags).match +def _select_special(paths, part): + """Yield special literal children of the given paths.""" + for path in paths: + yield path._make_child_relpath(part) + + def _select_children(parent_paths, dir_only, follow_symlinks, match): """Yield direct children of given paths, filtering by name and type.""" if follow_symlinks is None: @@ -84,7 +90,7 @@ def _select_children(parent_paths, dir_only, follow_symlinks, match): except OSError: continue if match(entry.name): - yield parent_path._make_child_entry(entry, dir_only) + yield parent_path._make_child_entry(entry) def _select_recursive(parent_paths, dir_only, follow_symlinks): @@ -107,7 +113,7 @@ def _select_recursive(parent_paths, dir_only, follow_symlinks): for entry in entries: try: if entry.is_dir(follow_symlinks=follow_symlinks): - paths.append(path._make_child_entry(entry, dir_only)) + paths.append(path._make_child_entry(entry)) continue except OSError: pass @@ -427,6 +433,14 @@ def is_absolute(self): a drive).""" return self.pathmod.isabs(self._raw_path) + @property + def _pattern_stack(self): + """Stack of path components, to be used with patterns in glob().""" + anchor, parts = self._stack + if anchor: + raise NotImplementedError("Non-relative patterns are unsupported") + return parts + def match(self, path_pattern, *, case_sensitive=None): """ Return True if this path matches the given pattern. @@ -436,11 +450,10 @@ def match(self, path_pattern, *, case_sensitive=None): if case_sensitive is None: case_sensitive = _is_case_sensitive(self.pathmod) sep = path_pattern.pathmod.sep - pattern_str = str(path_pattern) if path_pattern.anchor: - pass + pattern_str = str(path_pattern) elif path_pattern.parts: - pattern_str = f'**{sep}{pattern_str}' + pattern_str = str('**' / path_pattern) else: raise ValueError("empty pattern") match = _compile_pattern(pattern_str, sep, case_sensitive) @@ -714,10 +727,8 @@ def _scandir(self): from contextlib import nullcontext return nullcontext(self.iterdir()) - def _make_child_entry(self, entry, is_dir=False): + def _make_child_entry(self, entry): # Transform an entry yielded from _scandir() into a path object. - if is_dir: - return entry.joinpath('') return entry def _make_child_relpath(self, name): @@ -727,57 +738,35 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): """Iterate over this subtree and yield all existing files (of any kind, including directories) matching the given relative pattern. """ - path_pattern = self.with_segments(pattern) - if path_pattern.anchor: - raise NotImplementedError("Non-relative patterns are unsupported") - elif not path_pattern.parts: - raise ValueError("Unacceptable pattern: {!r}".format(pattern)) - - pattern_parts = list(path_pattern.parts) - if not self.pathmod.split(pattern)[1]: - # GH-65238: pathlib doesn't preserve trailing slash. Add it back. - pattern_parts.append('') - + if not isinstance(pattern, PurePathBase): + pattern = self.with_segments(pattern) if case_sensitive is None: # TODO: evaluate case-sensitivity of each directory in _select_children(). case_sensitive = _is_case_sensitive(self.pathmod) - # If symlinks are handled consistently, and the pattern does not - # contain '..' components, then we can use a 'walk-and-match' strategy - # when expanding '**' wildcards. When a '**' wildcard is encountered, - # all following pattern parts are immediately consumed and used to - # build a `re.Pattern` object. This pattern is used to filter the - # recursive walk. As a result, pattern parts following a '**' wildcard - # do not perform any filesystem access, which can be much faster! - filter_paths = follow_symlinks is not None and '..' not in pattern_parts + stack = pattern._pattern_stack + specials = ('', '.', '..') + filter_paths = False deduplicate_paths = False sep = self.pathmod.sep paths = iter([self.joinpath('')] if self.is_dir() else []) - part_idx = 0 - while part_idx < len(pattern_parts): - part = pattern_parts[part_idx] - part_idx += 1 - if part == '': - # Trailing slash. - pass - elif part == '..': - paths = (path._make_child_relpath('..') for path in paths) + while stack: + part = stack.pop() + if part in specials: + paths = _select_special(paths, part) elif part == '**': # Consume adjacent '**' components. - while part_idx < len(pattern_parts) and pattern_parts[part_idx] == '**': - part_idx += 1 - - if filter_paths and part_idx < len(pattern_parts) and pattern_parts[part_idx] != '': - dir_only = pattern_parts[-1] == '' - paths = _select_recursive(paths, dir_only, follow_symlinks) + while stack and stack[-1] == '**': + stack.pop() - # Filter out paths that don't match pattern. - prefix_len = len(str(self._make_child_relpath('_'))) - 1 - match = _compile_pattern(str(path_pattern), sep, case_sensitive) - paths = (path for path in paths if match(str(path), prefix_len)) - return paths + # Consume adjacent non-special components and enable post-walk + # regex filtering, provided we're treating symlinks consistently. + if follow_symlinks is not None: + while stack and stack[-1] not in specials: + filter_paths = True + stack.pop() - dir_only = part_idx < len(pattern_parts) + dir_only = bool(stack) paths = _select_recursive(paths, dir_only, follow_symlinks) if deduplicate_paths: # De-duplicate if we've already seen a '**' component. @@ -786,9 +775,14 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): elif '**' in part: raise ValueError("Invalid pattern: '**' can only be an entire path component") else: - dir_only = part_idx < len(pattern_parts) + dir_only = bool(stack) match = _compile_pattern(part, sep, case_sensitive) paths = _select_children(paths, dir_only, follow_symlinks, match) + if filter_paths: + # Filter out paths that don't match pattern. + prefix_len = len(str(self._make_child_relpath('_'))) - 1 + match = _compile_pattern(str(pattern), sep, case_sensitive) + paths = (path for path in paths if match(str(path), prefix_len)) return paths def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): @@ -796,8 +790,10 @@ def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): directories) matching the given relative pattern, anywhere in this subtree. """ - return self.glob( - f'**/{pattern}', case_sensitive=case_sensitive, follow_symlinks=follow_symlinks) + if not isinstance(pattern, PurePathBase): + pattern = self.with_segments(pattern) + pattern = '**' / pattern + return self.glob(pattern, case_sensitive=case_sensitive, follow_symlinks=follow_symlinks) def walk(self, top_down=True, on_error=None, follow_symlinks=False): """Walk the directory tree from this directory, similar to os.walk().""" diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 61d7939ad140b2..bdbe92369639ef 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1818,6 +1818,13 @@ def test_walk_above_recursion_limit(self): list(base.walk()) list(base.walk(top_down=False)) + def test_glob_empty_pattern(self): + p = self.cls('') + with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): + list(p.glob('')) + with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): + list(p.glob('.')) + def test_glob_many_open_files(self): depth = 30 P = self.cls @@ -1860,6 +1867,22 @@ def test_glob_recursive_no_trailing_slash(self): with self.assertWarns(FutureWarning): p.rglob('*/**') + def test_glob_pathlike(self): + P = self.cls + p = P(self.base) + pattern = "dir*/file*" + expect = {p / "dirB/fileB", p / "dirC/fileC"} + self.assertEqual(expect, set(p.glob(P(pattern)))) + self.assertEqual(expect, set(p.glob(FakePath(pattern)))) + + def test_rglob_pathlike(self): + P = self.cls + p = P(self.base, "dirC") + pattern = "**/file*" + expect = {p / "fileC", p / "dirD/fileD"} + self.assertEqual(expect, set(p.rglob(P(pattern)))) + self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) + @only_posix class PosixPathTest(PathTest, PurePosixPathTest): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index f877c98b7678f4..199718a8a69c5a 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1045,9 +1045,12 @@ def _check(glob, expected): _check(p.glob("*/"), ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) def test_glob_empty_pattern(self): - p = self.cls('') - with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): - list(p.glob('')) + def _check(glob, expected): + self.assertEqual(set(glob), { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p.glob(""), [""]) + _check(p.glob("."), ["."]) def test_glob_case_sensitive(self): P = self.cls diff --git a/Misc/NEWS.d/next/Library/2024-01-12-17-32-36.gh-issue-79634.uTSTRI.rst b/Misc/NEWS.d/next/Library/2024-01-12-17-32-36.gh-issue-79634.uTSTRI.rst new file mode 100644 index 00000000000000..ba19b5209e648e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-12-17-32-36.gh-issue-79634.uTSTRI.rst @@ -0,0 +1,2 @@ +Accept :term:`path-like objects ` as patterns in +:meth:`pathlib.Path.glob` and :meth:`~pathlib.Path.rglob`. From 1e610fb05fa4ba61a759b68461f1a9aed07622fc Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 20 Jan 2024 03:06:00 +0000 Subject: [PATCH 011/160] GH-113225: Speed up `pathlib.Path.walk(top_down=False)` (#113693) Use `_make_child_entry()` rather than `_make_child_relpath()` to retrieve path objects for directories to visit. This saves the allocation of one path object per directory in user subclasses of `PathBase`, and avoids a second loop. This trick does not apply when walking top-down, because users can affect the walk by modifying *dirnames* in-place. A side effect of this change is that, in bottom-up mode, subdirectories of each directory are visited in reverse order, and that this order doesn't match that of the names in *dirnames*. I suspect this is fine as the order is arbitrary anyway. --- Lib/pathlib/_abc.py | 9 +++++---- .../2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index e5eeb4afce2ea9..553e1a399061d3 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -820,6 +820,8 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): with scandir_obj as scandir_it: dirnames = [] filenames = [] + if not top_down: + paths.append((path, dirnames, filenames)) for entry in scandir_it: try: is_dir = entry.is_dir(follow_symlinks=follow_symlinks) @@ -828,16 +830,15 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): is_dir = False if is_dir: + if not top_down: + paths.append(path._make_child_entry(entry)) dirnames.append(entry.name) else: filenames.append(entry.name) if top_down: yield path, dirnames, filenames - else: - paths.append((path, dirnames, filenames)) - - paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + paths += [path._make_child_relpath(d) for d in reversed(dirnames)] def absolute(self): """Return an absolute version of this path diff --git a/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst new file mode 100644 index 00000000000000..0c07f42fd065d2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst @@ -0,0 +1,2 @@ +Speed up :meth:`pathlib.Path.walk` by using :attr:`os.DirEntry.path` where +possible. From 567a85e9c15a3f7848330ae7bef3de2f70fc9f97 Mon Sep 17 00:00:00 2001 From: "David H. Gutteridge" Date: Sat, 20 Jan 2024 06:17:41 -0500 Subject: [PATCH 012/160] gh-114332: Fix the flags reference for ``re.compile()`` (#114334) The GH-93000 change set inadvertently caused a sentence in re.compile() documentation to refer to details that no longer followed. Correct this with a link to the Flags sub-subsection. Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com> --- Doc/library/re.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 5bb93390aa5f79..0a8c88b50cdeec 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -880,8 +880,8 @@ Functions below. The expression's behaviour can be modified by specifying a *flags* value. - Values can be any of the following variables, combined using bitwise OR (the - ``|`` operator). + Values can be any of the `flags`_ variables, combined using bitwise OR + (the ``|`` operator). The sequence :: From e6495159f6f2ca4fe3b5143c62fc2ad5873ef54f Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Sat, 20 Jan 2024 11:20:51 +0000 Subject: [PATCH 013/160] GH-99380: Update to Sphinx 7 (#99381) --- Doc/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Doc/requirements.txt b/Doc/requirements.txt index 04334fd5a464d4..118e6c322b4be2 100644 --- a/Doc/requirements.txt +++ b/Doc/requirements.txt @@ -6,8 +6,7 @@ # Sphinx version is pinned so that new versions that introduce new warnings # won't suddenly cause build failures. Updating the version is fine as long # as no warnings are raised by doing so. -# PR #104777: Sphinx 6.2 no longer uses imghdr, removed in Python 3.13. -sphinx==6.2.1 +sphinx~=7.2.0 blurb From b1ad5a5d446f944a45c43a3e865d1d8f47611071 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sat, 20 Jan 2024 16:06:52 +0100 Subject: [PATCH 014/160] Docs: structure the ftplib reference (#114317) Introduce the following headings and subheadings: - Reference * FTP objects * FTP_TLS objects * Module variables --- Doc/library/ftplib.rst | 494 +++++++++++++++++++++-------------------- 1 file changed, 250 insertions(+), 244 deletions(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index d1fe6414ea020c..6c44b8d65c3293 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -45,7 +45,15 @@ Here's a sample session using the :mod:`ftplib` module:: '221 Goodbye.' -The module defines the following items: +.. _ftplib-reference: + +Reference +--------- + +.. _ftp-objects: + +FTP objects +^^^^^^^^^^^ .. class:: FTP(host='', user='', passwd='', acct='', timeout=None, source_address=None, *, encoding='utf-8') @@ -85,376 +93,374 @@ The module defines the following items: The *encoding* parameter was added, and the default was changed from Latin-1 to UTF-8 to follow :rfc:`2640`. -.. class:: FTP_TLS(host='', user='', passwd='', acct='', *, context=None, - timeout=None, source_address=None, encoding='utf-8') + Several :class:`!FTP` methods are available in two flavors: + one for handling text files and another for binary files. + The methods are named for the command which is used followed by + ``lines`` for the text version or ``binary`` for the binary version. - A :class:`FTP` subclass which adds TLS support to FTP as described in - :rfc:`4217`. - Connect as usual to port 21 implicitly securing the FTP control connection - before authenticating. Securing the data connection requires the user to - explicitly ask for it by calling the :meth:`prot_p` method. *context* - is a :class:`ssl.SSLContext` object which allows bundling SSL configuration - options, certificates and private keys into a single (potentially - long-lived) structure. Please read :ref:`ssl-security` for best practices. + :class:`FTP` instances have the following methods: - .. versionadded:: 3.2 + .. method:: FTP.set_debuglevel(level) - .. versionchanged:: 3.3 - *source_address* parameter was added. + Set the instance's debugging level. This controls the amount of debugging + output printed. The default, ``0``, produces no debugging output. A value of + ``1`` produces a moderate amount of debugging output, generally a single line + per request. A value of ``2`` or higher produces the maximum amount of + debugging output, logging each line sent and received on the control connection. - .. versionchanged:: 3.4 - The class now supports hostname check with - :attr:`ssl.SSLContext.check_hostname` and *Server Name Indication* (see - :const:`ssl.HAS_SNI`). - .. versionchanged:: 3.9 - If the *timeout* parameter is set to be zero, it will raise a - :class:`ValueError` to prevent the creation of a non-blocking socket. - The *encoding* parameter was added, and the default was changed from - Latin-1 to UTF-8 to follow :rfc:`2640`. + .. method:: FTP.connect(host='', port=0, timeout=None, source_address=None) - .. versionchanged:: 3.12 - The deprecated *keyfile* and *certfile* parameters have been removed. + Connect to the given host and port. The default port number is ``21``, as + specified by the FTP protocol specification. It is rarely needed to specify a + different port number. This function should be called only once for each + instance; it should not be called at all if a host was given when the instance + was created. All other methods can only be used after a connection has been + made. + The optional *timeout* parameter specifies a timeout in seconds for the + connection attempt. If no *timeout* is passed, the global default timeout + setting will be used. + *source_address* is a 2-tuple ``(host, port)`` for the socket to bind to as + its source address before connecting. - Here's a sample session using the :class:`FTP_TLS` class:: + .. audit-event:: ftplib.connect self,host,port ftplib.FTP.connect - >>> ftps = FTP_TLS('ftp.pureftpd.org') - >>> ftps.login() - '230 Anonymous user logged in' - >>> ftps.prot_p() - '200 Data protection level set to "private"' - >>> ftps.nlst() - ['6jack', 'OpenBSD', 'antilink', 'blogbench', 'bsdcam', 'clockspeed', 'djbdns-jedi', 'docs', 'eaccelerator-jedi', 'favicon.ico', 'francotone', 'fugu', 'ignore', 'libpuzzle', 'metalog', 'minidentd', 'misc', 'mysql-udf-global-user-variables', 'php-jenkins-hash', 'php-skein-hash', 'php-webdav', 'phpaudit', 'phpbench', 'pincaster', 'ping', 'posto', 'pub', 'public', 'public_keys', 'pure-ftpd', 'qscan', 'qtc', 'sharedance', 'skycache', 'sound', 'tmp', 'ucarp'] + .. versionchanged:: 3.3 + *source_address* parameter was added. -.. exception:: error_reply + .. method:: FTP.getwelcome() - Exception raised when an unexpected reply is received from the server. + Return the welcome message sent by the server in reply to the initial + connection. (This message sometimes contains disclaimers or help information + that may be relevant to the user.) -.. exception:: error_temp + .. method:: FTP.login(user='anonymous', passwd='', acct='') - Exception raised when an error code signifying a temporary error (response - codes in the range 400--499) is received. - - -.. exception:: error_perm - - Exception raised when an error code signifying a permanent error (response - codes in the range 500--599) is received. + Log in as the given *user*. The *passwd* and *acct* parameters are optional and + default to the empty string. If no *user* is specified, it defaults to + ``'anonymous'``. If *user* is ``'anonymous'``, the default *passwd* is + ``'anonymous@'``. This function should be called only once for each instance, + after a connection has been established; it should not be called at all if a + host and user were given when the instance was created. Most FTP commands are + only allowed after the client has logged in. The *acct* parameter supplies + "accounting information"; few systems implement this. -.. exception:: error_proto + .. method:: FTP.abort() - Exception raised when a reply is received from the server that does not fit - the response specifications of the File Transfer Protocol, i.e. begin with a - digit in the range 1--5. + Abort a file transfer that is in progress. Using this does not always work, but + it's worth a try. -.. data:: all_errors + .. method:: FTP.sendcmd(cmd) - The set of all exceptions (as a tuple) that methods of :class:`FTP` - instances may raise as a result of problems with the FTP connection (as - opposed to programming errors made by the caller). This set includes the - four exceptions listed above as well as :exc:`OSError` and :exc:`EOFError`. + Send a simple command string to the server and return the response string. + .. audit-event:: ftplib.sendcmd self,cmd ftplib.FTP.sendcmd -.. seealso:: - Module :mod:`netrc` - Parser for the :file:`.netrc` file format. The file :file:`.netrc` is - typically used by FTP clients to load user authentication information - before prompting the user. + .. method:: FTP.voidcmd(cmd) + Send a simple command string to the server and handle the response. Return + nothing if a response code corresponding to success (codes in the range + 200--299) is received. Raise :exc:`error_reply` otherwise. -.. _ftp-objects: + .. audit-event:: ftplib.sendcmd self,cmd ftplib.FTP.voidcmd -FTP Objects ------------ -Several methods are available in two flavors: one for handling text files and -another for binary files. These are named for the command which is used -followed by ``lines`` for the text version or ``binary`` for the binary version. + .. method:: FTP.retrbinary(cmd, callback, blocksize=8192, rest=None) -:class:`FTP` instances have the following methods: + Retrieve a file in binary transfer mode. *cmd* should be an appropriate + ``RETR`` command: ``'RETR filename'``. The *callback* function is called for + each block of data received, with a single bytes argument giving the data + block. The optional *blocksize* argument specifies the maximum chunk size to + read on the low-level socket object created to do the actual transfer (which + will also be the largest size of the data blocks passed to *callback*). A + reasonable default is chosen. *rest* means the same thing as in the + :meth:`transfercmd` method. -.. method:: FTP.set_debuglevel(level) + .. method:: FTP.retrlines(cmd, callback=None) - Set the instance's debugging level. This controls the amount of debugging - output printed. The default, ``0``, produces no debugging output. A value of - ``1`` produces a moderate amount of debugging output, generally a single line - per request. A value of ``2`` or higher produces the maximum amount of - debugging output, logging each line sent and received on the control connection. + Retrieve a file or directory listing in the encoding specified by the + *encoding* parameter at initialization. + *cmd* should be an appropriate ``RETR`` command (see :meth:`retrbinary`) or + a command such as ``LIST`` or ``NLST`` (usually just the string ``'LIST'``). + ``LIST`` retrieves a list of files and information about those files. + ``NLST`` retrieves a list of file names. + The *callback* function is called for each line with a string argument + containing the line with the trailing CRLF stripped. The default *callback* + prints the line to ``sys.stdout``. -.. method:: FTP.connect(host='', port=0, timeout=None, source_address=None) + .. method:: FTP.set_pasv(val) - Connect to the given host and port. The default port number is ``21``, as - specified by the FTP protocol specification. It is rarely needed to specify a - different port number. This function should be called only once for each - instance; it should not be called at all if a host was given when the instance - was created. All other methods can only be used after a connection has been - made. - The optional *timeout* parameter specifies a timeout in seconds for the - connection attempt. If no *timeout* is passed, the global default timeout - setting will be used. - *source_address* is a 2-tuple ``(host, port)`` for the socket to bind to as - its source address before connecting. + Enable "passive" mode if *val* is true, otherwise disable passive mode. + Passive mode is on by default. - .. audit-event:: ftplib.connect self,host,port ftplib.FTP.connect - .. versionchanged:: 3.3 - *source_address* parameter was added. + .. method:: FTP.storbinary(cmd, fp, blocksize=8192, callback=None, rest=None) + Store a file in binary transfer mode. *cmd* should be an appropriate + ``STOR`` command: ``"STOR filename"``. *fp* is a :term:`file object` + (opened in binary mode) which is read until EOF using its :meth:`~io.IOBase.read` + method in blocks of size *blocksize* to provide the data to be stored. + The *blocksize* argument defaults to 8192. *callback* is an optional single + parameter callable that is called on each block of data after it is sent. + *rest* means the same thing as in the :meth:`transfercmd` method. -.. method:: FTP.getwelcome() + .. versionchanged:: 3.2 + *rest* parameter added. - Return the welcome message sent by the server in reply to the initial - connection. (This message sometimes contains disclaimers or help information - that may be relevant to the user.) + .. method:: FTP.storlines(cmd, fp, callback=None) -.. method:: FTP.login(user='anonymous', passwd='', acct='') + Store a file in line mode. *cmd* should be an appropriate + ``STOR`` command (see :meth:`storbinary`). Lines are read until EOF from the + :term:`file object` *fp* (opened in binary mode) using its :meth:`~io.IOBase.readline` + method to provide the data to be stored. *callback* is an optional single + parameter callable that is called on each line after it is sent. - Log in as the given *user*. The *passwd* and *acct* parameters are optional and - default to the empty string. If no *user* is specified, it defaults to - ``'anonymous'``. If *user* is ``'anonymous'``, the default *passwd* is - ``'anonymous@'``. This function should be called only once for each instance, - after a connection has been established; it should not be called at all if a - host and user were given when the instance was created. Most FTP commands are - only allowed after the client has logged in. The *acct* parameter supplies - "accounting information"; few systems implement this. + .. method:: FTP.transfercmd(cmd, rest=None) -.. method:: FTP.abort() + Initiate a transfer over the data connection. If the transfer is active, send an + ``EPRT`` or ``PORT`` command and the transfer command specified by *cmd*, and + accept the connection. If the server is passive, send an ``EPSV`` or ``PASV`` + command, connect to it, and start the transfer command. Either way, return the + socket for the connection. - Abort a file transfer that is in progress. Using this does not always work, but - it's worth a try. + If optional *rest* is given, a ``REST`` command is sent to the server, passing + *rest* as an argument. *rest* is usually a byte offset into the requested file, + telling the server to restart sending the file's bytes at the requested offset, + skipping over the initial bytes. Note however that the :meth:`transfercmd` + method converts *rest* to a string with the *encoding* parameter specified + at initialization, but no check is performed on the string's contents. If the + server does not recognize the ``REST`` command, an :exc:`error_reply` exception + will be raised. If this happens, simply call :meth:`transfercmd` without a + *rest* argument. -.. method:: FTP.sendcmd(cmd) + .. method:: FTP.ntransfercmd(cmd, rest=None) - Send a simple command string to the server and return the response string. + Like :meth:`transfercmd`, but returns a tuple of the data connection and the + expected size of the data. If the expected size could not be computed, ``None`` + will be returned as the expected size. *cmd* and *rest* means the same thing as + in :meth:`transfercmd`. - .. audit-event:: ftplib.sendcmd self,cmd ftplib.FTP.sendcmd + .. method:: FTP.mlsd(path="", facts=[]) -.. method:: FTP.voidcmd(cmd) + List a directory in a standardized format by using ``MLSD`` command + (:rfc:`3659`). If *path* is omitted the current directory is assumed. + *facts* is a list of strings representing the type of information desired + (e.g. ``["type", "size", "perm"]``). Return a generator object yielding a + tuple of two elements for every file found in path. First element is the + file name, the second one is a dictionary containing facts about the file + name. Content of this dictionary might be limited by the *facts* argument + but server is not guaranteed to return all requested facts. - Send a simple command string to the server and handle the response. Return - nothing if a response code corresponding to success (codes in the range - 200--299) is received. Raise :exc:`error_reply` otherwise. + .. versionadded:: 3.3 - .. audit-event:: ftplib.sendcmd self,cmd ftplib.FTP.voidcmd + .. method:: FTP.nlst(argument[, ...]) -.. method:: FTP.retrbinary(cmd, callback, blocksize=8192, rest=None) + Return a list of file names as returned by the ``NLST`` command. The + optional *argument* is a directory to list (default is the current server + directory). Multiple arguments can be used to pass non-standard options to + the ``NLST`` command. - Retrieve a file in binary transfer mode. *cmd* should be an appropriate - ``RETR`` command: ``'RETR filename'``. The *callback* function is called for - each block of data received, with a single bytes argument giving the data - block. The optional *blocksize* argument specifies the maximum chunk size to - read on the low-level socket object created to do the actual transfer (which - will also be the largest size of the data blocks passed to *callback*). A - reasonable default is chosen. *rest* means the same thing as in the - :meth:`transfercmd` method. + .. note:: If your server supports the command, :meth:`mlsd` offers a better API. -.. method:: FTP.retrlines(cmd, callback=None) + .. method:: FTP.dir(argument[, ...]) - Retrieve a file or directory listing in the encoding specified by the - *encoding* parameter at initialization. - *cmd* should be an appropriate ``RETR`` command (see :meth:`retrbinary`) or - a command such as ``LIST`` or ``NLST`` (usually just the string ``'LIST'``). - ``LIST`` retrieves a list of files and information about those files. - ``NLST`` retrieves a list of file names. - The *callback* function is called for each line with a string argument - containing the line with the trailing CRLF stripped. The default *callback* - prints the line to ``sys.stdout``. + Produce a directory listing as returned by the ``LIST`` command, printing it to + standard output. The optional *argument* is a directory to list (default is the + current server directory). Multiple arguments can be used to pass non-standard + options to the ``LIST`` command. If the last argument is a function, it is used + as a *callback* function as for :meth:`retrlines`; the default prints to + ``sys.stdout``. This method returns ``None``. + .. note:: If your server supports the command, :meth:`mlsd` offers a better API. -.. method:: FTP.set_pasv(val) - Enable "passive" mode if *val* is true, otherwise disable passive mode. - Passive mode is on by default. + .. method:: FTP.rename(fromname, toname) + Rename file *fromname* on the server to *toname*. -.. method:: FTP.storbinary(cmd, fp, blocksize=8192, callback=None, rest=None) - Store a file in binary transfer mode. *cmd* should be an appropriate - ``STOR`` command: ``"STOR filename"``. *fp* is a :term:`file object` - (opened in binary mode) which is read until EOF using its :meth:`~io.IOBase.read` - method in blocks of size *blocksize* to provide the data to be stored. - The *blocksize* argument defaults to 8192. *callback* is an optional single - parameter callable that is called on each block of data after it is sent. - *rest* means the same thing as in the :meth:`transfercmd` method. + .. method:: FTP.delete(filename) - .. versionchanged:: 3.2 - *rest* parameter added. + Remove the file named *filename* from the server. If successful, returns the + text of the response, otherwise raises :exc:`error_perm` on permission errors or + :exc:`error_reply` on other errors. -.. method:: FTP.storlines(cmd, fp, callback=None) + .. method:: FTP.cwd(pathname) - Store a file in line mode. *cmd* should be an appropriate - ``STOR`` command (see :meth:`storbinary`). Lines are read until EOF from the - :term:`file object` *fp* (opened in binary mode) using its :meth:`~io.IOBase.readline` - method to provide the data to be stored. *callback* is an optional single - parameter callable that is called on each line after it is sent. + Set the current directory on the server. -.. method:: FTP.transfercmd(cmd, rest=None) + .. method:: FTP.mkd(pathname) - Initiate a transfer over the data connection. If the transfer is active, send an - ``EPRT`` or ``PORT`` command and the transfer command specified by *cmd*, and - accept the connection. If the server is passive, send an ``EPSV`` or ``PASV`` - command, connect to it, and start the transfer command. Either way, return the - socket for the connection. + Create a new directory on the server. - If optional *rest* is given, a ``REST`` command is sent to the server, passing - *rest* as an argument. *rest* is usually a byte offset into the requested file, - telling the server to restart sending the file's bytes at the requested offset, - skipping over the initial bytes. Note however that the :meth:`transfercmd` - method converts *rest* to a string with the *encoding* parameter specified - at initialization, but no check is performed on the string's contents. If the - server does not recognize the ``REST`` command, an :exc:`error_reply` exception - will be raised. If this happens, simply call :meth:`transfercmd` without a - *rest* argument. + .. method:: FTP.pwd() -.. method:: FTP.ntransfercmd(cmd, rest=None) + Return the pathname of the current directory on the server. - Like :meth:`transfercmd`, but returns a tuple of the data connection and the - expected size of the data. If the expected size could not be computed, ``None`` - will be returned as the expected size. *cmd* and *rest* means the same thing as - in :meth:`transfercmd`. + .. method:: FTP.rmd(dirname) -.. method:: FTP.mlsd(path="", facts=[]) + Remove the directory named *dirname* on the server. - List a directory in a standardized format by using ``MLSD`` command - (:rfc:`3659`). If *path* is omitted the current directory is assumed. - *facts* is a list of strings representing the type of information desired - (e.g. ``["type", "size", "perm"]``). Return a generator object yielding a - tuple of two elements for every file found in path. First element is the - file name, the second one is a dictionary containing facts about the file - name. Content of this dictionary might be limited by the *facts* argument - but server is not guaranteed to return all requested facts. - .. versionadded:: 3.3 + .. method:: FTP.size(filename) + Request the size of the file named *filename* on the server. On success, the + size of the file is returned as an integer, otherwise ``None`` is returned. + Note that the ``SIZE`` command is not standardized, but is supported by many + common server implementations. -.. method:: FTP.nlst(argument[, ...]) - Return a list of file names as returned by the ``NLST`` command. The - optional *argument* is a directory to list (default is the current server - directory). Multiple arguments can be used to pass non-standard options to - the ``NLST`` command. + .. method:: FTP.quit() - .. note:: If your server supports the command, :meth:`mlsd` offers a better API. + Send a ``QUIT`` command to the server and close the connection. This is the + "polite" way to close a connection, but it may raise an exception if the server + responds with an error to the ``QUIT`` command. This implies a call to the + :meth:`close` method which renders the :class:`FTP` instance useless for + subsequent calls (see below). -.. method:: FTP.dir(argument[, ...]) + .. method:: FTP.close() - Produce a directory listing as returned by the ``LIST`` command, printing it to - standard output. The optional *argument* is a directory to list (default is the - current server directory). Multiple arguments can be used to pass non-standard - options to the ``LIST`` command. If the last argument is a function, it is used - as a *callback* function as for :meth:`retrlines`; the default prints to - ``sys.stdout``. This method returns ``None``. + Close the connection unilaterally. This should not be applied to an already + closed connection such as after a successful call to :meth:`~FTP.quit`. + After this call the :class:`FTP` instance should not be used any more (after + a call to :meth:`close` or :meth:`~FTP.quit` you cannot reopen the + connection by issuing another :meth:`login` method). - .. note:: If your server supports the command, :meth:`mlsd` offers a better API. +FTP_TLS objects +^^^^^^^^^^^^^^^ -.. method:: FTP.rename(fromname, toname) +.. class:: FTP_TLS(host='', user='', passwd='', acct='', *, context=None, + timeout=None, source_address=None, encoding='utf-8') - Rename file *fromname* on the server to *toname*. + A :class:`FTP` subclass which adds TLS support to FTP as described in + :rfc:`4217`. + Connect as usual to port 21 implicitly securing the FTP control connection + before authenticating. Securing the data connection requires the user to + explicitly ask for it by calling the :meth:`prot_p` method. *context* + is a :class:`ssl.SSLContext` object which allows bundling SSL configuration + options, certificates and private keys into a single (potentially + long-lived) structure. Please read :ref:`ssl-security` for best practices. + .. versionadded:: 3.2 -.. method:: FTP.delete(filename) + .. versionchanged:: 3.3 + *source_address* parameter was added. - Remove the file named *filename* from the server. If successful, returns the - text of the response, otherwise raises :exc:`error_perm` on permission errors or - :exc:`error_reply` on other errors. + .. versionchanged:: 3.4 + The class now supports hostname check with + :attr:`ssl.SSLContext.check_hostname` and *Server Name Indication* (see + :const:`ssl.HAS_SNI`). + .. versionchanged:: 3.9 + If the *timeout* parameter is set to be zero, it will raise a + :class:`ValueError` to prevent the creation of a non-blocking socket. + The *encoding* parameter was added, and the default was changed from + Latin-1 to UTF-8 to follow :rfc:`2640`. -.. method:: FTP.cwd(pathname) + .. versionchanged:: 3.12 + The deprecated *keyfile* and *certfile* parameters have been removed. - Set the current directory on the server. + Here's a sample session using the :class:`FTP_TLS` class:: + >>> ftps = FTP_TLS('ftp.pureftpd.org') + >>> ftps.login() + '230 Anonymous user logged in' + >>> ftps.prot_p() + '200 Data protection level set to "private"' + >>> ftps.nlst() + ['6jack', 'OpenBSD', 'antilink', 'blogbench', 'bsdcam', 'clockspeed', 'djbdns-jedi', 'docs', 'eaccelerator-jedi', 'favicon.ico', 'francotone', 'fugu', 'ignore', 'libpuzzle', 'metalog', 'minidentd', 'misc', 'mysql-udf-global-user-variables', 'php-jenkins-hash', 'php-skein-hash', 'php-webdav', 'phpaudit', 'phpbench', 'pincaster', 'ping', 'posto', 'pub', 'public', 'public_keys', 'pure-ftpd', 'qscan', 'qtc', 'sharedance', 'skycache', 'sound', 'tmp', 'ucarp'] -.. method:: FTP.mkd(pathname) + :class:`!FTP_TLS` class inherits from :class:`FTP`, + defining these additional methods and attributes: - Create a new directory on the server. + .. attribute:: FTP_TLS.ssl_version + The SSL version to use (defaults to :data:`ssl.PROTOCOL_SSLv23`). -.. method:: FTP.pwd() + .. method:: FTP_TLS.auth() - Return the pathname of the current directory on the server. + Set up a secure control connection by using TLS or SSL, depending on what + is specified in the :attr:`ssl_version` attribute. + .. versionchanged:: 3.4 + The method now supports hostname check with + :attr:`ssl.SSLContext.check_hostname` and *Server Name Indication* (see + :const:`ssl.HAS_SNI`). -.. method:: FTP.rmd(dirname) + .. method:: FTP_TLS.ccc() - Remove the directory named *dirname* on the server. + Revert control channel back to plaintext. This can be useful to take + advantage of firewalls that know how to handle NAT with non-secure FTP + without opening fixed ports. + .. versionadded:: 3.3 -.. method:: FTP.size(filename) + .. method:: FTP_TLS.prot_p() - Request the size of the file named *filename* on the server. On success, the - size of the file is returned as an integer, otherwise ``None`` is returned. - Note that the ``SIZE`` command is not standardized, but is supported by many - common server implementations. + Set up secure data connection. + .. method:: FTP_TLS.prot_c() -.. method:: FTP.quit() + Set up clear text data connection. - Send a ``QUIT`` command to the server and close the connection. This is the - "polite" way to close a connection, but it may raise an exception if the server - responds with an error to the ``QUIT`` command. This implies a call to the - :meth:`close` method which renders the :class:`FTP` instance useless for - subsequent calls (see below). +Module variables +^^^^^^^^^^^^^^^^ -.. method:: FTP.close() +.. exception:: error_reply - Close the connection unilaterally. This should not be applied to an already - closed connection such as after a successful call to :meth:`~FTP.quit`. - After this call the :class:`FTP` instance should not be used any more (after - a call to :meth:`close` or :meth:`~FTP.quit` you cannot reopen the - connection by issuing another :meth:`login` method). + Exception raised when an unexpected reply is received from the server. -FTP_TLS Objects ---------------- +.. exception:: error_temp -:class:`FTP_TLS` class inherits from :class:`FTP`, defining these additional objects: + Exception raised when an error code signifying a temporary error (response + codes in the range 400--499) is received. -.. attribute:: FTP_TLS.ssl_version - The SSL version to use (defaults to :data:`ssl.PROTOCOL_SSLv23`). +.. exception:: error_perm -.. method:: FTP_TLS.auth() + Exception raised when an error code signifying a permanent error (response + codes in the range 500--599) is received. - Set up a secure control connection by using TLS or SSL, depending on what - is specified in the :attr:`ssl_version` attribute. - .. versionchanged:: 3.4 - The method now supports hostname check with - :attr:`ssl.SSLContext.check_hostname` and *Server Name Indication* (see - :const:`ssl.HAS_SNI`). +.. exception:: error_proto -.. method:: FTP_TLS.ccc() + Exception raised when a reply is received from the server that does not fit + the response specifications of the File Transfer Protocol, i.e. begin with a + digit in the range 1--5. - Revert control channel back to plaintext. This can be useful to take - advantage of firewalls that know how to handle NAT with non-secure FTP - without opening fixed ports. - .. versionadded:: 3.3 +.. data:: all_errors -.. method:: FTP_TLS.prot_p() + The set of all exceptions (as a tuple) that methods of :class:`FTP` + instances may raise as a result of problems with the FTP connection (as + opposed to programming errors made by the caller). This set includes the + four exceptions listed above as well as :exc:`OSError` and :exc:`EOFError`. - Set up secure data connection. -.. method:: FTP_TLS.prot_c() +.. seealso:: - Set up clear text data connection. + Module :mod:`netrc` + Parser for the :file:`.netrc` file format. The file :file:`.netrc` is + typically used by FTP clients to load user authentication information + before prompting the user. From 1d6d5e854c375821a64fa9c2fbb04a36fb3b9aaa Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Sat, 20 Jan 2024 11:14:45 -0500 Subject: [PATCH 015/160] gh-112529: Use GC heaps for GC allocations in free-threaded builds (gh-114157) * gh-112529: Use GC heaps for GC allocations in free-threaded builds The free-threaded build's garbage collector implementation will need to find GC objects by traversing mimalloc heaps. This hooks up the allocation calls with the correct heaps by using a thread-local "current_obj_heap" variable. * Refactor out setting heap based on type --- Include/internal/pycore_object_alloc.h | 71 ++++++++++++++++++++++++++ Makefile.pre.in | 1 + Objects/typeobject.c | 3 +- PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 ++ Python/gc.c | 13 ++--- 6 files changed, 85 insertions(+), 7 deletions(-) create mode 100644 Include/internal/pycore_object_alloc.h diff --git a/Include/internal/pycore_object_alloc.h b/Include/internal/pycore_object_alloc.h new file mode 100644 index 00000000000000..8cc7a444bc93e7 --- /dev/null +++ b/Include/internal/pycore_object_alloc.h @@ -0,0 +1,71 @@ +#ifndef Py_INTERNAL_OBJECT_ALLOC_H +#define Py_INTERNAL_OBJECT_ALLOC_H + +#include "pycore_object.h" // _PyType_HasFeature() +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_tstate.h" // _PyThreadStateImpl + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_GIL_DISABLED +static inline mi_heap_t * +_PyObject_GetAllocationHeap(_PyThreadStateImpl *tstate, PyTypeObject *tp) +{ + struct _mimalloc_thread_state *m = &tstate->mimalloc; + if (_PyType_HasFeature(tp, Py_TPFLAGS_PREHEADER)) { + return &m->heaps[_Py_MIMALLOC_HEAP_GC_PRE]; + } + else if (_PyType_IS_GC(tp)) { + return &m->heaps[_Py_MIMALLOC_HEAP_GC]; + } + else { + return &m->heaps[_Py_MIMALLOC_HEAP_OBJECT]; + } +} +#endif + +// Sets the heap used for PyObject_Malloc(), PyObject_Realloc(), etc. calls in +// Py_GIL_DISABLED builds. We use different heaps depending on if the object +// supports GC and if it has a pre-header. We smuggle the choice of heap +// through the _mimalloc_thread_state. In the default build, this simply +// calls PyObject_Malloc(). +static inline void * +_PyObject_MallocWithType(PyTypeObject *tp, size_t size) +{ +#ifdef Py_GIL_DISABLED + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + struct _mimalloc_thread_state *m = &tstate->mimalloc; + m->current_object_heap = _PyObject_GetAllocationHeap(tstate, tp); +#endif + void *mem = PyObject_Malloc(size); +#ifdef Py_GIL_DISABLED + m->current_object_heap = &m->heaps[_Py_MIMALLOC_HEAP_OBJECT]; +#endif + return mem; +} + +static inline void * +_PyObject_ReallocWithType(PyTypeObject *tp, void *ptr, size_t size) +{ +#ifdef Py_GIL_DISABLED + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + struct _mimalloc_thread_state *m = &tstate->mimalloc; + m->current_object_heap = _PyObject_GetAllocationHeap(tstate, tp); +#endif + void *mem = PyObject_Realloc(ptr, size); +#ifdef Py_GIL_DISABLED + m->current_object_heap = &m->heaps[_Py_MIMALLOC_HEAP_OBJECT]; +#endif + return mem; +} + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_OBJECT_ALLOC_H diff --git a/Makefile.pre.in b/Makefile.pre.in index d251e7c481b52b..1107259b5ae1ca 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1852,6 +1852,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_moduleobject.h \ $(srcdir)/Include/internal/pycore_namespace.h \ $(srcdir)/Include/internal/pycore_object.h \ + $(srcdir)/Include/internal/pycore_object_alloc.h \ $(srcdir)/Include/internal/pycore_object_state.h \ $(srcdir)/Include/internal/pycore_obmalloc.h \ $(srcdir)/Include/internal/pycore_obmalloc_init.h \ diff --git a/Objects/typeobject.c b/Objects/typeobject.c index ea29a38d74ae3e..3a35a5b5975898 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -11,6 +11,7 @@ #include "pycore_modsupport.h" // _PyArg_NoKwnames() #include "pycore_moduleobject.h" // _PyModule_GetDef() #include "pycore_object.h" // _PyType_HasFeature() +#include "pycore_object_alloc.h" // _PyObject_MallocWithType() #include "pycore_pyerrors.h" // _PyErr_Occurred() #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_symtable.h" // _Py_Mangle() @@ -1729,7 +1730,7 @@ _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems) const size_t size = _PyObject_VAR_SIZE(type, nitems+1); const size_t presize = _PyType_PreHeaderSize(type); - char *alloc = PyObject_Malloc(size + presize); + char *alloc = _PyObject_MallocWithType(type, size + presize); if (alloc == NULL) { return PyErr_NoMemory(); } diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 64738b1bbf235d..57275fb2039ee0 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -255,6 +255,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index b37ca2dfed55ab..51cbb079b5b550 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -693,6 +693,9 @@ Include\internal + + Include\internal + Include\internal diff --git a/Python/gc.c b/Python/gc.c index 9f9a755f6ac95e..14870505ef1308 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -9,6 +9,7 @@ #include "pycore_initconfig.h" #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_object.h" +#include "pycore_object_alloc.h" // _PyObject_MallocWithType() #include "pycore_pyerrors.h" #include "pycore_pystate.h" // _PyThreadState_GET() #include "pycore_weakref.h" // _PyWeakref_ClearRef() @@ -1795,14 +1796,14 @@ _Py_RunGC(PyThreadState *tstate) } static PyObject * -gc_alloc(size_t basicsize, size_t presize) +gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize) { PyThreadState *tstate = _PyThreadState_GET(); if (basicsize > PY_SSIZE_T_MAX - presize) { return _PyErr_NoMemory(tstate); } size_t size = presize + basicsize; - char *mem = PyObject_Malloc(size); + char *mem = _PyObject_MallocWithType(tp, size); if (mem == NULL) { return _PyErr_NoMemory(tstate); } @@ -1817,7 +1818,7 @@ PyObject * _PyObject_GC_New(PyTypeObject *tp) { size_t presize = _PyType_PreHeaderSize(tp); - PyObject *op = gc_alloc(_PyObject_SIZE(tp), presize); + PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp), presize); if (op == NULL) { return NULL; } @@ -1836,7 +1837,7 @@ _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) } size_t presize = _PyType_PreHeaderSize(tp); size_t size = _PyObject_VAR_SIZE(tp, nitems); - op = (PyVarObject *)gc_alloc(size, presize); + op = (PyVarObject *)gc_alloc(tp, size, presize); if (op == NULL) { return NULL; } @@ -1848,7 +1849,7 @@ PyObject * PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size) { size_t presize = _PyType_PreHeaderSize(tp); - PyObject *op = gc_alloc(_PyObject_SIZE(tp) + extra_size, presize); + PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp) + extra_size, presize); if (op == NULL) { return NULL; } @@ -1867,7 +1868,7 @@ _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) return (PyVarObject *)PyErr_NoMemory(); } char *mem = (char *)op - presize; - mem = (char *)PyObject_Realloc(mem, presize + basicsize); + mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize); if (mem == NULL) { return (PyVarObject *)PyErr_NoMemory(); } From 0554a9594e07f46836a58795c9d9af2a97acec66 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 20 Jan 2024 19:49:48 +0300 Subject: [PATCH 016/160] gh-114281: Remove incorrect type hints from `asyncio.staggered` (#114282) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Lib/asyncio/staggered.py | 12 +----------- .../2024-01-19-12-05-22.gh-issue-114281.H5JQe4.rst | 3 +++ 2 files changed, 4 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-19-12-05-22.gh-issue-114281.H5JQe4.rst diff --git a/Lib/asyncio/staggered.py b/Lib/asyncio/staggered.py index 451a53a16f3831..e180cde0243b15 100644 --- a/Lib/asyncio/staggered.py +++ b/Lib/asyncio/staggered.py @@ -3,7 +3,6 @@ __all__ = 'staggered_race', import contextlib -import typing from . import events from . import exceptions as exceptions_mod @@ -11,16 +10,7 @@ from . import tasks -async def staggered_race( - coro_fns: typing.Iterable[typing.Callable[[], typing.Awaitable]], - delay: typing.Optional[float], - *, - loop: events.AbstractEventLoop = None, -) -> typing.Tuple[ - typing.Any, - typing.Optional[int], - typing.List[typing.Optional[Exception]] -]: +async def staggered_race(coro_fns, delay, *, loop=None): """Run coroutines with staggered start times and take the first to finish. This method takes an iterable of coroutine functions. The first one is diff --git a/Misc/NEWS.d/next/Library/2024-01-19-12-05-22.gh-issue-114281.H5JQe4.rst b/Misc/NEWS.d/next/Library/2024-01-19-12-05-22.gh-issue-114281.H5JQe4.rst new file mode 100644 index 00000000000000..36c54e8faf214c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-19-12-05-22.gh-issue-114281.H5JQe4.rst @@ -0,0 +1,3 @@ +Remove type hints from ``Lib/asyncio/staggered.py``. +The annotations in the `typeshed `__ +project should be used instead. From 8f4f77364750d0ceec47157e8920983e3f41651f Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sat, 20 Jan 2024 20:39:44 +0100 Subject: [PATCH 017/160] Docs: Add missing line continuation to FTP_TLS class docs (#114352) Regression introduced by b1ad5a5d4. --- Doc/library/ftplib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index 6c44b8d65c3293..648956b7b66772 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -348,7 +348,7 @@ FTP objects FTP_TLS objects ^^^^^^^^^^^^^^^ -.. class:: FTP_TLS(host='', user='', passwd='', acct='', *, context=None, +.. class:: FTP_TLS(host='', user='', passwd='', acct='', *, context=None, \ timeout=None, source_address=None, encoding='utf-8') A :class:`FTP` subclass which adds TLS support to FTP as described in From c48b8f8db8625fe543691e3306f4226435f77ec8 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sat, 20 Jan 2024 12:09:50 -0800 Subject: [PATCH 018/160] Remove the non-test Lib/test/time_hashlib.py. (#114354) I believe I added this while chasing some performance of hash functions when I first created hashlib. It hasn't been used since, is frankly trivial, and not a test. --- Lib/test/time_hashlib.py | 88 ---------------------------------------- 1 file changed, 88 deletions(-) delete mode 100644 Lib/test/time_hashlib.py diff --git a/Lib/test/time_hashlib.py b/Lib/test/time_hashlib.py deleted file mode 100644 index 55ebac62912fe1..00000000000000 --- a/Lib/test/time_hashlib.py +++ /dev/null @@ -1,88 +0,0 @@ -# It's intended that this script be run by hand. It runs speed tests on -# hashlib functions; it does not test for correctness. - -import sys -import time -import hashlib - - -def creatorFunc(): - raise RuntimeError("eek, creatorFunc not overridden") - -def test_scaled_msg(scale, name): - iterations = 106201//scale * 20 - longStr = b'Z'*scale - - localCF = creatorFunc - start = time.perf_counter() - for f in range(iterations): - x = localCF(longStr).digest() - end = time.perf_counter() - - print(('%2.2f' % (end-start)), "seconds", iterations, "x", len(longStr), "bytes", name) - -def test_create(): - start = time.perf_counter() - for f in range(20000): - d = creatorFunc() - end = time.perf_counter() - - print(('%2.2f' % (end-start)), "seconds", '[20000 creations]') - -def test_zero(): - start = time.perf_counter() - for f in range(20000): - x = creatorFunc().digest() - end = time.perf_counter() - - print(('%2.2f' % (end-start)), "seconds", '[20000 "" digests]') - - - -hName = sys.argv[1] - -# -# setup our creatorFunc to test the requested hash -# -if hName in ('_md5', '_sha'): - exec('import '+hName) - exec('creatorFunc = '+hName+'.new') - print("testing speed of old", hName, "legacy interface") -elif hName == '_hashlib' and len(sys.argv) > 3: - import _hashlib - exec('creatorFunc = _hashlib.%s' % sys.argv[2]) - print("testing speed of _hashlib.%s" % sys.argv[2], getattr(_hashlib, sys.argv[2])) -elif hName == '_hashlib' and len(sys.argv) == 3: - import _hashlib - exec('creatorFunc = lambda x=_hashlib.new : x(%r)' % sys.argv[2]) - print("testing speed of _hashlib.new(%r)" % sys.argv[2]) -elif hasattr(hashlib, hName) and hasattr(getattr(hashlib, hName), '__call__'): - creatorFunc = getattr(hashlib, hName) - print("testing speed of hashlib."+hName, getattr(hashlib, hName)) -else: - exec("creatorFunc = lambda x=hashlib.new : x(%r)" % hName) - print("testing speed of hashlib.new(%r)" % hName) - -try: - test_create() -except ValueError: - print() - print("pass argument(s) naming the hash to run a speed test on:") - print(" '_md5' and '_sha' test the legacy builtin md5 and sha") - print(" '_hashlib' 'openssl_hName' 'fast' tests the builtin _hashlib") - print(" '_hashlib' 'hName' tests builtin _hashlib.new(shaFOO)") - print(" 'hName' tests the hashlib.hName() implementation if it exists") - print(" otherwise it uses hashlib.new(hName).") - print() - raise - -test_zero() -test_scaled_msg(scale=106201, name='[huge data]') -test_scaled_msg(scale=10620, name='[large data]') -test_scaled_msg(scale=1062, name='[medium data]') -test_scaled_msg(scale=424, name='[4*small data]') -test_scaled_msg(scale=336, name='[3*small data]') -test_scaled_msg(scale=212, name='[2*small data]') -test_scaled_msg(scale=106, name='[small data]') -test_scaled_msg(scale=creatorFunc().digest_size, name='[digest_size data]') -test_scaled_msg(scale=10, name='[tiny data]') From 52eade22237eef1f3843271b9aa8ff007e2b0176 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 20 Jan 2024 23:10:43 +0200 Subject: [PATCH 019/160] Remove deleted `time_hashlib.py` from `Lib/test/.ruff.toml` (#114355) --- Lib/test/.ruff.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/.ruff.toml b/Lib/test/.ruff.toml index 74ab215ee8ee28..d6c1d8745036ec 100644 --- a/Lib/test/.ruff.toml +++ b/Lib/test/.ruff.toml @@ -20,5 +20,4 @@ extend-exclude = [ "test_import/__init__.py", "test_pkg.py", "test_yield_from.py", - "time_hashlib.py", ] From b04c5005cc9bd04052075077f7b33beca5edf43d Mon Sep 17 00:00:00 2001 From: cdzhan Date: Sun, 21 Jan 2024 11:45:38 +0800 Subject: [PATCH 020/160] Fix the confusing "User-defined methods" reference in the datamodel (#114276) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Sergey B Kirpichev --- Doc/reference/datamodel.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index ca29a3712dfa38..0a1c1d58558e94 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -1529,7 +1529,7 @@ Class method objects A class method object, like a static method object, is a wrapper around another object that alters the way in which that object is retrieved from classes and class instances. The behaviour of class method objects upon such retrieval is -described above, under "User-defined methods". Class method objects are created +described above, under :ref:`"instance methods" `. Class method objects are created by the built-in :func:`classmethod` constructor. From fbc28748eaf578436ef9662363575f6bdde2e2f7 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 21 Jan 2024 10:19:31 +0100 Subject: [PATCH 021/160] Docs: mark up the FTP debug levels as a list (#114360) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/ftplib.rst | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index 648956b7b66772..16ad3a8b7f2d82 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -102,12 +102,15 @@ FTP objects .. method:: FTP.set_debuglevel(level) - Set the instance's debugging level. This controls the amount of debugging - output printed. The default, ``0``, produces no debugging output. A value of - ``1`` produces a moderate amount of debugging output, generally a single line - per request. A value of ``2`` or higher produces the maximum amount of - debugging output, logging each line sent and received on the control connection. - + Set the instance's debugging level as an :class:`int`. + This controls the amount of debugging output printed. + The debug levels are: + + * ``0`` (default): No debug output. + * ``1``: Produce a moderate amount of debug output, + generally a single line per request. + * ``2`` or higher: Produce the maximum amount of debugging output, + logging each line sent and received on the control connection. .. method:: FTP.connect(host='', port=0, timeout=None, source_address=None) From 47133d8d869c94c4d1f340b5481cc3f2cdc7d68b Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 21 Jan 2024 12:34:43 +0300 Subject: [PATCH 022/160] gh-101100: Fix sphinx warnings in `Doc/c-api/memory.rst` (#114373) --- Doc/c-api/memory.rst | 4 ++-- Doc/tools/.nitignore | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/memory.rst b/Doc/c-api/memory.rst index 1f392e55078e77..c05282ffc59521 100644 --- a/Doc/c-api/memory.rst +++ b/Doc/c-api/memory.rst @@ -267,14 +267,14 @@ The following type-oriented macros are provided for convenience. Note that .. c:macro:: PyMem_New(TYPE, n) Same as :c:func:`PyMem_Malloc`, but allocates ``(n * sizeof(TYPE))`` bytes of - memory. Returns a pointer cast to :c:expr:`TYPE*`. The memory will not have + memory. Returns a pointer cast to ``TYPE*``. The memory will not have been initialized in any way. .. c:macro:: PyMem_Resize(p, TYPE, n) Same as :c:func:`PyMem_Realloc`, but the memory block is resized to ``(n * - sizeof(TYPE))`` bytes. Returns a pointer cast to :c:expr:`TYPE*`. On return, + sizeof(TYPE))`` bytes. Returns a pointer cast to ``TYPE*``. On return, *p* will be a pointer to the new memory area, or ``NULL`` in the event of failure. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 5935d059877a91..4f2e4fdd43490c 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -10,7 +10,6 @@ Doc/c-api/gcsupport.rst Doc/c-api/init.rst Doc/c-api/init_config.rst Doc/c-api/intro.rst -Doc/c-api/memory.rst Doc/c-api/memoryview.rst Doc/c-api/module.rst Doc/c-api/object.rst From 5adff3f86ff92cbd27380ad88357c77b330af6e1 Mon Sep 17 00:00:00 2001 From: Ronald Oussoren Date: Sun, 21 Jan 2024 11:25:15 +0100 Subject: [PATCH 023/160] gh-80931: Skip some socket tests while hunting for refleaks on macOS (#114057) Some socket tests related to sending file descriptors cause a file descriptor leak on macOS, all of them tests that send one or more descriptors than cannot be received on the read end. This appears to be a platform bug. This PR skips those tests when doing a refleak test run to avoid hiding other problems. --- Lib/test/libregrtest/refleak.py | 8 ++++- Lib/test/support/refleak_helper.py | 8 +++++ Lib/test/test_socket.py | 49 ++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 Lib/test/support/refleak_helper.py diff --git a/Lib/test/libregrtest/refleak.py b/Lib/test/libregrtest/refleak.py index 5836a8421cb42d..7da16cf721f097 100644 --- a/Lib/test/libregrtest/refleak.py +++ b/Lib/test/libregrtest/refleak.py @@ -5,6 +5,7 @@ from test import support from test.support import os_helper +from test.support import refleak_helper from .runtests import HuntRefleak from .utils import clear_caches @@ -96,7 +97,12 @@ def get_pooled_int(value): support.gc_collect() for i in rep_range: - results = test_func() + current = refleak_helper._hunting_for_refleaks + refleak_helper._hunting_for_refleaks = True + try: + results = test_func() + finally: + refleak_helper._hunting_for_refleaks = current dash_R_cleanup(fs, ps, pic, zdc, abcs) support.gc_collect() diff --git a/Lib/test/support/refleak_helper.py b/Lib/test/support/refleak_helper.py new file mode 100644 index 00000000000000..2f86c93a1e2e58 --- /dev/null +++ b/Lib/test/support/refleak_helper.py @@ -0,0 +1,8 @@ +""" +Utilities for changing test behaviour while hunting +for refleaks +""" + +_hunting_for_refleaks = False +def hunting_for_refleaks(): + return _hunting_for_refleaks diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 4eb5af99d6674c..231448c75f01db 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -3,6 +3,7 @@ from test.support import os_helper from test.support import socket_helper from test.support import threading_helper +from test.support import refleak_helper import _thread as thread import array @@ -52,6 +53,35 @@ except ImportError: _socket = None +def skipForRefleakHuntinIf(condition, issueref): + if not condition: + def decorator(f): + f.client_skip = lambda f: f + return f + + else: + def decorator(f): + @contextlib.wraps(f) + def wrapper(*args, **kwds): + if refleak_helper.hunting_for_refleaks(): + raise unittest.SkipTest(f"ignore while hunting for refleaks, see {issueref}") + + return f(*args, **kwds) + + def client_skip(f): + @contextlib.wraps(f) + def wrapper(*args, **kwds): + if refleak_helper.hunting_for_refleaks(): + return + + return f(*args, **kwds) + + return wrapper + wrapper.client_skip = client_skip + return wrapper + + return decorator + def get_cid(): if fcntl is None: return None @@ -3814,6 +3844,7 @@ def checkTruncatedHeader(self, result, ignoreflags=0): self.checkFlags(flags, eor=True, checkset=socket.MSG_CTRUNC, ignore=ignoreflags) + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTruncNoBufSize(self): # Check that no ancillary data is received when no buffer size # is specified. @@ -3823,26 +3854,32 @@ def testCmsgTruncNoBufSize(self): # received. ignoreflags=socket.MSG_CTRUNC) + @testCmsgTruncNoBufSize.client_skip def _testCmsgTruncNoBufSize(self): self.createAndSendFDs(1) + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTrunc0(self): # Check that no ancillary data is received when buffer size is 0. self.checkTruncatedHeader(self.doRecvmsg(self.serv_sock, len(MSG), 0), ignoreflags=socket.MSG_CTRUNC) + @testCmsgTrunc0.client_skip def _testCmsgTrunc0(self): self.createAndSendFDs(1) # Check that no ancillary data is returned for various non-zero # (but still too small) buffer sizes. + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTrunc1(self): self.checkTruncatedHeader(self.doRecvmsg(self.serv_sock, len(MSG), 1)) + @testCmsgTrunc1.client_skip def _testCmsgTrunc1(self): self.createAndSendFDs(1) + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTrunc2Int(self): # The cmsghdr structure has at least three members, two of # which are ints, so we still shouldn't see any ancillary @@ -3850,13 +3887,16 @@ def testCmsgTrunc2Int(self): self.checkTruncatedHeader(self.doRecvmsg(self.serv_sock, len(MSG), SIZEOF_INT * 2)) + @testCmsgTrunc2Int.client_skip def _testCmsgTrunc2Int(self): self.createAndSendFDs(1) + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTruncLen0Minus1(self): self.checkTruncatedHeader(self.doRecvmsg(self.serv_sock, len(MSG), socket.CMSG_LEN(0) - 1)) + @testCmsgTruncLen0Minus1.client_skip def _testCmsgTruncLen0Minus1(self): self.createAndSendFDs(1) @@ -3887,29 +3927,38 @@ def checkTruncatedArray(self, ancbuf, maxdata, mindata=0): len(cmsg_data) - (len(cmsg_data) % fds.itemsize)]) self.checkFDs(fds) + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTruncLen0(self): self.checkTruncatedArray(ancbuf=socket.CMSG_LEN(0), maxdata=0) + @testCmsgTruncLen0.client_skip def _testCmsgTruncLen0(self): self.createAndSendFDs(1) + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTruncLen0Plus1(self): self.checkTruncatedArray(ancbuf=socket.CMSG_LEN(0) + 1, maxdata=1) + @testCmsgTruncLen0Plus1.client_skip def _testCmsgTruncLen0Plus1(self): self.createAndSendFDs(2) + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTruncLen1(self): self.checkTruncatedArray(ancbuf=socket.CMSG_LEN(SIZEOF_INT), maxdata=SIZEOF_INT) + @testCmsgTruncLen1.client_skip def _testCmsgTruncLen1(self): self.createAndSendFDs(2) + + @skipForRefleakHuntinIf(sys.platform == "darwin", "#80931") def testCmsgTruncLen2Minus1(self): self.checkTruncatedArray(ancbuf=socket.CMSG_LEN(2 * SIZEOF_INT) - 1, maxdata=(2 * SIZEOF_INT) - 1) + @testCmsgTruncLen2Minus1.client_skip def _testCmsgTruncLen2Minus1(self): self.createAndSendFDs(2) From 96c15b1c8d03db5b7b5b719214d9d156b317ba9d Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 21 Jan 2024 12:27:33 +0100 Subject: [PATCH 024/160] Docs: mark up FTP() constructor with param list (#114359) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/ftplib.rst | 48 ++++++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 11 deletions(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index 16ad3a8b7f2d82..c4c5beb7b49525 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -55,17 +55,43 @@ Reference FTP objects ^^^^^^^^^^^ -.. class:: FTP(host='', user='', passwd='', acct='', timeout=None, source_address=None, *, encoding='utf-8') - - Return a new instance of the :class:`FTP` class. When *host* is given, the - method call ``connect(host)`` is made. When *user* is given, additionally - the method call ``login(user, passwd, acct)`` is made (where *passwd* and - *acct* default to the empty string when not given). The optional *timeout* - parameter specifies a timeout in seconds for blocking operations like the - connection attempt (if is not specified, the global default timeout setting - will be used). *source_address* is a 2-tuple ``(host, port)`` for the socket - to bind to as its source address before connecting. The *encoding* parameter - specifies the encoding for directories and filenames. +.. class:: FTP(host='', user='', passwd='', acct='', timeout=None, \ + source_address=None, *, encoding='utf-8') + + Return a new instance of the :class:`FTP` class. + When *host* is given, the method call :meth:`connect(host) ` + is made by the constructor. + When *user* is given, additionally the method call + :meth:`login(user, passwd, acct) ` is made. + + :param str host: + The hostname to connect to. + + :param str user: + The username to log in with. + If empty string, ``"anonymous"`` is used. + + :param str passwd: + The password to use when logging in. + If not given, and if *passwd* is the empty string or ``"-"``, + a password will be automatically generated. + + :param str acct: + Account information; see the ACCT FTP command. + + :param timeout: + A timeout in seconds for blocking operations like :meth:`connect`. + If not specified, the global default timeout setting will be used. + :type timeout: int | None + + :param source_address: + *source_address* is a 2-tuple ``(host, port)`` for the socket + to bind to as its source address before connecting. + :type source_address: tuple | None + + :param str encoding: + The *encoding* parameter specifies the encoding + for directories and filenames. The :class:`FTP` class supports the :keyword:`with` statement, e.g.: From 38768e4cdd1c4b6e03702da8a94e1c22479d6ed3 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sun, 21 Jan 2024 15:49:49 +0300 Subject: [PATCH 025/160] gh-114384: Align sys.set_asyncgen_hooks signature in docs to reflect implementation (#114385) --- Doc/library/sys.rst | 2 +- Python/sysmodule.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index c371663934314a..abf2c393a44928 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1655,7 +1655,7 @@ always available. ``'opcode'`` event type added; :attr:`~frame.f_trace_lines` and :attr:`~frame.f_trace_opcodes` attributes added to frames -.. function:: set_asyncgen_hooks(firstiter, finalizer) +.. function:: set_asyncgen_hooks([firstiter] [, finalizer]) Accepts two optional keyword arguments which are callables that accept an :term:`asynchronous generator iterator` as an argument. The *firstiter* diff --git a/Python/sysmodule.c b/Python/sysmodule.c index c2de4ecdc8ce0f..f558a00a6916eb 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1420,7 +1420,7 @@ sys_set_asyncgen_hooks(PyObject *self, PyObject *args, PyObject *kw) } PyDoc_STRVAR(set_asyncgen_hooks_doc, -"set_asyncgen_hooks(* [, firstiter] [, finalizer])\n\ +"set_asyncgen_hooks([firstiter] [, finalizer])\n\ \n\ Set a finalizer for async generators objects." ); From de17cf444a7b1e06380bb5bf8547f1fc3c03fc4a Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 21 Jan 2024 18:41:21 +0100 Subject: [PATCH 026/160] Docs: link to sys.stdout in ftplib docs (#114396) --- Doc/library/ftplib.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index c4c5beb7b49525..6ff97008c3015b 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -221,7 +221,7 @@ FTP objects ``NLST`` retrieves a list of file names. The *callback* function is called for each line with a string argument containing the line with the trailing CRLF stripped. The default *callback* - prints the line to ``sys.stdout``. + prints the line to :data:`sys.stdout`. .. method:: FTP.set_pasv(val) @@ -311,7 +311,7 @@ FTP objects current server directory). Multiple arguments can be used to pass non-standard options to the ``LIST`` command. If the last argument is a function, it is used as a *callback* function as for :meth:`retrlines`; the default prints to - ``sys.stdout``. This method returns ``None``. + :data:`sys.stdout`. This method returns ``None``. .. note:: If your server supports the command, :meth:`mlsd` offers a better API. From 336030161a6cb8aa5b4f42a08510f4383984703f Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Sun, 21 Jan 2024 20:54:19 +0100 Subject: [PATCH 027/160] Docs: align sqlite3 docs with versionadded/versionchanged recommendations (#114400) When a parameter is added to a function or method, use the 'versionchanged' directive, not 'versionadded'. --- Doc/library/sqlite3.rst | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst index 6dbb34a84a4c40..c3406b166c3d89 100644 --- a/Doc/library/sqlite3.rst +++ b/Doc/library/sqlite3.rst @@ -343,17 +343,17 @@ Module functions .. audit-event:: sqlite3.connect database sqlite3.connect .. audit-event:: sqlite3.connect/handle connection_handle sqlite3.connect - .. versionadded:: 3.4 - The *uri* parameter. + .. versionchanged:: 3.4 + Added the *uri* parameter. .. versionchanged:: 3.7 *database* can now also be a :term:`path-like object`, not only a string. - .. versionadded:: 3.10 - The ``sqlite3.connect/handle`` auditing event. + .. versionchanged:: 3.10 + Added the ``sqlite3.connect/handle`` auditing event. - .. versionadded:: 3.12 - The *autocommit* parameter. + .. versionchanged:: 3.12 + Added the *autocommit* parameter. .. versionchanged:: 3.13 Positional use of the parameters *timeout*, *detect_types*, @@ -747,8 +747,8 @@ Connection objects `deterministic `_, which allows SQLite to perform additional optimizations. - .. versionadded:: 3.8 - The *deterministic* parameter. + .. versionchanged:: 3.8 + Added the *deterministic* parameter. Example: @@ -1132,8 +1132,8 @@ Connection objects .. versionchanged:: 3.10 Added the ``sqlite3.load_extension`` auditing event. - .. versionadded:: 3.12 - The *entrypoint* parameter. + .. versionchanged:: 3.12 + Added the *entrypoint* parameter. .. _Loading an Extension: https://www.sqlite.org/loadext.html#loading_an_extension_ @@ -1762,10 +1762,10 @@ Row objects Blob objects ^^^^^^^^^^^^ -.. versionadded:: 3.11 - .. class:: Blob + .. versionadded:: 3.11 + A :class:`Blob` instance is a :term:`file-like object` that can read and write data in an SQLite :abbr:`BLOB (Binary Large OBject)`. Call :func:`len(blob) ` to get the size (number of bytes) of the blob. From 42d72b23dd1ee0e100ee47aca64fc1e1bbe576c9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 21 Jan 2024 22:16:45 +0200 Subject: [PATCH 028/160] gh-114241: Fix and improve the ftplib CLI (GH-114242) * Fix writing the retrieved binary file to stdout. * Add a newline after writing warnings to stderr. * Fix a TypeError if the netrc file doesn't contain a host/default entry. * Improve the usage message. --- Lib/ftplib.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/Lib/ftplib.py b/Lib/ftplib.py index a56e0c3085701b..10c5d1ea08ab11 100644 --- a/Lib/ftplib.py +++ b/Lib/ftplib.py @@ -900,11 +900,17 @@ def ftpcp(source, sourcename, target, targetname = '', type = 'I'): def test(): '''Test program. - Usage: ftp [-d] [-r[file]] host [-l[dir]] [-d[dir]] [-p] [file] ... + Usage: ftplib [-d] [-r[file]] host [-l[dir]] [-d[dir]] [-p] [file] ... - -d dir - -l list - -p password + Options: + -d increase debugging level + -r[file] set alternate ~/.netrc file + + Commands: + -l[dir] list directory + -d[dir] change the current directory + -p toggle passive and active mode + file retrieve the file and write it to stdout ''' if len(sys.argv) < 2: @@ -930,15 +936,14 @@ def test(): netrcobj = netrc.netrc(rcfile) except OSError: if rcfile is not None: - sys.stderr.write("Could not open account file" - " -- using anonymous login.") + print("Could not open account file -- using anonymous login.", + file=sys.stderr) else: try: userid, acct, passwd = netrcobj.authenticators(host) - except KeyError: + except (KeyError, TypeError): # no account for host - sys.stderr.write( - "No account -- using anonymous login.") + print("No account -- using anonymous login.", file=sys.stderr) ftp.login(userid, passwd, acct) for file in sys.argv[2:]: if file[:2] == '-l': @@ -951,7 +956,9 @@ def test(): ftp.set_pasv(not ftp.passiveserver) else: ftp.retrbinary('RETR ' + file, \ - sys.stdout.write, 1024) + sys.stdout.buffer.write, 1024) + sys.stdout.buffer.flush() + sys.stdout.flush() ftp.quit() From db1c18eb6220653290a3ba9ebbe1df44394a3f19 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sun, 21 Jan 2024 22:29:51 +0200 Subject: [PATCH 029/160] gh-111803: Make test_deep_nesting from test_plistlib more strict (GH-114026) It is no longer silently passed if RecursionError was raised for low recursion depth. --- Lib/test/test_plistlib.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_plistlib.py b/Lib/test/test_plistlib.py index f47982907def21..001f86f2893f2f 100644 --- a/Lib/test/test_plistlib.py +++ b/Lib/test/test_plistlib.py @@ -971,12 +971,12 @@ def test_cycles(self): self.assertIs(b['x'], b) def test_deep_nesting(self): - for N in [300, 100000]: + for N in [50, 300, 100_000]: chunks = [b'\xa1' + (i + 1).to_bytes(4, 'big') for i in range(N)] try: result = self.decode(*chunks, b'\x54seed', offset_size=4, ref_size=4) except RecursionError: - pass + self.assertGreater(N, sys.getrecursionlimit()) else: for i in range(N): self.assertIsInstance(result, list) From fd49e226700e2483a452c3c92da6f15d822ae054 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Sun, 21 Jan 2024 15:25:52 -0800 Subject: [PATCH 030/160] gh-114328: tty cbreak mode should not alter ICRNL (#114335) The terminal CR -> NL mapping setting should be inherited in cbreak mode as OSes do not specify altering it as part of their stty cbreak mode definition. --- Doc/library/tty.rst | 15 +++++++++++++++ Lib/test/test_tty.py | 12 +++++++++++- Lib/tty.py | 3 --- ...2024-01-19-15-48-06.gh-issue-114328.hixxW3.rst | 4 ++++ 4 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-19-15-48-06.gh-issue-114328.hixxW3.rst diff --git a/Doc/library/tty.rst b/Doc/library/tty.rst index 20ba7d7e0a45b3..ed63561c40de24 100644 --- a/Doc/library/tty.rst +++ b/Doc/library/tty.rst @@ -35,8 +35,15 @@ The :mod:`tty` module defines the following functions: Convert the tty attribute list *mode*, which is a list like the one returned by :func:`termios.tcgetattr`, to that of a tty in cbreak mode. + This clears the ``ECHO`` and ``ICANON`` local mode flags in *mode* as well + as setting the minimum input to 1 byte with no delay. + .. versionadded:: 3.12 + .. versionchanged:: 3.12.2 + The ``ICRNL`` flag is no longer cleared. This matches Linux and macOS + ``stty cbreak`` behavior and what :func:`setcbreak` historically did. + .. function:: setraw(fd, when=termios.TCSAFLUSH) @@ -56,9 +63,17 @@ The :mod:`tty` module defines the following functions: :func:`termios.tcsetattr`. The return value of :func:`termios.tcgetattr` is saved before setting *fd* to cbreak mode; this value is returned. + This clears the ``ECHO`` and ``ICANON`` local mode flags as well as setting + the minimum input to 1 byte with no delay. + .. versionchanged:: 3.12 The return value is now the original tty attributes, instead of None. + .. versionchanged:: 3.12.2 + The ``ICRNL`` flag is no longer cleared. This restores the behavior + of Python 3.11 and earlier as well as matching what Linux, macOS, & BSDs + describe in their ``stty(1)`` man pages regarding cbreak mode. + .. seealso:: diff --git a/Lib/test/test_tty.py b/Lib/test/test_tty.py index af20864aac361e..4cb730c226f134 100644 --- a/Lib/test/test_tty.py +++ b/Lib/test/test_tty.py @@ -19,7 +19,6 @@ def setUp(self): self.addCleanup(termios.tcsetattr, self.fd, termios.TCSAFLUSH, self.mode) def check_cbreak(self, mode): - self.assertEqual(mode[0] & termios.ICRNL, 0) self.assertEqual(mode[3] & termios.ECHO, 0) self.assertEqual(mode[3] & termios.ICANON, 0) self.assertEqual(mode[6][termios.VMIN], 1) @@ -56,6 +55,14 @@ def test_cfmakecbreak(self): self.assertEqual(mode[2], self.mode[2]) self.assertEqual(mode[4], self.mode[4]) self.assertEqual(mode[5], self.mode[5]) + mode[tty.IFLAG] |= termios.ICRNL + tty.cfmakecbreak(mode) + self.assertEqual(mode[tty.IFLAG] & termios.ICRNL, termios.ICRNL, + msg="ICRNL should not be cleared by cbreak") + mode[tty.IFLAG] &= ~termios.ICRNL + tty.cfmakecbreak(mode) + self.assertEqual(mode[tty.IFLAG] & termios.ICRNL, 0, + msg="ICRNL should not be set by cbreak") def test_setraw(self): mode0 = termios.tcgetattr(self.fd) @@ -74,6 +81,9 @@ def test_setcbreak(self): self.assertEqual(mode1, mode0) mode2 = termios.tcgetattr(self.fd) self.check_cbreak(mode2) + ICRNL = termios.ICRNL + self.assertEqual(mode2[tty.IFLAG] & ICRNL, mode0[tty.IFLAG] & ICRNL, + msg="ICRNL should not be altered by cbreak") mode3 = tty.setcbreak(self.fd, termios.TCSANOW) self.assertEqual(mode3, mode2) tty.setcbreak(self.stream) diff --git a/Lib/tty.py b/Lib/tty.py index 283e5c334f5751..5a49e0400425f3 100644 --- a/Lib/tty.py +++ b/Lib/tty.py @@ -45,9 +45,6 @@ def cfmakeraw(mode): def cfmakecbreak(mode): """Make termios mode cbreak.""" - # Do not map CR to NL on input. - mode[IFLAG] &= ~(ICRNL) - # Do not echo characters; disable canonical input. mode[LFLAG] &= ~(ECHO | ICANON) diff --git a/Misc/NEWS.d/next/Library/2024-01-19-15-48-06.gh-issue-114328.hixxW3.rst b/Misc/NEWS.d/next/Library/2024-01-19-15-48-06.gh-issue-114328.hixxW3.rst new file mode 100644 index 00000000000000..42262c05fd1fbf --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-19-15-48-06.gh-issue-114328.hixxW3.rst @@ -0,0 +1,4 @@ +The :func:`tty.setcbreak` and new :func:`tty.cfmakecbreak` no longer clears +the terminal input ICRLF flag. This fixes a regression introduced in 3.12 +that no longer matched how OSes define cbreak mode in their ``stty(1)`` +manual pages. From 650f9e4c94711ff49ea4e13bf800945a6147b7e0 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 22 Jan 2024 10:42:50 +0200 Subject: [PATCH 031/160] gh-114115: Update documentation of array.array (GH-114117) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/array.rst | 44 ++++++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/Doc/library/array.rst b/Doc/library/array.rst index ad622627724217..a0e8bb20a098fd 100644 --- a/Doc/library/array.rst +++ b/Doc/library/array.rst @@ -79,14 +79,16 @@ The module defines the following type: .. class:: array(typecode[, initializer]) A new array whose items are restricted by *typecode*, and initialized - from the optional *initializer* value, which must be a list, a - :term:`bytes-like object`, or iterable over elements of the - appropriate type. + from the optional *initializer* value, which must be a :class:`bytes` + or :class:`bytearray` object, a Unicode string, or iterable over elements + of the appropriate type. - If given a list or string, the initializer is passed to the new array's - :meth:`fromlist`, :meth:`frombytes`, or :meth:`fromunicode` method (see below) - to add initial items to the array. Otherwise, the iterable initializer is - passed to the :meth:`extend` method. + If given a :class:`bytes` or :class:`bytearray` object, the initializer + is passed to the new array's :meth:`frombytes` method; + if given a Unicode string, the initializer is passed to the + :meth:`fromunicode` method; + otherwise, the initializer's iterator is passed to the :meth:`extend` method + to add initial items to the array. Array objects support the ordinary sequence operations of indexing, slicing, concatenation, and multiplication. When using slice assignment, the assigned @@ -152,10 +154,11 @@ The module defines the following type: must be the right type to be appended to the array. - .. method:: frombytes(s) + .. method:: frombytes(buffer) - Appends items from the string, interpreting the string as an array of machine - values (as if it had been read from a file using the :meth:`fromfile` method). + Appends items from the :term:`bytes-like object`, interpreting + its content as an array of machine values (as if it had been read + from a file using the :meth:`fromfile` method). .. versionadded:: 3.2 :meth:`!fromstring` is renamed to :meth:`frombytes` for clarity. @@ -177,7 +180,7 @@ The module defines the following type: .. method:: fromunicode(s) - Extends this array with data from the given unicode string. + Extends this array with data from the given Unicode string. The array must have type code ``'u'`` or ``'w'``; otherwise a :exc:`ValueError` is raised. Use ``array.frombytes(unicodestring.encode(enc))`` to append Unicode data to an array of some other type. @@ -239,24 +242,27 @@ The module defines the following type: .. method:: tounicode() - Convert the array to a unicode string. The array must have a type ``'u'`` or ``'w'``; + Convert the array to a Unicode string. The array must have a type ``'u'`` or ``'w'``; otherwise a :exc:`ValueError` is raised. Use ``array.tobytes().decode(enc)`` to - obtain a unicode string from an array of some other type. + obtain a Unicode string from an array of some other type. -When an array object is printed or converted to a string, it is represented as -``array(typecode, initializer)``. The *initializer* is omitted if the array is -empty, otherwise it is a string if the *typecode* is ``'u'`` or ``'w'``, -otherwise it is a list of numbers. -The string is guaranteed to be able to be converted back to an +The string representation of array objects has the form +``array(typecode, initializer)``. +The *initializer* is omitted if the array is empty, otherwise it is +a Unicode string if the *typecode* is ``'u'`` or ``'w'``, otherwise it is +a list of numbers. +The string representation is guaranteed to be able to be converted back to an array with the same type and value using :func:`eval`, so long as the :class:`~array.array` class has been imported using ``from array import array``. +Variables ``inf`` and ``nan`` must also be defined if it contains +corresponding floating point values. Examples:: array('l') array('w', 'hello \u2641') array('l', [1, 2, 3, 4, 5]) - array('d', [1.0, 2.0, 3.14]) + array('d', [1.0, 2.0, 3.14, -inf, nan]) .. seealso:: From d1b031cc58516e1aba823fd613528417a996f50d Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 22 Jan 2024 12:19:25 +0300 Subject: [PATCH 032/160] gh-114414: Assert PyType_GetModuleByDef result in _threadmodule (#114415) --- Modules/_threadmodule.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index afcf646e3bc19e..99f97eb6d0adcc 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -901,6 +901,7 @@ local_new(PyTypeObject *type, PyObject *args, PyObject *kw) } PyObject *module = PyType_GetModuleByDef(type, &thread_module); + assert(module != NULL); thread_module_state *state = get_thread_state(module); localobject *self = (localobject *)type->tp_alloc(type, 0); @@ -1042,6 +1043,7 @@ static int local_setattro(localobject *self, PyObject *name, PyObject *v) { PyObject *module = PyType_GetModuleByDef(Py_TYPE(self), &thread_module); + assert(module != NULL); thread_module_state *state = get_thread_state(module); PyObject *ldict = _ldict(self, state); @@ -1094,6 +1096,7 @@ static PyObject * local_getattro(localobject *self, PyObject *name) { PyObject *module = PyType_GetModuleByDef(Py_TYPE(self), &thread_module); + assert(module != NULL); thread_module_state *state = get_thread_state(module); PyObject *ldict = _ldict(self, state); From 2f2ddabd1a02e3095b751100b94b529e4e0bcd20 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Mon, 22 Jan 2024 03:56:28 -0800 Subject: [PATCH 033/160] gh-113102: Fix typo in INSTRUMENTED_RESUME (GH-114349) --- Python/bytecodes.c | 2 +- Python/generated_cases.c.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c48f0a17c60fb1..7674ff81f64cec 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -192,7 +192,7 @@ dummy_func( ERROR_IF(err, error); if (frame->instr_ptr != this_instr) { /* Instrumentation has jumped */ - next_instr = this_instr; + next_instr = frame->instr_ptr; DISPATCH(); } } diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 68468728d44bf8..c4bb3aeec5e224 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -3156,7 +3156,7 @@ if (err) goto error; if (frame->instr_ptr != this_instr) { /* Instrumentation has jumped */ - next_instr = this_instr; + next_instr = frame->instr_ptr; DISPATCH(); } } From c8351a617b8970dbe0d3af721c6aea873019c466 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 22 Jan 2024 15:34:16 +0200 Subject: [PATCH 034/160] gh-113796: Add more validation checks in the csv.Dialect constructor (GH-113797) ValueError is now raised if the same character is used in different roles. --- Lib/test/test_csv.py | 67 +++++++++++++++---- ...-01-07-21-04-24.gh-issue-113796.6iNsCR.rst | 3 + Modules/_csv.c | 39 +++++++++++ 3 files changed, 96 insertions(+), 13 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index 36da86e6a2c622..69fef5945ae66f 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase): in TestDialectRegistry. """ def _test_arg_valid(self, ctor, arg): + ctor(arg) self.assertRaises(TypeError, ctor) self.assertRaises(TypeError, ctor, None) - self.assertRaises(TypeError, ctor, arg, bad_attr = 0) - self.assertRaises(TypeError, ctor, arg, delimiter = 0) - self.assertRaises(TypeError, ctor, arg, delimiter = 'XX') + self.assertRaises(TypeError, ctor, arg, bad_attr=0) + self.assertRaises(TypeError, ctor, arg, delimiter='') + self.assertRaises(TypeError, ctor, arg, escapechar='') + self.assertRaises(TypeError, ctor, arg, quotechar='') + self.assertRaises(TypeError, ctor, arg, delimiter='^^') + self.assertRaises(TypeError, ctor, arg, escapechar='^^') + self.assertRaises(TypeError, ctor, arg, quotechar='^^') self.assertRaises(csv.Error, ctor, arg, 'foo') self.assertRaises(TypeError, ctor, arg, delimiter=None) self.assertRaises(TypeError, ctor, arg, delimiter=1) + self.assertRaises(TypeError, ctor, arg, escapechar=1) self.assertRaises(TypeError, ctor, arg, quotechar=1) self.assertRaises(TypeError, ctor, arg, lineterminator=None) self.assertRaises(TypeError, ctor, arg, lineterminator=1) @@ -46,6 +52,40 @@ def _test_arg_valid(self, ctor, arg): quoting=csv.QUOTE_ALL, quotechar=None) self.assertRaises(TypeError, ctor, arg, quoting=csv.QUOTE_NONE, quotechar='') + self.assertRaises(ValueError, ctor, arg, delimiter='\n') + self.assertRaises(ValueError, ctor, arg, escapechar='\n') + self.assertRaises(ValueError, ctor, arg, quotechar='\n') + self.assertRaises(ValueError, ctor, arg, delimiter='\r') + self.assertRaises(ValueError, ctor, arg, escapechar='\r') + self.assertRaises(ValueError, ctor, arg, quotechar='\r') + ctor(arg, delimiter=' ') + ctor(arg, escapechar=' ') + ctor(arg, quotechar=' ') + ctor(arg, delimiter='\t', skipinitialspace=True) + ctor(arg, escapechar='\t', skipinitialspace=True) + ctor(arg, quotechar='\t', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + delimiter=' ', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + escapechar=' ', skipinitialspace=True) + self.assertRaises(ValueError, ctor, arg, + quotechar=' ', skipinitialspace=True) + ctor(arg, delimiter='^') + ctor(arg, escapechar='^') + ctor(arg, quotechar='^') + self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^') + self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^') + self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^') + ctor(arg, delimiter='\x85') + ctor(arg, escapechar='\x85') + ctor(arg, quotechar='\x85') + ctor(arg, lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + delimiter='\x85', lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + escapechar='\x85', lineterminator='\x85') + self.assertRaises(ValueError, ctor, arg, + quotechar='\x85', lineterminator='\x85') def test_reader_arg_valid(self): self._test_arg_valid(csv.reader, []) @@ -535,14 +575,6 @@ class unspecified(): finally: csv.unregister_dialect('testC') - def test_bad_dialect(self): - # Unknown parameter - self.assertRaises(TypeError, csv.reader, [], bad_attr = 0) - # Bad values - self.assertRaises(TypeError, csv.reader, [], delimiter = None) - self.assertRaises(TypeError, csv.reader, [], quoting = -1) - self.assertRaises(TypeError, csv.reader, [], quoting = 100) - def test_copy(self): for name in csv.list_dialects(): dialect = csv.get_dialect(name) @@ -1088,10 +1120,15 @@ class mydialect(csv.Dialect): '"lineterminator" must be a string') def test_invalid_chars(self): - def create_invalid(field_name, value): + def create_invalid(field_name, value, **kwargs): class mydialect(csv.Dialect): - pass + delimiter = ',' + quoting = csv.QUOTE_ALL + quotechar = '"' + lineterminator = '\r\n' setattr(mydialect, field_name, value) + for field_name, value in kwargs.items(): + setattr(mydialect, field_name, value) d = mydialect() for field_name in ("delimiter", "escapechar", "quotechar"): @@ -1100,6 +1137,10 @@ class mydialect(csv.Dialect): self.assertRaises(csv.Error, create_invalid, field_name, "abc") self.assertRaises(csv.Error, create_invalid, field_name, b'x') self.assertRaises(csv.Error, create_invalid, field_name, 5) + self.assertRaises(ValueError, create_invalid, field_name, "\n") + self.assertRaises(ValueError, create_invalid, field_name, "\r") + self.assertRaises(ValueError, create_invalid, field_name, " ", + skipinitialspace=True) class TestSniffer(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst b/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst new file mode 100644 index 00000000000000..e9d4aba9906677 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-07-21-04-24.gh-issue-113796.6iNsCR.rst @@ -0,0 +1,3 @@ +Add more validation checks in the :class:`csv.Dialect` constructor. +:exc:`ValueError` is now raised if the same character is used in different +roles. diff --git a/Modules/_csv.c b/Modules/_csv.c index 8d941563025580..929c21584ac2ef 100644 --- a/Modules/_csv.c +++ b/Modules/_csv.c @@ -331,6 +331,33 @@ dialect_check_quoting(int quoting) return -1; } +static int +dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect) +{ + if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) { + PyErr_Format(PyExc_ValueError, "bad %s value", name); + return -1; + } + if (PyUnicode_FindChar( + dialect->lineterminator, c, 0, + PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) + { + PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name); + return -1; + } + return 0; +} + + static int +dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2) +{ + if (c1 == c2 && c1 != NOT_SET) { + PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2); + return -1; + } + return 0; +} + #define D_OFF(x) offsetof(DialectObj, x) static struct PyMemberDef Dialect_memberlist[] = { @@ -508,6 +535,18 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); goto err; } + if (dialect_check_char("delimiter", self->delimiter, self) || + dialect_check_char("escapechar", self->escapechar, self) || + dialect_check_char("quotechar", self->quotechar, self) || + dialect_check_chars("delimiter", "escapechar", + self->delimiter, self->escapechar) || + dialect_check_chars("delimiter", "quotechar", + self->delimiter, self->quotechar) || + dialect_check_chars("escapechar", "quotechar", + self->escapechar, self->quotechar)) + { + goto err; + } ret = Py_NewRef(self); err: From 8f5e7d739f56a75022dfe8fa24675b6c7b321ab5 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 22 Jan 2024 16:36:29 +0300 Subject: [PATCH 035/160] gh-108303: Move `smtpd` to `test.support` (#114368) Update test_logging.py and test_smtplib.py. --- Lib/test/{ => support}/smtpd.py | 0 Lib/test/test_logging.py | 4 +--- Lib/test/test_smtplib.py | 3 +-- 3 files changed, 2 insertions(+), 5 deletions(-) rename Lib/test/{ => support}/smtpd.py (100%) diff --git a/Lib/test/smtpd.py b/Lib/test/support/smtpd.py similarity index 100% rename from Lib/test/smtpd.py rename to Lib/test/support/smtpd.py diff --git a/Lib/test/test_logging.py b/Lib/test/test_logging.py index 0be26981184213..908e242b85f5e7 100644 --- a/Lib/test/test_logging.py +++ b/Lib/test/test_logging.py @@ -49,6 +49,7 @@ from test.support import threading_helper from test.support import warnings_helper from test.support import asyncore +from test.support import smtpd from test.support.logging_helper import TestHandler import textwrap import threading @@ -63,9 +64,6 @@ from socketserver import (ThreadingUDPServer, DatagramRequestHandler, ThreadingTCPServer, StreamRequestHandler) -with warnings.catch_warnings(): - from . import smtpd - try: import win32evtlog, win32evtlogutil, pywintypes except ImportError: diff --git a/Lib/test/test_smtplib.py b/Lib/test/test_smtplib.py index f2e02dab1c3ca5..4c9fc14bd43f54 100644 --- a/Lib/test/test_smtplib.py +++ b/Lib/test/test_smtplib.py @@ -22,10 +22,9 @@ from test.support import socket_helper from test.support import threading_helper from test.support import asyncore +from test.support import smtpd from unittest.mock import Mock -from . import smtpd - support.requires_working_socket(module=True) From 9f7176d360b5898003d5ca78bab1822ad67867c4 Mon Sep 17 00:00:00 2001 From: neonene <53406459+neonene@users.noreply.github.com> Date: Mon, 22 Jan 2024 22:40:36 +0900 Subject: [PATCH 036/160] gh-103092: Ensure `_ctypes.c` static types are accessed via global state (#113857) --- Modules/_ctypes/_ctypes.c | 185 ++++++++++++++++++++++-------------- Modules/_ctypes/callbacks.c | 3 +- Modules/_ctypes/callproc.c | 23 +++-- Modules/_ctypes/cfield.c | 8 +- Modules/_ctypes/ctypes.h | 40 ++++---- Modules/_ctypes/stgdict.c | 19 ++-- 6 files changed, 172 insertions(+), 106 deletions(-) diff --git a/Modules/_ctypes/_ctypes.c b/Modules/_ctypes/_ctypes.c index b51a03b5497fed..94245ae41afffc 100644 --- a/Modules/_ctypes/_ctypes.c +++ b/Modules/_ctypes/_ctypes.c @@ -128,15 +128,26 @@ bytes(cdata) #include "pycore_long.h" // _PyLong_GetZero() -ctypes_state global_state; +static PyTypeObject Union_Type; +static PyTypeObject Struct_Type; +static PyTypeObject Simple_Type; + +ctypes_state global_state = { + .PyCStgDict_Type = &PyCStgDict_Type, + .PyCData_Type = &PyCData_Type, + .Struct_Type = &Struct_Type, + .Union_Type = &Union_Type, + .PyCArray_Type = &PyCArray_Type, + .Simple_Type = &Simple_Type, + .PyCPointer_Type = &PyCPointer_Type, + .PyCFuncPtr_Type = &PyCFuncPtr_Type, +}; PyObject *PyExc_ArgError = NULL; /* This dict maps ctypes types to POINTER types */ PyObject *_ctypes_ptrtype_cache = NULL; -static PyTypeObject Simple_Type; - /* a callable object used for unpickling: strong reference to _ctypes._unpickle() function */ static PyObject *_unpickle; @@ -521,14 +532,16 @@ StructUnionType_new(PyTypeObject *type, PyObject *args, PyObject *kwds, int isSt /* keep this for bw compatibility */ int r = PyDict_Contains(result->tp_dict, &_Py_ID(_abstract_)); - if (r > 0) + if (r > 0) { return (PyObject *)result; + } if (r < 0) { Py_DECREF(result); return NULL; } - dict = (StgDictObject *)_PyObject_CallNoArgs((PyObject *)&PyCStgDict_Type); + ctypes_state *st = GLOBAL_STATE(); + dict = (StgDictObject *)_PyObject_CallNoArgs((PyObject *)st->PyCStgDict_Type); if (!dict) { Py_DECREF(result); return NULL; @@ -568,8 +581,9 @@ StructUnionType_new(PyTypeObject *type, PyObject *args, PyObject *kwds, int isSt else { StgDictObject *basedict = PyType_stgdict((PyObject *)result->tp_base); - if (basedict == NULL) + if (basedict == NULL) { return (PyObject *)result; + } /* copy base dict */ if (-1 == PyCStgDict_clone(dict, basedict)) { Py_DECREF(result); @@ -1023,16 +1037,19 @@ PyCPointerType_new(PyTypeObject *type, PyObject *args, PyObject *kwds) typedict = PyTuple_GetItem(args, 2); - if (!typedict) + if (!typedict) { return NULL; + } /* stgdict items size, align, length contain info about pointers itself, stgdict->proto has info about the pointed to type! */ + ctypes_state *st = GLOBAL_STATE(); stgdict = (StgDictObject *)_PyObject_CallNoArgs( - (PyObject *)&PyCStgDict_Type); - if (!stgdict) + (PyObject *)st->PyCStgDict_Type); + if (!stgdict) { return NULL; + } stgdict->size = sizeof(void *); stgdict->align = _ctypes_get_fielddesc("P")->pffi_type->alignment; stgdict->length = 1; @@ -1149,7 +1166,8 @@ PyCPointerType_from_param(PyObject *type, PyObject *value) break; } - if (PointerObject_Check(value) || ArrayObject_Check(value)) { + ctypes_state *st = GLOBAL_STATE(); + if (PointerObject_Check(st, value) || ArrayObject_Check(st, value)) { /* Array instances are also pointers when the item types are the same. */ @@ -1448,11 +1466,12 @@ PyCArrayType_new(PyTypeObject *type, PyObject *args, PyObject *kwds) goto error; } + ctypes_state *st = GLOBAL_STATE(); stgdict = (StgDictObject *)_PyObject_CallNoArgs( - (PyObject *)&PyCStgDict_Type); - if (!stgdict) + (PyObject *)st->PyCStgDict_Type); + if (!stgdict) { goto error; - + } itemdict = PyType_stgdict(type_attr); if (!itemdict) { PyErr_SetString(PyExc_TypeError, @@ -1587,7 +1606,8 @@ c_wchar_p_from_param(PyObject *type, PyObject *value) if (res) { return Py_NewRef(value); } - if (ArrayObject_Check(value) || PointerObject_Check(value)) { + ctypes_state *st = GLOBAL_STATE(); + if (ArrayObject_Check(st, value) || PointerObject_Check(st, value)) { /* c_wchar array instance or pointer(c_wchar(...)) */ StgDictObject *dt = PyObject_stgdict(value); StgDictObject *dict; @@ -1597,7 +1617,6 @@ c_wchar_p_from_param(PyObject *type, PyObject *value) return Py_NewRef(value); } } - ctypes_state *st = GLOBAL_STATE(); if (PyCArg_CheckExact(st, value)) { /* byref(c_char(...)) */ PyCArgObject *a = (PyCArgObject *)value; @@ -1651,7 +1670,8 @@ c_char_p_from_param(PyObject *type, PyObject *value) if (res) { return Py_NewRef(value); } - if (ArrayObject_Check(value) || PointerObject_Check(value)) { + ctypes_state *st = GLOBAL_STATE(); + if (ArrayObject_Check(st, value) || PointerObject_Check(st, value)) { /* c_char array instance or pointer(c_char(...)) */ StgDictObject *dt = PyObject_stgdict(value); StgDictObject *dict; @@ -1661,7 +1681,6 @@ c_char_p_from_param(PyObject *type, PyObject *value) return Py_NewRef(value); } } - ctypes_state *st = GLOBAL_STATE(); if (PyCArg_CheckExact(st, value)) { /* byref(c_char(...)) */ PyCArgObject *a = (PyCArgObject *)value; @@ -1757,13 +1776,13 @@ c_void_p_from_param(PyObject *type, PyObject *value) /* c_void_p instances */ return Py_NewRef(value); } + ctypes_state *st = GLOBAL_STATE(); /* ctypes array or pointer instance */ - if (ArrayObject_Check(value) || PointerObject_Check(value)) { + if (ArrayObject_Check(st, value) || PointerObject_Check(st, value)) { /* Any array or pointer is accepted */ return Py_NewRef(value); } /* byref(...) */ - ctypes_state *st = GLOBAL_STATE(); if (PyCArg_CheckExact(st, value)) { /* byref(c_xxx()) */ PyCArgObject *a = (PyCArgObject *)value; @@ -1772,7 +1791,7 @@ c_void_p_from_param(PyObject *type, PyObject *value) } } /* function pointer */ - if (PyCFuncPtrObject_Check(value)) { + if (PyCFuncPtrObject_Check(st, value)) { PyCArgObject *parg; PyCFuncPtrObject *func; func = (PyCFuncPtrObject *)value; @@ -1788,7 +1807,11 @@ c_void_p_from_param(PyObject *type, PyObject *value) } /* c_char_p, c_wchar_p */ stgd = PyObject_stgdict(value); - if (stgd && CDataObject_Check(value) && stgd->proto && PyUnicode_Check(stgd->proto)) { + if (stgd + && CDataObject_Check(st, value) + && stgd->proto + && PyUnicode_Check(stgd->proto)) + { PyCArgObject *parg; switch (PyUnicode_AsUTF8(stgd->proto)[0]) { @@ -1870,8 +1893,9 @@ static PyObject *CreateSwappedType(PyTypeObject *type, PyObject *args, PyObject if (result == NULL) return NULL; + ctypes_state *st = GLOBAL_STATE(); stgdict = (StgDictObject *)_PyObject_CallNoArgs( - (PyObject *)&PyCStgDict_Type); + (PyObject *)st->PyCStgDict_Type); if (!stgdict) { Py_DECREF(result); return NULL; @@ -1981,11 +2005,12 @@ PyCSimpleType_new(PyTypeObject *type, PyObject *args, PyObject *kwds) goto error; } + ctypes_state *st = GLOBAL_STATE(); stgdict = (StgDictObject *)_PyObject_CallNoArgs( - (PyObject *)&PyCStgDict_Type); - if (!stgdict) + (PyObject *)st->PyCStgDict_Type); + if (!stgdict) { goto error; - + } stgdict->ffi_type_pointer = *fmt->pffi_type; stgdict->align = fmt->pffi_type->alignment; stgdict->length = 0; @@ -2006,7 +2031,7 @@ PyCSimpleType_new(PyTypeObject *type, PyObject *args, PyObject *kwds) stgdict->paramfunc = PyCSimpleType_paramfunc; /* - if (result->tp_base != &Simple_Type) { + if (result->tp_base != st->Simple_Type) { stgdict->setfunc = NULL; stgdict->getfunc = NULL; } @@ -2026,7 +2051,7 @@ PyCSimpleType_new(PyTypeObject *type, PyObject *args, PyObject *kwds) /* Install from_param class methods in ctypes base classes. Overrides the PyCSimpleType_from_param generic method. */ - if (result->tp_base == &Simple_Type) { + if (result->tp_base == st->Simple_Type) { switch (*proto_str) { case 'z': /* c_char_p */ ml = &c_char_p_method; @@ -2070,7 +2095,6 @@ PyCSimpleType_new(PyTypeObject *type, PyObject *args, PyObject *kwds) } } - ctypes_state *st = GLOBAL_STATE(); if (type == st->PyCSimpleType_Type && fmt->setfunc_swapped && fmt->getfunc_swapped) @@ -2408,11 +2432,12 @@ PyCFuncPtrType_new(PyTypeObject *type, PyObject *args, PyObject *kwds) PyTypeObject *result; StgDictObject *stgdict; + ctypes_state *st = GLOBAL_STATE(); stgdict = (StgDictObject *)_PyObject_CallNoArgs( - (PyObject *)&PyCStgDict_Type); - if (!stgdict) + (PyObject *)st->PyCStgDict_Type); + if (!stgdict) { return NULL; - + } stgdict->paramfunc = PyCFuncPtrType_paramfunc; /* We do NOT expose the function signature in the format string. It is impossible, generally, because the only requirement for the @@ -2623,7 +2648,8 @@ static PyMemberDef PyCData_members[] = { static PyObject * PyCData_item_type(PyObject *type) { - if (PyCArrayTypeObject_Check(type)) { + ctypes_state *st = GLOBAL_STATE(); + if (PyCArrayTypeObject_Check(st, type)) { StgDictObject *stg_dict; PyObject *elem_type; @@ -2832,14 +2858,14 @@ PyCData_FromBaseObj(PyObject *type, PyObject *base, Py_ssize_t index, char *adr) } dict->flags |= DICTFLAG_FINAL; cmem = (CDataObject *)((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (cmem == NULL) + if (cmem == NULL) { return NULL; - assert(CDataObject_Check(cmem)); - + } + assert(CDataObject_Check(GLOBAL_STATE(), cmem)); cmem->b_length = dict->length; cmem->b_size = dict->size; if (base) { /* use base's buffer */ - assert(CDataObject_Check(base)); + assert(CDataObject_Check(GLOBAL_STATE(), base)); cmem->b_ptr = adr; cmem->b_needsfree = 0; cmem->b_base = (CDataObject *)Py_NewRef(base); @@ -2878,9 +2904,10 @@ PyCData_AtAddress(PyObject *type, void *buf) dict->flags |= DICTFLAG_FINAL; pd = (CDataObject *)((PyTypeObject *)type)->tp_alloc((PyTypeObject *)type, 0); - if (!pd) + if (!pd) { return NULL; - assert(CDataObject_Check(pd)); + } + assert(CDataObject_Check(GLOBAL_STATE(), pd)); pd->b_ptr = (char *)buf; pd->b_length = dict->length; pd->b_size = dict->size; @@ -2895,9 +2922,11 @@ PyCData_AtAddress(PyObject *type, void *buf) int _ctypes_simple_instance(PyObject *obj) { PyTypeObject *type = (PyTypeObject *)obj; + ctypes_state *st = GLOBAL_STATE(); - if (PyCSimpleTypeObject_Check(type)) - return type->tp_base != &Simple_Type; + if (PyCSimpleTypeObject_Check(st, type)) { + return type->tp_base != st->Simple_Type; + } return 0; } @@ -2925,10 +2954,11 @@ _PyCData_set(CDataObject *dst, PyObject *type, SETFUNC setfunc, PyObject *value, CDataObject *src; int err; - if (setfunc) + if (setfunc) { return setfunc(ptr, value, size); - - if (!CDataObject_Check(value)) { + } + ctypes_state *st = GLOBAL_STATE(); + if (!CDataObject_Check(st, value)) { StgDictObject *dict = PyType_stgdict(type); if (dict && dict->setfunc) return dict->setfunc(ptr, value, size); @@ -2950,7 +2980,7 @@ _PyCData_set(CDataObject *dst, PyObject *type, SETFUNC setfunc, PyObject *value, size, ptr); Py_DECREF(ob); return result; - } else if (value == Py_None && PyCPointerTypeObject_Check(type)) { + } else if (value == Py_None && PyCPointerTypeObject_Check(st, type)) { *(void **)ptr = NULL; Py_RETURN_NONE; } else { @@ -2971,7 +3001,7 @@ _PyCData_set(CDataObject *dst, PyObject *type, SETFUNC setfunc, PyObject *value, src->b_ptr, size); - if (PyCPointerTypeObject_Check(type)) { + if (PyCPointerTypeObject_Check(st, type)) { /* XXX */ } @@ -2982,8 +3012,8 @@ _PyCData_set(CDataObject *dst, PyObject *type, SETFUNC setfunc, PyObject *value, return Py_NewRef(value); } - if (PyCPointerTypeObject_Check(type) - && ArrayObject_Check(value)) { + if (PyCPointerTypeObject_Check(st, type) + && ArrayObject_Check(st, value)) { StgDictObject *p1, *p2; PyObject *keep; p1 = PyObject_stgdict(value); @@ -3031,8 +3061,9 @@ PyCData_set(PyObject *dst, PyObject *type, SETFUNC setfunc, PyObject *value, { CDataObject *mem = (CDataObject *)dst; PyObject *result; + ctypes_state *st = GLOBAL_STATE(); - if (!CDataObject_Check(dst)) { + if (!CDataObject_Check(st, dst)) { PyErr_SetString(PyExc_TypeError, "not a ctype instance"); return -1; @@ -3252,13 +3283,14 @@ static int _check_outarg_type(PyObject *arg, Py_ssize_t index) { StgDictObject *dict; + ctypes_state *st = GLOBAL_STATE(); - if (PyCPointerTypeObject_Check(arg)) + if (PyCPointerTypeObject_Check(st, arg)) { return 1; - - if (PyCArrayTypeObject_Check(arg)) + } + if (PyCArrayTypeObject_Check(st, arg)) { return 1; - + } dict = PyType_stgdict(arg); if (dict /* simple pointer types, c_void_p, c_wchar_p, BSTR, ... */ @@ -3627,7 +3659,9 @@ static PyObject * _byref(PyObject *obj) { PyCArgObject *parg; - if (!CDataObject_Check(obj)) { + ctypes_state *st = GLOBAL_STATE(); + + if (!CDataObject_Check(st, obj)) { PyErr_SetString(PyExc_TypeError, "expected CData instance"); return NULL; @@ -3735,6 +3769,7 @@ _build_callargs(PyCFuncPtrObject *self, PyObject *argtypes, inargs_index = 1; } #endif + ctypes_state *st = GLOBAL_STATE(); for (i = 0; i < len; ++i) { PyObject *item = PyTuple_GET_ITEM(paramflags, i); PyObject *ob; @@ -3806,11 +3841,13 @@ _build_callargs(PyCFuncPtrObject *self, PyObject *argtypes, ((PyTypeObject *)ob)->tp_name); goto error; } - if (PyCArrayTypeObject_Check(ob)) + if (PyCArrayTypeObject_Check(st, ob)) { ob = _PyObject_CallNoArgs(ob); - else + } + else { /* Create an instance of the pointed-to type */ ob = _PyObject_CallNoArgs(dict->proto); + } /* XXX Is the following correct any longer? We must not pass a byref() to the array then but @@ -3962,7 +3999,8 @@ PyCFuncPtr_call(PyCFuncPtrObject *self, PyObject *inargs, PyObject *kwds) "native com method call without 'this' parameter"); return NULL; } - if (!CDataObject_Check(this)) { + ctypes_state *st = GLOBAL_STATE(); + if (!CDataObject_Check(st, this)) { PyErr_SetString(PyExc_TypeError, "Expected a COM this pointer as first argument"); return NULL; @@ -4733,12 +4771,11 @@ PyCArrayType_from_ctype(PyObject *itemtype, Py_ssize_t length) sprintf(name, "%.200s_Array_%ld", ((PyTypeObject *)itemtype)->tp_name, (long)length); #endif - ctypes_state *st = GLOBAL_STATE(); result = PyObject_CallFunction((PyObject *)st->PyCArrayType_Type, "s(O){s:n,s:O}", name, - &PyCArray_Type, + st->PyCArray_Type, "_length_", length, "_type_", @@ -4849,8 +4886,9 @@ static PyObject * Simple_repr(CDataObject *self) { PyObject *val, *result; + ctypes_state *st = GLOBAL_STATE(); - if (Py_TYPE(self)->tp_base != &Simple_Type) { + if (Py_TYPE(self)->tp_base != st->Simple_Type) { return PyUnicode_FromFormat("<%s object at %p>", Py_TYPE(self)->tp_name, self); } @@ -5013,7 +5051,8 @@ Pointer_set_contents(CDataObject *self, PyObject *value, void *closure) stgdict = PyObject_stgdict((PyObject *)self); assert(stgdict); /* Cannot be NULL for pointer instances */ assert(stgdict->proto); - if (!CDataObject_Check(value)) { + ctypes_state *st = GLOBAL_STATE(); + if (!CDataObject_Check(st, value)) { int res = PyObject_IsInstance(value, stgdict->proto); if (res == -1) return -1; @@ -5395,11 +5434,14 @@ static int cast_check_pointertype(PyObject *arg) { StgDictObject *dict; + ctypes_state *st = GLOBAL_STATE(); - if (PyCPointerTypeObject_Check(arg)) + if (PyCPointerTypeObject_Check(st, arg)) { return 1; - if (PyCFuncPtrTypeObject_Check(arg)) + } + if (PyCFuncPtrTypeObject_Check(st, arg)) { return 1; + } dict = PyType_stgdict(arg); if (dict != NULL && dict->proto != NULL) { if (PyUnicode_Check(dict->proto) @@ -5432,7 +5474,8 @@ cast(void *ptr, PyObject *src, PyObject *ctype) It must certainly contain the source objects one. It must contain the source object itself. */ - if (CDataObject_Check(src)) { + ctypes_state *st = GLOBAL_STATE(); + if (CDataObject_Check(st, src)) { CDataObject *obj = (CDataObject *)src; CDataObject *container; @@ -5536,9 +5579,9 @@ _ctypes_add_types(PyObject *mod) */ CREATE_TYPE(mod, st->PyCArg_Type, &carg_spec, NULL); CREATE_TYPE(mod, st->PyCThunk_Type, &cthunk_spec, NULL); - TYPE_READY(&PyCData_Type); + TYPE_READY(st->PyCData_Type); /* StgDict is derived from PyDict_Type */ - TYPE_READY_BASE(&PyCStgDict_Type, &PyDict_Type); + TYPE_READY_BASE(st->PyCStgDict_Type, &PyDict_Type); /************************************************* * @@ -5561,12 +5604,12 @@ _ctypes_add_types(PyObject *mod) * Classes using a custom metaclass */ - MOD_ADD_TYPE(&Struct_Type, st->PyCStructType_Type, &PyCData_Type); - MOD_ADD_TYPE(&Union_Type, st->UnionType_Type, &PyCData_Type); - MOD_ADD_TYPE(&PyCPointer_Type, st->PyCPointerType_Type, &PyCData_Type); - MOD_ADD_TYPE(&PyCArray_Type, st->PyCArrayType_Type, &PyCData_Type); - MOD_ADD_TYPE(&Simple_Type, st->PyCSimpleType_Type, &PyCData_Type); - MOD_ADD_TYPE(&PyCFuncPtr_Type, st->PyCFuncPtrType_Type, &PyCData_Type); + MOD_ADD_TYPE(st->Struct_Type, st->PyCStructType_Type, st->PyCData_Type); + MOD_ADD_TYPE(st->Union_Type, st->UnionType_Type, st->PyCData_Type); + MOD_ADD_TYPE(st->PyCPointer_Type, st->PyCPointerType_Type, st->PyCData_Type); + MOD_ADD_TYPE(st->PyCArray_Type, st->PyCArrayType_Type, st->PyCData_Type); + MOD_ADD_TYPE(st->Simple_Type, st->PyCSimpleType_Type, st->PyCData_Type); + MOD_ADD_TYPE(st->PyCFuncPtr_Type, st->PyCFuncPtrType_Type, st->PyCData_Type); /************************************************* * diff --git a/Modules/_ctypes/callbacks.c b/Modules/_ctypes/callbacks.c index 154e9f43983cdb..f70479435915ff 100644 --- a/Modules/_ctypes/callbacks.c +++ b/Modules/_ctypes/callbacks.c @@ -151,6 +151,7 @@ static void _CallPythonObject(void *mem, assert(nargs <= CTYPES_MAX_ARGCOUNT); PyObject **args = alloca(nargs * sizeof(PyObject *)); PyObject **cnvs = PySequence_Fast_ITEMS(converters); + ctypes_state *st = GLOBAL_STATE(); for (i = 0; i < nargs; i++) { PyObject *cnv = cnvs[i]; // borrowed ref StgDictObject *dict; @@ -175,7 +176,7 @@ static void _CallPythonObject(void *mem, PrintError("create argument %zd:\n", i); goto Done; } - if (!CDataObject_Check(obj)) { + if (!CDataObject_Check(st, obj)) { Py_DECREF(obj); PrintError("unexpected result of create argument %zd:\n", i); goto Done; diff --git a/Modules/_ctypes/callproc.c b/Modules/_ctypes/callproc.c index 3b11cd7f58ce4b..97d1dbaae03d4f 100644 --- a/Modules/_ctypes/callproc.c +++ b/Modules/_ctypes/callproc.c @@ -1686,11 +1686,13 @@ sizeof_func(PyObject *self, PyObject *obj) StgDictObject *dict; dict = PyType_stgdict(obj); - if (dict) + if (dict) { return PyLong_FromSsize_t(dict->size); - - if (CDataObject_Check(obj)) + } + ctypes_state *st = GLOBAL_STATE(); + if (CDataObject_Check(st, obj)) { return PyLong_FromSsize_t(((CDataObject *)obj)->b_size); + } PyErr_SetString(PyExc_TypeError, "this type has no size"); return NULL; @@ -1744,7 +1746,8 @@ byref(PyObject *self, PyObject *args) if (offset == -1 && PyErr_Occurred()) return NULL; } - if (!CDataObject_Check(obj)) { + ctypes_state *st = GLOBAL_STATE(); + if (!CDataObject_Check(st, obj)) { PyErr_Format(PyExc_TypeError, "byref() argument must be a ctypes instance, not '%s'", Py_TYPE(obj)->tp_name); @@ -1769,7 +1772,8 @@ PyDoc_STRVAR(addressof_doc, static PyObject * addressof(PyObject *self, PyObject *obj) { - if (!CDataObject_Check(obj)) { + ctypes_state *st = GLOBAL_STATE(); + if (!CDataObject_Check(st, obj)) { PyErr_SetString(PyExc_TypeError, "invalid type"); return NULL; @@ -1925,13 +1929,14 @@ create_pointer_type(PyObject *module, PyObject *cls) // found or error return result; } + ctypes_state *st = GLOBAL_STATE(); // not found if (PyUnicode_CheckExact(cls)) { PyObject *name = PyUnicode_FromFormat("LP_%U", cls); - result = PyObject_CallFunction((PyObject *)Py_TYPE(&PyCPointer_Type), + result = PyObject_CallFunction((PyObject *)Py_TYPE(st->PyCPointer_Type), "N(O){}", name, - &PyCPointer_Type); + st->PyCPointer_Type); if (result == NULL) return result; key = PyLong_FromVoidPtr(result); @@ -1942,10 +1947,10 @@ create_pointer_type(PyObject *module, PyObject *cls) } else if (PyType_Check(cls)) { typ = (PyTypeObject *)cls; PyObject *name = PyUnicode_FromFormat("LP_%s", typ->tp_name); - result = PyObject_CallFunction((PyObject *)Py_TYPE(&PyCPointer_Type), + result = PyObject_CallFunction((PyObject *)Py_TYPE(st->PyCPointer_Type), "N(O){sO}", name, - &PyCPointer_Type, + st->PyCPointer_Type, "_type_", cls); if (result == NULL) return result; diff --git a/Modules/_ctypes/cfield.c b/Modules/_ctypes/cfield.c index bfb40e5c5393fc..1d5b0b14bc39e5 100644 --- a/Modules/_ctypes/cfield.c +++ b/Modules/_ctypes/cfield.c @@ -111,7 +111,7 @@ PyCField_FromDesc(PyObject *desc, Py_ssize_t index, /* Field descriptors for 'c_char * n' are be scpecial cased to return a Python string instead of an Array object instance... */ - if (PyCArrayTypeObject_Check(proto)) { + if (PyCArrayTypeObject_Check(st, proto)) { StgDictObject *adict = PyType_stgdict(proto); StgDictObject *idict; if (adict && adict->proto) { @@ -204,7 +204,8 @@ PyCField_set(CFieldObject *self, PyObject *inst, PyObject *value) { CDataObject *dst; char *ptr; - if (!CDataObject_Check(inst)) { + ctypes_state *st = GLOBAL_STATE(); + if (!CDataObject_Check(st, inst)) { PyErr_SetString(PyExc_TypeError, "not a ctype instance"); return -1; @@ -227,7 +228,8 @@ PyCField_get(CFieldObject *self, PyObject *inst, PyTypeObject *type) if (inst == NULL) { return Py_NewRef(self); } - if (!CDataObject_Check(inst)) { + ctypes_state *st = GLOBAL_STATE(); + if (!CDataObject_Check(st, inst)) { PyErr_SetString(PyExc_TypeError, "not a ctype instance"); return NULL; diff --git a/Modules/_ctypes/ctypes.h b/Modules/_ctypes/ctypes.h index 55e9f777788079..1989723f6f3dbb 100644 --- a/Modules/_ctypes/ctypes.h +++ b/Modules/_ctypes/ctypes.h @@ -37,9 +37,7 @@ typedef struct { PyTypeObject *PyCArg_Type; PyTypeObject *PyCField_Type; PyTypeObject *PyCThunk_Type; -#ifdef MS_WIN32 - PyTypeObject *PyComError_Type; -#endif + PyTypeObject *PyCStgDict_Type; PyTypeObject *StructParam_Type; PyTypeObject *PyCStructType_Type; PyTypeObject *UnionType_Type; @@ -47,6 +45,16 @@ typedef struct { PyTypeObject *PyCArrayType_Type; PyTypeObject *PyCSimpleType_Type; PyTypeObject *PyCFuncPtrType_Type; + PyTypeObject *PyCData_Type; + PyTypeObject *Struct_Type; + PyTypeObject *Union_Type; + PyTypeObject *PyCArray_Type; + PyTypeObject *Simple_Type; + PyTypeObject *PyCPointer_Type; + PyTypeObject *PyCFuncPtr_Type; +#ifdef MS_WIN32 + PyTypeObject *PyComError_Type; +#endif } ctypes_state; extern ctypes_state global_state; @@ -147,8 +155,8 @@ typedef struct { } PyCFuncPtrObject; extern PyTypeObject PyCStgDict_Type; -#define PyCStgDict_CheckExact(v) Py_IS_TYPE(v, &PyCStgDict_Type) -#define PyCStgDict_Check(v) PyObject_TypeCheck(v, &PyCStgDict_Type) +#define PyCStgDict_CheckExact(st, v) Py_IS_TYPE((v), (st)->PyCStgDict_Type) +#define PyCStgDict_Check(st, v) PyObject_TypeCheck((v), (st)->PyCStgDict_Type) extern int PyCStructUnionType_update_stgdict(PyObject *fields, PyObject *type, int isStruct); extern int PyType_stginfo(PyTypeObject *self, Py_ssize_t *psize, Py_ssize_t *palign, Py_ssize_t *plength); @@ -157,12 +165,12 @@ extern int PyObject_stginfo(PyObject *self, Py_ssize_t *psize, Py_ssize_t *palig extern PyTypeObject PyCData_Type; -#define CDataObject_CheckExact(v) Py_IS_TYPE(v, &PyCData_Type) -#define CDataObject_Check(v) PyObject_TypeCheck(v, &PyCData_Type) +#define CDataObject_CheckExact(st, v) Py_IS_TYPE((v), (st)->PyCData_Type) +#define CDataObject_Check(st, v) PyObject_TypeCheck((v), (st)->PyCData_Type) #define _CDataObject_HasExternalBuffer(v) ((v)->b_ptr != (char *)&(v)->b_value) -#define PyCSimpleTypeObject_CheckExact(v) Py_IS_TYPE(v, GLOBAL_STATE()->PyCSimpleType_Type) -#define PyCSimpleTypeObject_Check(v) PyObject_TypeCheck(v, GLOBAL_STATE()->PyCSimpleType_Type) +#define PyCSimpleTypeObject_CheckExact(st, v) Py_IS_TYPE((v), (st)->PyCSimpleType_Type) +#define PyCSimpleTypeObject_Check(st, v) PyObject_TypeCheck((v), (st)->PyCSimpleType_Type) extern struct fielddesc *_ctypes_get_fielddesc(const char *fmt); @@ -180,13 +188,13 @@ extern PyTypeObject PyCArray_Type; extern PyTypeObject PyCPointer_Type; extern PyTypeObject PyCFuncPtr_Type; -#define PyCArrayTypeObject_Check(v) PyObject_TypeCheck(v, GLOBAL_STATE()->PyCArrayType_Type) -#define ArrayObject_Check(v) PyObject_TypeCheck(v, &PyCArray_Type) -#define PointerObject_Check(v) PyObject_TypeCheck(v, &PyCPointer_Type) -#define PyCPointerTypeObject_Check(v) PyObject_TypeCheck(v, GLOBAL_STATE()->PyCPointerType_Type) -#define PyCFuncPtrObject_Check(v) PyObject_TypeCheck(v, &PyCFuncPtr_Type) -#define PyCFuncPtrTypeObject_Check(v) PyObject_TypeCheck(v, GLOBAL_STATE()->PyCFuncPtrType_Type) -#define PyCStructTypeObject_Check(v) PyObject_TypeCheck(v, GLOBAL_STATE()->PyCStructType_Type) +#define PyCArrayTypeObject_Check(st, v) PyObject_TypeCheck((v), (st)->PyCArrayType_Type) +#define ArrayObject_Check(st, v) PyObject_TypeCheck((v), (st)->PyCArray_Type) +#define PointerObject_Check(st, v) PyObject_TypeCheck((v), (st)->PyCPointer_Type) +#define PyCPointerTypeObject_Check(st, v) PyObject_TypeCheck((v), (st)->PyCPointerType_Type) +#define PyCFuncPtrObject_Check(st,v) PyObject_TypeCheck((v), (st)->PyCFuncPtr_Type) +#define PyCFuncPtrTypeObject_Check(st, v) PyObject_TypeCheck((v), (st)->PyCFuncPtrType_Type) +#define PyCStructTypeObject_Check(st, v) PyObject_TypeCheck((v), (st)->PyCStructType_Type) extern PyObject * PyCArrayType_from_ctype(PyObject *itemtype, Py_ssize_t length); diff --git a/Modules/_ctypes/stgdict.c b/Modules/_ctypes/stgdict.c index fb3e20e8db3e27..2397015ba65889 100644 --- a/Modules/_ctypes/stgdict.c +++ b/Modules/_ctypes/stgdict.c @@ -184,11 +184,14 @@ PyType_stgdict(PyObject *obj) { PyTypeObject *type; - if (!PyType_Check(obj)) + if (!PyType_Check(obj)) { return NULL; + } + ctypes_state *st = GLOBAL_STATE(); type = (PyTypeObject *)obj; - if (!type->tp_dict || !PyCStgDict_CheckExact(type->tp_dict)) + if (!type->tp_dict || !PyCStgDict_CheckExact(st, type->tp_dict)) { return NULL; + } return (StgDictObject *)type->tp_dict; } @@ -201,8 +204,10 @@ StgDictObject * PyObject_stgdict(PyObject *self) { PyTypeObject *type = Py_TYPE(self); - if (!type->tp_dict || !PyCStgDict_CheckExact(type->tp_dict)) + ctypes_state *st = GLOBAL_STATE(); + if (!type->tp_dict || !PyCStgDict_CheckExact(st, type->tp_dict)) { return NULL; + } return (StgDictObject *)type->tp_dict; } @@ -505,6 +510,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct if (stgdict->format == NULL) return -1; + ctypes_state *st = GLOBAL_STATE(); for (i = 0; i < len; ++i) { PyObject *name = NULL, *desc = NULL; PyObject *pair = PySequence_GetItem(fields, i); @@ -518,8 +524,9 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct Py_XDECREF(pair); return -1; } - if (PyCArrayTypeObject_Check(desc)) + if (PyCArrayTypeObject_Check(st, desc)) { arrays_seen = 1; + } dict = PyType_stgdict(desc); if (dict == NULL) { Py_DECREF(pair); @@ -806,7 +813,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct i); return -1; } - if (!PyCArrayTypeObject_Check(desc)) { + if (!PyCArrayTypeObject_Check(st, desc)) { /* Not an array. Just need an ffi_type pointer. */ num_ffi_type_pointers++; } @@ -906,7 +913,7 @@ PyCStructUnionType_update_stgdict(PyObject *type, PyObject *fields, int isStruct return -1; } assert(element_index < (ffi_ofs + len)); /* will be used below */ - if (!PyCArrayTypeObject_Check(desc)) { + if (!PyCArrayTypeObject_Check(st, desc)) { /* Not an array. Just copy over the element ffi_type. */ element_types[element_index++] = &dict->ffi_type_pointer; } From 49785b06ded19c7c4afce186bac90fea707470ea Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 22 Jan 2024 16:14:42 +0200 Subject: [PATCH 037/160] gh-102512: Turn _DummyThread into _MainThread after os.fork() called from a foreign thread (GH-113261) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always set a _MainThread as a main thread after os.fork() is called from a thread started not by the threading module. A new _MainThread was already set as a new main thread after fork if threading.current_thread() was not called for a foreign thread before fork. Now, if it was called before fork, the implicitly created _DummyThread will be turned into _MainThread after fork. It fixes, in particularly, an incompatibility of _DummyThread with the threading shutdown logic which relies on the main thread having tstate_lock. Co-authored-by: Marek Marczykowski-Górecki --- Lib/test/test_threading.py | 98 +++++++++++++++++-- Lib/threading.py | 9 +- ...-03-08-00-02-30.gh-issue-102512.LiugDr.rst | 3 + 3 files changed, 101 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-08-00-02-30.gh-issue-102512.LiugDr.rst diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 3060af44fd7e3d..7160c53d691ba2 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -115,6 +115,7 @@ def tearDown(self): class ThreadTests(BaseTestCase): + maxDiff = 9999 @cpython_only def test_name(self): @@ -676,19 +677,25 @@ def test_main_thread_after_fork(self): import os, threading from test import support + ident = threading.get_ident() pid = os.fork() if pid == 0: + print("current ident", threading.get_ident() == ident) main = threading.main_thread() - print(main.name) - print(main.ident == threading.current_thread().ident) - print(main.ident == threading.get_ident()) + print("main", main.name) + print("main ident", main.ident == ident) + print("current is main", threading.current_thread() is main) else: support.wait_process(pid, exitcode=0) """ _, out, err = assert_python_ok("-c", code) data = out.decode().replace('\r', '') self.assertEqual(err, b"") - self.assertEqual(data, "MainThread\nTrue\nTrue\n") + self.assertEqual(data, + "current ident True\n" + "main MainThread\n" + "main ident True\n" + "current is main True\n") @skip_unless_reliable_fork @unittest.skipUnless(hasattr(os, 'waitpid'), "test needs os.waitpid()") @@ -698,15 +705,17 @@ def test_main_thread_after_fork_from_nonmain_thread(self): from test import support def func(): + ident = threading.get_ident() with warnings.catch_warnings(record=True) as ws: warnings.filterwarnings( "always", category=DeprecationWarning) pid = os.fork() if pid == 0: + print("current ident", threading.get_ident() == ident) main = threading.main_thread() - print(main.name) - print(main.ident == threading.current_thread().ident) - print(main.ident == threading.get_ident()) + print("main", main.name, type(main).__name__) + print("main ident", main.ident == ident) + print("current is main", threading.current_thread() is main) # stdout is fully buffered because not a tty, # we have to flush before exit. sys.stdout.flush() @@ -722,7 +731,80 @@ def func(): _, out, err = assert_python_ok("-c", code) data = out.decode().replace('\r', '') self.assertEqual(err.decode('utf-8'), "") - self.assertEqual(data, "Thread-1 (func)\nTrue\nTrue\n") + self.assertEqual(data, + "current ident True\n" + "main Thread-1 (func) Thread\n" + "main ident True\n" + "current is main True\n" + ) + + @unittest.skipIf(sys.platform in platforms_to_skip, "due to known OS bug") + @support.requires_fork() + @unittest.skipUnless(hasattr(os, 'waitpid'), "test needs os.waitpid()") + def test_main_thread_after_fork_from_foreign_thread(self, create_dummy=False): + code = """if 1: + import os, threading, sys, traceback, _thread + from test import support + + def func(lock): + ident = threading.get_ident() + if %s: + # call current_thread() before fork to allocate DummyThread + current = threading.current_thread() + print("current", current.name, type(current).__name__) + print("ident in _active", ident in threading._active) + # flush before fork, so child won't flush it again + sys.stdout.flush() + pid = os.fork() + if pid == 0: + print("current ident", threading.get_ident() == ident) + main = threading.main_thread() + print("main", main.name, type(main).__name__) + print("main ident", main.ident == ident) + print("current is main", threading.current_thread() is main) + print("_dangling", [t.name for t in list(threading._dangling)]) + # stdout is fully buffered because not a tty, + # we have to flush before exit. + sys.stdout.flush() + try: + threading._shutdown() + os._exit(0) + except: + traceback.print_exc() + sys.stderr.flush() + os._exit(1) + else: + try: + support.wait_process(pid, exitcode=0) + except Exception: + # avoid 'could not acquire lock for + # <_io.BufferedWriter name=''> at interpreter shutdown,' + traceback.print_exc() + sys.stderr.flush() + finally: + lock.release() + + join_lock = _thread.allocate_lock() + join_lock.acquire() + th = _thread.start_new_thread(func, (join_lock,)) + join_lock.acquire() + """ % create_dummy + # "DeprecationWarning: This process is multi-threaded, use of fork() + # may lead to deadlocks in the child" + _, out, err = assert_python_ok("-W", "ignore::DeprecationWarning", "-c", code) + data = out.decode().replace('\r', '') + self.assertEqual(err.decode(), "") + self.assertEqual(data, + ("current Dummy-1 _DummyThread\n" if create_dummy else "") + + f"ident in _active {create_dummy!s}\n" + + "current ident True\n" + "main MainThread _MainThread\n" + "main ident True\n" + "current is main True\n" + "_dangling ['MainThread']\n") + + def test_main_thread_after_fork_from_dummy_thread(self, create_dummy=False): + self.test_main_thread_after_fork_from_foreign_thread(create_dummy=True) def test_main_thread_during_shutdown(self): # bpo-31516: current_thread() should still point to the main thread diff --git a/Lib/threading.py b/Lib/threading.py index 85aff58968082d..c561800a128059 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -1489,7 +1489,6 @@ class _DummyThread(Thread): def __init__(self): Thread.__init__(self, name=_newname("Dummy-%d"), daemon=_daemon_threads_allowed()) - self._started.set() self._set_ident() if _HAVE_THREAD_NATIVE_ID: @@ -1508,6 +1507,14 @@ def is_alive(self): def join(self, timeout=None): raise RuntimeError("cannot join a dummy thread") + def _after_fork(self, new_ident=None): + if new_ident is not None: + self.__class__ = _MainThread + self._name = 'MainThread' + self._daemonic = False + self._set_tstate_lock() + Thread._after_fork(self, new_ident=new_ident) + # Global API functions diff --git a/Misc/NEWS.d/next/Library/2023-03-08-00-02-30.gh-issue-102512.LiugDr.rst b/Misc/NEWS.d/next/Library/2023-03-08-00-02-30.gh-issue-102512.LiugDr.rst new file mode 100644 index 00000000000000..659cba73cbf34e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-08-00-02-30.gh-issue-102512.LiugDr.rst @@ -0,0 +1,3 @@ +When :func:`os.fork` is called from a foreign thread (aka ``_DummyThread``), +the type of the thread in a child process is changed to ``_MainThread``. +Also changed its name and daemonic status, it can be now joined. From 2ef520ebecf5544ba792266a5dbe4d53653a4a03 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 22 Jan 2024 18:09:10 +0300 Subject: [PATCH 038/160] gh-108303: Remove `Lib/test/shadowed_super.py` (#114372) Move code into Lib/test/test_super.py. --- Lib/test/shadowed_super.py | 7 ------- Lib/test/test_super.py | 16 +++++++++++++++- 2 files changed, 15 insertions(+), 8 deletions(-) delete mode 100644 Lib/test/shadowed_super.py diff --git a/Lib/test/shadowed_super.py b/Lib/test/shadowed_super.py deleted file mode 100644 index 2a62f667e93818..00000000000000 --- a/Lib/test/shadowed_super.py +++ /dev/null @@ -1,7 +0,0 @@ -class super: - msg = "truly super" - - -class C: - def method(self): - return super().msg diff --git a/Lib/test/test_super.py b/Lib/test/test_super.py index f8e968b9b56f82..256b416caaa584 100644 --- a/Lib/test/test_super.py +++ b/Lib/test/test_super.py @@ -1,8 +1,9 @@ """Unit tests for zero-argument super() & related machinery.""" +import textwrap import unittest from unittest.mock import patch -from test import shadowed_super +from test.support import import_helper ADAPTIVE_WARMUP_DELAY = 2 @@ -342,7 +343,20 @@ def test_super_argtype(self): super(1, int) def test_shadowed_global(self): + source = textwrap.dedent( + """ + class super: + msg = "truly super" + + class C: + def method(self): + return super().msg + """, + ) + with import_helper.ready_to_import(name="shadowed_super", source=source): + import shadowed_super self.assertEqual(shadowed_super.C().method(), "truly super") + import_helper.unload("shadowed_super") def test_shadowed_local(self): class super: From 6d30cbee013b4182937ffa11a7c87d2a7b6b7b41 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 22 Jan 2024 18:56:30 +0300 Subject: [PATCH 039/160] Docs: Fix typo in code snippet (GH-114421) --- Doc/library/dis.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index 7492ae85c4ea46..b97d48fafab3b6 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -576,7 +576,7 @@ operations on it as if it was a Python list. The top of the stack corresponds to Swap the top of the stack with the i-th element:: - STACK[-i], STACK[-1] = stack[-1], STACK[-i] + STACK[-i], STACK[-1] = STACK[-1], STACK[-i] .. versionadded:: 3.11 From 1b719b39b9cd125258739699ac5168d139901b48 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 22 Jan 2024 18:09:22 +0200 Subject: [PATCH 040/160] gh-114321: Expose more constants in the fcntl module (GH-114322) --- Doc/library/fcntl.rst | 41 ++++++++++---- ...-01-19-18-41-02.gh-issue-114321.yj_Xw3.rst | 2 + Modules/fcntlmodule.c | 53 +++++++++++++++++++ 3 files changed, 86 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-19-18-41-02.gh-issue-114321.yj_Xw3.rst diff --git a/Doc/library/fcntl.rst b/Doc/library/fcntl.rst index 309ad652d4af34..13ad2dd7da5090 100644 --- a/Doc/library/fcntl.rst +++ b/Doc/library/fcntl.rst @@ -31,26 +31,26 @@ descriptor. raise an :exc:`OSError`. .. versionchanged:: 3.8 - The fcntl module now contains ``F_ADD_SEALS``, ``F_GET_SEALS``, and + The :mod:`!fcntl` module now contains ``F_ADD_SEALS``, ``F_GET_SEALS``, and ``F_SEAL_*`` constants for sealing of :func:`os.memfd_create` file descriptors. .. versionchanged:: 3.9 - On macOS, the fcntl module exposes the ``F_GETPATH`` constant, which obtains - the path of a file from a file descriptor. - On Linux(>=3.15), the fcntl module exposes the ``F_OFD_GETLK``, ``F_OFD_SETLK`` - and ``F_OFD_SETLKW`` constants, which are used when working with open file - description locks. + On macOS, the :mod:`!fcntl` module exposes the ``F_GETPATH`` constant, + which obtains the path of a file from a file descriptor. + On Linux(>=3.15), the :mod:`!fcntl` module exposes the ``F_OFD_GETLK``, + ``F_OFD_SETLK`` and ``F_OFD_SETLKW`` constants, which are used when working + with open file description locks. .. versionchanged:: 3.10 - On Linux >= 2.6.11, the fcntl module exposes the ``F_GETPIPE_SZ`` and + On Linux >= 2.6.11, the :mod:`!fcntl` module exposes the ``F_GETPIPE_SZ`` and ``F_SETPIPE_SZ`` constants, which allow to check and modify a pipe's size respectively. .. versionchanged:: 3.11 - On FreeBSD, the fcntl module exposes the ``F_DUP2FD`` and ``F_DUP2FD_CLOEXEC`` - constants, which allow to duplicate a file descriptor, the latter setting - ``FD_CLOEXEC`` flag in addition. + On FreeBSD, the :mod:`!fcntl` module exposes the ``F_DUP2FD`` and + ``F_DUP2FD_CLOEXEC`` constants, which allow to duplicate a file descriptor, + the latter setting ``FD_CLOEXEC`` flag in addition. .. versionchanged:: 3.12 On Linux >= 4.5, the :mod:`fcntl` module exposes the ``FICLONE`` and @@ -58,6 +58,27 @@ descriptor. another file by reflinking on some filesystems (e.g., btrfs, OCFS2, and XFS). This behavior is commonly referred to as "copy-on-write". +.. versionchanged:: 3.13 + On Linux >= 2.6.32, the :mod:`!fcntl` module exposes the + ``F_GETOWN_EX``, ``F_SETOWN_EX``, ``F_OWNER_TID``, ``F_OWNER_PID``, ``F_OWNER_PGRP`` constants, which allow to direct I/O availability signals + to a specific thread, process, or process group. + On Linux >= 4.13, the :mod:`!fcntl` module exposes the + ``F_GET_RW_HINT``, ``F_SET_RW_HINT``, ``F_GET_FILE_RW_HINT``, + ``F_SET_FILE_RW_HINT``, and ``RWH_WRITE_LIFE_*`` constants, which allow + to inform the kernel about the relative expected lifetime of writes on + a given inode or via a particular open file description. + On Linux >= 5.1 and NetBSD, the :mod:`!fcntl` module exposes the + ``F_SEAL_FUTURE_WRITE`` constant for use with ``F_ADD_SEALS`` and + ``F_GET_SEALS`` operations. + On FreeBSD, the :mod:`!fcntl` module exposes the ``F_READAHEAD``, ``F_ISUNIONSTACK``, and ``F_KINFO`` constants. + On macOS and FreeBSD, the :mod:`!fcntl` module exposes the ``F_RDAHEAD`` + constant. + On NetBSD and AIX, the :mod:`!fcntl` module exposes the ``F_CLOSEM`` + constant. + On NetBSD, the :mod:`!fcntl` module exposes the ``F_MAXFD`` constant. + On macOS and NetBSD, the :mod:`!fcntl` module exposes the ``F_GETNOSIGPIPE`` + and ``F_SETNOSIGPIPE`` constant. + The module defines the following functions: diff --git a/Misc/NEWS.d/next/Library/2024-01-19-18-41-02.gh-issue-114321.yj_Xw3.rst b/Misc/NEWS.d/next/Library/2024-01-19-18-41-02.gh-issue-114321.yj_Xw3.rst new file mode 100644 index 00000000000000..dc2934bd81a42a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-19-18-41-02.gh-issue-114321.yj_Xw3.rst @@ -0,0 +1,2 @@ +Expose more platform specific constants in the :mod:`fcntl` module on Linux, +macOS, FreeBSD and NetBSD. diff --git a/Modules/fcntlmodule.c b/Modules/fcntlmodule.c index fd03abf0561da6..0d16602692b62d 100644 --- a/Modules/fcntlmodule.c +++ b/Modules/fcntlmodule.c @@ -583,6 +583,30 @@ all_ins(PyObject* m) #ifdef FICLONERANGE if (PyModule_AddIntMacro(m, FICLONERANGE)) return -1; #endif +#ifdef F_GETOWN_EX + // since Linux 2.6.32 + if (PyModule_AddIntMacro(m, F_GETOWN_EX)) return -1; + if (PyModule_AddIntMacro(m, F_SETOWN_EX)) return -1; + if (PyModule_AddIntMacro(m, F_OWNER_TID)) return -1; + if (PyModule_AddIntMacro(m, F_OWNER_PID)) return -1; + if (PyModule_AddIntMacro(m, F_OWNER_PGRP)) return -1; +#endif +#ifdef F_GET_RW_HINT + // since Linux 4.13 + if (PyModule_AddIntMacro(m, F_GET_RW_HINT)) return -1; + if (PyModule_AddIntMacro(m, F_SET_RW_HINT)) return -1; + if (PyModule_AddIntMacro(m, F_GET_FILE_RW_HINT)) return -1; + if (PyModule_AddIntMacro(m, F_SET_FILE_RW_HINT)) return -1; +#ifndef RWH_WRITE_LIFE_NOT_SET // typo in Linux < 5.5 +# define RWH_WRITE_LIFE_NOT_SET RWF_WRITE_LIFE_NOT_SET +#endif + if (PyModule_AddIntMacro(m, RWH_WRITE_LIFE_NOT_SET)) return -1; + if (PyModule_AddIntMacro(m, RWH_WRITE_LIFE_NONE)) return -1; + if (PyModule_AddIntMacro(m, RWH_WRITE_LIFE_SHORT)) return -1; + if (PyModule_AddIntMacro(m, RWH_WRITE_LIFE_MEDIUM)) return -1; + if (PyModule_AddIntMacro(m, RWH_WRITE_LIFE_LONG)) return -1; + if (PyModule_AddIntMacro(m, RWH_WRITE_LIFE_EXTREME)) return -1; +#endif /* OS X specifics */ #ifdef F_FULLFSYNC @@ -599,6 +623,32 @@ all_ins(PyObject* m) #ifdef F_DUP2FD_CLOEXEC if (PyModule_AddIntMacro(m, F_DUP2FD_CLOEXEC)) return -1; #endif +#ifdef F_READAHEAD + if (PyModule_AddIntMacro(m, F_READAHEAD)) return -1; +#endif +#ifdef F_RDAHEAD + if (PyModule_AddIntMacro(m, F_RDAHEAD)) return -1; +#endif +#ifdef F_ISUNIONSTACK + if (PyModule_AddIntMacro(m, F_ISUNIONSTACK)) return -1; +#endif +#ifdef F_KINFO + if (PyModule_AddIntMacro(m, F_KINFO)) return -1; +#endif + +/* NetBSD specifics */ +#ifdef F_CLOSEM + if (PyModule_AddIntMacro(m, F_CLOSEM)) return -1; +#endif +#ifdef F_MAXFD + if (PyModule_AddIntMacro(m, F_MAXFD)) return -1; +#endif +#ifdef F_GETNOSIGPIPE + if (PyModule_AddIntMacro(m, F_GETNOSIGPIPE)) return -1; +#endif +#ifdef F_SETNOSIGPIPE + if (PyModule_AddIntMacro(m, F_SETNOSIGPIPE)) return -1; +#endif /* For F_{GET|SET}FL */ #ifdef FD_CLOEXEC @@ -673,6 +723,9 @@ all_ins(PyObject* m) if (PyModule_AddIntMacro(m, F_SEAL_SHRINK)) return -1; if (PyModule_AddIntMacro(m, F_SEAL_GROW)) return -1; if (PyModule_AddIntMacro(m, F_SEAL_WRITE)) return -1; +#ifdef F_SEAL_FUTURE_WRITE + if (PyModule_AddIntMacro(m, F_SEAL_FUTURE_WRITE)) return -1; +#endif #endif return 0; } From 8ccd1ba461b44ca078ab120559637bd3ffe22e50 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 22 Jan 2024 18:21:14 +0200 Subject: [PATCH 041/160] gh-101100: Fix Sphinx warnings in `reference/expressions.rst` (#114194) --- Doc/reference/expressions.rst | 52 +++++++++++++++++------------------ Doc/tools/.nitignore | 1 - 2 files changed, 26 insertions(+), 27 deletions(-) diff --git a/Doc/reference/expressions.rst b/Doc/reference/expressions.rst index e543c1228d4d19..87ebdc1ca1c9c6 100644 --- a/Doc/reference/expressions.rst +++ b/Doc/reference/expressions.rst @@ -1001,7 +1001,7 @@ but does not affect the semantics. The primary must evaluate to a callable object (user-defined functions, built-in functions, methods of built-in objects, class objects, methods of class -instances, and all objects having a :meth:`__call__` method are callable). All +instances, and all objects having a :meth:`~object.__call__` method are callable). All argument expressions are evaluated before the call is attempted. Please refer to section :ref:`function` for the syntax of formal :term:`parameter` lists. @@ -1159,7 +1159,7 @@ a class instance: pair: instance; call single: __call__() (object method) - The class must define a :meth:`__call__` method; the effect is then the same as + The class must define a :meth:`~object.__call__` method; the effect is then the same as if that method was called. @@ -1211,7 +1211,7 @@ Raising ``0.0`` to a negative power results in a :exc:`ZeroDivisionError`. Raising a negative number to a fractional power results in a :class:`complex` number. (In earlier versions it raised a :exc:`ValueError`.) -This operation can be customized using the special :meth:`__pow__` method. +This operation can be customized using the special :meth:`~object.__pow__` method. .. _unary: @@ -1234,7 +1234,7 @@ All unary arithmetic and bitwise operations have the same priority: single: - (minus); unary operator The unary ``-`` (minus) operator yields the negation of its numeric argument; the -operation can be overridden with the :meth:`__neg__` special method. +operation can be overridden with the :meth:`~object.__neg__` special method. .. index:: single: plus @@ -1242,7 +1242,7 @@ operation can be overridden with the :meth:`__neg__` special method. single: + (plus); unary operator The unary ``+`` (plus) operator yields its numeric argument unchanged; the -operation can be overridden with the :meth:`__pos__` special method. +operation can be overridden with the :meth:`~object.__pos__` special method. .. index:: single: inversion @@ -1251,7 +1251,7 @@ operation can be overridden with the :meth:`__pos__` special method. The unary ``~`` (invert) operator yields the bitwise inversion of its integer argument. The bitwise inversion of ``x`` is defined as ``-(x+1)``. It only applies to integral numbers or to custom objects that override the -:meth:`__invert__` special method. +:meth:`~object.__invert__` special method. @@ -1289,8 +1289,8 @@ the other must be a sequence. In the former case, the numbers are converted to a common type and then multiplied together. In the latter case, sequence repetition is performed; a negative repetition factor yields an empty sequence. -This operation can be customized using the special :meth:`__mul__` and -:meth:`__rmul__` methods. +This operation can be customized using the special :meth:`~object.__mul__` and +:meth:`~object.__rmul__` methods. .. index:: single: matrix multiplication @@ -1314,8 +1314,8 @@ integer; the result is that of mathematical division with the 'floor' function applied to the result. Division by zero raises the :exc:`ZeroDivisionError` exception. -This operation can be customized using the special :meth:`__truediv__` and -:meth:`__floordiv__` methods. +This operation can be customized using the special :meth:`~object.__truediv__` and +:meth:`~object.__floordiv__` methods. .. index:: single: modulo @@ -1340,7 +1340,7 @@ also overloaded by string objects to perform old-style string formatting (also known as interpolation). The syntax for string formatting is described in the Python Library Reference, section :ref:`old-string-formatting`. -The *modulo* operation can be customized using the special :meth:`__mod__` method. +The *modulo* operation can be customized using the special :meth:`~object.__mod__` method. The floor division operator, the modulo operator, and the :func:`divmod` function are not defined for complex numbers. Instead, convert to a floating @@ -1356,8 +1356,8 @@ must either both be numbers or both be sequences of the same type. In the former case, the numbers are converted to a common type and then added together. In the latter case, the sequences are concatenated. -This operation can be customized using the special :meth:`__add__` and -:meth:`__radd__` methods. +This operation can be customized using the special :meth:`~object.__add__` and +:meth:`~object.__radd__` methods. .. index:: single: subtraction @@ -1367,7 +1367,7 @@ This operation can be customized using the special :meth:`__add__` and The ``-`` (subtraction) operator yields the difference of its arguments. The numeric arguments are first converted to a common type. -This operation can be customized using the special :meth:`__sub__` method. +This operation can be customized using the special :meth:`~object.__sub__` method. .. _shifting: @@ -1388,8 +1388,8 @@ The shifting operations have lower priority than the arithmetic operations: These operators accept integers as arguments. They shift the first argument to the left or right by the number of bits given by the second argument. -This operation can be customized using the special :meth:`__lshift__` and -:meth:`__rshift__` methods. +This operation can be customized using the special :meth:`~object.__lshift__` and +:meth:`~object.__rshift__` methods. .. index:: pair: exception; ValueError @@ -1416,8 +1416,8 @@ Each of the three bitwise operations has a different priority level: pair: operator; & (ampersand) The ``&`` operator yields the bitwise AND of its arguments, which must be -integers or one of them must be a custom object overriding :meth:`__and__` or -:meth:`__rand__` special methods. +integers or one of them must be a custom object overriding :meth:`~object.__and__` or +:meth:`~object.__rand__` special methods. .. index:: pair: bitwise; xor @@ -1425,8 +1425,8 @@ integers or one of them must be a custom object overriding :meth:`__and__` or pair: operator; ^ (caret) The ``^`` operator yields the bitwise XOR (exclusive OR) of its arguments, which -must be integers or one of them must be a custom object overriding :meth:`__xor__` or -:meth:`__rxor__` special methods. +must be integers or one of them must be a custom object overriding :meth:`~object.__xor__` or +:meth:`~object.__rxor__` special methods. .. index:: pair: bitwise; or @@ -1434,8 +1434,8 @@ must be integers or one of them must be a custom object overriding :meth:`__xor_ pair: operator; | (vertical bar) The ``|`` operator yields the bitwise (inclusive) OR of its arguments, which -must be integers or one of them must be a custom object overriding :meth:`__or__` or -:meth:`__ror__` special methods. +must be integers or one of them must be a custom object overriding :meth:`~object.__or__` or +:meth:`~object.__ror__` special methods. .. _comparisons: @@ -1502,7 +1502,7 @@ comparison implementation. Because all types are (direct or indirect) subtypes of :class:`object`, they inherit the default comparison behavior from :class:`object`. Types can customize their comparison behavior by implementing -:dfn:`rich comparison methods` like :meth:`__lt__`, described in +:dfn:`rich comparison methods` like :meth:`~object.__lt__`, described in :ref:`customization`. The default behavior for equality comparison (``==`` and ``!=``) is based on @@ -1666,12 +1666,12 @@ substring of *y*. An equivalent test is ``y.find(x) != -1``. Empty strings are always considered to be a substring of any other string, so ``"" in "abc"`` will return ``True``. -For user-defined classes which define the :meth:`__contains__` method, ``x in +For user-defined classes which define the :meth:`~object.__contains__` method, ``x in y`` returns ``True`` if ``y.__contains__(x)`` returns a true value, and ``False`` otherwise. -For user-defined classes which do not define :meth:`__contains__` but do define -:meth:`__iter__`, ``x in y`` is ``True`` if some value ``z``, for which the +For user-defined classes which do not define :meth:`~object.__contains__` but do define +:meth:`~object.__iter__`, ``x in y`` is ``True`` if some value ``z``, for which the expression ``x is z or x == z`` is true, is produced while iterating over ``y``. If an exception is raised during the iteration, it is as if :keyword:`in` raised that exception. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 4f2e4fdd43490c..bebd92bffad46a 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -95,7 +95,6 @@ Doc/library/xmlrpc.server.rst Doc/library/zlib.rst Doc/reference/compound_stmts.rst Doc/reference/datamodel.rst -Doc/reference/expressions.rst Doc/reference/import.rst Doc/tutorial/datastructures.rst Doc/using/windows.rst From a53e56e7d88b4f2a2943c9f191024198009fcf9e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 22 Jan 2024 18:40:35 +0200 Subject: [PATCH 042/160] gh-75128: Ignore EADDRNOTAVAIL error in asyncio.BaseEventLoop.create_server() (GH-114420) Co-authored-by: Antoine Pitrou --- Lib/asyncio/base_events.py | 20 ++++++++++++++++--- ...4-01-22-12-10-34.gh-issue-75128.4FGlRS.rst | 2 ++ 2 files changed, 19 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-22-12-10-34.gh-issue-75128.4FGlRS.rst diff --git a/Lib/asyncio/base_events.py b/Lib/asyncio/base_events.py index a8870b636d1df5..c60d7688ef8c77 100644 --- a/Lib/asyncio/base_events.py +++ b/Lib/asyncio/base_events.py @@ -16,6 +16,7 @@ import collections import collections.abc import concurrent.futures +import errno import functools import heapq import itertools @@ -1585,9 +1586,22 @@ async def create_server( try: sock.bind(sa) except OSError as err: - raise OSError(err.errno, 'error while attempting ' - 'to bind on address %r: %s' - % (sa, err.strerror.lower())) from None + msg = ('error while attempting ' + 'to bind on address %r: %s' + % (sa, err.strerror.lower())) + if err.errno == errno.EADDRNOTAVAIL: + # Assume the family is not enabled (bpo-30945) + sockets.pop() + sock.close() + if self._debug: + logger.warning(msg) + continue + raise OSError(err.errno, msg) from None + + if not sockets: + raise OSError('could not bind on any address out of %r' + % ([info[4] for info in infos],)) + completed = True finally: if not completed: diff --git a/Misc/NEWS.d/next/Library/2024-01-22-12-10-34.gh-issue-75128.4FGlRS.rst b/Misc/NEWS.d/next/Library/2024-01-22-12-10-34.gh-issue-75128.4FGlRS.rst new file mode 100644 index 00000000000000..d875148e89b41b --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-22-12-10-34.gh-issue-75128.4FGlRS.rst @@ -0,0 +1,2 @@ +Ignore an :exc:`OSError` in :meth:`asyncio.BaseEventLoop.create_server` when +IPv6 is available but the interface cannot actually support it. From ed30a3c337f30abd2ea5357565a956ed3dc0719c Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Mon, 22 Jan 2024 17:12:06 +0000 Subject: [PATCH 043/160] gh-114083: apply optimization of LOAD_CONST instructions to the whole CFG before optimize_basic_block. (#114408) --- Lib/test/test_compile.py | 14 +- ...-01-22-09-49-02.gh-issue-114083.hf1-ku.rst | 1 + Python/flowgraph.c | 319 ++++++++++-------- 3 files changed, 191 insertions(+), 143 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-22-09-49-02.gh-issue-114083.hf1-ku.rst diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index 9c36f053314f9f..3b1ceceaa6305f 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -449,9 +449,17 @@ def test_condition_expression_with_dead_blocks_compiles(self): compile('if (5 if 5 else T): 0', '', 'exec') def test_condition_expression_with_redundant_comparisons_compiles(self): - # See gh-113054 - with self.assertWarns(SyntaxWarning): - compile('if 9<9<9and 9or 9:9', '', 'exec') + # See gh-113054, gh-114083 + exprs = [ + 'if 9<9<9and 9or 9:9', + 'if 9<9<9and 9or 9or 9:9', + 'if 9<9<9and 9or 9or 9or 9:9', + 'if 9<9<9and 9or 9or 9or 9or 9:9', + ] + for expr in exprs: + with self.subTest(expr=expr): + with self.assertWarns(SyntaxWarning): + compile(expr, '', 'exec') def test_dead_code_with_except_handler_compiles(self): compile(textwrap.dedent(""" diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-22-09-49-02.gh-issue-114083.hf1-ku.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-22-09-49-02.gh-issue-114083.hf1-ku.rst new file mode 100644 index 00000000000000..79be45e87b90d3 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-22-09-49-02.gh-issue-114083.hf1-ku.rst @@ -0,0 +1 @@ +Compiler applies folding of LOAD_CONST with following instruction in a separate pass before other optimisations. This enables jump threading in certain circumstances. diff --git a/Python/flowgraph.c b/Python/flowgraph.c index e84030c87b1b4b..2fc90b8877b475 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -472,14 +472,12 @@ next_nonempty_block(basicblock *b) /***** debugging helpers *****/ #ifndef NDEBUG -static int remove_redundant_nops(basicblock *bb); +static int remove_redundant_nops(cfg_builder *g); static bool no_redundant_nops(cfg_builder *g) { - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - if (remove_redundant_nops(b) != 0) { - return false; - } + if (remove_redundant_nops(g) != 0) { + return false; } return true; } @@ -1003,7 +1001,7 @@ remove_unreachable(basicblock *entryblock) { } static int -remove_redundant_nops(basicblock *bb) { +basicblock_remove_redundant_nops(basicblock *bb) { /* Remove NOPs when legal to do so. */ int dest = 0; int prev_lineno = -1; @@ -1062,6 +1060,17 @@ remove_redundant_nops(basicblock *bb) { return num_removed; } +static int +remove_redundant_nops(cfg_builder *g) { + int changes = 0; + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + int change = basicblock_remove_redundant_nops(b); + RETURN_IF_ERROR(change); + changes += change; + } + return changes; +} + static int remove_redundant_nops_and_pairs(basicblock *entryblock) { @@ -1072,7 +1081,7 @@ remove_redundant_nops_and_pairs(basicblock *entryblock) cfg_instr *prev_instr = NULL; cfg_instr *instr = NULL; for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); + RETURN_IF_ERROR(basicblock_remove_redundant_nops(b)); if (IS_LABEL(b->b_label)) { /* this block is a jump target, forget instr */ instr = NULL; @@ -1112,8 +1121,11 @@ remove_redundant_jumps(cfg_builder *g) { * non-empty block reached through normal flow control is the target * of that jump. If it is, then the jump instruction is redundant and * can be deleted. + * + * Return the number of changes applied, or -1 on error. */ + int changes = 0; for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { cfg_instr *last = basicblock_last_instr(b); if (last == NULL) { @@ -1128,17 +1140,19 @@ remove_redundant_jumps(cfg_builder *g) { } basicblock *next = next_nonempty_block(b->b_next); if (jump_target == next) { - if (last->i_loc.lineno == NO_LOCATION.lineno) { + changes++; + if (last->i_loc_propagated) { b->b_iused--; } else { + assert(last->i_loc.lineno != -1); INSTR_SET_OP0(last, NOP); } } } } - return SUCCESS; + return changes; } /* Maximum size of basic block that should be copied in optimizer */ @@ -1479,16 +1493,12 @@ apply_static_swaps(basicblock *block, int i) } static int -optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) +basicblock_optimize_load_const(PyObject *const_cache, basicblock *bb, PyObject *consts) { assert(PyDict_CheckExact(const_cache)); assert(PyList_CheckExact(consts)); - cfg_instr nop; - INSTR_SET_OP0(&nop, NOP); - cfg_instr *target = &nop; int opcode = 0; int oparg = 0; - int nextop = 0; for (int i = 0; i < bb->b_iused; i++) { cfg_instr *inst = &bb->b_instr[i]; bool is_copy_of_load_const = (opcode == LOAD_CONST && @@ -1497,118 +1507,148 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) if (! is_copy_of_load_const) { opcode = inst->i_opcode; oparg = inst->i_oparg; - if (HAS_TARGET(opcode)) { - assert(inst->i_target->b_iused > 0); - target = &inst->i_target->b_instr[0]; - assert(!IS_ASSEMBLER_OPCODE(target->i_opcode)); - } - else { - target = &nop; - } } - nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0; assert(!IS_ASSEMBLER_OPCODE(opcode)); - switch (opcode) { - /* Remove LOAD_CONST const; conditional jump */ - case LOAD_CONST: + if (opcode != LOAD_CONST) { + continue; + } + int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0; + switch(nextop) { + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: { - PyObject* cnt; - int is_true; - int jump_if_true; - switch(nextop) { - case POP_JUMP_IF_FALSE: - case POP_JUMP_IF_TRUE: - cnt = get_const_value(opcode, oparg, consts); - if (cnt == NULL) { - goto error; - } - is_true = PyObject_IsTrue(cnt); - Py_DECREF(cnt); - if (is_true == -1) { - goto error; - } - INSTR_SET_OP0(inst, NOP); - jump_if_true = nextop == POP_JUMP_IF_TRUE; - if (is_true == jump_if_true) { - bb->b_instr[i+1].i_opcode = JUMP; - } - else { - INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); - } - break; - case IS_OP: - // Fold to POP_JUMP_IF_NONE: - // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE - // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE - // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE - // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE - // Fold to POP_JUMP_IF_NOT_NONE: - // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE - // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE - // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE - // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE - cnt = get_const_value(opcode, oparg, consts); - if (cnt == NULL) { - goto error; - } - if (!Py_IsNone(cnt)) { - Py_DECREF(cnt); - break; - } - if (bb->b_iused <= i + 2) { - break; - } - cfg_instr *is_instr = &bb->b_instr[i + 1]; - cfg_instr *jump_instr = &bb->b_instr[i + 2]; - // Get rid of TO_BOOL regardless: - if (jump_instr->i_opcode == TO_BOOL) { - INSTR_SET_OP0(jump_instr, NOP); - if (bb->b_iused <= i + 3) { - break; - } - jump_instr = &bb->b_instr[i + 3]; - } - bool invert = is_instr->i_oparg; - if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) { - invert = !invert; - } - else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) { - break; - } - INSTR_SET_OP0(inst, NOP); - INSTR_SET_OP0(is_instr, NOP); - jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE - : POP_JUMP_IF_NONE; - break; - case RETURN_VALUE: - INSTR_SET_OP0(inst, NOP); - INSTR_SET_OP1(&bb->b_instr[++i], RETURN_CONST, oparg); - break; - case TO_BOOL: - cnt = get_const_value(opcode, oparg, consts); - if (cnt == NULL) { - goto error; - } - is_true = PyObject_IsTrue(cnt); - Py_DECREF(cnt); - if (is_true == -1) { - goto error; - } - cnt = PyBool_FromLong(is_true); - int index = add_const(cnt, consts, const_cache); - if (index < 0) { - return ERROR; - } - INSTR_SET_OP0(inst, NOP); - INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index); + /* Remove LOAD_CONST const; conditional jump */ + PyObject* cnt = get_const_value(opcode, oparg, consts); + if (cnt == NULL) { + return ERROR; + } + int is_true = PyObject_IsTrue(cnt); + Py_DECREF(cnt); + if (is_true == -1) { + return ERROR; + } + INSTR_SET_OP0(inst, NOP); + int jump_if_true = nextop == POP_JUMP_IF_TRUE; + if (is_true == jump_if_true) { + bb->b_instr[i+1].i_opcode = JUMP; + } + else { + INSTR_SET_OP0(&bb->b_instr[i + 1], NOP); + } + break; + } + case IS_OP: + { + // Fold to POP_JUMP_IF_NONE: + // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_TRUE + // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_FALSE + // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_TRUE + // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_FALSE + // Fold to POP_JUMP_IF_NOT_NONE: + // - LOAD_CONST(None) IS_OP(0) POP_JUMP_IF_FALSE + // - LOAD_CONST(None) IS_OP(1) POP_JUMP_IF_TRUE + // - LOAD_CONST(None) IS_OP(0) TO_BOOL POP_JUMP_IF_FALSE + // - LOAD_CONST(None) IS_OP(1) TO_BOOL POP_JUMP_IF_TRUE + PyObject *cnt = get_const_value(opcode, oparg, consts); + if (cnt == NULL) { + return ERROR; + } + if (!Py_IsNone(cnt)) { + Py_DECREF(cnt); + break; + } + if (bb->b_iused <= i + 2) { + break; + } + cfg_instr *is_instr = &bb->b_instr[i + 1]; + cfg_instr *jump_instr = &bb->b_instr[i + 2]; + // Get rid of TO_BOOL regardless: + if (jump_instr->i_opcode == TO_BOOL) { + INSTR_SET_OP0(jump_instr, NOP); + if (bb->b_iused <= i + 3) { break; + } + jump_instr = &bb->b_instr[i + 3]; + } + bool invert = is_instr->i_oparg; + if (jump_instr->i_opcode == POP_JUMP_IF_FALSE) { + invert = !invert; + } + else if (jump_instr->i_opcode != POP_JUMP_IF_TRUE) { + break; + } + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP0(is_instr, NOP); + jump_instr->i_opcode = invert ? POP_JUMP_IF_NOT_NONE + : POP_JUMP_IF_NONE; + break; + } + case RETURN_VALUE: + { + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP1(&bb->b_instr[++i], RETURN_CONST, oparg); + break; + } + case TO_BOOL: + { + PyObject *cnt = get_const_value(opcode, oparg, consts); + if (cnt == NULL) { + return ERROR; + } + int is_true = PyObject_IsTrue(cnt); + Py_DECREF(cnt); + if (is_true == -1) { + return ERROR; + } + cnt = PyBool_FromLong(is_true); + int index = add_const(cnt, consts, const_cache); + if (index < 0) { + return ERROR; } + INSTR_SET_OP0(inst, NOP); + INSTR_SET_OP1(&bb->b_instr[i + 1], LOAD_CONST, index); break; } - /* Try to fold tuples of constants. - Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1). - Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2). - Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */ + } + } + return SUCCESS; +} + +static int +optimize_load_const(PyObject *const_cache, cfg_builder *g, PyObject *consts) { + for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { + RETURN_IF_ERROR(basicblock_optimize_load_const(const_cache, b, consts)); + } + return SUCCESS; +} + +static int +optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) +{ + assert(PyDict_CheckExact(const_cache)); + assert(PyList_CheckExact(consts)); + cfg_instr nop; + INSTR_SET_OP0(&nop, NOP); + for (int i = 0; i < bb->b_iused; i++) { + cfg_instr *inst = &bb->b_instr[i]; + cfg_instr *target; + int opcode = inst->i_opcode; + int oparg = inst->i_oparg; + if (HAS_TARGET(opcode)) { + assert(inst->i_target->b_iused > 0); + target = &inst->i_target->b_instr[0]; + assert(!IS_ASSEMBLER_OPCODE(target->i_opcode)); + } + else { + target = &nop; + } + int nextop = i+1 < bb->b_iused ? bb->b_instr[i+1].i_opcode : 0; + assert(!IS_ASSEMBLER_OPCODE(opcode)); + switch (opcode) { + /* Try to fold tuples of constants. + Skip over BUILD_TUPLE(1) UNPACK_SEQUENCE(1). + Replace BUILD_TUPLE(2) UNPACK_SEQUENCE(2) with SWAP(2). + Replace BUILD_TUPLE(3) UNPACK_SEQUENCE(3) with SWAP(3). */ case BUILD_TUPLE: if (nextop == UNPACK_SEQUENCE && oparg == bb->b_instr[i+1].i_oparg) { switch(oparg) { @@ -1723,9 +1763,6 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) continue; } break; - default: - /* All OPCODE_HAS_CONST opcodes should be handled with LOAD_CONST */ - assert (!OPCODE_HAS_CONST(inst->i_opcode)); } } @@ -1762,6 +1799,7 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstl } RETURN_IF_ERROR(remove_unreachable(g->g_entryblock)); RETURN_IF_ERROR(resolve_line_numbers(g, firstlineno)); + RETURN_IF_ERROR(optimize_load_const(const_cache, g, consts)); for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { RETURN_IF_ERROR(optimize_basic_block(const_cache, b, consts)); } @@ -1771,13 +1809,16 @@ optimize_cfg(cfg_builder *g, PyObject *consts, PyObject *const_cache, int firstl } RETURN_IF_ERROR(remove_unreachable(g->g_entryblock)); - for (int n = 0; n < 2; n++) { - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); - } - RETURN_IF_ERROR(remove_redundant_jumps(g)); - } - + int removed_nops, removed_jumps; + do { + /* Convergence is guaranteed because the number of + * redundant jumps and nops only decreases. + */ + removed_nops = remove_redundant_nops(g); + RETURN_IF_ERROR(removed_nops); + removed_jumps = remove_redundant_jumps(g); + RETURN_IF_ERROR(removed_jumps); + } while(removed_nops + removed_jumps > 0); assert(no_redundant_jumps(g)); return SUCCESS; } @@ -1798,7 +1839,7 @@ make_super_instruction(cfg_instr *inst1, cfg_instr *inst2, int super_op) INSTR_SET_OP0(inst2, NOP); } -static void +static int insert_superinstructions(cfg_builder *g) { for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { @@ -1825,10 +1866,9 @@ insert_superinstructions(cfg_builder *g) } } } - for (basicblock *b = g->g_entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); - } + int res = remove_redundant_nops(g); assert(no_redundant_nops(g)); + return res; } // helper functions for add_checks_for_loads_of_unknown_variables @@ -2257,9 +2297,10 @@ push_cold_blocks_to_end(cfg_builder *g) { return SUCCESS; } -static void -convert_pseudo_ops(basicblock *entryblock) +static int +convert_pseudo_ops(cfg_builder *g) { + basicblock *entryblock = g->g_entryblock; for (basicblock *b = entryblock; b != NULL; b = b->b_next) { for (int i = 0; i < b->b_iused; i++) { cfg_instr *instr = &b->b_instr[i]; @@ -2276,9 +2317,7 @@ convert_pseudo_ops(basicblock *entryblock) } } } - for (basicblock *b = entryblock; b != NULL; b = b->b_next) { - remove_redundant_nops(b); - } + return remove_redundant_nops(g); } static inline bool @@ -2422,7 +2461,7 @@ _PyCfg_OptimizeCodeUnit(cfg_builder *g, PyObject *consts, PyObject *const_cache, RETURN_IF_ERROR( add_checks_for_loads_of_uninitialized_variables( g->g_entryblock, nlocals, nparams)); - insert_superinstructions(g); + RETURN_IF_ERROR(insert_superinstructions(g)); RETURN_IF_ERROR(push_cold_blocks_to_end(g)); assert(all_exits_have_lineno(g->g_entryblock)); @@ -2697,7 +2736,7 @@ _PyCfg_OptimizedCfgToInstructionSequence(cfg_builder *g, return ERROR; } - convert_pseudo_ops(g->g_entryblock); + RETURN_IF_ERROR(convert_pseudo_ops(g)); /* Order of basic blocks must have been determined by now */ From 7fc51c3f6b7b13f88480557ff14bdb1c049f9a37 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 23 Jan 2024 01:15:29 +0800 Subject: [PATCH 044/160] gh-114257: Ignore the FileNotFound error in ctypes.util._is_elf() (GH-114394) --- Lib/ctypes/util.py | 7 +++++-- Lib/test/test_ctypes/test_find.py | 3 +++ .../Library/2024-01-21-16-32-55.gh-issue-114257.bCFld5.rst | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-21-16-32-55.gh-issue-114257.bCFld5.rst diff --git a/Lib/ctypes/util.py b/Lib/ctypes/util.py index 0c2510e1619c8e..c550883e7c7d4b 100644 --- a/Lib/ctypes/util.py +++ b/Lib/ctypes/util.py @@ -96,8 +96,11 @@ def find_library(name): def _is_elf(filename): "Return True if the given file is an ELF file" elf_header = b'\x7fELF' - with open(filename, 'br') as thefile: - return thefile.read(4) == elf_header + try: + with open(filename, 'br') as thefile: + return thefile.read(4) == elf_header + except FileNotFoundError: + return False def _findLib_gcc(name): # Run GCC's linker with the -t (aka --trace) option and examine the diff --git a/Lib/test/test_ctypes/test_find.py b/Lib/test/test_ctypes/test_find.py index 66ff23e72b5e10..7732ff37308848 100644 --- a/Lib/test/test_ctypes/test_find.py +++ b/Lib/test/test_ctypes/test_find.py @@ -125,6 +125,9 @@ def test_find_library_with_ld(self): unittest.mock.patch("ctypes.util._findLib_gcc", lambda *args: None): self.assertNotEqual(find_library('c'), None) + def test_gh114257(self): + self.assertIsNone(find_library("libc")) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-01-21-16-32-55.gh-issue-114257.bCFld5.rst b/Misc/NEWS.d/next/Library/2024-01-21-16-32-55.gh-issue-114257.bCFld5.rst new file mode 100644 index 00000000000000..6f02ff9e62617d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-21-16-32-55.gh-issue-114257.bCFld5.rst @@ -0,0 +1,2 @@ +Dismiss the :exc:`FileNotFound` error in :func:`ctypes.util.find_library` and +just return ``None`` on Linux. From 5cd9c6b1fca549741828288febf9d5c13293847d Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Mon, 22 Jan 2024 10:28:57 -0800 Subject: [PATCH 045/160] Fix `wasi.py build` after adding the `clean` subcommand. ({GH-114447) --- Tools/wasm/wasi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tools/wasm/wasi.py b/Tools/wasm/wasi.py index e71b0b302a5561..46ecae74a9ecea 100644 --- a/Tools/wasm/wasi.py +++ b/Tools/wasm/wasi.py @@ -68,7 +68,8 @@ def wrapper(context): terminal_width = 80 print("⎯" * terminal_width) print("📁", working_dir) - if clean_ok and context.clean and working_dir.exists(): + if (clean_ok and getattr(context, "clean", False) and + working_dir.exists()): print(f"🚮 Deleting directory (--clean)...") shutil.rmtree(working_dir) From e45bae7a45e5696c3ebdf477ecc948374cf8ebff Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 22 Jan 2024 14:45:15 -0500 Subject: [PATCH 046/160] GH-114448: Don't sort summarize_stats.py histograms by amount of change (GH-114449) --- Tools/scripts/summarize_stats.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index df8a7fddfb8866..1e9dc07bae8981 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -460,8 +460,11 @@ class JoinMode(enum.Enum): # second column of each input table as a new column CHANGE = 1 # Join using the first column as a key, indicating the change in the second - # column of each input table as a ne column, and omit all other columns + # column of each input table as a new column, and omit all other columns CHANGE_ONE_COLUMN = 2 + # Join using the first column as a key, and indicate the change as a new + # column, but don't sort by the amount of change. + CHANGE_NO_SORT = 3 class Table: @@ -484,7 +487,7 @@ def join_row(self, key: str, row_a: tuple, row_b: tuple) -> tuple: match self.join_mode: case JoinMode.SIMPLE: return (key, *row_a, *row_b) - case JoinMode.CHANGE: + case JoinMode.CHANGE | JoinMode.CHANGE_NO_SORT: return (key, *row_a, *row_b, DiffRatio(row_a[0], row_b[0])) case JoinMode.CHANGE_ONE_COLUMN: return (key, row_a[0], row_b[0], DiffRatio(row_a[0], row_b[0])) @@ -497,7 +500,7 @@ def join_columns(self, columns: Columns) -> Columns: *("Base " + x for x in columns[1:]), *("Head " + x for x in columns[1:]), ) - case JoinMode.CHANGE: + case JoinMode.CHANGE | JoinMode.CHANGE_NO_SORT: return ( columns[0], *("Base " + x for x in columns[1:]), @@ -1027,7 +1030,7 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) Table( ("Range", "Count:", "Ratio:"), calc_histogram_table(name, den), - JoinMode.CHANGE, + JoinMode.CHANGE_NO_SORT, ) ], ) From 412920a41efc6f3307e710d5ce61bfe00c0f3c11 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 22 Jan 2024 16:10:21 -0500 Subject: [PATCH 047/160] gh-112532: Improve mimalloc page visiting (#114133) This adds support for visiting abandoned pages in mimalloc and improves the performance of the page visiting code. Abandoned pages contain memory blocks from threads that have exited. At some point, they may be later reclaimed by other threads. We still need to visit those pages in the free-threaded GC because they contain live objects. This also reduces the overhead of visiting mimalloc pages: * Special cases for full, empty, and pages containing only a single block. * Fix free_map to use one bit instead of one byte per block. * Use fast integer division by a constant algorithm when computing block offset from block size and index. --- Include/internal/mimalloc/mimalloc/internal.h | 4 + Objects/mimalloc/heap.c | 114 ++++++++++++------ Objects/mimalloc/segment.c | 50 ++++++++ 3 files changed, 134 insertions(+), 34 deletions(-) diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h index 887bf26c956982..8af841cfdffc01 100644 --- a/Include/internal/mimalloc/mimalloc/internal.h +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -120,6 +120,8 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); +bool _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg); + #if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); @@ -161,6 +163,8 @@ void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); void _mi_heap_unsafe_destroy_all(void); +void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page); +bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg); // "stats.c" void _mi_stats_done(mi_stats_t* stats); diff --git a/Objects/mimalloc/heap.c b/Objects/mimalloc/heap.c index 6468999a7d5766..164b28f0fab240 100644 --- a/Objects/mimalloc/heap.c +++ b/Objects/mimalloc/heap.c @@ -26,7 +26,7 @@ typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa // Visit all pages in a heap; returns `false` if break was called. static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2) { - if (heap==NULL || heap->page_count==0) return 0; + if (heap==NULL || heap->page_count==0) return true; // visit all pages #if MI_DEBUG>1 @@ -521,11 +521,20 @@ typedef struct mi_heap_area_ex_s { mi_page_t* page; } mi_heap_area_ex_t; -static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) { - mi_assert(xarea != NULL); - if (xarea==NULL) return true; - const mi_heap_area_t* area = &xarea->area; - mi_page_t* page = xarea->page; +static void mi_fast_divisor(size_t divisor, size_t* magic, size_t* shift) { + mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX); + *shift = MI_INTPTR_BITS - mi_clz(divisor - 1); + *magic = (size_t)(((1ULL << 32) * ((1ULL << *shift) - divisor)) / divisor + 1); +} + +static size_t mi_fast_divide(size_t n, size_t magic, size_t shift) { + mi_assert_internal(n <= UINT32_MAX); + return ((((uint64_t) n * magic) >> 32) + n) >> shift; +} + +bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg) { + mi_assert(area != NULL); + if (area==NULL) return true; mi_assert(page != NULL); if (page == NULL) return true; @@ -537,17 +546,39 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v const size_t ubsize = mi_page_usable_block_size(page); // without padding size_t psize; uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize); + mi_heap_t* heap = mi_page_heap(page); if (page->capacity == 1) { // optimize page with one block mi_assert_internal(page->used == 1 && page->free == NULL); - return visitor(mi_page_heap(page), area, pstart, ubsize, arg); + return visitor(heap, area, pstart, ubsize, arg); + } + + if (page->used == page->capacity) { + // optimize full pages + uint8_t* block = pstart; + for (size_t i = 0; i < page->capacity; i++) { + if (!visitor(heap, area, block, ubsize, arg)) return false; + block += bsize; + } + return true; } // create a bitmap of free blocks. #define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*)) - uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; - memset(free_map, 0, sizeof(free_map)); + uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS]; + size_t bmapsize = (page->capacity + MI_INTPTR_BITS - 1) / MI_INTPTR_BITS; + memset(free_map, 0, bmapsize * sizeof(uintptr_t)); + + if (page->capacity % MI_INTPTR_BITS != 0) { + size_t shift = (page->capacity % MI_INTPTR_BITS); + uintptr_t mask = (UINTPTR_MAX << shift); + free_map[bmapsize-1] = mask; + } + + // fast repeated division by the block size + size_t magic, shift; + mi_fast_divisor(bsize, &magic, &shift); #if MI_DEBUG>1 size_t free_count = 0; @@ -559,10 +590,11 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; mi_assert_internal(offset % bsize == 0); - size_t blockidx = offset / bsize; // Todo: avoid division? - mi_assert_internal( blockidx < MI_MAX_BLOCKS); - size_t bitidx = (blockidx / sizeof(uintptr_t)); - size_t bit = blockidx - (bitidx * sizeof(uintptr_t)); + size_t blockidx = mi_fast_divide(offset, magic, shift); + mi_assert_internal(blockidx == offset / bsize); + mi_assert_internal(blockidx < MI_MAX_BLOCKS); + size_t bitidx = (blockidx / MI_INTPTR_BITS); + size_t bit = blockidx - (bitidx * MI_INTPTR_BITS); free_map[bitidx] |= ((uintptr_t)1 << bit); } mi_assert_internal(page->capacity == (free_count + page->used)); @@ -571,19 +603,29 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v #if MI_DEBUG>1 size_t used_count = 0; #endif - for (size_t i = 0; i < page->capacity; i++) { - size_t bitidx = (i / sizeof(uintptr_t)); - size_t bit = i - (bitidx * sizeof(uintptr_t)); - uintptr_t m = free_map[bitidx]; - if (bit == 0 && m == UINTPTR_MAX) { - i += (sizeof(uintptr_t) - 1); // skip a run of free blocks + uint8_t* block = pstart; + for (size_t i = 0; i < bmapsize; i++) { + if (free_map[i] == 0) { + // every block is in use + for (size_t j = 0; j < MI_INTPTR_BITS; j++) { + #if MI_DEBUG>1 + used_count++; + #endif + if (!visitor(heap, area, block, ubsize, arg)) return false; + block += bsize; + } } - else if ((m & ((uintptr_t)1 << bit)) == 0) { - #if MI_DEBUG>1 - used_count++; - #endif - uint8_t* block = pstart + (i * bsize); - if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; + else { + uintptr_t m = ~free_map[i]; + while (m) { + #if MI_DEBUG>1 + used_count++; + #endif + size_t bitidx = mi_ctz(m); + if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false; + m &= m - 1; + } + block += bsize * MI_INTPTR_BITS; } } mi_assert_internal(page->used == used_count); @@ -592,21 +634,24 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg); +void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) { + const size_t bsize = mi_page_block_size(page); + const size_t ubsize = mi_page_usable_block_size(page); + area->reserved = page->reserved * bsize; + area->committed = page->capacity * bsize; + area->blocks = _mi_page_start(_mi_page_segment(page), page, NULL); + area->used = page->used; // number of blocks in use (#553) + area->block_size = ubsize; + area->full_block_size = bsize; +} static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) { MI_UNUSED(heap); MI_UNUSED(pq); mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun; mi_heap_area_ex_t xarea; - const size_t bsize = mi_page_block_size(page); - const size_t ubsize = mi_page_usable_block_size(page); xarea.page = page; - xarea.area.reserved = page->reserved * bsize; - xarea.area.committed = page->capacity * bsize; - xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL); - xarea.area.used = page->used; // number of blocks in use (#553) - xarea.area.block_size = ubsize; - xarea.area.full_block_size = bsize; + _mi_heap_area_init(&xarea.area, page); return fun(heap, &xarea, arg); } @@ -627,7 +672,7 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg; if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false; if (args->visit_blocks) { - return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg); + return _mi_heap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg); } else { return true; @@ -637,5 +682,6 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t* // Visit all blocks in a heap bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { mi_visit_blocks_args_t args = { visit_blocks, visitor, arg }; + _mi_heap_delayed_free_partial((mi_heap_t *)heap); return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args); } diff --git a/Objects/mimalloc/segment.c b/Objects/mimalloc/segment.c index d9b39b03fd6c5f..584233b8b57bb4 100644 --- a/Objects/mimalloc/segment.c +++ b/Objects/mimalloc/segment.c @@ -1614,3 +1614,53 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); return page; } + +/* ----------------------------------------------------------- + Visit blocks in abandoned segments +----------------------------------------------------------- */ + +static bool mi_segment_visit_page(mi_segment_t* segment, mi_page_t* page, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) +{ + mi_heap_area_t area; + _mi_heap_area_init(&area, page); + if (!visitor(NULL, &area, NULL, area.block_size, arg)) return false; + if (visit_blocks) { + return _mi_heap_area_visit_blocks(&area, page, visitor, arg); + } + else { + return true; + } +} + +static bool mi_segment_visit_pages(mi_segment_t* segment, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { + const mi_slice_t* end; + mi_slice_t* slice = mi_slices_start_iterate(segment, &end); + while (slice < end) { + if (mi_slice_is_used(slice)) { + mi_page_t* const page = mi_slice_to_page(slice); + if (page->tag == page_tag) { + if (!mi_segment_visit_page(segment, page, visit_blocks, visitor, arg)) return false; + } + } + slice = slice + slice->slice_count; + } + return true; +} + +// Visit all blocks in a abandoned segments +bool _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) { + // Note: this is not safe in any other thread is abandoning or claiming segments from the pool + mi_segment_t* segment = mi_tagged_segment_ptr(pool->abandoned); + while (segment != NULL) { + if (!mi_segment_visit_pages(segment, page_tag, visit_blocks, visitor, arg)) return false; + segment = segment->abandoned_next; + } + + segment = pool->abandoned_visited; + while (segment != NULL) { + if (!mi_segment_visit_pages(segment, page_tag, visit_blocks, visitor, arg)) return false; + segment = segment->abandoned_next; + } + + return true; +} From 665b8f365efc4e0063f764e20d3cad13635232f3 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Mon, 22 Jan 2024 21:19:16 +0000 Subject: [PATCH 048/160] gh-113655: Revert extra stack reserve in PGO builds unless UseExtraStackReserve=true (GH-114263) --- PCbuild/python.vcxproj | 2 +- PCbuild/pythonw.vcxproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/PCbuild/python.vcxproj b/PCbuild/python.vcxproj index 1e5ab877488e4a..4a99ffc677c287 100644 --- a/PCbuild/python.vcxproj +++ b/PCbuild/python.vcxproj @@ -99,7 +99,7 @@ 12000000 12000000 - 3000000 + 3000000 diff --git a/PCbuild/pythonw.vcxproj b/PCbuild/pythonw.vcxproj index d6cf0c97dedb09..d08c210ef8a1dc 100644 --- a/PCbuild/pythonw.vcxproj +++ b/PCbuild/pythonw.vcxproj @@ -94,7 +94,7 @@ 12000000 12000000 - 3000000 + 3000000 From 1d7bddd9612bcbaaedbc837e2936de773e855411 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Mon, 22 Jan 2024 22:40:26 +0100 Subject: [PATCH 049/160] Docs: align usage of versionadded/versionchanged with recommended practice (#114409) Co-authored-by: C.A.M. Gerlach Co-authored-by: Ezio Melotti --- Doc/library/argparse.rst | 4 +- Doc/library/asyncio-stream.rst | 4 +- Doc/library/bdb.rst | 4 +- Doc/library/concurrent.futures.rst | 4 +- Doc/library/configparser.rst | 16 ++-- Doc/library/datetime.rst | 20 ++-- Doc/library/difflib.rst | 8 +- Doc/library/email.policy.rst | 1 - Doc/library/functions.rst | 8 +- Doc/library/functools.rst | 6 +- Doc/library/http.client.rst | 7 +- Doc/library/http.server.rst | 22 ++--- Doc/library/logging.config.rst | 4 +- Doc/library/logging.handlers.rst | 4 +- Doc/library/logging.rst | 12 +-- Doc/library/multiprocessing.shared_memory.rst | 4 +- Doc/library/os.rst | 91 ++++++++++--------- Doc/library/pdb.rst | 30 +++--- Doc/library/pickletools.rst | 4 +- Doc/library/shutil.rst | 4 +- Doc/library/subprocess.rst | 18 ++-- Doc/library/unittest.rst | 4 +- Doc/library/urllib.parse.rst | 4 +- Doc/library/venv.rst | 6 +- Doc/library/xml.etree.elementtree.rst | 24 ++--- Doc/library/xml.sax.utils.rst | 4 +- Doc/library/zipapp.rst | 4 +- Doc/library/zipfile.rst | 24 ++--- 28 files changed, 172 insertions(+), 173 deletions(-) diff --git a/Doc/library/argparse.rst b/Doc/library/argparse.rst index fbffa71d200735..1395d457f874b0 100644 --- a/Doc/library/argparse.rst +++ b/Doc/library/argparse.rst @@ -1936,8 +1936,8 @@ FileType objects >>> parser.parse_args(['-']) Namespace(infile=<_io.TextIOWrapper name='' encoding='UTF-8'>) - .. versionadded:: 3.4 - The *encodings* and *errors* keyword arguments. + .. versionchanged:: 3.4 + Added the *encodings* and *errors* parameters. Argument groups diff --git a/Doc/library/asyncio-stream.rst b/Doc/library/asyncio-stream.rst index 0736e783bbc8c8..3427da1b43caef 100644 --- a/Doc/library/asyncio-stream.rst +++ b/Doc/library/asyncio-stream.rst @@ -77,8 +77,8 @@ and work with streams: .. versionchanged:: 3.7 Added the *ssl_handshake_timeout* parameter. - .. versionadded:: 3.8 - Added *happy_eyeballs_delay* and *interleave* parameters. + .. versionchanged:: 3.8 + Added the *happy_eyeballs_delay* and *interleave* parameters. .. versionchanged:: 3.10 Removed the *loop* parameter. diff --git a/Doc/library/bdb.rst b/Doc/library/bdb.rst index 4ce5c9bcde38ff..52f0ca7c013482 100644 --- a/Doc/library/bdb.rst +++ b/Doc/library/bdb.rst @@ -132,8 +132,8 @@ The :mod:`bdb` module also defines two classes: frame is considered to originate in a certain module is determined by the ``__name__`` in the frame globals. - .. versionadded:: 3.1 - The *skip* argument. + .. versionchanged:: 3.1 + Added the *skip* parameter. The following methods of :class:`Bdb` normally don't need to be overridden. diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index 760e1196d7cf9a..800c7f6739d8a3 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -171,8 +171,8 @@ And:: should be higher than the number of workers for :class:`ProcessPoolExecutor`. - .. versionadded:: 3.6 - The *thread_name_prefix* argument was added to allow users to + .. versionchanged:: 3.6 + Added the *thread_name_prefix* parameter to allow users to control the :class:`threading.Thread` names for worker threads created by the pool for easier debugging. diff --git a/Doc/library/configparser.rst b/Doc/library/configparser.rst index 0031737853e7b4..18e5bc20f3f690 100644 --- a/Doc/library/configparser.rst +++ b/Doc/library/configparser.rst @@ -1045,14 +1045,14 @@ ConfigParser Objects config.read(['site.cfg', os.path.expanduser('~/.myapp.cfg')], encoding='cp1250') - .. versionadded:: 3.2 - The *encoding* parameter. Previously, all files were read using the - default encoding for :func:`open`. + .. versionchanged:: 3.2 + Added the *encoding* parameter. + Previously, all files were read using the default encoding for :func:`open`. - .. versionadded:: 3.6.1 + .. versionchanged:: 3.6.1 The *filenames* parameter accepts a :term:`path-like object`. - .. versionadded:: 3.7 + .. versionchanged:: 3.7 The *filenames* parameter accepts a :class:`bytes` object. @@ -1291,9 +1291,9 @@ Exceptions that is already present or in strict parsers when a section if found more than once in a single input file, string or dictionary. - .. versionadded:: 3.2 - Optional ``source`` and ``lineno`` attributes and arguments to - :meth:`!__init__` were added. + .. versionchanged:: 3.2 + Added the optional *source* and *lineno* attributes and parameters to + :meth:`!__init__`. .. exception:: DuplicateOptionError diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 3674b4bd97d39d..b36f8c19cd6040 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -859,8 +859,8 @@ Constructor: If an argument outside those ranges is given, :exc:`ValueError` is raised. - .. versionadded:: 3.6 - Added the ``fold`` argument. + .. versionchanged:: 3.6 + Added the *fold* parameter. Other constructors, all class methods: @@ -1258,8 +1258,8 @@ Instance methods: :class:`datetime` objects are also supported by generic function :func:`copy.replace`. - .. versionadded:: 3.6 - Added the ``fold`` argument. + .. versionchanged:: 3.6 + Added the *fold* parameter. .. method:: datetime.astimezone(tz=None) @@ -1502,8 +1502,8 @@ Instance methods: >>> dt.isoformat(timespec='microseconds') '2015-01-01T12:30:59.000000' - .. versionadded:: 3.6 - Added the *timespec* argument. + .. versionchanged:: 3.6 + Added the *timespec* parameter. .. method:: datetime.__str__() @@ -1839,8 +1839,8 @@ Instance methods: :class:`time` objects are also supported by generic function :func:`copy.replace`. - .. versionadded:: 3.6 - Added the ``fold`` argument. + .. versionchanged:: 3.6 + Added the *fold* parameter. .. method:: time.isoformat(timespec='auto') @@ -1883,8 +1883,8 @@ Instance methods: >>> dt.isoformat(timespec='auto') '12:34:56' - .. versionadded:: 3.6 - Added the *timespec* argument. + .. versionchanged:: 3.6 + Added the *timespec* parameter. .. method:: time.__str__() diff --git a/Doc/library/difflib.rst b/Doc/library/difflib.rst index 9abf19557f989c..d45e46448207a4 100644 --- a/Doc/library/difflib.rst +++ b/Doc/library/difflib.rst @@ -52,8 +52,8 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module. the purpose of sequence matching. This heuristic can be turned off by setting the ``autojunk`` argument to ``False`` when creating the :class:`SequenceMatcher`. - .. versionadded:: 3.2 - The *autojunk* parameter. + .. versionchanged:: 3.2 + Added the *autojunk* parameter. .. class:: Differ @@ -383,8 +383,8 @@ The :class:`SequenceMatcher` class has this constructor: The optional argument *autojunk* can be used to disable the automatic junk heuristic. - .. versionadded:: 3.2 - The *autojunk* parameter. + .. versionchanged:: 3.2 + Added the *autojunk* parameter. SequenceMatcher objects get three data attributes: *bjunk* is the set of elements of *b* for which *isjunk* is ``True``; *bpopular* is the set of diff --git a/Doc/library/email.policy.rst b/Doc/library/email.policy.rst index fd47dd0dc5df36..f4777bb2462138 100644 --- a/Doc/library/email.policy.rst +++ b/Doc/library/email.policy.rst @@ -219,7 +219,6 @@ added matters. To illustrate:: Default: :const:`False`. .. versionadded:: 3.5 - The *mangle_from_* parameter. .. attribute:: message_factory diff --git a/Doc/library/functions.rst b/Doc/library/functions.rst index 37234f7a5a2485..27fce5aa0f1a63 100644 --- a/Doc/library/functions.rst +++ b/Doc/library/functions.rst @@ -1073,8 +1073,8 @@ are always available. They are listed here in alphabetical order. such as ``sorted(iterable, key=keyfunc, reverse=True)[0]`` and ``heapq.nlargest(1, iterable, key=keyfunc)``. - .. versionadded:: 3.4 - The *default* keyword-only argument. + .. versionchanged:: 3.4 + Added the *default* keyword-only parameter. .. versionchanged:: 3.8 The *key* can be ``None``. @@ -1111,8 +1111,8 @@ are always available. They are listed here in alphabetical order. such as ``sorted(iterable, key=keyfunc)[0]`` and ``heapq.nsmallest(1, iterable, key=keyfunc)``. - .. versionadded:: 3.4 - The *default* keyword-only argument. + .. versionchanged:: 3.4 + Added the *default* keyword-only parameter. .. versionchanged:: 3.8 The *key* can be ``None``. diff --git a/Doc/library/functools.rst b/Doc/library/functools.rst index 6749a5137b446f..20fcbe76c36985 100644 --- a/Doc/library/functools.rst +++ b/Doc/library/functools.rst @@ -194,7 +194,7 @@ The :mod:`functools` module defines the following functions: In contrast, the tuple arguments ``('answer', Decimal(42))`` and ``('answer', Fraction(42))`` are treated as equivalent. - The wrapped function is instrumented with a :func:`cache_parameters` + The wrapped function is instrumented with a :func:`!cache_parameters` function that returns a new :class:`dict` showing the values for *maxsize* and *typed*. This is for information purposes only. Mutating the values has no effect. @@ -275,8 +275,8 @@ The :mod:`functools` module defines the following functions: .. versionchanged:: 3.8 Added the *user_function* option. - .. versionadded:: 3.9 - Added the function :func:`cache_parameters` + .. versionchanged:: 3.9 + Added the function :func:`!cache_parameters` .. decorator:: total_ordering diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst index 95b6c1f364bcc5..7e4502064f22a1 100644 --- a/Doc/library/http.client.rst +++ b/Doc/library/http.client.rst @@ -311,7 +311,7 @@ HTTPConnection Objects :class:`str` or bytes-like object that is not also a file as the body representation. - .. versionadded:: 3.2 + .. versionchanged:: 3.2 *body* can now be an iterable. .. versionchanged:: 3.6 @@ -461,9 +461,8 @@ also send your request step by step, by using the four functions below. This is to avoid premature termination of the read of the request by the target server due to malformed encoding. - .. versionadded:: 3.6 - Chunked encoding support. The *encode_chunked* parameter was - added. + .. versionchanged:: 3.6 + Added chunked encoding support and the *encode_chunked* parameter. .. method:: HTTPConnection.send(data) diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 64bddd23f82933..bc59d3d17912fd 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -328,8 +328,8 @@ provides three different variants: or the current directory if *directory* is not provided, directly mapping the directory structure to HTTP requests. - .. versionadded:: 3.7 - The *directory* parameter. + .. versionchanged:: 3.7 + Added the *directory* parameter. .. versionchanged:: 3.9 The *directory* parameter accepts a :term:`path-like object`. @@ -438,11 +438,11 @@ to bind to localhost only:: python -m http.server --bind 127.0.0.1 -.. versionadded:: 3.4 - ``--bind`` argument was introduced. +.. versionchanged:: 3.4 + Added the ``--bind`` option. -.. versionadded:: 3.8 - ``--bind`` argument enhanced to support IPv6 +.. versionchanged:: 3.8 + Support IPv6 in the ``--bind`` option. By default, the server uses the current directory. The option ``-d/--directory`` specifies a directory to which it should serve the files. For example, @@ -450,8 +450,8 @@ the following command uses a specific directory:: python -m http.server --directory /tmp/ -.. versionadded:: 3.7 - ``--directory`` argument was introduced. +.. versionchanged:: 3.7 + Added the ``--directory`` option. By default, the server is conformant to HTTP/1.0. The option ``-p/--protocol`` specifies the HTTP version to which the server is conformant. For example, the @@ -459,8 +459,8 @@ following command runs an HTTP/1.1 conformant server:: python -m http.server --protocol HTTP/1.1 -.. versionadded:: 3.11 - ``--protocol`` argument was introduced. +.. versionchanged:: 3.11 + Added the ``--protocol`` option. .. class:: CGIHTTPRequestHandler(request, client_address, server) @@ -537,5 +537,5 @@ default :class:`BaseHTTPRequestHandler` ``.log_message`` implementation. This could allow remote clients connecting to your server to send nefarious control codes to your terminal. -.. versionadded:: 3.12 +.. versionchanged:: 3.12 Control characters are scrubbed in stderr logs. diff --git a/Doc/library/logging.config.rst b/Doc/library/logging.config.rst index 85a68cb11ee22c..13850c91446da5 100644 --- a/Doc/library/logging.config.rst +++ b/Doc/library/logging.config.rst @@ -127,8 +127,8 @@ in :mod:`logging` itself) and defining handlers which are declared either in application (e.g. based on command-line parameters or other aspects of the runtime environment) before being passed to ``fileConfig``. - .. versionadded:: 3.10 - The *encoding* parameter is added. + .. versionchanged:: 3.10 + Added the *encoding* parameter. .. versionchanged:: 3.12 An exception will be thrown if the provided file diff --git a/Doc/library/logging.handlers.rst b/Doc/library/logging.handlers.rst index 2dd4bd081b0429..2fe9370333beaf 100644 --- a/Doc/library/logging.handlers.rst +++ b/Doc/library/logging.handlers.rst @@ -871,8 +871,8 @@ supports sending logging messages to an email address via SMTP. A timeout can be specified for communication with the SMTP server using the *timeout* argument. - .. versionadded:: 3.3 - The *timeout* argument was added. + .. versionchanged:: 3.3 + Added the *timeout* parameter. .. method:: emit(record) diff --git a/Doc/library/logging.rst b/Doc/library/logging.rst index acdeb88a546261..4b756d10b4c586 100644 --- a/Doc/library/logging.rst +++ b/Doc/library/logging.rst @@ -615,14 +615,14 @@ Formatter Objects ``logging.Formatter('%(ip)s %(message)s', defaults={"ip": None})`` :type defaults: dict[str, Any] - .. versionadded:: 3.2 - The *style* parameter. + .. versionchanged:: 3.2 + Added the *style* parameter. - .. versionadded:: 3.8 - The *validate* parameter. + .. versionchanged:: 3.8 + Added the *validate* parameter. - .. versionadded:: 3.10 - The *defaults* parameter. + .. versionchanged:: 3.10 + Added the *defaults* parameter. .. method:: format(record) diff --git a/Doc/library/multiprocessing.shared_memory.rst b/Doc/library/multiprocessing.shared_memory.rst index 2e3f5be4dd2335..10d7f061fb759b 100644 --- a/Doc/library/multiprocessing.shared_memory.rst +++ b/Doc/library/multiprocessing.shared_memory.rst @@ -88,8 +88,8 @@ copying of data. *track* is ignored on Windows, which has its own tracking and automatically deletes shared memory when all handles to it have been closed. - .. versionadded:: 3.13 - The *track* parameter. + .. versionchanged:: 3.13 + Added the *track* parameter. .. method:: close() diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 637191f2980a05..0008ec6a40c76f 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1254,8 +1254,8 @@ as internal buffering of data. :meth:`~file.read` and :meth:`~file.write` methods (and many more). To wrap a file descriptor in a file object, use :func:`fdopen`. - .. versionadded:: 3.3 - The *dir_fd* argument. + .. versionchanged:: 3.3 + Added the *dir_fd* parameter. .. versionchanged:: 3.5 If the system call is interrupted and the signal handler does not raise an @@ -1991,7 +1991,7 @@ features: .. audit-event:: os.chdir path os.chdir - .. versionadded:: 3.3 + .. versionchanged:: 3.3 Added support for specifying *path* as a file descriptor on some platforms. @@ -2023,8 +2023,8 @@ features: .. availability:: Unix, not Emscripten, not WASI. - .. versionadded:: 3.3 - The *follow_symlinks* argument. + .. versionchanged:: 3.3 + Added the *follow_symlinks* parameter. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -2210,8 +2210,8 @@ features: .. versionchanged:: 3.2 Added Windows support. - .. versionadded:: 3.3 - Added the *src_dir_fd*, *dst_dir_fd*, and *follow_symlinks* arguments. + .. versionchanged:: 3.3 + Added the *src_dir_fd*, *dst_dir_fd*, and *follow_symlinks* parameters. .. versionchanged:: 3.6 Accepts a :term:`path-like object` for *src* and *dst*. @@ -2375,8 +2375,8 @@ features: .. audit-event:: os.mkdir path,mode,dir_fd os.mkdir - .. versionadded:: 3.3 - The *dir_fd* argument. + .. versionchanged:: 3.3 + Added the *dir_fd* parameter. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -2409,8 +2409,8 @@ features: .. audit-event:: os.mkdir path,mode,dir_fd os.makedirs - .. versionadded:: 3.2 - The *exist_ok* parameter. + .. versionchanged:: 3.2 + Added the *exist_ok* parameter. .. versionchanged:: 3.4.1 @@ -2443,8 +2443,8 @@ features: .. availability:: Unix, not Emscripten, not WASI. - .. versionadded:: 3.3 - The *dir_fd* argument. + .. versionchanged:: 3.3 + Added the *dir_fd* parameter. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -2465,8 +2465,8 @@ features: .. availability:: Unix, not Emscripten, not WASI. - .. versionadded:: 3.3 - The *dir_fd* argument. + .. versionchanged:: 3.3 + Added the *dir_fd* parameter. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -2546,8 +2546,8 @@ features: .. versionchanged:: 3.2 Added support for Windows 6.0 (Vista) symbolic links. - .. versionadded:: 3.3 - The *dir_fd* argument. + .. versionchanged:: 3.3 + Added the *dir_fd* parameter. .. versionchanged:: 3.6 Accepts a :term:`path-like object` on Unix. @@ -2577,8 +2577,8 @@ features: .. audit-event:: os.remove path,dir_fd os.remove - .. versionadded:: 3.3 - The *dir_fd* argument. + .. versionchanged:: 3.3 + Added the *dir_fd* parameter. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -2628,8 +2628,8 @@ features: .. audit-event:: os.rename src,dst,src_dir_fd,dst_dir_fd os.rename - .. versionadded:: 3.3 - The *src_dir_fd* and *dst_dir_fd* arguments. + .. versionchanged:: 3.3 + Added the *src_dir_fd* and *dst_dir_fd* parameters. .. versionchanged:: 3.6 Accepts a :term:`path-like object` for *src* and *dst*. @@ -2684,8 +2684,8 @@ features: .. audit-event:: os.rmdir path,dir_fd os.rmdir - .. versionadded:: 3.3 - The *dir_fd* parameter. + .. versionchanged:: 3.3 + Added the *dir_fd* parameter. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -2759,7 +2759,7 @@ features: .. versionadded:: 3.5 - .. versionadded:: 3.6 + .. versionchanged:: 3.6 Added support for the :term:`context manager` protocol and the :func:`~scandir.close()` method. If a :func:`scandir` iterator is neither exhausted nor explicitly closed a :exc:`ResourceWarning` will be emitted @@ -2973,9 +2973,9 @@ features: :func:`fstat` and :func:`lstat` functions. - .. versionadded:: 3.3 - Added the *dir_fd* and *follow_symlinks* arguments, specifying a file - descriptor instead of a path. + .. versionchanged:: 3.3 + Added the *dir_fd* and *follow_symlinks* parameters, + specifying a file descriptor instead of a path. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -3056,16 +3056,22 @@ features: Time of most recent access expressed in nanoseconds as an integer. + .. versionadded: 3.3 + .. attribute:: st_mtime_ns Time of most recent content modification expressed in nanoseconds as an integer. + .. versionadded: 3.3 + .. attribute:: st_ctime_ns Time of most recent metadata change expressed in nanoseconds as an integer. + .. versionadded: 3.3 + .. versionchanged:: 3.12 ``st_ctime_ns`` is deprecated on Windows. Use ``st_birthtime_ns`` for the file creation time. In the future, ``st_ctime`` will contain @@ -3166,6 +3172,8 @@ features: See the :const:`!FILE_ATTRIBUTE_* ` constants in the :mod:`stat` module. + .. versionadded:: 3.5 + .. attribute:: st_reparse_tag When :attr:`st_file_attributes` has the :const:`~stat.FILE_ATTRIBUTE_REPARSE_POINT` @@ -3186,13 +3194,6 @@ features: some implementations. For compatibility with older Python versions, accessing :class:`stat_result` as a tuple always returns integers. - .. versionadded:: 3.3 - Added the :attr:`st_atime_ns`, :attr:`st_mtime_ns`, and - :attr:`st_ctime_ns` members. - - .. versionadded:: 3.5 - Added the :attr:`st_file_attributes` member on Windows. - .. versionchanged:: 3.5 Windows now returns the file index as :attr:`st_ino` when available. @@ -3257,7 +3258,7 @@ features: .. versionchanged:: 3.2 The :const:`ST_RDONLY` and :const:`ST_NOSUID` constants were added. - .. versionadded:: 3.3 + .. versionchanged:: 3.3 Added support for specifying *path* as an open file descriptor. .. versionchanged:: 3.4 @@ -3269,8 +3270,8 @@ features: .. versionchanged:: 3.6 Accepts a :term:`path-like object`. - .. versionadded:: 3.7 - Added :attr:`f_fsid`. + .. versionchanged:: 3.7 + Added the :attr:`f_fsid` attribute. .. data:: supports_dir_fd @@ -3394,8 +3395,8 @@ features: .. versionchanged:: 3.2 Added support for Windows 6.0 (Vista) symbolic links. - .. versionadded:: 3.3 - Added the *dir_fd* argument, and now allow *target_is_directory* + .. versionchanged:: 3.3 + Added the *dir_fd* parameter, and now allow *target_is_directory* on non-Windows platforms. .. versionchanged:: 3.6 @@ -3443,8 +3444,8 @@ features: .. audit-event:: os.remove path,dir_fd os.unlink - .. versionadded:: 3.3 - The *dir_fd* parameter. + .. versionchanged:: 3.3 + Added the *dir_fd* parameter. .. versionchanged:: 3.6 Accepts a :term:`path-like object`. @@ -3482,7 +3483,7 @@ features: .. audit-event:: os.utime path,times,ns,dir_fd os.utime - .. versionadded:: 3.3 + .. versionchanged:: 3.3 Added support for specifying *path* as an open file descriptor, and the *dir_fd*, *follow_symlinks*, and *ns* parameters. @@ -4217,7 +4218,7 @@ to be ignored. .. availability:: Unix, Windows, not Emscripten, not WASI. - .. versionadded:: 3.3 + .. versionchanged:: 3.3 Added support for specifying *path* as an open file descriptor for :func:`execve`. @@ -4471,8 +4472,8 @@ written in Python, such as a mail server's external command delivery program. .. availability:: Unix, Windows, not Emscripten, not WASI. - .. versionadded:: 3.2 - Windows support. + .. versionchanged:: 3.2 + Added Windows support. .. function:: killpg(pgid, sig, /) diff --git a/Doc/library/pdb.rst b/Doc/library/pdb.rst index 2495dcf50bb17f..cb17acfb367619 100644 --- a/Doc/library/pdb.rst +++ b/Doc/library/pdb.rst @@ -48,7 +48,7 @@ at the location you want to break into the debugger, and then run the program. You can then step through the code following this statement, and continue running without the debugger using the :pdbcmd:`continue` command. -.. versionadded:: 3.7 +.. versionchanged:: 3.7 The built-in :func:`breakpoint()`, when called with defaults, can be used instead of ``import pdb; pdb.set_trace()``. @@ -86,12 +86,12 @@ after normal exit of the program), pdb will restart the program. Automatic restarting preserves pdb's state (such as breakpoints) and in most cases is more useful than quitting the debugger upon program's exit. -.. versionadded:: 3.2 - ``-c`` option is introduced to execute commands as if given - in a :file:`.pdbrc` file, see :ref:`debugger-commands`. +.. versionchanged:: 3.2 + Added the ``-c`` option to execute commands as if given + in a :file:`.pdbrc` file; see :ref:`debugger-commands`. -.. versionadded:: 3.7 - ``-m`` option is introduced to execute modules similar to the way +.. versionchanged:: 3.7 + Added the ``-m`` option to execute modules similar to the way ``python -m`` does. As with a script, the debugger will pause execution just before the first line of the module. @@ -209,12 +209,12 @@ access further features, you have to do this yourself: .. audit-event:: pdb.Pdb "" pdb.Pdb - .. versionadded:: 3.1 - The *skip* argument. + .. versionchanged:: 3.1 + Added the *skip* parameter. - .. versionadded:: 3.2 - The *nosigint* argument. Previously, a SIGINT handler was never set by - Pdb. + .. versionchanged:: 3.2 + Added the *nosigint* parameter. + Previously, a SIGINT handler was never set by Pdb. .. versionchanged:: 3.6 The *readrc* argument. @@ -467,8 +467,8 @@ can be overridden by the local file. raised or propagated is indicated by ``>>``, if it differs from the current line. - .. versionadded:: 3.2 - The ``>>`` marker. + .. versionchanged:: 3.2 + Added the ``>>`` marker. .. pdbcommand:: ll | longlist @@ -583,8 +583,8 @@ can be overridden by the local file. .. versionadded:: 3.2 - .. versionadded:: 3.13 - ``exit()`` and ``quit()`` can be used to exit :pdbcmd:`interact` + .. versionchanged:: 3.13 + ``exit()`` and ``quit()`` can be used to exit the :pdbcmd:`interact` command. .. versionchanged:: 3.13 diff --git a/Doc/library/pickletools.rst b/Doc/library/pickletools.rst index 41930f8cbe8412..9739207a224431 100644 --- a/Doc/library/pickletools.rst +++ b/Doc/library/pickletools.rst @@ -94,8 +94,8 @@ Programmatic Interface a short description. The value of *annotate* is used as a hint for the column where annotation should start. - .. versionadded:: 3.2 - The *annotate* argument. + .. versionchanged:: 3.2 + Added the *annotate* parameter. .. function:: genops(pickle) diff --git a/Doc/library/shutil.rst b/Doc/library/shutil.rst index f61ef8b0ecc7ba..d9ec2cbc47e611 100644 --- a/Doc/library/shutil.rst +++ b/Doc/library/shutil.rst @@ -289,8 +289,8 @@ Directory and files operations copy the file more efficiently. See :ref:`shutil-platform-dependent-efficient-copy-operations` section. - .. versionadded:: 3.8 - The *dirs_exist_ok* parameter. + .. versionchanged:: 3.8 + Added the *dirs_exist_ok* parameter. .. function:: rmtree(path, ignore_errors=False, onerror=None, *, onexc=None, dir_fd=None) diff --git a/Doc/library/subprocess.rst b/Doc/library/subprocess.rst index 91e9fcf0263d8d..c437ce770b37d0 100644 --- a/Doc/library/subprocess.rst +++ b/Doc/library/subprocess.rst @@ -308,10 +308,10 @@ default values. The arguments that are most commonly needed are: If text mode is not used, *stdin*, *stdout* and *stderr* will be opened as binary streams. No encoding or line ending conversion is performed. - .. versionadded:: 3.6 - Added *encoding* and *errors* parameters. + .. versionchanged:: 3.6 + Added the *encoding* and *errors* parameters. - .. versionadded:: 3.7 + .. versionchanged:: 3.7 Added the *text* parameter as an alias for *universal_newlines*. .. note:: @@ -684,8 +684,8 @@ functions. is only changed on platforms that support this (only Linux at this time of writing). Other platforms will ignore this parameter. - .. versionadded:: 3.10 - The ``pipesize`` parameter was added. + .. versionchanged:: 3.10 + Added the *pipesize* parameter. Popen objects are supported as context managers via the :keyword:`with` statement: on exit, standard file descriptors are closed, and the process is waited for. @@ -1538,8 +1538,8 @@ handling consistency are valid for these functions. as it did in Python 3.3.3 and earlier. exitcode has the same value as :attr:`~Popen.returncode`. - .. versionadded:: 3.11 - Added *encoding* and *errors* arguments. + .. versionchanged:: 3.11 + Added the *encoding* and *errors* parameters. .. function:: getoutput(cmd, *, encoding=None, errors=None) @@ -1556,8 +1556,8 @@ handling consistency are valid for these functions. .. versionchanged:: 3.3.4 Windows support added - .. versionadded:: 3.11 - Added *encoding* and *errors* arguments. + .. versionchanged:: 3.11 + Added the *encoding* and *errors* parameters. Notes diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 491009769f5aa6..e6140ac70eb87a 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -2196,8 +2196,8 @@ Loading and running tests .. versionadded:: 3.2 - .. versionadded:: 3.12 - Added *durations* keyword argument. + .. versionchanged:: 3.12 + Added the *durations* keyword parameter. .. data:: defaultTestLoader diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst index 53e5f0395715d7..3c898c3e826304 100644 --- a/Doc/library/urllib.parse.rst +++ b/Doc/library/urllib.parse.rst @@ -729,8 +729,8 @@ task isn't already covered by the URL parsing functions above. .. versionchanged:: 3.2 *query* supports bytes and string objects. - .. versionadded:: 3.5 - *quote_via* parameter. + .. versionchanged:: 3.5 + Added the *quote_via* parameter. .. seealso:: diff --git a/Doc/library/venv.rst b/Doc/library/venv.rst index da8942c554dea1..aa18873f223a6b 100644 --- a/Doc/library/venv.rst +++ b/Doc/library/venv.rst @@ -201,13 +201,13 @@ creation according to their needs, the :class:`EnvBuilder` class. .. versionchanged:: 3.4 Added the ``with_pip`` parameter - .. versionadded:: 3.6 + .. versionchanged:: 3.6 Added the ``prompt`` parameter - .. versionadded:: 3.9 + .. versionchanged:: 3.9 Added the ``upgrade_deps`` parameter - .. versionadded:: 3.13 + .. versionchanged:: 3.13 Added the ``scm_ignore_files`` parameter Creators of third-party virtual environment tools will be free to use the diff --git a/Doc/library/xml.etree.elementtree.rst b/Doc/library/xml.etree.elementtree.rst index 57cfbb8d92244b..fe92400fb08dfd 100644 --- a/Doc/library/xml.etree.elementtree.rst +++ b/Doc/library/xml.etree.elementtree.rst @@ -705,11 +705,11 @@ Functions meaning as in :meth:`ElementTree.write`. Returns an (optionally) encoded string containing the XML data. - .. versionadded:: 3.4 - The *short_empty_elements* parameter. + .. versionchanged:: 3.4 + Added the *short_empty_elements* parameter. - .. versionadded:: 3.8 - The *xml_declaration* and *default_namespace* parameters. + .. versionchanged:: 3.8 + Added the *xml_declaration* and *default_namespace* parameters. .. versionchanged:: 3.8 The :func:`tostring` function now preserves the attribute order @@ -732,11 +732,11 @@ Functions .. versionadded:: 3.2 - .. versionadded:: 3.4 - The *short_empty_elements* parameter. + .. versionchanged:: 3.4 + Added the *short_empty_elements* parameter. - .. versionadded:: 3.8 - The *xml_declaration* and *default_namespace* parameters. + .. versionchanged:: 3.8 + Added the *xml_declaration* and *default_namespace* parameters. .. versionchanged:: 3.8 The :func:`tostringlist` function now preserves the attribute order @@ -858,8 +858,8 @@ Functions this is a Unicode string. If the loader fails, it can return None or raise an exception. - .. versionadded:: 3.9 - The *base_url* and *max_depth* parameters. + .. versionchanged:: 3.9 + Added the *base_url* and *max_depth* parameters. .. _elementtree-element-objects: @@ -1189,8 +1189,8 @@ ElementTree Objects :term:`file object`; make sure you do not try to write a string to a binary stream and vice versa. - .. versionadded:: 3.4 - The *short_empty_elements* parameter. + .. versionchanged:: 3.4 + Added the *short_empty_elements* parameter. .. versionchanged:: 3.8 The :meth:`write` method now preserves the attribute order specified diff --git a/Doc/library/xml.sax.utils.rst b/Doc/library/xml.sax.utils.rst index e57e76dcac7820..3a524c9c0d5a9f 100644 --- a/Doc/library/xml.sax.utils.rst +++ b/Doc/library/xml.sax.utils.rst @@ -71,8 +71,8 @@ or as base classes. content: if ``False`` (the default) they are emitted as a pair of start/end tags, if set to ``True`` they are emitted as a single self-closed tag. - .. versionadded:: 3.2 - The *short_empty_elements* parameter. + .. versionchanged:: 3.2 + Added the *short_empty_elements* parameter. .. class:: XMLFilterBase(base) diff --git a/Doc/library/zipapp.rst b/Doc/library/zipapp.rst index 104afca23a20b4..c8a059bdb1cb93 100644 --- a/Doc/library/zipapp.rst +++ b/Doc/library/zipapp.rst @@ -171,8 +171,8 @@ The module defines two convenience functions: passed to the ``zipfile.ZipFile`` class, and must supply the methods needed by that class. - .. versionadded:: 3.7 - Added the *filter* and *compressed* arguments. + .. versionchanged:: 3.7 + Added the *filter* and *compressed* parameters. .. function:: get_interpreter(archive) diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst index c70f2ec561de8f..b6f881fd2dfd70 100644 --- a/Doc/library/zipfile.rst +++ b/Doc/library/zipfile.rst @@ -79,9 +79,9 @@ The module defines the following items: of the last modification to the file; the fields are described in section :ref:`zipinfo-objects`. - .. versionadded:: 3.13 - A public ``.compress_level`` attribute has been added to expose the - formerly protected ``._compresslevel``. The older protected name + .. versionchanged:: 3.13 + A public :attr:`!compress_level` attribute has been added to expose the + formerly protected :attr:`!_compresslevel`. The older protected name continues to work as a property for backwards compatibility. .. function:: is_zipfile(filename) @@ -218,7 +218,7 @@ ZipFile Objects That flag takes precedence over *metadata_encoding*, which is a Python-specific extension. - .. versionadded:: 3.2 + .. versionchanged:: 3.2 Added the ability to use :class:`ZipFile` as a context manager. .. versionchanged:: 3.3 @@ -241,8 +241,8 @@ ZipFile Objects .. versionchanged:: 3.7 Add the *compresslevel* parameter. - .. versionadded:: 3.8 - The *strict_timestamps* keyword-only argument + .. versionchanged:: 3.8 + The *strict_timestamps* keyword-only parameter. .. versionchanged:: 3.11 Added support for specifying member name encoding for reading @@ -648,8 +648,8 @@ The :class:`PyZipFile` constructor takes the same parameters as the .. class:: PyZipFile(file, mode='r', compression=ZIP_STORED, allowZip64=True, \ optimize=-1) - .. versionadded:: 3.2 - The *optimize* parameter. + .. versionchanged:: 3.2 + Added the *optimize* parameter. .. versionchanged:: 3.4 ZIP64 extensions are enabled by default. @@ -704,8 +704,8 @@ The :class:`PyZipFile` constructor takes the same parameters as the test/bogus/__init__.pyc # Subpackage directory test/bogus/myfile.pyc # Submodule test.bogus.myfile - .. versionadded:: 3.4 - The *filterfunc* parameter. + .. versionchanged:: 3.4 + Added the *filterfunc* parameter. .. versionchanged:: 3.6.2 The *pathname* parameter accepts a :term:`path-like object`. @@ -749,8 +749,8 @@ file: .. versionchanged:: 3.6.2 The *filename* parameter accepts a :term:`path-like object`. - .. versionadded:: 3.8 - The *strict_timestamps* keyword-only argument + .. versionchanged:: 3.8 + Added the *strict_timestamps* keyword-only parameter. Instances have the following methods and attributes: From 57255236957573e73033d2d345028f5a676533a0 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" Date: Mon, 22 Jan 2024 15:55:12 -0800 Subject: [PATCH 050/160] Add me to codeowners for hashlib & multiprocessing (#114454) I already effectively own these. (multiprocessing reluctantly, but I've spent enough time in the code of late, it is important, and and championing some changes, so I may as well be looped in there). --- .github/CODEOWNERS | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8038206441ab9b..9587b3996a9ac2 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -50,13 +50,13 @@ Objects/exceptions.c @iritkatriel Python/traceback.c @iritkatriel # Hashing -**/*hashlib* @tiran -**/*pyhash* @tiran -**/*sha* @tiran -**/*md5* @tiran -**/*blake* @tiran -/Modules/_blake2/** @tiran -/Modules/_sha3/** @tiran +**/*hashlib* @gpshead @tiran +**/*pyhash* @gpshead @tiran +**/sha* @gpshead @tiran +Modules/md5* @gpshead @tiran +**/*blake* @gpshead @tiran +Modules/_blake2/** @gpshead @tiran +Modules/_hacl/** @gpshead # logging **/*logging* @vsajip @@ -120,6 +120,9 @@ Lib/ast.py @isidentical /Lib/unittest/mock.py @cjw296 /Lib/test/test_unittest/testmock/* @cjw296 +# multiprocessing +**/*multiprocessing* @gpshead + # SQLite 3 **/*sqlite* @berkerpeksag @erlend-aasland From 647b6cc7f16c03535cede7e1748a58ab884135b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A9ry=20Ogam?= Date: Tue, 23 Jan 2024 01:00:26 +0100 Subject: [PATCH 051/160] Docs: minor amendments to runpy.rst (#18416) - Add missing single quote in inline code - Align parameter formatting with style guide recommendations - Fix punctuation around parenthesised sentence --- Doc/library/runpy.rst | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/Doc/library/runpy.rst b/Doc/library/runpy.rst index 406b080b7be30f..f2cb595f495f6b 100644 --- a/Doc/library/runpy.rst +++ b/Doc/library/runpy.rst @@ -32,7 +32,7 @@ The :mod:`runpy` module provides two functions: .. index:: pair: module; __main__ - Execute the code of the specified module and return the resulting module + Execute the code of the specified module and return the resulting module's globals dictionary. The module's code is first located using the standard import mechanism (refer to :pep:`302` for details) and then executed in a fresh module namespace. @@ -44,16 +44,16 @@ The :mod:`runpy` module provides two functions: returned. The optional dictionary argument *init_globals* may be used to pre-populate - the module's globals dictionary before the code is executed. The supplied - dictionary will not be modified. If any of the special global variables - below are defined in the supplied dictionary, those definitions are + the module's globals dictionary before the code is executed. + *init_globals* will not be modified. If any of the special global variables + below are defined in *init_globals*, those definitions are overridden by :func:`run_module`. The special global variables ``__name__``, ``__spec__``, ``__file__``, ``__cached__``, ``__loader__`` and ``__package__`` are set in the globals - dictionary before the module code is executed (Note that this is a + dictionary before the module code is executed. (Note that this is a minimal set of variables - other variables may be set implicitly as an - interpreter implementation detail). + interpreter implementation detail.) ``__name__`` is set to *run_name* if this optional argument is not :const:`None`, to ``mod_name + '.__main__'`` if the named module is a @@ -61,7 +61,7 @@ The :mod:`runpy` module provides two functions: ``__spec__`` will be set appropriately for the *actually* imported module (that is, ``__spec__.name`` will always be *mod_name* or - ``mod_name + '.__main__``, never *run_name*). + ``mod_name + '.__main__'``, never *run_name*). ``__file__``, ``__cached__``, ``__loader__`` and ``__package__`` are :ref:`set as normal ` based on the module spec. @@ -104,11 +104,11 @@ The :mod:`runpy` module provides two functions: pair: module; __main__ Execute the code at the named filesystem location and return the resulting - module globals dictionary. As with a script name supplied to the CPython - command line, the supplied path may refer to a Python source file, a + module's globals dictionary. As with a script name supplied to the CPython + command line, *file_path* may refer to a Python source file, a compiled bytecode file or a valid :data:`sys.path` entry containing a :mod:`__main__` module - (e.g. a zipfile containing a top-level ``__main__.py`` file). + (e.g. a zipfile containing a top-level :file:`__main__.py` file). For a simple script, the specified code is simply executed in a fresh module namespace. For a valid :data:`sys.path` entry (typically a zipfile or @@ -119,26 +119,26 @@ The :mod:`runpy` module provides two functions: there is no such module at the specified location. The optional dictionary argument *init_globals* may be used to pre-populate - the module's globals dictionary before the code is executed. The supplied - dictionary will not be modified. If any of the special global variables - below are defined in the supplied dictionary, those definitions are + the module's globals dictionary before the code is executed. + *init_globals* will not be modified. If any of the special global variables + below are defined in *init_globals*, those definitions are overridden by :func:`run_path`. The special global variables ``__name__``, ``__spec__``, ``__file__``, ``__cached__``, ``__loader__`` and ``__package__`` are set in the globals - dictionary before the module code is executed (Note that this is a + dictionary before the module code is executed. (Note that this is a minimal set of variables - other variables may be set implicitly as an - interpreter implementation detail). + interpreter implementation detail.) ``__name__`` is set to *run_name* if this optional argument is not :const:`None` and to ``''`` otherwise. - If the supplied path directly references a script file (whether as source - or as precompiled byte code), then ``__file__`` will be set to the - supplied path, and ``__spec__``, ``__cached__``, ``__loader__`` and + If *file_path* directly references a script file (whether as source + or as precompiled byte code), then ``__file__`` will be set to + *file_path*, and ``__spec__``, ``__cached__``, ``__loader__`` and ``__package__`` will all be set to :const:`None`. - If the supplied path is a reference to a valid :data:`sys.path` entry, then + If *file_path* is a reference to a valid :data:`sys.path` entry, then ``__spec__`` will be set appropriately for the imported :mod:`__main__` module (that is, ``__spec__.name`` will always be ``__main__``). ``__file__``, ``__cached__``, ``__loader__`` and ``__package__`` will be @@ -146,7 +146,7 @@ The :mod:`runpy` module provides two functions: A number of alterations are also made to the :mod:`sys` module. Firstly, :data:`sys.path` may be altered as described above. ``sys.argv[0]`` is updated - with the value of ``path_name`` and ``sys.modules[__name__]`` is updated + with the value of *file_path* and ``sys.modules[__name__]`` is updated with a temporary module object for the module being executed. All modifications to items in :mod:`sys` are reverted before the function returns. From 9af9ac153acb4198878ad81ef438aca2b808e45d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Skytt=C3=A4?= Date: Tue, 23 Jan 2024 03:00:53 +0200 Subject: [PATCH 052/160] gh-66944: Note that the `contextlib.closing` example is for illustrative purposes (#112198) Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/contextlib.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst index aab319cbe7405e..b73373bc2363fb 100644 --- a/Doc/library/contextlib.rst +++ b/Doc/library/contextlib.rst @@ -182,6 +182,14 @@ Functions and classes provided: without needing to explicitly close ``page``. Even if an error occurs, ``page.close()`` will be called when the :keyword:`with` block is exited. + .. note:: + + Most types managing resources support the :term:`context manager` protocol, + which closes *thing* on leaving the :keyword:`with` statment. + As such, :func:`!closing` is most useful for third party types that don't + support context managers. + This example is purely for illustration purposes, + as :func:`~urllib.request.urlopen` would normally be used in a context manager. .. function:: aclosing(thing) From 3a61d24062aaa1e13ba794360b6c765d9a1f2b06 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 23 Jan 2024 01:06:44 +0000 Subject: [PATCH 053/160] GH-99334: Explain that `PurePath.is_relative_to()` is purely lexical. (#114031) --- Doc/library/pathlib.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index b924f470e0be04..faff3bc5823cb4 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -515,6 +515,13 @@ Pure paths provide the following methods and properties: >>> p.is_relative_to('/usr') False + This method is string-based; it neither accesses the filesystem nor treats + "``..``" segments specially. The following code is equivalent: + + >>> u = PurePath('/usr') + >>> u == p or u in p.parents + False + .. versionadded:: 3.9 .. deprecated-removed:: 3.12 3.14 From 32c227470aa6f72950b76206ffc529c258b4b8fa Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 23 Jan 2024 02:31:09 +0000 Subject: [PATCH 054/160] GH-82695: Clarify `pathlib.Path.mkdir()` documentation (#114032) Remove a double negative in the documentation of `mkdir()`'s *exist_ok* parameter. Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> --- Doc/library/pathlib.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index faff3bc5823cb4..a6d99d4a64f801 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1306,9 +1306,9 @@ call fails (for example because the path doesn't exist). If *exist_ok* is false (the default), :exc:`FileExistsError` is raised if the target directory already exists. - If *exist_ok* is true, :exc:`FileExistsError` exceptions will be - ignored (same behavior as the POSIX ``mkdir -p`` command), but only if the - last path component is not an existing non-directory file. + If *exist_ok* is true, :exc:`FileExistsError` will not be raised unless the given + path already exists in the file system and is not a directory (same + behavior as the POSIX ``mkdir -p`` command). .. versionchanged:: 3.5 The *exist_ok* parameter was added. From b822b85ac11e73bbe4417bf03ee770ab116bb42d Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Tue, 23 Jan 2024 05:30:16 +0000 Subject: [PATCH 055/160] GH-105900: Fix `pathlib.Path.symlink_to(target_is_directory=...)` docs (#114035) Clarify that *target_is_directory* only matters if the target doesn't exist. --- Doc/library/pathlib.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index a6d99d4a64f801..fcbc0bf489b344 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1539,9 +1539,13 @@ call fails (for example because the path doesn't exist). .. method:: Path.symlink_to(target, target_is_directory=False) - Make this path a symbolic link to *target*. Under Windows, - *target_is_directory* must be true (default ``False``) if the link's target - is a directory. Under POSIX, *target_is_directory*'s value is ignored. + Make this path a symbolic link pointing to *target*. + + On Windows, a symlink represents either a file or a directory, and does not + morph to the target dynamically. If the target is present, the type of the + symlink will be created to match. Otherwise, the symlink will be created + as a directory if *target_is_directory* is ``True`` or a file symlink (the + default) otherwise. On non-Windows platforms, *target_is_directory* is ignored. :: From 8edc8029def8040ebe1caf75d815439156dd2124 Mon Sep 17 00:00:00 2001 From: Jim Porter <826865+jimporter@users.noreply.github.com> Date: Tue, 23 Jan 2024 00:53:04 -0800 Subject: [PATCH 056/160] gh-89427: Provide the original prompt value for VIRTUAL_ENV_PROMPT (GH-106726) This improves the implementation in gh-106643. Previously, venv passed "() " to the activation scripts, but we want to provide the original value so that users can inspect it in the $VIRTUAL_ENV_PROMPT env var. Note: Lib/venv/scripts/common/Activate.ps1 surrounded the prompt value with parens a second time, so no change was necessary in that file. --- Lib/test/test_venv.py | 8 ++++---- Lib/venv/__init__.py | 3 +-- Lib/venv/scripts/common/activate | 2 +- Lib/venv/scripts/nt/activate.bat | 2 +- Lib/venv/scripts/posix/activate.csh | 2 +- Lib/venv/scripts/posix/activate.fish | 2 +- 6 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_venv.py b/Lib/test/test_venv.py index 6852625c36c62b..6dda00efd7bbb6 100644 --- a/Lib/test/test_venv.py +++ b/Lib/test/test_venv.py @@ -169,7 +169,7 @@ def test_config_file_command_key(self): ('--clear', 'clear', True), ('--upgrade', 'upgrade', True), ('--upgrade-deps', 'upgrade_deps', True), - ('--prompt', 'prompt', True), + ('--prompt="foobar"', 'prompt', 'foobar'), ('--without-scm-ignore-files', 'scm_ignore_files', frozenset()), ] for opt, attr, value in options: @@ -201,7 +201,7 @@ def test_prompt(self): self.run_with_capture(builder.create, self.env_dir) context = builder.ensure_directories(self.env_dir) data = self.get_text_file_contents('pyvenv.cfg') - self.assertEqual(context.prompt, '(%s) ' % env_name) + self.assertEqual(context.prompt, env_name) self.assertNotIn("prompt = ", data) rmtree(self.env_dir) @@ -209,7 +209,7 @@ def test_prompt(self): self.run_with_capture(builder.create, self.env_dir) context = builder.ensure_directories(self.env_dir) data = self.get_text_file_contents('pyvenv.cfg') - self.assertEqual(context.prompt, '(My prompt) ') + self.assertEqual(context.prompt, 'My prompt') self.assertIn("prompt = 'My prompt'\n", data) rmtree(self.env_dir) @@ -218,7 +218,7 @@ def test_prompt(self): self.run_with_capture(builder.create, self.env_dir) context = builder.ensure_directories(self.env_dir) data = self.get_text_file_contents('pyvenv.cfg') - self.assertEqual(context.prompt, '(%s) ' % cwd) + self.assertEqual(context.prompt, cwd) self.assertIn("prompt = '%s'\n" % cwd, data) def test_upgrade_dependencies(self): diff --git a/Lib/venv/__init__.py b/Lib/venv/__init__.py index f04ca8fafcc33b..4856594755ae57 100644 --- a/Lib/venv/__init__.py +++ b/Lib/venv/__init__.py @@ -129,8 +129,7 @@ def create_if_needed(d): context = types.SimpleNamespace() context.env_dir = env_dir context.env_name = os.path.split(env_dir)[1] - prompt = self.prompt if self.prompt is not None else context.env_name - context.prompt = '(%s) ' % prompt + context.prompt = self.prompt if self.prompt is not None else context.env_name create_if_needed(env_dir) executable = sys._base_executable if not executable: # see gh-96861 diff --git a/Lib/venv/scripts/common/activate b/Lib/venv/scripts/common/activate index a4e0609045a9d5..cbd4873f012246 100644 --- a/Lib/venv/scripts/common/activate +++ b/Lib/venv/scripts/common/activate @@ -66,7 +66,7 @@ fi if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then _OLD_VIRTUAL_PS1="${PS1:-}" - PS1="__VENV_PROMPT__${PS1:-}" + PS1="(__VENV_PROMPT__) ${PS1:-}" export PS1 fi diff --git a/Lib/venv/scripts/nt/activate.bat b/Lib/venv/scripts/nt/activate.bat index c1c3c82ee37f10..2c98122362a060 100644 --- a/Lib/venv/scripts/nt/activate.bat +++ b/Lib/venv/scripts/nt/activate.bat @@ -16,7 +16,7 @@ if defined _OLD_VIRTUAL_PROMPT set PROMPT=%_OLD_VIRTUAL_PROMPT% if defined _OLD_VIRTUAL_PYTHONHOME set PYTHONHOME=%_OLD_VIRTUAL_PYTHONHOME% set _OLD_VIRTUAL_PROMPT=%PROMPT% -set PROMPT=__VENV_PROMPT__%PROMPT% +set PROMPT=(__VENV_PROMPT__) %PROMPT% if defined PYTHONHOME set _OLD_VIRTUAL_PYTHONHOME=%PYTHONHOME% set PYTHONHOME= diff --git a/Lib/venv/scripts/posix/activate.csh b/Lib/venv/scripts/posix/activate.csh index 9caf138a919a86..c707f1988b0acc 100644 --- a/Lib/venv/scripts/posix/activate.csh +++ b/Lib/venv/scripts/posix/activate.csh @@ -19,7 +19,7 @@ setenv VIRTUAL_ENV_PROMPT "__VENV_PROMPT__" set _OLD_VIRTUAL_PROMPT="$prompt" if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then - set prompt = "__VENV_PROMPT__$prompt" + set prompt = "(__VENV_PROMPT__) $prompt" endif alias pydoc python -m pydoc diff --git a/Lib/venv/scripts/posix/activate.fish b/Lib/venv/scripts/posix/activate.fish index 565df23d1e2a13..25c42756789bbc 100644 --- a/Lib/venv/scripts/posix/activate.fish +++ b/Lib/venv/scripts/posix/activate.fish @@ -57,7 +57,7 @@ if test -z "$VIRTUAL_ENV_DISABLE_PROMPT" set -l old_status $status # Output the venv prompt; color taken from the blue of the Python logo. - printf "%s%s%s" (set_color 4B8BBE) "__VENV_PROMPT__" (set_color normal) + printf "%s(%s)%s " (set_color 4B8BBE) "__VENV_PROMPT__" (set_color normal) # Restore the return status of the previous command. echo "exit $old_status" | . From e14930ff6397439759eb34ca70a3493baa845014 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 23 Jan 2024 11:07:56 +0100 Subject: [PATCH 057/160] gh-113317: Don't use global clinic instance in bad_argument() (#114330) Make it possible for a converter to have multiple includes, by collecting them in a list on the converter instance. This implies converter includes are added during template generation, so we have to add them to the clinic instance at the end of the template generation instead of in the beginning. --- Tools/clinic/clinic.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index c247bd075321cd..770878a3f8d2c7 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -818,12 +818,6 @@ def output_templates( del parameters[0] converters = [p.converter for p in parameters] - # Copy includes from parameters to Clinic - for converter in converters: - include = converter.include - if include: - clinic.add_include(include.filename, include.reason, - condition=include.condition) if f.critical_section: clinic.add_include('pycore_critical_section.h', 'Py_BEGIN_CRITICAL_SECTION()') has_option_groups = parameters and (parameters[0].group or parameters[-1].group) @@ -1367,6 +1361,13 @@ def parser_body( declarations=declarations) + # Copy includes from parameters to Clinic after parse_arg() has been + # called above. + for converter in converters: + for include in converter.includes: + clinic.add_include(include.filename, include.reason, + condition=include.condition) + if new_or_init: methoddef_define = '' @@ -2988,7 +2989,6 @@ class CConverter(metaclass=CConverterAutoRegister): # Only set by self_converter. signature_name: str | None = None - include: Include | None = None broken_limited_capi: bool = False # keep in sync with self_converter.__init__! @@ -3008,6 +3008,7 @@ def __init__(self, self.name = ensure_legal_c_identifier(name) self.py_name = py_name self.unused = unused + self.includes: list[Include] = [] if default is not unspecified: if (self.default_type @@ -3263,8 +3264,7 @@ def bad_argument(self, displayname: str, expected: str, *, limited_capi: bool, e else: if expected_literal: expected = f'"{expected}"' - if clinic is not None: - clinic.add_include('pycore_modsupport.h', '_PyArg_BadArgument()') + self.add_include('pycore_modsupport.h', '_PyArg_BadArgument()') return f'_PyArg_BadArgument("{{{{name}}}}", "{displayname}", {expected}, {{argname}});' def format_code(self, fmt: str, *, @@ -3336,9 +3336,8 @@ def parser_name(self) -> str: def add_include(self, name: str, reason: str, *, condition: str | None = None) -> None: - if self.include is not None: - raise ValueError("a converter only supports a single include") - self.include = Include(name, reason, condition) + include = Include(name, reason, condition) + self.includes.append(include) type_checks = { '&PyLong_Type': ('PyLong_Check', 'int'), From 7d21cae964fc47afda400fc1fbbcf7984fcfe819 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Tue, 23 Jan 2024 14:05:27 +0300 Subject: [PATCH 058/160] gh-101100: Fix sphinx warnings in `Doc/library/locale.rst` (#114425) * gh-101100: Fix sphinx warnings in `Doc/library/locale.rst` * Remove `/` from signatures --- Doc/library/locale.rst | 61 +++++++++++++++++++++++++++++++++--------- Doc/tools/.nitignore | 1 - 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index a7201199191215..414979524e57b6 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -18,7 +18,7 @@ know all the specifics of each country where the software is executed. .. index:: pair: module; _locale -The :mod:`locale` module is implemented on top of the :mod:`_locale` module, +The :mod:`locale` module is implemented on top of the :mod:`!_locale` module, which in turn uses an ANSI C locale implementation if available. The :mod:`locale` module defines the following exception and functions: @@ -192,7 +192,13 @@ The :mod:`locale` module defines the following exception and functions: Get a format string for :func:`time.strftime` to represent time in the am/pm format. - .. data:: DAY_1 ... DAY_7 + .. data:: DAY_1 + DAY_2 + DAY_3 + DAY_4 + DAY_5 + DAY_6 + DAY_7 Get the name of the n-th day of the week. @@ -202,15 +208,43 @@ The :mod:`locale` module defines the following exception and functions: international convention (ISO 8601) that Monday is the first day of the week. - .. data:: ABDAY_1 ... ABDAY_7 + .. data:: ABDAY_1 + ABDAY_2 + ABDAY_3 + ABDAY_4 + ABDAY_5 + ABDAY_6 + ABDAY_7 Get the abbreviated name of the n-th day of the week. - .. data:: MON_1 ... MON_12 + .. data:: MON_1 + MON_2 + MON_3 + MON_4 + MON_5 + MON_6 + MON_7 + MON_8 + MON_9 + MON_10 + MON_11 + MON_12 Get the name of the n-th month. - .. data:: ABMON_1 ... ABMON_12 + .. data:: ABMON_1 + ABMON_2 + ABMON_3 + ABMON_4 + ABMON_5 + ABMON_6 + ABMON_7 + ABMON_8 + ABMON_9 + ABMON_10 + ABMON_11 + ABMON_12 Get the abbreviated name of the n-th month. @@ -229,14 +263,14 @@ The :mod:`locale` module defines the following exception and functions: .. data:: NOEXPR - Get a regular expression that can be used with the regex(3) function to + Get a regular expression that can be used with the ``regex(3)`` function to recognize a negative response to a yes/no question. .. note:: The regular expressions for :const:`YESEXPR` and :const:`NOEXPR` use syntax suitable for the - :c:func:`regex` function from the C library, which might + ``regex`` function from the C library, which might differ from the syntax used in :mod:`re`. .. data:: CRNCYSTR @@ -581,9 +615,9 @@ the locale is ``C``). When Python code uses the :mod:`locale` module to change the locale, this also affects the embedding application. If the embedding application doesn't want -this to happen, it should remove the :mod:`_locale` extension module (which does +this to happen, it should remove the :mod:`!_locale` extension module (which does all the work) from the table of built-in modules in the :file:`config.c` file, -and make sure that the :mod:`_locale` module is not accessible as a shared +and make sure that the :mod:`!_locale` module is not accessible as a shared library. @@ -597,17 +631,18 @@ Access to message catalogs .. function:: dcgettext(domain, msg, category) .. function:: textdomain(domain) .. function:: bindtextdomain(domain, dir) +.. function:: bind_textdomain_codeset(domain, codeset) The locale module exposes the C library's gettext interface on systems that -provide this interface. It consists of the functions :func:`!gettext`, -:func:`!dgettext`, :func:`!dcgettext`, :func:`!textdomain`, :func:`!bindtextdomain`, -and :func:`!bind_textdomain_codeset`. These are similar to the same functions in +provide this interface. It consists of the functions :func:`gettext`, +:func:`dgettext`, :func:`dcgettext`, :func:`textdomain`, :func:`bindtextdomain`, +and :func:`bind_textdomain_codeset`. These are similar to the same functions in the :mod:`gettext` module, but use the C library's binary format for message catalogs, and the C library's search algorithms for locating message catalogs. Python applications should normally find no need to invoke these functions, and should use :mod:`gettext` instead. A known exception to this rule are applications that link with additional C libraries which internally invoke -:c:func:`gettext` or :c:func:`dcgettext`. For these applications, it may be +C functions ``gettext`` or ``dcgettext``. For these applications, it may be necessary to bind the text domain, so that the libraries can properly locate their message catalogs. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index bebd92bffad46a..1b24f145c2f13b 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -50,7 +50,6 @@ Doc/library/functools.rst Doc/library/http.cookiejar.rst Doc/library/http.server.rst Doc/library/importlib.rst -Doc/library/locale.rst Doc/library/logging.config.rst Doc/library/logging.handlers.rst Doc/library/lzma.rst From 5a1ecc8cc7d3dfedd14adea1c3cdc3cfeb79f0e1 Mon Sep 17 00:00:00 2001 From: Fabio Zadrozny <117621+fabioz@users.noreply.github.com> Date: Tue, 23 Jan 2024 09:12:50 -0300 Subject: [PATCH 059/160] gh-114423: Remove DummyThread from threading._active when thread dies (GH-114424) --- Lib/test/test_threading.py | 55 +++++++++++++------ Lib/threading.py | 48 ++++++++++++---- ...-01-22-11-43-38.gh-issue-114423.6mMoPH.rst | 1 + 3 files changed, 76 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-22-11-43-38.gh-issue-114423.6mMoPH.rst diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index 7160c53d691ba2..dbdc46fff1e313 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -227,8 +227,6 @@ def f(): tid = _thread.start_new_thread(f, ()) done.wait() self.assertEqual(ident[0], tid) - # Kill the "immortal" _DummyThread - del threading._active[ident[0]] # run with a small(ish) thread stack size (256 KiB) def test_various_ops_small_stack(self): @@ -256,11 +254,29 @@ def test_various_ops_large_stack(self): def test_foreign_thread(self): # Check that a "foreign" thread can use the threading module. + dummy_thread = None + error = None def f(mutex): - # Calling current_thread() forces an entry for the foreign - # thread to get made in the threading._active map. - threading.current_thread() - mutex.release() + try: + nonlocal dummy_thread + nonlocal error + # Calling current_thread() forces an entry for the foreign + # thread to get made in the threading._active map. + dummy_thread = threading.current_thread() + tid = dummy_thread.ident + self.assertIn(tid, threading._active) + self.assertIsInstance(dummy_thread, threading._DummyThread) + self.assertIs(threading._active.get(tid), dummy_thread) + # gh-29376 + self.assertTrue( + dummy_thread.is_alive(), + 'Expected _DummyThread to be considered alive.' + ) + self.assertIn('_DummyThread', repr(dummy_thread)) + except BaseException as e: + error = e + finally: + mutex.release() mutex = threading.Lock() mutex.acquire() @@ -268,20 +284,25 @@ def f(mutex): tid = _thread.start_new_thread(f, (mutex,)) # Wait for the thread to finish. mutex.acquire() - self.assertIn(tid, threading._active) - self.assertIsInstance(threading._active[tid], threading._DummyThread) - #Issue 29376 - self.assertTrue(threading._active[tid].is_alive()) - self.assertRegex(repr(threading._active[tid]), '_DummyThread') - + if error is not None: + raise error + self.assertEqual(tid, dummy_thread.ident) # Issue gh-106236: with self.assertRaises(RuntimeError): - threading._active[tid].join() - threading._active[tid]._started.clear() + dummy_thread.join() + dummy_thread._started.clear() with self.assertRaises(RuntimeError): - threading._active[tid].is_alive() - - del threading._active[tid] + dummy_thread.is_alive() + # Busy wait for the following condition: after the thread dies, the + # related dummy thread must be removed from threading._active. + timeout = 5 + timeout_at = time.monotonic() + timeout + while time.monotonic() < timeout_at: + if threading._active.get(dummy_thread.ident) is not dummy_thread: + break + time.sleep(.1) + else: + self.fail('It was expected that the created threading._DummyThread was removed from threading._active.') # PyThreadState_SetAsyncExc() is a CPython-only gimmick, not (currently) # exposed at the Python level. This test relies on ctypes to get at it. diff --git a/Lib/threading.py b/Lib/threading.py index c561800a128059..ecf799bc26ab06 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -54,6 +54,13 @@ TIMEOUT_MAX = _thread.TIMEOUT_MAX del _thread +# get thread-local implementation, either from the thread +# module, or from the python fallback + +try: + from _thread import _local as local +except ImportError: + from _threading_local import local # Support for profile and trace hooks @@ -1476,10 +1483,36 @@ def __init__(self): _active[self._ident] = self +# Helper thread-local instance to detect when a _DummyThread +# is collected. Not a part of the public API. +_thread_local_info = local() + + +class _DeleteDummyThreadOnDel: + ''' + Helper class to remove a dummy thread from threading._active on __del__. + ''' + + def __init__(self, dummy_thread): + self._dummy_thread = dummy_thread + self._tident = dummy_thread.ident + # Put the thread on a thread local variable so that when + # the related thread finishes this instance is collected. + # + # Note: no other references to this instance may be created. + # If any client code creates a reference to this instance, + # the related _DummyThread will be kept forever! + _thread_local_info._track_dummy_thread_ref = self + + def __del__(self): + with _active_limbo_lock: + if _active.get(self._tident) is self._dummy_thread: + _active.pop(self._tident, None) + + # Dummy thread class to represent threads not started here. -# These aren't garbage collected when they die, nor can they be waited for. -# If they invoke anything in threading.py that calls current_thread(), they -# leave an entry in the _active dict forever after. +# These should be added to `_active` and removed automatically +# when they die, although they can't be waited for. # Their purpose is to return *something* from current_thread(). # They are marked as daemon threads so we won't wait for them # when we exit (conform previous semantics). @@ -1495,6 +1528,7 @@ def __init__(self): self._set_native_id() with _active_limbo_lock: _active[self._ident] = self + _DeleteDummyThreadOnDel(self) def _stop(self): pass @@ -1676,14 +1710,6 @@ def main_thread(): # XXX Figure this out for subinterpreters. (See gh-75698.) return _main_thread -# get thread-local implementation, either from the thread -# module, or from the python fallback - -try: - from _thread import _local as local -except ImportError: - from _threading_local import local - def _after_fork(): """ diff --git a/Misc/NEWS.d/next/Library/2024-01-22-11-43-38.gh-issue-114423.6mMoPH.rst b/Misc/NEWS.d/next/Library/2024-01-22-11-43-38.gh-issue-114423.6mMoPH.rst new file mode 100644 index 00000000000000..7b77b73295d948 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-22-11-43-38.gh-issue-114423.6mMoPH.rst @@ -0,0 +1 @@ +``_DummyThread`` entries in ``threading._active`` are now automatically removed when the related thread dies. From 5277d4c7dbd1baee300e494fce2738cee218c243 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 23 Jan 2024 14:53:35 +0100 Subject: [PATCH 060/160] Docs: mark up FTP.retrbinary and FTP.storbinary with param lists (#114399) Co-authored-by: Ezio Melotti --- Doc/library/ftplib.rst | 62 +++++++++++++++++++++++++++++++----------- Doc/tools/.nitignore | 1 - 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index 6ff97008c3015b..c7251db50a9371 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -201,14 +201,27 @@ FTP objects .. method:: FTP.retrbinary(cmd, callback, blocksize=8192, rest=None) - Retrieve a file in binary transfer mode. *cmd* should be an appropriate - ``RETR`` command: ``'RETR filename'``. The *callback* function is called for - each block of data received, with a single bytes argument giving the data - block. The optional *blocksize* argument specifies the maximum chunk size to - read on the low-level socket object created to do the actual transfer (which - will also be the largest size of the data blocks passed to *callback*). A - reasonable default is chosen. *rest* means the same thing as in the - :meth:`transfercmd` method. + Retrieve a file in binary transfer mode. + + :param str cmd: + An appropriate ``STOR`` command: :samp:`"STOR {filename}"`. + + :param callback: + A single parameter callable that is called + for each block of data received, + with its single argument being the data as :class:`bytes`. + :type callback: :term:`callable` + + :param int blocksize: + The maximum chunk size to read on the low-level + :class:`~socket.socket` object created to do the actual transfer. + This also corresponds to the largest size of data + that will be passed to *callback*. + Defaults to ``8192``. + + :param int rest: + A ``REST`` command to be sent to the server. + See the documentation for the *rest* parameter of the :meth:`transfercmd` method. .. method:: FTP.retrlines(cmd, callback=None) @@ -232,16 +245,33 @@ FTP objects .. method:: FTP.storbinary(cmd, fp, blocksize=8192, callback=None, rest=None) - Store a file in binary transfer mode. *cmd* should be an appropriate - ``STOR`` command: ``"STOR filename"``. *fp* is a :term:`file object` - (opened in binary mode) which is read until EOF using its :meth:`~io.IOBase.read` - method in blocks of size *blocksize* to provide the data to be stored. - The *blocksize* argument defaults to 8192. *callback* is an optional single - parameter callable that is called on each block of data after it is sent. - *rest* means the same thing as in the :meth:`transfercmd` method. + Store a file in binary transfer mode. + + :param str cmd: + An appropriate ``STOR`` command: :samp:`"STOR {filename}"`. + + :param fp: + A file object (opened in binary mode) which is read until EOF, + using its :meth:`~io.RawIOBase.read` method in blocks of size *blocksize* + to provide the data to be stored. + :type fp: :term:`file object` + + :param int blocksize: + The read block size. + Defaults to ``8192``. + + :param callback: + A single parameter callable that is called + for each block of data sent, + with its single argument being the data as :class:`bytes`. + :type callback: :term:`callable` + + :param int rest: + A ``REST`` command to be sent to the server. + See the documentation for the *rest* parameter of the :meth:`transfercmd` method. .. versionchanged:: 3.2 - *rest* parameter added. + The *rest* parameter was added. .. method:: FTP.storlines(cmd, fp, callback=None) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 1b24f145c2f13b..221a1f05c11e49 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -45,7 +45,6 @@ Doc/library/enum.rst Doc/library/exceptions.rst Doc/library/faulthandler.rst Doc/library/fcntl.rst -Doc/library/ftplib.rst Doc/library/functools.rst Doc/library/http.cookiejar.rst Doc/library/http.server.rst From 01105c7c4f7f01a8b1077008e61d5c7df0ab832b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 23 Jan 2024 14:57:23 +0100 Subject: [PATCH 061/160] Docs: mark up FTP.connect() and FTP.login() with param lists (#114395) Use rst substitutions to reduce raw text duplication. Co-authored-by: Ezio Melotti Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/library/ftplib.rst | 103 ++++++++++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 33 deletions(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index c7251db50a9371..e93a1e85598e3a 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -55,38 +55,56 @@ Reference FTP objects ^^^^^^^^^^^ +.. Use substitutions for some param docs so we don't need to repeat them + in multiple places. + +.. |param_doc_user| replace:: + The username to log in with (default: ``'anonymous'``). + +.. |param_doc_passwd| replace:: + The password to use when logging in. + If not given, and if *passwd* is the empty string or ``"-"``, + a password will be automatically generated. + +.. Ideally, we'd like to use the :rfc: directive, but Sphinx will not allow it. + +.. |param_doc_acct| replace:: + Account information to be used for the ``ACCT`` FTP command. + Few systems implement this. + See `RFC-959 `__ + for more details. + +.. |param_doc_source_address| replace:: + A 2-tuple ``(host, port)`` for the socket to bind to as its + source address before connecting. + .. class:: FTP(host='', user='', passwd='', acct='', timeout=None, \ source_address=None, *, encoding='utf-8') Return a new instance of the :class:`FTP` class. - When *host* is given, the method call :meth:`connect(host) ` - is made by the constructor. - When *user* is given, additionally the method call - :meth:`login(user, passwd, acct) ` is made. :param str host: The hostname to connect to. + If given, :code:`connect(host)` is implicitly called by the constructor. :param str user: - The username to log in with. - If empty string, ``"anonymous"`` is used. + |param_doc_user| + If given, :code:`login(host, passwd, acct)` is implicitly called + by the constructor. :param str passwd: - The password to use when logging in. - If not given, and if *passwd* is the empty string or ``"-"``, - a password will be automatically generated. + |param_doc_passwd| :param str acct: - Account information; see the ACCT FTP command. + |param_doc_acct| :param timeout: - A timeout in seconds for blocking operations like :meth:`connect`. - If not specified, the global default timeout setting will be used. + A timeout in seconds for blocking operations like :meth:`connect` + (default: the global default timeout setting). :type timeout: int | None :param source_address: - *source_address* is a 2-tuple ``(host, port)`` for the socket - to bind to as its source address before connecting. + |param_doc_source_address| :type source_address: tuple | None :param str encoding: @@ -140,17 +158,29 @@ FTP objects .. method:: FTP.connect(host='', port=0, timeout=None, source_address=None) - Connect to the given host and port. The default port number is ``21``, as - specified by the FTP protocol specification. It is rarely needed to specify a - different port number. This function should be called only once for each - instance; it should not be called at all if a host was given when the instance - was created. All other methods can only be used after a connection has been - made. - The optional *timeout* parameter specifies a timeout in seconds for the - connection attempt. If no *timeout* is passed, the global default timeout - setting will be used. - *source_address* is a 2-tuple ``(host, port)`` for the socket to bind to as - its source address before connecting. + Connect to the given host and port. + This function should be called only once for each instance; + it should not be called if a *host* argument was given + when the :class:`FTP` instance was created. + All other :class:`!FTP` methods can only be called + after a connection has successfully been made. + + :param str host: + The host to connect to. + + :param int port: + The TCP port to connect to (default: ``21``, + as specified by the FTP protocol specification). + It is rarely needed to specify a different port number. + + :param timeout: + A timeout in seconds for the connection attempt + (default: the global default timeout setting). + :type timeout: int | None + + :param source_address: + |param_doc_source_address| + :type source_address: tuple | None .. audit-event:: ftplib.connect self,host,port ftplib.FTP.connect @@ -167,14 +197,21 @@ FTP objects .. method:: FTP.login(user='anonymous', passwd='', acct='') - Log in as the given *user*. The *passwd* and *acct* parameters are optional and - default to the empty string. If no *user* is specified, it defaults to - ``'anonymous'``. If *user* is ``'anonymous'``, the default *passwd* is - ``'anonymous@'``. This function should be called only once for each instance, - after a connection has been established; it should not be called at all if a - host and user were given when the instance was created. Most FTP commands are - only allowed after the client has logged in. The *acct* parameter supplies - "accounting information"; few systems implement this. + Log on to the connected FTP server. + This function should be called only once for each instance, + after a connection has been established; + it should not be called if the *host* and *user* arguments were given + when the :class:`FTP` instance was created. + Most FTP commands are only allowed after the client has logged in. + + :param str user: + |param_doc_user| + + :param str passwd: + |param_doc_passwd| + + :param str acct: + |param_doc_acct| .. method:: FTP.abort() From ba253a4794ae2d35a6f6df46a98a1ed38bd61268 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Tue, 23 Jan 2024 18:33:12 +0300 Subject: [PATCH 062/160] gh-108303: Move `.whl` test files to `Lib/test/wheeldata/` (#114343) --- Lib/test/support/__init__.py | 4 +++- .../setuptools-67.6.1-py3-none-any.whl | Bin .../{ => wheeldata}/wheel-0.40.0-py3-none-any.whl | Bin Makefile.pre.in | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) rename Lib/test/{ => wheeldata}/setuptools-67.6.1-py3-none-any.whl (100%) rename Lib/test/{ => wheeldata}/wheel-0.40.0-py3-none-any.whl (100%) diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 8344dd1849c61d..f2e6af078a5f29 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -2193,7 +2193,9 @@ def _findwheel(pkgname): If set, the wheels are searched for in WHEEL_PKG_DIR (see ensurepip). Otherwise, they are searched for in the test directory. """ - wheel_dir = sysconfig.get_config_var('WHEEL_PKG_DIR') or TEST_HOME_DIR + wheel_dir = sysconfig.get_config_var('WHEEL_PKG_DIR') or os.path.join( + TEST_HOME_DIR, 'wheeldata', + ) filenames = os.listdir(wheel_dir) filenames = sorted(filenames, reverse=True) # approximate "newest" first for filename in filenames: diff --git a/Lib/test/setuptools-67.6.1-py3-none-any.whl b/Lib/test/wheeldata/setuptools-67.6.1-py3-none-any.whl similarity index 100% rename from Lib/test/setuptools-67.6.1-py3-none-any.whl rename to Lib/test/wheeldata/setuptools-67.6.1-py3-none-any.whl diff --git a/Lib/test/wheel-0.40.0-py3-none-any.whl b/Lib/test/wheeldata/wheel-0.40.0-py3-none-any.whl similarity index 100% rename from Lib/test/wheel-0.40.0-py3-none-any.whl rename to Lib/test/wheeldata/wheel-0.40.0-py3-none-any.whl diff --git a/Makefile.pre.in b/Makefile.pre.in index 1107259b5ae1ca..21b122ae0fcd9f 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -2318,6 +2318,7 @@ TESTSUBDIRS= idlelib/idle_test \ test/tokenizedata \ test/tracedmodules \ test/typinganndata \ + test/wheeldata \ test/xmltestdata \ test/xmltestdata/c14n-20 From 5f1997896d9c3ecf92e9863177c452b468a6a2c8 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 23 Jan 2024 13:05:15 -0500 Subject: [PATCH 063/160] gh-112984: Fix link error on free-threaded Windows build (GH-114455) The test_peg_generator test tried to link the python313_d.lib library, which failed because the library is now named python313t_d.lib. The underlying problem is that the "compiler" attribute was not set when we call get_libraries() from distutils. --- Tools/peg_generator/pegen/build.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py index 00295c984d1bb6..67a7c0c4788e9d 100644 --- a/Tools/peg_generator/pegen/build.py +++ b/Tools/peg_generator/pegen/build.py @@ -220,6 +220,9 @@ def compile_c_extension( ) else: objects = compiler.object_filenames(extension.sources, output_dir=cmd.build_temp) + # The cmd.get_libraries() call needs a valid compiler attribute or we will + # get an incorrect library name on the free-threaded Windows build. + cmd.compiler = compiler # Now link the object files together into a "shared object" compiler.link_shared_object( objects, From 441affc9e7f419ef0b68f734505fa2f79fe653c7 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 23 Jan 2024 13:08:23 -0500 Subject: [PATCH 064/160] gh-111964: Implement stop-the-world pauses (gh-112471) The `--disable-gil` builds occasionally need to pause all but one thread. Some examples include: * Cyclic garbage collection, where this is often called a "stop the world event" * Before calling `fork()`, to ensure a consistent state for internal data structures * During interpreter shutdown, to ensure that daemon threads aren't accessing Python objects This adds the following functions to implement global and per-interpreter pauses: * `_PyEval_StopTheWorldAll()` and `_PyEval_StartTheWorldAll()` (for the global runtime) * `_PyEval_StopTheWorld()` and `_PyEval_StartTheWorld()` (per-interpreter) (The function names may change.) These functions are no-ops outside of the `--disable-gil` build. --- Include/cpython/pystate.h | 2 +- Include/internal/pycore_ceval.h | 1 + Include/internal/pycore_interp.h | 17 ++ Include/internal/pycore_llist.h | 3 +- Include/internal/pycore_pystate.h | 51 +++-- Include/internal/pycore_runtime.h | 7 + Include/internal/pycore_runtime_init.h | 3 + Include/pymacro.h | 3 + Python/ceval_gil.c | 9 + Python/pystate.c | 269 +++++++++++++++++++++++-- 10 files changed, 336 insertions(+), 29 deletions(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 10913943c1140d..60b056bdcc2f1f 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -102,7 +102,7 @@ struct _ts { #endif int _whence; - /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_GC). + /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED). See Include/internal/pycore_pystate.h for more details. */ int state; diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h index a357bfa3a26064..a66af1389541dd 100644 --- a/Include/internal/pycore_ceval.h +++ b/Include/internal/pycore_ceval.h @@ -205,6 +205,7 @@ void _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame) #define _PY_CALLS_TO_DO_BIT 2 #define _PY_ASYNC_EXCEPTION_BIT 3 #define _PY_GC_SCHEDULED_BIT 4 +#define _PY_EVAL_PLEASE_STOP_BIT 5 /* Reserve a few bits for future use */ #define _PY_EVAL_EVENTS_BITS 8 diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 922c84543a1393..f953b8426e180a 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -41,6 +41,22 @@ struct _Py_long_state { int max_str_digits; }; +// Support for stop-the-world events. This exists in both the PyRuntime struct +// for global pauses and in each PyInterpreterState for per-interpreter pauses. +struct _stoptheworld_state { + PyMutex mutex; // Serializes stop-the-world attempts. + + // NOTE: The below fields are protected by HEAD_LOCK(runtime), not by the + // above mutex. + bool requested; // Set when a pause is requested. + bool world_stopped; // Set when the world is stopped. + bool is_global; // Set when contained in PyRuntime struct. + + PyEvent stop_event; // Set when thread_countdown reaches zero. + Py_ssize_t thread_countdown; // Number of threads that must pause. + + PyThreadState *requester; // Thread that requested the pause (may be NULL). +}; /* cross-interpreter data registry */ @@ -166,6 +182,7 @@ struct _is { struct _warnings_runtime_state warnings; struct atexit_state atexit; + struct _stoptheworld_state stoptheworld; #if defined(Py_GIL_DISABLED) struct _mimalloc_interp_state mimalloc; diff --git a/Include/internal/pycore_llist.h b/Include/internal/pycore_llist.h index 5fd261da05fa5d..f629902fda9ff1 100644 --- a/Include/internal/pycore_llist.h +++ b/Include/internal/pycore_llist.h @@ -37,8 +37,7 @@ struct llist_node { }; // Get the struct containing a node. -#define llist_data(node, type, member) \ - (type*)((char*)node - offsetof(type, member)) +#define llist_data(node, type, member) (_Py_CONTAINER_OF(node, type, member)) // Iterate over a list. #define llist_for_each(node, head) \ diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 348c5c634284b0..289ef28f0dd9a9 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -21,23 +21,27 @@ extern "C" { // interpreter at the same time. Only the "bound" thread may perform the // transitions between "attached" and "detached" on its own PyThreadState. // -// The "gc" state is used to implement stop-the-world pauses, such as for -// cyclic garbage collection. It is only used in `--disable-gil` builds. It is -// similar to the "detached" state, but only the thread performing a -// stop-the-world pause may transition threads between the "detached" and "gc" -// states. A thread trying to "attach" from the "gc" state will block until -// it is transitioned back to "detached" when the stop-the-world pause is -// complete. +// The "suspended" state is used to implement stop-the-world pauses, such as +// for cyclic garbage collection. It is only used in `--disable-gil` builds. +// The "suspended" state is similar to the "detached" state in that in both +// states the thread is not allowed to call most Python APIs. However, unlike +// the "detached" state, a thread may not transition itself out from the +// "suspended" state. Only the thread performing a stop-the-world pause may +// transition a thread from the "suspended" state back to the "detached" state. // // State transition diagram: // // (bound thread) (stop-the-world thread) -// [attached] <-> [detached] <-> [gc] +// [attached] <-> [detached] <-> [suspended] +// | ^ +// +---------------------------->---------------------------+ +// (bound thread) // -// See `_PyThreadState_Attach()` and `_PyThreadState_Detach()`. +// The (bound thread) and (stop-the-world thread) labels indicate which thread +// is allowed to perform the transition. #define _Py_THREAD_DETACHED 0 #define _Py_THREAD_ATTACHED 1 -#define _Py_THREAD_GC 2 +#define _Py_THREAD_SUSPENDED 2 /* Check if the current thread is the main thread. @@ -140,13 +144,36 @@ _PyThreadState_GET(void) // // High-level code should generally call PyEval_RestoreThread() instead, which // calls this function. -void _PyThreadState_Attach(PyThreadState *tstate); +extern void _PyThreadState_Attach(PyThreadState *tstate); // Detaches the current thread from the interpreter. // // High-level code should generally call PyEval_SaveThread() instead, which // calls this function. -void _PyThreadState_Detach(PyThreadState *tstate); +extern void _PyThreadState_Detach(PyThreadState *tstate); + +// Detaches the current thread to the "suspended" state if a stop-the-world +// pause is in progress. +// +// If there is no stop-the-world pause in progress, then the thread switches +// to the "detached" state. +extern void _PyThreadState_Suspend(PyThreadState *tstate); + +// Perform a stop-the-world pause for all threads in the all interpreters. +// +// Threads in the "attached" state are paused and transitioned to the "GC" +// state. Threads in the "detached" state switch to the "GC" state, preventing +// them from reattaching until the stop-the-world pause is complete. +// +// NOTE: This is a no-op outside of Py_GIL_DISABLED builds. +extern void _PyEval_StopTheWorldAll(_PyRuntimeState *runtime); +extern void _PyEval_StartTheWorldAll(_PyRuntimeState *runtime); + +// Perform a stop-the-world pause for threads in the specified interpreter. +// +// NOTE: This is a no-op outside of Py_GIL_DISABLED builds. +extern void _PyEval_StopTheWorld(PyInterpreterState *interp); +extern void _PyEval_StartTheWorld(PyInterpreterState *interp); static inline void diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h index e3348296ea61b7..02ab22b967b38f 100644 --- a/Include/internal/pycore_runtime.h +++ b/Include/internal/pycore_runtime.h @@ -227,6 +227,13 @@ typedef struct pyruntimestate { struct _faulthandler_runtime_state faulthandler; struct _tracemalloc_runtime_state tracemalloc; + // The rwmutex is used to prevent overlapping global and per-interpreter + // stop-the-world events. Global stop-the-world events lock the mutex + // exclusively (as a "writer"), while per-interpreter stop-the-world events + // lock it non-exclusively (as "readers"). + _PyRWMutex stoptheworld_mutex; + struct _stoptheworld_state stoptheworld; + PyPreConfig preconfig; // Audit values must be preserved when Py_Initialize()/Py_Finalize() diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 5f47d60de37825..b4806ab09fd145 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -116,6 +116,9 @@ extern PyTypeObject _PyExc_MemoryError; }, \ .faulthandler = _faulthandler_runtime_state_INIT, \ .tracemalloc = _tracemalloc_runtime_state_INIT, \ + .stoptheworld = { \ + .is_global = 1, \ + }, \ .float_state = { \ .float_format = _py_float_format_unknown, \ .double_format = _py_float_format_unknown, \ diff --git a/Include/pymacro.h b/Include/pymacro.h index 9d264fe6eea1d4..cd6fc4eba9c2ed 100644 --- a/Include/pymacro.h +++ b/Include/pymacro.h @@ -160,6 +160,9 @@ Py_FatalError("Unreachable C code path reached") #endif +#define _Py_CONTAINER_OF(ptr, type, member) \ + (type*)((char*)ptr - offsetof(type, member)) + // Prevent using an expression as a l-value. // For example, "int x; _Py_RVALUE(x) = 1;" fails with a compiler error. #define _Py_RVALUE(EXPR) ((void)0, (EXPR)) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index d70abbc27606b4..f3b169241535f3 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -949,6 +949,15 @@ _Py_HandlePending(PyThreadState *tstate) { PyInterpreterState *interp = tstate->interp; + /* Stop-the-world */ + if (_Py_eval_breaker_bit_is_set(interp, _PY_EVAL_PLEASE_STOP_BIT)) { + _Py_set_eval_breaker_bit(interp, _PY_EVAL_PLEASE_STOP_BIT, 0); + _PyThreadState_Suspend(tstate); + + /* The attach blocks until the stop-the-world event is complete. */ + _PyThreadState_Attach(tstate); + } + /* Pending signals */ if (_Py_eval_breaker_bit_is_set(interp, _PY_SIGNALS_PENDING_BIT)) { if (handle_signals(tstate) != 0) { diff --git a/Python/pystate.c b/Python/pystate.c index 999976283da675..23ddc781434ac8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -1336,6 +1336,11 @@ init_threadstate(_PyThreadStateImpl *_tstate, tstate->datastack_limit = NULL; tstate->what_event = -1; + if (interp->stoptheworld.requested || _PyRuntime.stoptheworld.requested) { + // Start in the suspended state if there is an ongoing stop-the-world. + tstate->state = _Py_THREAD_SUSPENDED; + } + tstate->_status.initialized = 1; } @@ -1562,6 +1567,9 @@ PyThreadState_Clear(PyThreadState *tstate) // XXX Do it as early in the function as possible. } +static void +decrement_stoptheworld_countdown(struct _stoptheworld_state *stw); + /* Common code for PyThreadState_Delete() and PyThreadState_DeleteCurrent() */ static void tstate_delete_common(PyThreadState *tstate) @@ -1585,6 +1593,16 @@ tstate_delete_common(PyThreadState *tstate) if (tstate->next) { tstate->next->prev = tstate->prev; } + if (tstate->state != _Py_THREAD_SUSPENDED) { + // Any ongoing stop-the-world request should not wait for us because + // our thread is getting deleted. + if (interp->stoptheworld.requested) { + decrement_stoptheworld_countdown(&interp->stoptheworld); + } + if (runtime->stoptheworld.requested) { + decrement_stoptheworld_countdown(&runtime->stoptheworld); + } + } HEAD_UNLOCK(runtime); // XXX Unbind in PyThreadState_Clear(), or earlier @@ -1790,13 +1808,9 @@ tstate_try_attach(PyThreadState *tstate) { #ifdef Py_GIL_DISABLED int expected = _Py_THREAD_DETACHED; - if (_Py_atomic_compare_exchange_int( - &tstate->state, - &expected, - _Py_THREAD_ATTACHED)) { - return 1; - } - return 0; + return _Py_atomic_compare_exchange_int(&tstate->state, + &expected, + _Py_THREAD_ATTACHED); #else assert(tstate->state == _Py_THREAD_DETACHED); tstate->state = _Py_THREAD_ATTACHED; @@ -1815,6 +1829,20 @@ tstate_set_detached(PyThreadState *tstate) #endif } +static void +tstate_wait_attach(PyThreadState *tstate) +{ + do { + int expected = _Py_THREAD_SUSPENDED; + + // Wait until we're switched out of SUSPENDED to DETACHED. + _PyParkingLot_Park(&tstate->state, &expected, sizeof(tstate->state), + /*timeout=*/-1, NULL, /*detach=*/0); + + // Once we're back in DETACHED we can re-attach + } while (!tstate_try_attach(tstate)); +} + void _PyThreadState_Attach(PyThreadState *tstate) { @@ -1836,10 +1864,7 @@ _PyThreadState_Attach(PyThreadState *tstate) tstate_activate(tstate); if (!tstate_try_attach(tstate)) { - // TODO: Once stop-the-world GC is implemented for --disable-gil builds - // this will need to wait until the GC completes. For now, this case - // should never happen. - Py_FatalError("thread attach failed"); + tstate_wait_attach(tstate); } // Resume previous critical section. This acquires the lock(s) from the @@ -1853,8 +1878,8 @@ _PyThreadState_Attach(PyThreadState *tstate) #endif } -void -_PyThreadState_Detach(PyThreadState *tstate) +static void +detach_thread(PyThreadState *tstate, int detached_state) { // XXX assert(tstate_is_alive(tstate) && tstate_is_bound(tstate)); assert(tstate->state == _Py_THREAD_ATTACHED); @@ -1862,12 +1887,228 @@ _PyThreadState_Detach(PyThreadState *tstate) if (tstate->critical_section != 0) { _PyCriticalSection_SuspendAll(tstate); } - tstate_set_detached(tstate); tstate_deactivate(tstate); + tstate_set_detached(tstate); current_fast_clear(&_PyRuntime); _PyEval_ReleaseLock(tstate->interp, tstate); } +void +_PyThreadState_Detach(PyThreadState *tstate) +{ + detach_thread(tstate, _Py_THREAD_DETACHED); +} + +void +_PyThreadState_Suspend(PyThreadState *tstate) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + assert(tstate->state == _Py_THREAD_ATTACHED); + + struct _stoptheworld_state *stw = NULL; + HEAD_LOCK(runtime); + if (runtime->stoptheworld.requested) { + stw = &runtime->stoptheworld; + } + else if (tstate->interp->stoptheworld.requested) { + stw = &tstate->interp->stoptheworld; + } + HEAD_UNLOCK(runtime); + + if (stw == NULL) { + // Switch directly to "detached" if there is no active stop-the-world + // request. + detach_thread(tstate, _Py_THREAD_DETACHED); + return; + } + + // Switch to "suspended" state. + detach_thread(tstate, _Py_THREAD_SUSPENDED); + + // Decrease the count of remaining threads needing to park. + HEAD_LOCK(runtime); + decrement_stoptheworld_countdown(stw); + HEAD_UNLOCK(runtime); +} + +// Decrease stop-the-world counter of remaining number of threads that need to +// pause. If we are the final thread to pause, notify the requesting thread. +static void +decrement_stoptheworld_countdown(struct _stoptheworld_state *stw) +{ + assert(stw->thread_countdown > 0); + if (--stw->thread_countdown == 0) { + _PyEvent_Notify(&stw->stop_event); + } +} + +#ifdef Py_GIL_DISABLED +// Interpreter for _Py_FOR_EACH_THREAD(). For global stop-the-world events, +// we start with the first interpreter and then iterate over all interpreters. +// For per-interpreter stop-the-world events, we only operate on the one +// interpreter. +static PyInterpreterState * +interp_for_stop_the_world(struct _stoptheworld_state *stw) +{ + return (stw->is_global + ? PyInterpreterState_Head() + : _Py_CONTAINER_OF(stw, PyInterpreterState, stoptheworld)); +} + +// Loops over threads for a stop-the-world event. +// For global: all threads in all interpreters +// For per-interpreter: all threads in the interpreter +#define _Py_FOR_EACH_THREAD(stw, i, t) \ + for (i = interp_for_stop_the_world((stw)); \ + i != NULL; i = ((stw->is_global) ? i->next : NULL)) \ + for (t = i->threads.head; t; t = t->next) + + +// Try to transition threads atomically from the "detached" state to the +// "gc stopped" state. Returns true if all threads are in the "gc stopped" +static bool +park_detached_threads(struct _stoptheworld_state *stw) +{ + int num_parked = 0; + PyInterpreterState *i; + PyThreadState *t; + _Py_FOR_EACH_THREAD(stw, i, t) { + int state = _Py_atomic_load_int_relaxed(&t->state); + if (state == _Py_THREAD_DETACHED) { + // Atomically transition to "suspended" if in "detached" state. + if (_Py_atomic_compare_exchange_int(&t->state, + &state, _Py_THREAD_SUSPENDED)) { + num_parked++; + } + } + else if (state == _Py_THREAD_ATTACHED && t != stw->requester) { + // TODO: set this per-thread, rather than per-interpreter. + _Py_set_eval_breaker_bit(t->interp, _PY_EVAL_PLEASE_STOP_BIT, 1); + } + } + stw->thread_countdown -= num_parked; + assert(stw->thread_countdown >= 0); + return num_parked > 0 && stw->thread_countdown == 0; +} + +static void +stop_the_world(struct _stoptheworld_state *stw) +{ + _PyRuntimeState *runtime = &_PyRuntime; + + PyMutex_Lock(&stw->mutex); + if (stw->is_global) { + _PyRWMutex_Lock(&runtime->stoptheworld_mutex); + } + else { + _PyRWMutex_RLock(&runtime->stoptheworld_mutex); + } + + HEAD_LOCK(runtime); + stw->requested = 1; + stw->thread_countdown = 0; + stw->stop_event = (PyEvent){0}; // zero-initialize (unset) + stw->requester = _PyThreadState_GET(); // may be NULL + + PyInterpreterState *i; + PyThreadState *t; + _Py_FOR_EACH_THREAD(stw, i, t) { + if (t != stw->requester) { + // Count all the other threads (we don't wait on ourself). + stw->thread_countdown++; + } + } + + if (stw->thread_countdown == 0) { + HEAD_UNLOCK(runtime); + stw->world_stopped = 1; + return; + } + + for (;;) { + // Switch threads that are detached to the GC stopped state + bool stopped_all_threads = park_detached_threads(stw); + HEAD_UNLOCK(runtime); + + if (stopped_all_threads) { + break; + } + + _PyTime_t wait_ns = 1000*1000; // 1ms (arbitrary, may need tuning) + if (PyEvent_WaitTimed(&stw->stop_event, wait_ns)) { + assert(stw->thread_countdown == 0); + break; + } + + HEAD_LOCK(runtime); + } + stw->world_stopped = 1; +} + +static void +start_the_world(struct _stoptheworld_state *stw) +{ + _PyRuntimeState *runtime = &_PyRuntime; + assert(PyMutex_IsLocked(&stw->mutex)); + + HEAD_LOCK(runtime); + stw->requested = 0; + stw->world_stopped = 0; + stw->requester = NULL; + // Switch threads back to the detached state. + PyInterpreterState *i; + PyThreadState *t; + _Py_FOR_EACH_THREAD(stw, i, t) { + if (t != stw->requester) { + assert(t->state == _Py_THREAD_SUSPENDED); + _Py_atomic_store_int(&t->state, _Py_THREAD_DETACHED); + _PyParkingLot_UnparkAll(&t->state); + } + } + HEAD_UNLOCK(runtime); + if (stw->is_global) { + _PyRWMutex_Unlock(&runtime->stoptheworld_mutex); + } + else { + _PyRWMutex_RUnlock(&runtime->stoptheworld_mutex); + } + PyMutex_Unlock(&stw->mutex); +} +#endif // Py_GIL_DISABLED + +void +_PyEval_StopTheWorldAll(_PyRuntimeState *runtime) +{ +#ifdef Py_GIL_DISABLED + stop_the_world(&runtime->stoptheworld); +#endif +} + +void +_PyEval_StartTheWorldAll(_PyRuntimeState *runtime) +{ +#ifdef Py_GIL_DISABLED + start_the_world(&runtime->stoptheworld); +#endif +} + +void +_PyEval_StopTheWorld(PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + stop_the_world(&interp->stoptheworld); +#endif +} + +void +_PyEval_StartTheWorld(PyInterpreterState *interp) +{ +#ifdef Py_GIL_DISABLED + start_the_world(&interp->stoptheworld); +#endif +} + //---------- // other API //---------- From 925907ea362c4c014086be48625ac7dd67645cfc Mon Sep 17 00:00:00 2001 From: mpage Date: Tue, 23 Jan 2024 11:25:41 -0800 Subject: [PATCH 065/160] gh-113884: Make queue.SimpleQueue thread-safe when the GIL is disabled (#114161) * use the ParkingLot API to manage waiting threads * use Argument Clinic's critical section directive to protect queue methods * remove unnecessary overflow check Co-authored-by: Erlend E. Aasland --- ...-01-17-00-52-57.gh-issue-113884.CvEjUE.rst | 1 + Modules/_queuemodule.c | 202 ++++++++++-------- Modules/clinic/_queuemodule.c.h | 24 ++- 3 files changed, 137 insertions(+), 90 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-17-00-52-57.gh-issue-113884.CvEjUE.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-17-00-52-57.gh-issue-113884.CvEjUE.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-17-00-52-57.gh-issue-113884.CvEjUE.rst new file mode 100644 index 00000000000000..6a39fd2f60ab81 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-17-00-52-57.gh-issue-113884.CvEjUE.rst @@ -0,0 +1 @@ +Make :class:`queue.SimpleQueue` thread safe when the GIL is disabled. diff --git a/Modules/_queuemodule.c b/Modules/_queuemodule.c index 8fca3cdd0deb18..18b24855c52ad6 100644 --- a/Modules/_queuemodule.c +++ b/Modules/_queuemodule.c @@ -3,8 +3,9 @@ #endif #include "Python.h" -#include "pycore_ceval.h" // _PyEval_MakePendingCalls() +#include "pycore_ceval.h" // Py_MakePendingCalls() #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_parking_lot.h" #include "pycore_time.h" // _PyTime_t #include @@ -151,7 +152,9 @@ RingBuf_Get(RingBuf *buf) return item; } -// Returns 0 on success or -1 if the buffer failed to grow +// Returns 0 on success or -1 if the buffer failed to grow. +// +// Steals a reference to item. static int RingBuf_Put(RingBuf *buf, PyObject *item) { @@ -164,7 +167,7 @@ RingBuf_Put(RingBuf *buf, PyObject *item) return -1; } } - buf->items[buf->put_idx] = Py_NewRef(item); + buf->items[buf->put_idx] = item; buf->put_idx = (buf->put_idx + 1) % buf->items_cap; buf->num_items++; return 0; @@ -184,9 +187,13 @@ RingBuf_IsEmpty(RingBuf *buf) typedef struct { PyObject_HEAD - PyThread_type_lock lock; - int locked; + + // Are there threads waiting for items + bool has_threads_waiting; + + // Items in the queue RingBuf buf; + PyObject *weakreflist; } simplequeueobject; @@ -209,12 +216,6 @@ simplequeue_dealloc(simplequeueobject *self) PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - if (self->lock != NULL) { - /* Unlock the lock so it's safe to free it */ - if (self->locked > 0) - PyThread_release_lock(self->lock); - PyThread_free_lock(self->lock); - } (void)simplequeue_clear(self); if (self->weakreflist != NULL) PyObject_ClearWeakRefs((PyObject *) self); @@ -249,12 +250,6 @@ simplequeue_new_impl(PyTypeObject *type) self = (simplequeueobject *) type->tp_alloc(type, 0); if (self != NULL) { self->weakreflist = NULL; - self->lock = PyThread_allocate_lock(); - if (self->lock == NULL) { - Py_DECREF(self); - PyErr_SetString(PyExc_MemoryError, "can't allocate lock"); - return NULL; - } if (RingBuf_Init(&self->buf) < 0) { Py_DECREF(self); return NULL; @@ -264,7 +259,29 @@ simplequeue_new_impl(PyTypeObject *type) return (PyObject *) self; } +typedef struct { + bool handed_off; + simplequeueobject *queue; + PyObject *item; +} HandoffData; + +static void +maybe_handoff_item(HandoffData *data, PyObject **item, int has_more_waiters) +{ + if (item == NULL) { + // No threads were waiting + data->handed_off = false; + } + else { + // There was at least one waiting thread, hand off the item + *item = data->item; + data->handed_off = true; + } + data->queue->has_threads_waiting = has_more_waiters; +} + /*[clinic input] +@critical_section _queue.SimpleQueue.put item: object block: bool = True @@ -280,21 +297,28 @@ never blocks. They are provided for compatibility with the Queue class. static PyObject * _queue_SimpleQueue_put_impl(simplequeueobject *self, PyObject *item, int block, PyObject *timeout) -/*[clinic end generated code: output=4333136e88f90d8b input=6e601fa707a782d5]*/ +/*[clinic end generated code: output=4333136e88f90d8b input=a16dbb33363c0fa8]*/ { - /* BEGIN GIL-protected critical section */ - if (RingBuf_Put(&self->buf, item) < 0) - return NULL; - if (self->locked) { - /* A get() may be waiting, wake it up */ - self->locked = 0; - PyThread_release_lock(self->lock); + HandoffData data = { + .handed_off = 0, + .item = Py_NewRef(item), + .queue = self, + }; + if (self->has_threads_waiting) { + // Try to hand the item off directly if there are threads waiting + _PyParkingLot_Unpark(&self->has_threads_waiting, + (_Py_unpark_fn_t *)maybe_handoff_item, &data); + } + if (!data.handed_off) { + if (RingBuf_Put(&self->buf, item) < 0) { + return NULL; + } } - /* END GIL-protected critical section */ Py_RETURN_NONE; } /*[clinic input] +@critical_section _queue.SimpleQueue.put_nowait item: object @@ -307,12 +331,23 @@ for compatibility with the Queue class. static PyObject * _queue_SimpleQueue_put_nowait_impl(simplequeueobject *self, PyObject *item) -/*[clinic end generated code: output=0990536715efb1f1 input=36b1ea96756b2ece]*/ +/*[clinic end generated code: output=0990536715efb1f1 input=ce949cc2cd8a4119]*/ { return _queue_SimpleQueue_put_impl(self, item, 0, Py_None); } +static PyObject * +empty_error(PyTypeObject *cls) +{ + PyObject *module = PyType_GetModule(cls); + assert(module != NULL); + simplequeue_state *state = simplequeue_get_state(module); + PyErr_SetNone(state->EmptyError); + return NULL; +} + /*[clinic input] +@critical_section _queue.SimpleQueue.get cls: defining_class @@ -335,23 +370,15 @@ in that case). static PyObject * _queue_SimpleQueue_get_impl(simplequeueobject *self, PyTypeObject *cls, int block, PyObject *timeout_obj) -/*[clinic end generated code: output=5c2cca914cd1e55b input=5b4047bfbc645ec1]*/ +/*[clinic end generated code: output=5c2cca914cd1e55b input=f7836c65e5839c51]*/ { _PyTime_t endtime = 0; - _PyTime_t timeout; - PyObject *item; - PyLockStatus r; - PY_TIMEOUT_T microseconds; - PyThreadState *tstate = PyThreadState_Get(); // XXX Use PyThread_ParseTimeoutArg(). - if (block == 0) { - /* Non-blocking */ - microseconds = 0; - } - else if (timeout_obj != Py_None) { + if (block != 0 && !Py_IsNone(timeout_obj)) { /* With timeout */ + _PyTime_t timeout; if (_PyTime_FromSecondsObject(&timeout, timeout_obj, _PyTime_ROUND_CEILING) < 0) { return NULL; @@ -361,65 +388,64 @@ _queue_SimpleQueue_get_impl(simplequeueobject *self, PyTypeObject *cls, "'timeout' must be a non-negative number"); return NULL; } - microseconds = _PyTime_AsMicroseconds(timeout, - _PyTime_ROUND_CEILING); - if (microseconds > PY_TIMEOUT_MAX) { - PyErr_SetString(PyExc_OverflowError, - "timeout value is too large"); - return NULL; - } endtime = _PyDeadline_Init(timeout); } - else { - /* Infinitely blocking */ - microseconds = -1; - } - /* put() signals the queue to be non-empty by releasing the lock. - * So we simply try to acquire the lock in a loop, until the condition - * (queue non-empty) becomes true. - */ - while (RingBuf_IsEmpty(&self->buf)) { - /* First a simple non-blocking try without releasing the GIL */ - r = PyThread_acquire_lock_timed(self->lock, 0, 0); - if (r == PY_LOCK_FAILURE && microseconds != 0) { - Py_BEGIN_ALLOW_THREADS - r = PyThread_acquire_lock_timed(self->lock, microseconds, 1); - Py_END_ALLOW_THREADS + for (;;) { + if (!RingBuf_IsEmpty(&self->buf)) { + return RingBuf_Get(&self->buf); } - if (r == PY_LOCK_INTR && _PyEval_MakePendingCalls(tstate) < 0) { - return NULL; - } - if (r == PY_LOCK_FAILURE) { - PyObject *module = PyType_GetModule(cls); - simplequeue_state *state = simplequeue_get_state(module); - /* Timed out */ - PyErr_SetNone(state->EmptyError); - return NULL; + if (!block) { + return empty_error(cls); } - self->locked = 1; - /* Adjust timeout for next iteration (if any) */ - if (microseconds > 0) { - timeout = _PyDeadline_Get(endtime); - microseconds = _PyTime_AsMicroseconds(timeout, - _PyTime_ROUND_CEILING); + int64_t timeout_ns = -1; + if (endtime != 0) { + timeout_ns = _PyDeadline_Get(endtime); + if (timeout_ns < 0) { + return empty_error(cls); + } } - } - /* BEGIN GIL-protected critical section */ - item = RingBuf_Get(&self->buf); - if (self->locked) { - PyThread_release_lock(self->lock); - self->locked = 0; + bool waiting = 1; + self->has_threads_waiting = waiting; + + PyObject *item = NULL; + int st = _PyParkingLot_Park(&self->has_threads_waiting, &waiting, + sizeof(bool), timeout_ns, &item, + /* detach */ 1); + switch (st) { + case Py_PARK_OK: { + assert(item != NULL); + return item; + } + case Py_PARK_TIMEOUT: { + return empty_error(cls); + } + case Py_PARK_INTR: { + // Interrupted + if (Py_MakePendingCalls() < 0) { + return NULL; + } + break; + } + case Py_PARK_AGAIN: { + // This should be impossible with the current implementation of + // PyParkingLot, but would be possible if critical sections / + // the GIL were released before the thread was added to the + // internal thread queue in the parking lot. + break; + } + default: { + Py_UNREACHABLE(); + } + } } - /* END GIL-protected critical section */ - - return item; } /*[clinic input] +@critical_section _queue.SimpleQueue.get_nowait cls: defining_class @@ -434,12 +460,13 @@ raise the Empty exception. static PyObject * _queue_SimpleQueue_get_nowait_impl(simplequeueobject *self, PyTypeObject *cls) -/*[clinic end generated code: output=620c58e2750f8b8a input=842f732bf04216d3]*/ +/*[clinic end generated code: output=620c58e2750f8b8a input=d48be63633fefae9]*/ { return _queue_SimpleQueue_get_impl(self, cls, 0, Py_None); } /*[clinic input] +@critical_section _queue.SimpleQueue.empty -> bool Return True if the queue is empty, False otherwise (not reliable!). @@ -447,12 +474,13 @@ Return True if the queue is empty, False otherwise (not reliable!). static int _queue_SimpleQueue_empty_impl(simplequeueobject *self) -/*[clinic end generated code: output=1a02a1b87c0ef838 input=1a98431c45fd66f9]*/ +/*[clinic end generated code: output=1a02a1b87c0ef838 input=96cb22df5a67d831]*/ { return RingBuf_IsEmpty(&self->buf); } /*[clinic input] +@critical_section _queue.SimpleQueue.qsize -> Py_ssize_t Return the approximate size of the queue (not reliable!). @@ -460,7 +488,7 @@ Return the approximate size of the queue (not reliable!). static Py_ssize_t _queue_SimpleQueue_qsize_impl(simplequeueobject *self) -/*[clinic end generated code: output=f9dcd9d0a90e121e input=7a74852b407868a1]*/ +/*[clinic end generated code: output=f9dcd9d0a90e121e input=e218623cb8c16a79]*/ { return RingBuf_Len(&self->buf); } diff --git a/Modules/clinic/_queuemodule.c.h b/Modules/clinic/_queuemodule.c.h index 8e2a430835e35f..b3b6b8e96c135e 100644 --- a/Modules/clinic/_queuemodule.c.h +++ b/Modules/clinic/_queuemodule.c.h @@ -6,6 +6,7 @@ preserve # include "pycore_gc.h" // PyGC_Head # include "pycore_runtime.h" // _Py_ID() #endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() #include "pycore_modsupport.h" // _PyArg_NoKeywords() PyDoc_STRVAR(simplequeue_new__doc__, @@ -107,7 +108,9 @@ _queue_SimpleQueue_put(simplequeueobject *self, PyObject *const *args, Py_ssize_ } timeout = args[2]; skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _queue_SimpleQueue_put_impl(self, item, block, timeout); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -165,7 +168,9 @@ _queue_SimpleQueue_put_nowait(simplequeueobject *self, PyObject *const *args, Py goto exit; } item = args[0]; + Py_BEGIN_CRITICAL_SECTION(self); return_value = _queue_SimpleQueue_put_nowait_impl(self, item); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -244,7 +249,9 @@ _queue_SimpleQueue_get(simplequeueobject *self, PyTypeObject *cls, PyObject *con } timeout_obj = args[1]; skip_optional_pos: + Py_BEGIN_CRITICAL_SECTION(self); return_value = _queue_SimpleQueue_get_impl(self, cls, block, timeout_obj); + Py_END_CRITICAL_SECTION(); exit: return return_value; @@ -269,11 +276,18 @@ _queue_SimpleQueue_get_nowait_impl(simplequeueobject *self, static PyObject * _queue_SimpleQueue_get_nowait(simplequeueobject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { + PyObject *return_value = NULL; + if (nargs) { PyErr_SetString(PyExc_TypeError, "get_nowait() takes no arguments"); - return NULL; + goto exit; } - return _queue_SimpleQueue_get_nowait_impl(self, cls); + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _queue_SimpleQueue_get_nowait_impl(self, cls); + Py_END_CRITICAL_SECTION(); + +exit: + return return_value; } PyDoc_STRVAR(_queue_SimpleQueue_empty__doc__, @@ -294,7 +308,9 @@ _queue_SimpleQueue_empty(simplequeueobject *self, PyObject *Py_UNUSED(ignored)) PyObject *return_value = NULL; int _return_value; + Py_BEGIN_CRITICAL_SECTION(self); _return_value = _queue_SimpleQueue_empty_impl(self); + Py_END_CRITICAL_SECTION(); if ((_return_value == -1) && PyErr_Occurred()) { goto exit; } @@ -322,7 +338,9 @@ _queue_SimpleQueue_qsize(simplequeueobject *self, PyObject *Py_UNUSED(ignored)) PyObject *return_value = NULL; Py_ssize_t _return_value; + Py_BEGIN_CRITICAL_SECTION(self); _return_value = _queue_SimpleQueue_qsize_impl(self); + Py_END_CRITICAL_SECTION(); if ((_return_value == -1) && PyErr_Occurred()) { goto exit; } @@ -331,4 +349,4 @@ _queue_SimpleQueue_qsize(simplequeueobject *self, PyObject *Py_UNUSED(ignored)) exit: return return_value; } -/*[clinic end generated code: output=457310b20cb61cf8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=242950edc8f7dfd7 input=a9049054013a1b77]*/ From 8c265408c51609c6b4a6788cac9cc5fea7a14888 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Tue, 23 Jan 2024 20:54:44 +0100 Subject: [PATCH 066/160] Docs: use placeholders in dbm flag param docs (#114482) Also correct the default flag param for dbm.dumb.open(); it's 'c', not 'r'. --- Doc/library/dbm.rst | 103 ++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 60 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index cb95c61322582f..eca1c25602a018 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -36,6 +36,21 @@ the Oracle Berkeley DB. .. versionchanged:: 3.11 Accepts :term:`path-like object` for filename. +.. Substitutions for the open() flag param docs; + all submodules use the same text. + +.. |flag_r| replace:: + Open existing database for reading only. + +.. |flag_w| replace:: + Open existing database for reading and writing. + +.. |flag_c| replace:: + Open database for reading and writing, creating it if it doesn't exist. + +.. |flag_n| replace:: + Always create a new, empty database, open for reading and writing. + .. function:: open(file, flag='r', mode=0o666) Open the database file *file* and return a corresponding object. @@ -46,21 +61,13 @@ the Oracle Berkeley DB. The optional *flag* argument can be: - +---------+-------------------------------------------+ - | Value | Meaning | - +=========+===========================================+ - | ``'r'`` | Open existing database for reading only | - | | (default) | - +---------+-------------------------------------------+ - | ``'w'`` | Open existing database for reading and | - | | writing | - +---------+-------------------------------------------+ - | ``'c'`` | Open database for reading and writing, | - | | creating it if it doesn't exist | - +---------+-------------------------------------------+ - | ``'n'`` | Always create a new, empty database, open | - | | for reading and writing | - +---------+-------------------------------------------+ + .. csv-table:: + :header: "Value", "Meaning" + + ``'r'`` (default), |flag_r| + ``'w'``, |flag_w| + ``'c'``, |flag_c| + ``'n'``, |flag_n| The optional *mode* argument is the Unix mode of the file, used only when the database has to be created. It defaults to octal ``0o666`` (and will be @@ -165,21 +172,13 @@ supported. The optional *flag* argument can be: - +---------+-------------------------------------------+ - | Value | Meaning | - +=========+===========================================+ - | ``'r'`` | Open existing database for reading only | - | | (default) | - +---------+-------------------------------------------+ - | ``'w'`` | Open existing database for reading and | - | | writing | - +---------+-------------------------------------------+ - | ``'c'`` | Open database for reading and writing, | - | | creating it if it doesn't exist | - +---------+-------------------------------------------+ - | ``'n'`` | Always create a new, empty database, open | - | | for reading and writing | - +---------+-------------------------------------------+ + .. csv-table:: + :header: "Value", "Meaning" + + ``'r'`` (default), |flag_r| + ``'w'``, |flag_w| + ``'c'``, |flag_c| + ``'n'``, |flag_n| The following additional characters may be appended to the flag to control how the database is opened: @@ -297,21 +296,13 @@ to locate the appropriate header file to simplify building this module. The optional *flag* argument must be one of these values: - +---------+-------------------------------------------+ - | Value | Meaning | - +=========+===========================================+ - | ``'r'`` | Open existing database for reading only | - | | (default) | - +---------+-------------------------------------------+ - | ``'w'`` | Open existing database for reading and | - | | writing | - +---------+-------------------------------------------+ - | ``'c'`` | Open database for reading and writing, | - | | creating it if it doesn't exist | - +---------+-------------------------------------------+ - | ``'n'`` | Always create a new, empty database, open | - | | for reading and writing | - +---------+-------------------------------------------+ + .. csv-table:: + :header: "Value", "Meaning" + + ``'r'`` (default), |flag_r| + ``'w'``, |flag_w| + ``'c'``, |flag_c| + ``'n'``, |flag_n| The optional *mode* argument is the Unix mode of the file, used only when the database has to be created. It defaults to octal ``0o666`` (and will be @@ -376,21 +367,13 @@ The module defines the following: The optional *flag* argument can be: - +---------+-------------------------------------------+ - | Value | Meaning | - +=========+===========================================+ - | ``'r'`` | Open existing database for reading only | - | | (default) | - +---------+-------------------------------------------+ - | ``'w'`` | Open existing database for reading and | - | | writing | - +---------+-------------------------------------------+ - | ``'c'`` | Open database for reading and writing, | - | | creating it if it doesn't exist | - +---------+-------------------------------------------+ - | ``'n'`` | Always create a new, empty database, open | - | | for reading and writing | - +---------+-------------------------------------------+ + .. csv-table:: + :header: "Value", "Meaning" + + ``'r'``, |flag_r| + ``'w'``, |flag_w| + ``'c'`` (default), |flag_c| + ``'n'``, |flag_n| The optional *mode* argument is the Unix mode of the file, used only when the database has to be created. It defaults to octal ``0o666`` (and will be modified From ce01ab536f22a3cf095d621f3b3579c1e3567859 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Tue, 23 Jan 2024 15:14:46 -0500 Subject: [PATCH 067/160] gh-101438: Avoid reference cycle in ElementTree.iterparse. (GH-114269) The iterator returned by ElementTree.iterparse() may hold on to a file descriptor. The reference cycle prevented prompt clean-up of the file descriptor if the returned iterator was not exhausted. --- Lib/xml/etree/ElementTree.py | 27 ++++++++++++------- ...-01-18-22-29-28.gh-issue-101438.1-uUi_.rst | 4 +++ 2 files changed, 21 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst diff --git a/Lib/xml/etree/ElementTree.py b/Lib/xml/etree/ElementTree.py index 42574eefd81beb..ae6575028be11c 100644 --- a/Lib/xml/etree/ElementTree.py +++ b/Lib/xml/etree/ElementTree.py @@ -99,6 +99,7 @@ import collections import collections.abc import contextlib +import weakref from . import ElementPath @@ -1223,13 +1224,14 @@ def iterparse(source, events=None, parser=None): # parser argument of iterparse is removed, this can be killed. pullparser = XMLPullParser(events=events, _parser=parser) - def iterator(source): + if not hasattr(source, "read"): + source = open(source, "rb") + close_source = True + else: close_source = False + + def iterator(source): try: - if not hasattr(source, "read"): - source = open(source, "rb") - close_source = True - yield None while True: yield from pullparser.read_events() # load event buffer @@ -1239,18 +1241,23 @@ def iterator(source): pullparser.feed(data) root = pullparser._close_and_return_root() yield from pullparser.read_events() - it.root = root + it = wr() + if it is not None: + it.root = root finally: if close_source: source.close() class IterParseIterator(collections.abc.Iterator): __next__ = iterator(source).__next__ - it = IterParseIterator() - it.root = None - del iterator, IterParseIterator - next(it) + def __del__(self): + if close_source: + source.close() + + it = IterParseIterator() + wr = weakref.ref(it) + del IterParseIterator return it diff --git a/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst new file mode 100644 index 00000000000000..9b69b5deb1b5a0 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-18-22-29-28.gh-issue-101438.1-uUi_.rst @@ -0,0 +1,4 @@ +Avoid reference cycle in ElementTree.iterparse. The iterator returned by +``ElementTree.iterparse`` may hold on to a file descriptor. The reference +cycle prevented prompt clean-up of the file descriptor if the returned +iterator was not exhausted. From d22c066b802592932f9eb18434782299e80ca42e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Tue, 23 Jan 2024 23:27:04 +0200 Subject: [PATCH 068/160] gh-114492: Initialize struct termios before calling tcgetattr() (GH-114495) On Alpine Linux it could leave some field non-initialized. --- .../next/Library/2024-01-23-21-20-40.gh-issue-114492.vKxl5o.rst | 2 ++ Modules/termios.c | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-23-21-20-40.gh-issue-114492.vKxl5o.rst diff --git a/Misc/NEWS.d/next/Library/2024-01-23-21-20-40.gh-issue-114492.vKxl5o.rst b/Misc/NEWS.d/next/Library/2024-01-23-21-20-40.gh-issue-114492.vKxl5o.rst new file mode 100644 index 00000000000000..8df8299d0dffcd --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-23-21-20-40.gh-issue-114492.vKxl5o.rst @@ -0,0 +1,2 @@ +Make the result of :func:`termios.tcgetattr` reproducible on Alpine Linux. +Previously it could leave a random garbage in some fields. diff --git a/Modules/termios.c b/Modules/termios.c index c4f0fd9d50044a..69dbd88be5fcc2 100644 --- a/Modules/termios.c +++ b/Modules/termios.c @@ -98,6 +98,8 @@ termios_tcgetattr_impl(PyObject *module, int fd) struct termios mode; int r; + /* Alpine Linux can leave some fields uninitialized. */ + memset(&mode, 0, sizeof(mode)); Py_BEGIN_ALLOW_THREADS r = tcgetattr(fd, &mode); Py_END_ALLOW_THREADS From afe8f376c096d5d6e8b12fbc691ca9b35381470b Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Tue, 23 Jan 2024 14:10:04 -0800 Subject: [PATCH 069/160] gh-112075: Adapt more dict methods to Argument Clinic (#114256) * Move more dict objects to argument clinic * Improve doc strings * More doc string improvements * Update Objects/dictobject.c Co-authored-by: Erlend E. Aasland * Update Objects/dictobject.c Co-authored-by: Erlend E. Aasland * Update Objects/dictobject.c Co-authored-by: Erlend E. Aasland * Update Objects/dictobject.c Co-authored-by: Erlend E. Aasland * Update Objects/dictobject.c Co-authored-by: Erlend E. Aasland * Update Objects/dictobject.c Co-authored-by: Erlend E. Aasland * Update Objects/dictobject.c Co-authored-by: Erlend E. Aasland --------- Co-authored-by: Erlend E. Aasland --- Objects/clinic/dictobject.c.h | 110 ++++++++++++++++++++++- Objects/dictobject.c | 162 +++++++++++++++++----------------- 2 files changed, 191 insertions(+), 81 deletions(-) diff --git a/Objects/clinic/dictobject.c.h b/Objects/clinic/dictobject.c.h index 641514235c2341..8f532f454156de 100644 --- a/Objects/clinic/dictobject.c.h +++ b/Objects/clinic/dictobject.c.h @@ -38,6 +38,24 @@ dict_fromkeys(PyTypeObject *type, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(dict_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a shallow copy of the dict."); + +#define DICT_COPY_METHODDEF \ + {"copy", (PyCFunction)dict_copy, METH_NOARGS, dict_copy__doc__}, + +static PyObject * +dict_copy_impl(PyDictObject *self); + +static PyObject * +dict_copy(PyDictObject *self, PyObject *Py_UNUSED(ignored)) +{ + return dict_copy_impl(self); +} + PyDoc_STRVAR(dict___contains____doc__, "__contains__($self, key, /)\n" "--\n" @@ -118,6 +136,24 @@ dict_setdefault(PyDictObject *self, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(dict_clear__doc__, +"clear($self, /)\n" +"--\n" +"\n" +"Remove all items from the dict."); + +#define DICT_CLEAR_METHODDEF \ + {"clear", (PyCFunction)dict_clear, METH_NOARGS, dict_clear__doc__}, + +static PyObject * +dict_clear_impl(PyDictObject *self); + +static PyObject * +dict_clear(PyDictObject *self, PyObject *Py_UNUSED(ignored)) +{ + return dict_clear_impl(self); +} + PyDoc_STRVAR(dict_pop__doc__, "pop($self, key, default=, /)\n" "--\n" @@ -176,6 +212,24 @@ dict_popitem(PyDictObject *self, PyObject *Py_UNUSED(ignored)) return dict_popitem_impl(self); } +PyDoc_STRVAR(dict___sizeof____doc__, +"__sizeof__($self, /)\n" +"--\n" +"\n" +"Return the size of the dict in memory, in bytes."); + +#define DICT___SIZEOF___METHODDEF \ + {"__sizeof__", (PyCFunction)dict___sizeof__, METH_NOARGS, dict___sizeof____doc__}, + +static PyObject * +dict___sizeof___impl(PyDictObject *self); + +static PyObject * +dict___sizeof__(PyDictObject *self, PyObject *Py_UNUSED(ignored)) +{ + return dict___sizeof___impl(self); +} + PyDoc_STRVAR(dict___reversed____doc__, "__reversed__($self, /)\n" "--\n" @@ -193,4 +247,58 @@ dict___reversed__(PyDictObject *self, PyObject *Py_UNUSED(ignored)) { return dict___reversed___impl(self); } -/*[clinic end generated code: output=17c3c4cf9a9b95a7 input=a9049054013a1b77]*/ + +PyDoc_STRVAR(dict_keys__doc__, +"keys($self, /)\n" +"--\n" +"\n" +"Return a set-like object providing a view on the dict\'s keys."); + +#define DICT_KEYS_METHODDEF \ + {"keys", (PyCFunction)dict_keys, METH_NOARGS, dict_keys__doc__}, + +static PyObject * +dict_keys_impl(PyDictObject *self); + +static PyObject * +dict_keys(PyDictObject *self, PyObject *Py_UNUSED(ignored)) +{ + return dict_keys_impl(self); +} + +PyDoc_STRVAR(dict_items__doc__, +"items($self, /)\n" +"--\n" +"\n" +"Return a set-like object providing a view on the dict\'s items."); + +#define DICT_ITEMS_METHODDEF \ + {"items", (PyCFunction)dict_items, METH_NOARGS, dict_items__doc__}, + +static PyObject * +dict_items_impl(PyDictObject *self); + +static PyObject * +dict_items(PyDictObject *self, PyObject *Py_UNUSED(ignored)) +{ + return dict_items_impl(self); +} + +PyDoc_STRVAR(dict_values__doc__, +"values($self, /)\n" +"--\n" +"\n" +"Return an object providing a view on the dict\'s values."); + +#define DICT_VALUES_METHODDEF \ + {"values", (PyCFunction)dict_values, METH_NOARGS, dict_values__doc__}, + +static PyObject * +dict_values_impl(PyDictObject *self); + +static PyObject * +dict_values(PyDictObject *self, PyObject *Py_UNUSED(ignored)) +{ + return dict_values_impl(self); +} +/*[clinic end generated code: output=f3ac47dfbf341b23 input=a9049054013a1b77]*/ diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 2482a918ba983b..e608b91679b568 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -2641,9 +2641,14 @@ static PyMappingMethods dict_as_mapping = { dict_ass_sub, /*mp_ass_subscript*/ }; -static PyObject * -dict_keys(PyDictObject *mp) +PyObject * +PyDict_Keys(PyObject *dict) { + if (dict == NULL || !PyDict_Check(dict)) { + PyErr_BadInternalCall(); + return NULL; + } + PyDictObject *mp = (PyDictObject *)dict; PyObject *v; Py_ssize_t n; @@ -2672,9 +2677,14 @@ dict_keys(PyDictObject *mp) return v; } -static PyObject * -dict_values(PyDictObject *mp) +PyObject * +PyDict_Values(PyObject *dict) { + if (dict == NULL || !PyDict_Check(dict)) { + PyErr_BadInternalCall(); + return NULL; + } + PyDictObject *mp = (PyDictObject *)dict; PyObject *v; Py_ssize_t n; @@ -2703,9 +2713,14 @@ dict_values(PyDictObject *mp) return v; } -static PyObject * -dict_items(PyDictObject *mp) +PyObject * +PyDict_Items(PyObject *dict) { + if (dict == NULL || !PyDict_Check(dict)) { + PyErr_BadInternalCall(); + return NULL; + } + PyDictObject *mp = (PyDictObject *)dict; PyObject *v; Py_ssize_t i, n; PyObject *item; @@ -3108,10 +3123,17 @@ _PyDict_MergeEx(PyObject *a, PyObject *b, int override) return dict_merge(interp, a, b, override); } +/*[clinic input] +dict.copy + +Return a shallow copy of the dict. +[clinic start generated code]*/ + static PyObject * -dict_copy(PyObject *mp, PyObject *Py_UNUSED(ignored)) +dict_copy_impl(PyDictObject *self) +/*[clinic end generated code: output=ffb782cf970a5c39 input=73935f042b639de4]*/ { - return PyDict_Copy(mp); + return PyDict_Copy((PyObject *)self); } PyObject * @@ -3217,36 +3239,6 @@ PyDict_Size(PyObject *mp) return ((PyDictObject *)mp)->ma_used; } -PyObject * -PyDict_Keys(PyObject *mp) -{ - if (mp == NULL || !PyDict_Check(mp)) { - PyErr_BadInternalCall(); - return NULL; - } - return dict_keys((PyDictObject *)mp); -} - -PyObject * -PyDict_Values(PyObject *mp) -{ - if (mp == NULL || !PyDict_Check(mp)) { - PyErr_BadInternalCall(); - return NULL; - } - return dict_values((PyDictObject *)mp); -} - -PyObject * -PyDict_Items(PyObject *mp) -{ - if (mp == NULL || !PyDict_Check(mp)) { - PyErr_BadInternalCall(); - return NULL; - } - return dict_items((PyDictObject *)mp); -} - /* Return 1 if dicts equal, 0 if not, -1 if error. * Gets out as soon as any difference is detected. * Uses only Py_EQ comparison. @@ -3512,10 +3504,18 @@ dict_setdefault_impl(PyDictObject *self, PyObject *key, return Py_XNewRef(val); } + +/*[clinic input] +dict.clear + +Remove all items from the dict. +[clinic start generated code]*/ + static PyObject * -dict_clear(PyObject *mp, PyObject *Py_UNUSED(ignored)) +dict_clear_impl(PyDictObject *self) +/*[clinic end generated code: output=5139a830df00830a input=0bf729baba97a4c2]*/ { - PyDict_Clear(mp); + PyDict_Clear((PyObject *)self); Py_RETURN_NONE; } @@ -3703,11 +3703,17 @@ _PyDict_KeysSize(PyDictKeysObject *keys) return size; } +/*[clinic input] +dict.__sizeof__ + +Return the size of the dict in memory, in bytes. +[clinic start generated code]*/ + static PyObject * -dict_sizeof(PyObject *self, PyObject *Py_UNUSED(ignored)) +dict___sizeof___impl(PyDictObject *self) +/*[clinic end generated code: output=44279379b3824bda input=4fec4ddfc44a4d1a]*/ { - PyDictObject *mp = (PyDictObject *)self; - return PyLong_FromSsize_t(_PyDict_SizeOf(mp)); + return PyLong_FromSsize_t(_PyDict_SizeOf(self)); } static PyObject * @@ -3739,56 +3745,31 @@ dict_ior(PyObject *self, PyObject *other) PyDoc_STRVAR(getitem__doc__, "__getitem__($self, key, /)\n--\n\nReturn self[key]."); -PyDoc_STRVAR(sizeof__doc__, -"D.__sizeof__() -> size of D in memory, in bytes"); - PyDoc_STRVAR(update__doc__, "D.update([E, ]**F) -> None. Update D from dict/iterable E and F.\n\ If E is present and has a .keys() method, then does: for k in E: D[k] = E[k]\n\ If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v\n\ In either case, this is followed by: for k in F: D[k] = F[k]"); -PyDoc_STRVAR(clear__doc__, -"D.clear() -> None. Remove all items from D."); - -PyDoc_STRVAR(copy__doc__, -"D.copy() -> a shallow copy of D"); - /* Forward */ -static PyObject *dictkeys_new(PyObject *, PyObject *); -static PyObject *dictitems_new(PyObject *, PyObject *); -static PyObject *dictvalues_new(PyObject *, PyObject *); - -PyDoc_STRVAR(keys__doc__, - "D.keys() -> a set-like object providing a view on D's keys"); -PyDoc_STRVAR(items__doc__, - "D.items() -> a set-like object providing a view on D's items"); -PyDoc_STRVAR(values__doc__, - "D.values() -> an object providing a view on D's values"); static PyMethodDef mapp_methods[] = { DICT___CONTAINS___METHODDEF {"__getitem__", dict_subscript, METH_O | METH_COEXIST, getitem__doc__}, - {"__sizeof__", dict_sizeof, METH_NOARGS, - sizeof__doc__}, + DICT___SIZEOF___METHODDEF DICT_GET_METHODDEF DICT_SETDEFAULT_METHODDEF DICT_POP_METHODDEF DICT_POPITEM_METHODDEF - {"keys", dictkeys_new, METH_NOARGS, - keys__doc__}, - {"items", dictitems_new, METH_NOARGS, - items__doc__}, - {"values", dictvalues_new, METH_NOARGS, - values__doc__}, + DICT_KEYS_METHODDEF + DICT_ITEMS_METHODDEF + DICT_VALUES_METHODDEF {"update", _PyCFunction_CAST(dict_update), METH_VARARGS | METH_KEYWORDS, update__doc__}, DICT_FROMKEYS_METHODDEF - {"clear", dict_clear, METH_NOARGS, - clear__doc__}, - {"copy", dict_copy, METH_NOARGS, - copy__doc__}, + DICT_CLEAR_METHODDEF + DICT_COPY_METHODDEF DICT___REVERSED___METHODDEF {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL} /* sentinel */ @@ -5263,10 +5244,17 @@ PyTypeObject PyDictKeys_Type = { .tp_getset = dictview_getset, }; +/*[clinic input] +dict.keys + +Return a set-like object providing a view on the dict's keys. +[clinic start generated code]*/ + static PyObject * -dictkeys_new(PyObject *dict, PyObject *Py_UNUSED(ignored)) +dict_keys_impl(PyDictObject *self) +/*[clinic end generated code: output=aac2830c62990358 input=42f48a7a771212a7]*/ { - return _PyDictView_New(dict, &PyDictKeys_Type); + return _PyDictView_New((PyObject *)self, &PyDictKeys_Type); } static PyObject * @@ -5368,10 +5356,17 @@ PyTypeObject PyDictItems_Type = { .tp_getset = dictview_getset, }; +/*[clinic input] +dict.items + +Return a set-like object providing a view on the dict's items. +[clinic start generated code]*/ + static PyObject * -dictitems_new(PyObject *dict, PyObject *Py_UNUSED(ignored)) +dict_items_impl(PyDictObject *self) +/*[clinic end generated code: output=88c7db7150c7909a input=87c822872eb71f5a]*/ { - return _PyDictView_New(dict, &PyDictItems_Type); + return _PyDictView_New((PyObject *)self, &PyDictItems_Type); } static PyObject * @@ -5451,10 +5446,17 @@ PyTypeObject PyDictValues_Type = { .tp_getset = dictview_getset, }; +/*[clinic input] +dict.values + +Return an object providing a view on the dict's values. +[clinic start generated code]*/ + static PyObject * -dictvalues_new(PyObject *dict, PyObject *Py_UNUSED(ignored)) +dict_values_impl(PyDictObject *self) +/*[clinic end generated code: output=ce9f2e9e8a959dd4 input=b46944f85493b230]*/ { - return _PyDictView_New(dict, &PyDictValues_Type); + return _PyDictView_New((PyObject *)self, &PyDictValues_Type); } static PyObject * From f59f90b5bccb9e7ac522bc779ab1f6bf11bb4aa3 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Tue, 23 Jan 2024 15:48:14 -0800 Subject: [PATCH 070/160] GH-114456: lower the recursion limit under WASI for debug builds (GH-114457) Testing under wasmtime 16.0.0 w/ code from https://github.com/python/cpython/issues/114413 is how the value was found. --- Include/cpython/pystate.h | 11 +++++++---- Lib/test/test_dynamic.py | 3 ++- Lib/test/test_pickle.py | 4 +++- .../2024-01-22-15-10-01.gh-issue-114456.fBFEJF.rst | 1 + 4 files changed, 13 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-22-15-10-01.gh-issue-114456.fBFEJF.rst diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 60b056bdcc2f1f..1dbf97660f382f 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -217,11 +217,14 @@ struct _ts { #ifdef Py_DEBUG // A debug build is likely built with low optimization level which implies // higher stack memory usage than a release build: use a lower limit. -# define Py_C_RECURSION_LIMIT 500 +# if defined(__wasi__) + // Based on wasmtime 16. +# define Py_C_RECURSION_LIMIT 150 +# else +# define Py_C_RECURSION_LIMIT 500 +# endif #elif defined(__wasi__) - // WASI has limited call stack. Python's recursion limit depends on code - // layout, optimization, and WASI runtime. Wasmtime can handle about 700 - // recursions, sometimes less. 500 is a more conservative limit. + // Based on wasmtime 16. # define Py_C_RECURSION_LIMIT 500 #elif defined(__s390x__) # define Py_C_RECURSION_LIMIT 800 diff --git a/Lib/test/test_dynamic.py b/Lib/test/test_dynamic.py index 0aa3be6a1bde6a..3928bbab4423c2 100644 --- a/Lib/test/test_dynamic.py +++ b/Lib/test/test_dynamic.py @@ -4,7 +4,7 @@ import sys import unittest -from test.support import swap_item, swap_attr +from test.support import is_wasi, Py_DEBUG, swap_item, swap_attr class RebindBuiltinsTests(unittest.TestCase): @@ -134,6 +134,7 @@ def test_eval_gives_lambda_custom_globals(self): self.assertEqual(foo(), 7) + @unittest.skipIf(is_wasi and Py_DEBUG, "stack depth too shallow in pydebug WASI") def test_load_global_specialization_failure_keeps_oparg(self): # https://github.com/python/cpython/issues/91625 class MyGlobals(dict): diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index f6405d6dd44ef6..b2245ddf72f708 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -402,7 +402,9 @@ def recurse(deep): check_unpickler(recurse(1), 32, 20) check_unpickler(recurse(20), 32, 20) check_unpickler(recurse(50), 64, 60) - check_unpickler(recurse(100), 128, 140) + if not (support.is_wasi and support.Py_DEBUG): + # stack depth too shallow in pydebug WASI. + check_unpickler(recurse(100), 128, 140) u = unpickler(io.BytesIO(pickle.dumps('a', 0)), encoding='ASCII', errors='strict') diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-22-15-10-01.gh-issue-114456.fBFEJF.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-22-15-10-01.gh-issue-114456.fBFEJF.rst new file mode 100644 index 00000000000000..2b30ad98fb5c79 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-22-15-10-01.gh-issue-114456.fBFEJF.rst @@ -0,0 +1 @@ +Lower the recursion limit under a debug build of WASI. From 82cd8fee31823b560e664f81b430a9186d6019dd Mon Sep 17 00:00:00 2001 From: Daniel Hollas Date: Wed, 24 Jan 2024 04:16:31 +0000 Subject: [PATCH 071/160] Fix a typo in the contextlib documentation (#114507) --- Doc/library/contextlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/contextlib.rst b/Doc/library/contextlib.rst index b73373bc2363fb..73e53aec9cbf1c 100644 --- a/Doc/library/contextlib.rst +++ b/Doc/library/contextlib.rst @@ -185,7 +185,7 @@ Functions and classes provided: .. note:: Most types managing resources support the :term:`context manager` protocol, - which closes *thing* on leaving the :keyword:`with` statment. + which closes *thing* on leaving the :keyword:`with` statement. As such, :func:`!closing` is most useful for third party types that don't support context managers. This example is purely for illustration purposes, From ce75b4c26d18dcd840fd2e7ee362a84209648d06 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 24 Jan 2024 09:13:09 +0200 Subject: [PATCH 072/160] gh-113205: test_multiprocessing.test_terminate: Give tasks a chance to start (GH-114249) --- Lib/test/_test_multiprocessing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/_test_multiprocessing.py b/Lib/test/_test_multiprocessing.py index 6a050fa541db1e..c0d3ca50f17d69 100644 --- a/Lib/test/_test_multiprocessing.py +++ b/Lib/test/_test_multiprocessing.py @@ -2705,6 +2705,7 @@ def test_terminate(self): p = self.Pool(3) args = [sleep_time for i in range(10_000)] result = p.map_async(time.sleep, args, chunksize=1) + time.sleep(0.2) # give some tasks a chance to start p.terminate() p.join() From 1e4f00ebd8db44a031b61eed0803b4c3d731aed7 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Wed, 24 Jan 2024 10:23:34 +0300 Subject: [PATCH 073/160] gh-101100: Fix sphinx warnings in `asyncio-task.rst` (#114469) Co-authored-by: Serhiy Storchaka --- Doc/library/asyncio-task.rst | 33 ++++++++++++++++----------------- Doc/tools/.nitignore | 1 - 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/Doc/library/asyncio-task.rst b/Doc/library/asyncio-task.rst index 797065c8ccf894..24bd36e6431b4f 100644 --- a/Doc/library/asyncio-task.rst +++ b/Doc/library/asyncio-task.rst @@ -828,23 +828,22 @@ Waiting Primitives *return_when* indicates when this function should return. It must be one of the following constants: - .. tabularcolumns:: |l|L| - - +-----------------------------+----------------------------------------+ - | Constant | Description | - +=============================+========================================+ - | :const:`FIRST_COMPLETED` | The function will return when any | - | | future finishes or is cancelled. | - +-----------------------------+----------------------------------------+ - | :const:`FIRST_EXCEPTION` | The function will return when any | - | | future finishes by raising an | - | | exception. If no future raises an | - | | exception then it is equivalent to | - | | :const:`ALL_COMPLETED`. | - +-----------------------------+----------------------------------------+ - | :const:`ALL_COMPLETED` | The function will return when all | - | | futures finish or are cancelled. | - +-----------------------------+----------------------------------------+ + .. list-table:: + :header-rows: 1 + + * - Constant + - Description + + * - .. data:: FIRST_COMPLETED + - The function will return when any future finishes or is cancelled. + + * - .. data:: FIRST_EXCEPTION + - The function will return when any future finishes by raising an + exception. If no future raises an exception + then it is equivalent to :const:`ALL_COMPLETED`. + + * - .. data:: ALL_COMPLETED + - The function will return when all futures finish or are cancelled. Unlike :func:`~asyncio.wait_for`, ``wait()`` does not cancel the futures when a timeout occurs. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 221a1f05c11e49..2114ec6dfacd7d 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -27,7 +27,6 @@ Doc/library/ast.rst Doc/library/asyncio-extending.rst Doc/library/asyncio-policy.rst Doc/library/asyncio-subprocess.rst -Doc/library/asyncio-task.rst Doc/library/bdb.rst Doc/library/collections.rst Doc/library/concurrent.futures.rst From 384429d1c0cf011dcf88d4043e0328de8b063c24 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 24 Jan 2024 12:08:31 +0000 Subject: [PATCH 074/160] GH-113710: Add a tier 2 peephole optimization pass. (GH-114487) * Convert _LOAD_CONST to inline versions * Remove PEP 523 checks --- Include/internal/pycore_uop_ids.h | 7 +++--- Include/internal/pycore_uop_metadata.h | 2 ++ Python/bytecodes.c | 4 +++ Python/executor_cases.c.h | 9 +++++++ Python/optimizer.c | 6 +++++ Python/optimizer_analysis.c | 34 ++++++++++++++++++++++++++ Python/pystate.c | 10 +++++--- 7 files changed, 66 insertions(+), 6 deletions(-) diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h index 8ee90d79a13c2f..a7056586ff04c0 100644 --- a/Include/internal/pycore_uop_ids.h +++ b/Include/internal/pycore_uop_ids.h @@ -230,9 +230,10 @@ extern "C" { #define _JUMP_TO_TOP 377 #define _SAVE_RETURN_OFFSET 378 #define _CHECK_VALIDITY 379 -#define _LOAD_CONST_INLINE_BORROW 380 -#define _INTERNAL_INCREMENT_OPT_COUNTER 381 -#define MAX_UOP_ID 381 +#define _LOAD_CONST_INLINE 380 +#define _LOAD_CONST_INLINE_BORROW 381 +#define _INTERNAL_INCREMENT_OPT_COUNTER 382 +#define MAX_UOP_ID 382 #ifdef __cplusplus } diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 9bfb4f4f3a4dea..14d3382e895cdf 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -202,6 +202,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, [_EXIT_TRACE] = HAS_DEOPT_FLAG, [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, + [_LOAD_CONST_INLINE] = 0, [_LOAD_CONST_INLINE_BORROW] = 0, [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, }; @@ -329,6 +330,7 @@ const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT", [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS", [_LOAD_CONST] = "_LOAD_CONST", + [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", [_LOAD_DEREF] = "_LOAD_DEREF", [_LOAD_FAST] = "_LOAD_FAST", diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 7674ff81f64cec..18749ce60ecd45 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -4070,6 +4070,10 @@ dummy_func( DEOPT_IF(!current_executor->vm_data.valid); } + op(_LOAD_CONST_INLINE, (ptr/4 -- value)) { + value = Py_NewRef(ptr); + } + op(_LOAD_CONST_INLINE_BORROW, (ptr/4 -- value)) { value = ptr; } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 2b4399b25bae2b..241b9056207715 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -3390,6 +3390,15 @@ break; } + case _LOAD_CONST_INLINE: { + PyObject *value; + PyObject *ptr = (PyObject *)CURRENT_OPERAND(); + value = Py_NewRef(ptr); + stack_pointer[0] = value; + stack_pointer += 1; + break; + } + case _LOAD_CONST_INLINE_BORROW: { PyObject *value; PyObject *ptr = (PyObject *)CURRENT_OPERAND(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 1551a5ef61f892..4b6ed1781b5b78 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -588,6 +588,9 @@ translate_bytecode_to_trace( ADD_TO_TRACE(uop, oparg, operand, target); if (uop == _POP_FRAME) { TRACE_STACK_POP(); + /* Set the operand to the code object returned to, + * to assist optimization passes */ + trace[trace_length-1].operand = (uintptr_t)code; DPRINTF(2, "Returning to %s (%s:%d) at byte offset %d\n", PyUnicode_AsUTF8(code->co_qualname), @@ -629,6 +632,9 @@ translate_bytecode_to_trace( instr += _PyOpcode_Caches[_PyOpcode_Deopt[opcode]] + 1; TRACE_STACK_PUSH(); _Py_BloomFilter_Add(dependencies, new_code); + /* Set the operand to the callee's code object, + * to assist optimization passes */ + trace[trace_length-1].operand = (uintptr_t)new_code; code = new_code; instr = _PyCode_CODE(code); DPRINTF(2, diff --git a/Python/optimizer_analysis.c b/Python/optimizer_analysis.c index 7db51f0d90a453..d1225997e10be2 100644 --- a/Python/optimizer_analysis.c +++ b/Python/optimizer_analysis.c @@ -12,6 +12,39 @@ #include #include "pycore_optimizer.h" +static void +peephole_opt(PyCodeObject *co, _PyUOpInstruction *buffer, int buffer_size) +{ + for (int pc = 0; pc < buffer_size; pc++) { + int opcode = buffer[pc].opcode; + switch(opcode) { + case _LOAD_CONST: { + assert(co != NULL); + PyObject *val = PyTuple_GET_ITEM(co->co_consts, buffer[pc].oparg); + buffer[pc].opcode = _Py_IsImmortal(val) ? _LOAD_CONST_INLINE_BORROW : _LOAD_CONST_INLINE; + buffer[pc].operand = (uintptr_t)val; + break; + } + case _CHECK_PEP_523: + { + /* Setting the eval frame function invalidates + * all executors, so no need to check dynamically */ + if (_PyInterpreterState_GET()->eval_frame == NULL) { + buffer[pc].opcode = _NOP; + } + break; + } + case _PUSH_FRAME: + case _POP_FRAME: + co = (PyCodeObject *)buffer[pc].operand; + break; + case _JUMP_TO_TOP: + case _EXIT_TRACE: + return; + } + } +} + static void remove_unneeded_uops(_PyUOpInstruction *buffer, int buffer_size) { @@ -59,6 +92,7 @@ _Py_uop_analyze_and_optimize( int curr_stacklen ) { + peephole_opt(co, buffer, buffer_size); remove_unneeded_uops(buffer, buffer_size); return 0; } diff --git a/Python/pystate.c b/Python/pystate.c index 23ddc781434ac8..548c77b7dc7ebb 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2608,11 +2608,15 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, _PyFrameEvalFunction eval_frame) { if (eval_frame == _PyEval_EvalFrameDefault) { - interp->eval_frame = NULL; + eval_frame = NULL; } - else { - interp->eval_frame = eval_frame; + if (eval_frame == interp->eval_frame) { + return; + } + if (eval_frame != NULL) { + _Py_Executors_InvalidateAll(interp); } + interp->eval_frame = eval_frame; } From 8744ecf5896ccf57875574a9aed46369b8d48dc1 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Wed, 24 Jan 2024 16:30:50 +0300 Subject: [PATCH 075/160] gh-101100: Fix sphinx warnings in `concurrent.futures.rst` (#114521) --- Doc/library/concurrent.futures.rst | 42 ++++++++++++++++-------------- Doc/tools/.nitignore | 1 - 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/Doc/library/concurrent.futures.rst b/Doc/library/concurrent.futures.rst index 800c7f6739d8a3..d3c7a40aa9d390 100644 --- a/Doc/library/concurrent.futures.rst +++ b/Doc/library/concurrent.futures.rst @@ -275,7 +275,8 @@ to a :class:`ProcessPoolExecutor` will result in deadlock. .. versionchanged:: 3.3 When one of the worker processes terminates abruptly, a - :exc:`BrokenProcessPool` error is now raised. Previously, behaviour + :exc:`~concurrent.futures.process.BrokenProcessPool` error is now raised. + Previously, behaviour was undefined but operations on the executor or its futures would often freeze or deadlock. @@ -493,23 +494,22 @@ Module Functions *return_when* indicates when this function should return. It must be one of the following constants: - .. tabularcolumns:: |l|L| - - +-----------------------------+----------------------------------------+ - | Constant | Description | - +=============================+========================================+ - | :const:`FIRST_COMPLETED` | The function will return when any | - | | future finishes or is cancelled. | - +-----------------------------+----------------------------------------+ - | :const:`FIRST_EXCEPTION` | The function will return when any | - | | future finishes by raising an | - | | exception. If no future raises an | - | | exception then it is equivalent to | - | | :const:`ALL_COMPLETED`. | - +-----------------------------+----------------------------------------+ - | :const:`ALL_COMPLETED` | The function will return when all | - | | futures finish or are cancelled. | - +-----------------------------+----------------------------------------+ + .. list-table:: + :header-rows: 1 + + * - Constant + - Description + + * - .. data:: FIRST_COMPLETED + - The function will return when any future finishes or is cancelled. + + * - .. data:: FIRST_EXCEPTION + - The function will return when any future finishes by raising an + exception. If no future raises an exception + then it is equivalent to :const:`ALL_COMPLETED`. + + * - .. data:: ALL_COMPLETED + - The function will return when all futures finish or are cancelled. .. function:: as_completed(fs, timeout=None) @@ -570,7 +570,8 @@ Exception classes .. exception:: BrokenThreadPool Derived from :exc:`~concurrent.futures.BrokenExecutor`, this exception - class is raised when one of the workers of a :class:`ThreadPoolExecutor` + class is raised when one of the workers + of a :class:`~concurrent.futures.ThreadPoolExecutor` has failed initializing. .. versionadded:: 3.7 @@ -581,7 +582,8 @@ Exception classes Derived from :exc:`~concurrent.futures.BrokenExecutor` (formerly :exc:`RuntimeError`), this exception class is raised when one of the - workers of a :class:`ProcessPoolExecutor` has terminated in a non-clean + workers of a :class:`~concurrent.futures.ProcessPoolExecutor` + has terminated in a non-clean fashion (for example, if it was killed from the outside). .. versionadded:: 3.3 diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 2114ec6dfacd7d..6778e57c272ffb 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -29,7 +29,6 @@ Doc/library/asyncio-policy.rst Doc/library/asyncio-subprocess.rst Doc/library/bdb.rst Doc/library/collections.rst -Doc/library/concurrent.futures.rst Doc/library/csv.rst Doc/library/datetime.rst Doc/library/dbm.rst From 51d9068ede41d49e86c9637960f212e2a0f07f4c Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Wed, 24 Jan 2024 15:40:09 +0200 Subject: [PATCH 076/160] gh-101100: Fix Sphinx warnings in `c-api/structures.rst` (#113564) Co-authored-by: Hugo van Kemenade Co-authored-by: Petr Viktorin --- Doc/c-api/structures.rst | 19 ++++++++++--------- Doc/tools/.nitignore | 1 - 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/c-api/structures.rst b/Doc/c-api/structures.rst index 86c779472fd244..0032da9659636c 100644 --- a/Doc/c-api/structures.rst +++ b/Doc/c-api/structures.rst @@ -551,11 +551,11 @@ The following flags can be used with :c:member:`PyMemberDef.flags`: from ``PyObject``. Can only be used as part of :c:member:`Py_tp_members ` - :c:type:`slot ` when creating a class using negative + :c:type:`slot ` when creating a class using negative :c:member:`~PyType_Spec.basicsize`. It is mandatory in that case. - This flag is only used in :c:type:`PyTypeSlot`. + This flag is only used in :c:type:`PyType_Slot`. When setting :c:member:`~PyTypeObject.tp_members` during class creation, Python clears it and sets :c:member:`PyMemberDef.offset` to the offset from the ``PyObject`` struct. @@ -693,7 +693,8 @@ Defining Getters and Setters .. c:member:: setter set - Optional C function to set or delete the attribute, if omitted the attribute is readonly. + Optional C function to set or delete the attribute. + If ``NULL``, the attribute is read-only. .. c:member:: const char* doc @@ -703,18 +704,18 @@ Defining Getters and Setters Optional function pointer, providing additional data for getter and setter. - The ``get`` function takes one :c:expr:`PyObject*` parameter (the - instance) and a function pointer (the associated ``closure``):: +.. c:type:: PyObject *(*getter)(PyObject *, void *) - typedef PyObject *(*getter)(PyObject *, void *); + The ``get`` function takes one :c:expr:`PyObject*` parameter (the + instance) and a function pointer (the associated ``closure``): It should return a new reference on success or ``NULL`` with a set exception on failure. - ``set`` functions take two :c:expr:`PyObject*` parameters (the instance and - the value to be set) and a function pointer (the associated ``closure``):: +.. c:type:: int (*setter)(PyObject *, PyObject *, void *) - typedef int (*setter)(PyObject *, PyObject *, void *); + ``set`` functions take two :c:expr:`PyObject*` parameters (the instance and + the value to be set) and a function pointer (the associated ``closure``): In case the attribute should be deleted the second parameter is ``NULL``. Should return ``0`` on success or ``-1`` with a set exception on failure. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 6778e57c272ffb..00b4b6919ff14a 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -14,7 +14,6 @@ Doc/c-api/memoryview.rst Doc/c-api/module.rst Doc/c-api/object.rst Doc/c-api/stable.rst -Doc/c-api/structures.rst Doc/c-api/sys.rst Doc/c-api/type.rst Doc/c-api/typeobj.rst From 127a49785247ac8af158b18e38b722e520054d71 Mon Sep 17 00:00:00 2001 From: Thomas Grainger Date: Wed, 24 Jan 2024 14:24:00 +0000 Subject: [PATCH 077/160] gh-104360: remove reference to removed module-level wrap_socket (GH-104361) * remove reference to removed module-level wrap_socket * drive by typo fix --- Doc/library/ssl.rst | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Doc/library/ssl.rst b/Doc/library/ssl.rst index e8709b516ae07a..f9648fa6744bdc 100644 --- a/Doc/library/ssl.rst +++ b/Doc/library/ssl.rst @@ -2574,12 +2574,8 @@ provided. :exc:`SSLWantReadError` if it needs more data than the incoming BIO has available. - - There is no module-level ``wrap_bio()`` call like there is for - :meth:`~SSLContext.wrap_socket`. An :class:`SSLObject` is always created - via an :class:`SSLContext`. - .. versionchanged:: 3.7 - :class:`SSLObject` instances must to created with + :class:`SSLObject` instances must be created with :meth:`~SSLContext.wrap_bio`. In earlier versions, it was possible to create instances directly. This was never documented or officially supported. From 6fadd68da5dd928847264b17f62a5b8b369c1c1e Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Wed, 24 Jan 2024 16:06:14 +0100 Subject: [PATCH 078/160] Docs: mark up the FTP_TLS() docs with param list (#114510) Also turn sentence about prot_p() into a note. --- Doc/library/ftplib.rst | 56 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 46 insertions(+), 10 deletions(-) diff --git a/Doc/library/ftplib.rst b/Doc/library/ftplib.rst index e93a1e85598e3a..2f98a272c297ae 100644 --- a/Doc/library/ftplib.rst +++ b/Doc/library/ftplib.rst @@ -78,6 +78,9 @@ FTP objects A 2-tuple ``(host, port)`` for the socket to bind to as its source address before connecting. +.. |param_doc_encoding| replace:: + The encoding for directories and filenames (default: ``'utf-8'``). + .. class:: FTP(host='', user='', passwd='', acct='', timeout=None, \ source_address=None, *, encoding='utf-8') @@ -108,8 +111,7 @@ FTP objects :type source_address: tuple | None :param str encoding: - The *encoding* parameter specifies the encoding - for directories and filenames. + |param_doc_encoding| The :class:`FTP` class supports the :keyword:`with` statement, e.g.: @@ -447,19 +449,53 @@ FTP_TLS objects .. class:: FTP_TLS(host='', user='', passwd='', acct='', *, context=None, \ timeout=None, source_address=None, encoding='utf-8') - A :class:`FTP` subclass which adds TLS support to FTP as described in + An :class:`FTP` subclass which adds TLS support to FTP as described in :rfc:`4217`. - Connect as usual to port 21 implicitly securing the FTP control connection - before authenticating. Securing the data connection requires the user to - explicitly ask for it by calling the :meth:`prot_p` method. *context* - is a :class:`ssl.SSLContext` object which allows bundling SSL configuration - options, certificates and private keys into a single (potentially - long-lived) structure. Please read :ref:`ssl-security` for best practices. + Connect to port 21 implicitly securing the FTP control connection + before authenticating. + + .. note:: + The user must explicitly secure the data connection + by calling the :meth:`prot_p` method. + + :param str host: + The hostname to connect to. + If given, :code:`connect(host)` is implicitly called by the constructor. + + :param str user: + |param_doc_user| + If given, :code:`login(host, passwd, acct)` is implicitly called + by the constructor. + + :param str passwd: + |param_doc_passwd| + + :param str acct: + |param_doc_acct| + + :param context: + An SSL context object which allows bundling SSL configuration options, + certificates and private keys into a single, potentially long-lived, + structure. + Please read :ref:`ssl-security` for best practices. + :type context: :class:`ssl.SSLContext` + + :param timeout: + A timeout in seconds for blocking operations like :meth:`~FTP.connect` + (default: the global default timeout setting). + :type timeout: int | None + + :param source_address: + |param_doc_source_address| + :type source_address: tuple | None + + :param str encoding: + |param_doc_encoding| .. versionadded:: 3.2 .. versionchanged:: 3.3 - *source_address* parameter was added. + Added the *source_address* parameter. .. versionchanged:: 3.4 The class now supports hostname check with From 981d172f7f0613d30bef4a8934b361db7fcf0672 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 24 Jan 2024 15:10:17 +0000 Subject: [PATCH 079/160] GH-112354: `END_FOR` instruction to only pop one value. (GH-114247) * Compiler emits END_FOR; POP_TOP instead of END_FOR. To support tier 2 side exits in loops. --- Doc/library/dis.rst | 4 +- .../pycore_global_objects_fini_generated.h | 6 + Include/internal/pycore_global_strings.h | 6 + Include/internal/pycore_opcode_metadata.h | 6 +- .../internal/pycore_runtime_init_generated.h | 6 + .../internal/pycore_unicodeobject_generated.h | 18 ++ Lib/importlib/_bootstrap_external.py | 3 +- Lib/test/test_compiler_codegen.py | 1 + Lib/test/test_dis.py | 236 +++++++++--------- ...-01-17-05-09-32.gh-issue-112354.Run9ko.rst | 2 + Programs/test_frozenmain.h | 50 ++-- Python/bytecodes.c | 24 +- Python/compile.c | 10 + Python/generated_cases.c.h | 35 +-- Python/optimizer.c | 7 +- Tools/build/generate_global_objects.py | 8 + 16 files changed, 238 insertions(+), 184 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-17-05-09-32.gh-issue-112354.Run9ko.rst diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index b97d48fafab3b6..e654760fb91c65 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -546,8 +546,8 @@ operations on it as if it was a Python list. The top of the stack corresponds to .. opcode:: END_FOR - Removes the top two values from the stack. - Equivalent to ``POP_TOP``; ``POP_TOP``. + Removes the top-of-stack item. + Equivalent to ``POP_TOP``. Used to clean up at the end of loops, hence the name. .. versionadded:: 3.12 diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 0a24b127192c9b..e92707051c12b7 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -787,9 +787,11 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_child)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_parent)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(aggregate_class)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(append)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(argdefs)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(args)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(arguments)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(argv)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(as_integer_ratio)); @@ -913,6 +915,8 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(errors)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_value)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(excepthook)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exception)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(existing_file_name)); @@ -1166,6 +1170,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seek)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seekable)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(selectors)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(self)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(send)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sep)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sequence)); @@ -1228,6 +1233,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timetuple)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(top)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(trace_callback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(traceback)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(trailers)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(translate)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(true)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index efb659c5806e6e..eb60b80c964d42 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -276,9 +276,11 @@ struct _Py_global_strings { STRUCT_FOR_ID(after_in_child) STRUCT_FOR_ID(after_in_parent) STRUCT_FOR_ID(aggregate_class) + STRUCT_FOR_ID(alias) STRUCT_FOR_ID(allow_code) STRUCT_FOR_ID(append) STRUCT_FOR_ID(argdefs) + STRUCT_FOR_ID(args) STRUCT_FOR_ID(arguments) STRUCT_FOR_ID(argv) STRUCT_FOR_ID(as_integer_ratio) @@ -402,6 +404,8 @@ struct _Py_global_strings { STRUCT_FOR_ID(errors) STRUCT_FOR_ID(event) STRUCT_FOR_ID(eventmask) + STRUCT_FOR_ID(exc_type) + STRUCT_FOR_ID(exc_value) STRUCT_FOR_ID(excepthook) STRUCT_FOR_ID(exception) STRUCT_FOR_ID(existing_file_name) @@ -655,6 +659,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(seek) STRUCT_FOR_ID(seekable) STRUCT_FOR_ID(selectors) + STRUCT_FOR_ID(self) STRUCT_FOR_ID(send) STRUCT_FOR_ID(sep) STRUCT_FOR_ID(sequence) @@ -717,6 +722,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(timetuple) STRUCT_FOR_ID(top) STRUCT_FOR_ID(trace_callback) + STRUCT_FOR_ID(traceback) STRUCT_FOR_ID(trailers) STRUCT_FOR_ID(translate) STRUCT_FOR_ID(true) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index fbb448f663369a..75d7f44025328e 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -176,7 +176,7 @@ int _PyOpcode_num_popped(int opcode, int oparg) { case END_ASYNC_FOR: return 2; case END_FOR: - return 2; + return 1; case END_SEND: return 2; case ENTER_EXECUTOR: @@ -647,7 +647,7 @@ int _PyOpcode_num_pushed(int opcode, int oparg) { case INSTRUMENTED_CALL_KW: return 0; case INSTRUMENTED_END_FOR: - return 0; + return 1; case INSTRUMENTED_END_SEND: return 1; case INSTRUMENTED_FOR_ITER: @@ -1232,7 +1232,7 @@ _PyOpcode_macro_expansion[256] = { [DELETE_SUBSCR] = { .nuops = 1, .uops = { { _DELETE_SUBSCR, 0, 0 } } }, [DICT_MERGE] = { .nuops = 1, .uops = { { _DICT_MERGE, 0, 0 } } }, [DICT_UPDATE] = { .nuops = 1, .uops = { { _DICT_UPDATE, 0, 0 } } }, - [END_FOR] = { .nuops = 2, .uops = { { _POP_TOP, 0, 0 }, { _POP_TOP, 0, 0 } } }, + [END_FOR] = { .nuops = 1, .uops = { { _POP_TOP, 0, 0 } } }, [END_SEND] = { .nuops = 1, .uops = { { _END_SEND, 0, 0 } } }, [EXIT_INIT_CHECK] = { .nuops = 1, .uops = { { _EXIT_INIT_CHECK, 0, 0 } } }, [FORMAT_SIMPLE] = { .nuops = 1, .uops = { { _FORMAT_SIMPLE, 0, 0 } } }, diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index e3ebd80745e610..9b39de1d69c6c7 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -785,9 +785,11 @@ extern "C" { INIT_ID(after_in_child), \ INIT_ID(after_in_parent), \ INIT_ID(aggregate_class), \ + INIT_ID(alias), \ INIT_ID(allow_code), \ INIT_ID(append), \ INIT_ID(argdefs), \ + INIT_ID(args), \ INIT_ID(arguments), \ INIT_ID(argv), \ INIT_ID(as_integer_ratio), \ @@ -911,6 +913,8 @@ extern "C" { INIT_ID(errors), \ INIT_ID(event), \ INIT_ID(eventmask), \ + INIT_ID(exc_type), \ + INIT_ID(exc_value), \ INIT_ID(excepthook), \ INIT_ID(exception), \ INIT_ID(existing_file_name), \ @@ -1164,6 +1168,7 @@ extern "C" { INIT_ID(seek), \ INIT_ID(seekable), \ INIT_ID(selectors), \ + INIT_ID(self), \ INIT_ID(send), \ INIT_ID(sep), \ INIT_ID(sequence), \ @@ -1226,6 +1231,7 @@ extern "C" { INIT_ID(timetuple), \ INIT_ID(top), \ INIT_ID(trace_callback), \ + INIT_ID(traceback), \ INIT_ID(trailers), \ INIT_ID(translate), \ INIT_ID(true), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 9fa6c896c1a328..898d386f4cfd05 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -669,6 +669,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(aggregate_class); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(alias); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(allow_code); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -678,6 +681,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(argdefs); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(args); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(arguments); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1047,6 +1053,12 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(eventmask); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(exc_type); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(exc_value); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(excepthook); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1806,6 +1818,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(selectors); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(self); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(send); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); @@ -1992,6 +2007,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(trace_callback); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(traceback); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(trailers); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index a4d2b7e0184409..2a9aef03179f6f 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -464,6 +464,7 @@ def _write_atomic(path, data, mode=0o666): # Python 3.13a1 3565 (Oparg of YIELD_VALUE indicates whether it is in a yield-from) # Python 3.13a1 3566 (Emit JUMP_NO_INTERRUPT instead of JUMP for non-loop no-lineno cases) # Python 3.13a1 3567 (Reimplement line number propagation by the compiler) +# Python 3.13a1 3568 (Change semantics of END_FOR) # Python 3.14 will start with 3600 @@ -480,7 +481,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3567).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3568).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c diff --git a/Lib/test/test_compiler_codegen.py b/Lib/test/test_compiler_codegen.py index b5d1e2f9e4752c..dbeadd9ca47c63 100644 --- a/Lib/test/test_compiler_codegen.py +++ b/Lib/test/test_compiler_codegen.py @@ -49,6 +49,7 @@ def test_for_loop(self): ('JUMP', loop_lbl), exit_lbl, ('END_FOR', None), + ('POP_TOP', None), ('LOAD_CONST', 0), ('RETURN_VALUE', None), ] diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 3ae81b2f5d62b0..a5917da346dded 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -158,6 +158,7 @@ def bug708901(): %3d JUMP_BACKWARD 5 (to L1) %3d L2: END_FOR + POP_TOP RETURN_CONST 0 (None) """ % (bug708901.__code__.co_firstlineno, bug708901.__code__.co_firstlineno + 1, @@ -791,6 +792,7 @@ def foo(x): POP_TOP JUMP_BACKWARD 12 (to L2) L3: END_FOR + POP_TOP RETURN_CONST 0 (None) -- L4: CALL_INTRINSIC_1 3 (INTRINSIC_STOPITERATION_ERROR) @@ -843,6 +845,7 @@ def loop_test(): JUMP_BACKWARD 16 (to L1) %3d L2: END_FOR + POP_TOP RETURN_CONST 0 (None) """ % (loop_test.__code__.co_firstlineno, loop_test.__code__.co_firstlineno + 1, @@ -1648,122 +1651,123 @@ def _prepare_test_cases(): ] expected_opinfo_jumpy = [ - Instruction(opname='RESUME', opcode=149, arg=0, argval=0, argrepr='', offset=0, start_offset=0, starts_line=True, line_number=1, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=1, argval='range', argrepr='range + NULL', offset=2, start_offset=2, starts_line=True, line_number=3, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=1, argval=10, argrepr='10', offset=12, start_offset=12, starts_line=False, line_number=3, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=14, start_offset=14, starts_line=False, line_number=3, label=None, positions=None), - Instruction(opname='GET_ITER', opcode=19, arg=None, argval=None, argrepr='', offset=22, start_offset=22, starts_line=False, line_number=3, label=None, positions=None), - Instruction(opname='FOR_ITER', opcode=72, arg=30, argval=88, argrepr='to L4', offset=24, start_offset=24, starts_line=False, line_number=3, label=1, positions=None), - Instruction(opname='STORE_FAST', opcode=110, arg=0, argval='i', argrepr='i', offset=28, start_offset=28, starts_line=False, line_number=3, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=4, label=None, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=42, start_offset=42, starts_line=False, line_number=4, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=50, start_offset=50, starts_line=False, line_number=4, label=None, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=52, start_offset=52, starts_line=True, line_number=5, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=2, argval=4, argrepr='4', offset=54, start_offset=54, starts_line=False, line_number=5, label=None, positions=None), - Instruction(opname='COMPARE_OP', opcode=58, arg=18, argval='<', argrepr='bool(<)', offset=56, start_offset=56, starts_line=False, line_number=5, label=None, positions=None), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=2, argval=68, argrepr='to L2', offset=60, start_offset=60, starts_line=False, line_number=5, label=None, positions=None), - Instruction(opname='JUMP_BACKWARD', opcode=77, arg=22, argval=24, argrepr='to L1', offset=64, start_offset=64, starts_line=True, line_number=6, label=None, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=68, start_offset=68, starts_line=True, line_number=7, label=2, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=3, argval=6, argrepr='6', offset=70, start_offset=70, starts_line=False, line_number=7, label=None, positions=None), - Instruction(opname='COMPARE_OP', opcode=58, arg=148, argval='>', argrepr='bool(>)', offset=72, start_offset=72, starts_line=False, line_number=7, label=None, positions=None), - Instruction(opname='POP_JUMP_IF_TRUE', opcode=100, arg=2, argval=84, argrepr='to L3', offset=76, start_offset=76, starts_line=False, line_number=7, label=None, positions=None), - Instruction(opname='JUMP_BACKWARD', opcode=77, arg=30, argval=24, argrepr='to L1', offset=80, start_offset=80, starts_line=False, line_number=7, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=84, start_offset=84, starts_line=True, line_number=8, label=3, positions=None), - Instruction(opname='JUMP_FORWARD', opcode=79, arg=12, argval=112, argrepr='to L5', offset=86, start_offset=86, starts_line=False, line_number=8, label=None, positions=None), - Instruction(opname='END_FOR', opcode=11, arg=None, argval=None, argrepr='', offset=88, start_offset=88, starts_line=True, line_number=3, label=4, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=90, start_offset=90, starts_line=True, line_number=10, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=4, argval='I can haz else clause?', argrepr="'I can haz else clause?'", offset=100, start_offset=100, starts_line=False, line_number=10, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=102, start_offset=102, starts_line=False, line_number=10, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=110, start_offset=110, starts_line=False, line_number=10, label=None, positions=None), - Instruction(opname='LOAD_FAST_CHECK', opcode=87, arg=0, argval='i', argrepr='i', offset=112, start_offset=112, starts_line=True, line_number=11, label=5, positions=None), - Instruction(opname='TO_BOOL', opcode=40, arg=None, argval=None, argrepr='', offset=114, start_offset=114, starts_line=False, line_number=11, label=None, positions=None), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=40, argval=206, argrepr='to L9', offset=122, start_offset=122, starts_line=False, line_number=11, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=126, start_offset=126, starts_line=True, line_number=12, label=6, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=136, start_offset=136, starts_line=False, line_number=12, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=138, start_offset=138, starts_line=False, line_number=12, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=146, start_offset=146, starts_line=False, line_number=12, label=None, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=148, start_offset=148, starts_line=True, line_number=13, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=5, argval=1, argrepr='1', offset=150, start_offset=150, starts_line=False, line_number=13, label=None, positions=None), - Instruction(opname='BINARY_OP', opcode=45, arg=23, argval=23, argrepr='-=', offset=152, start_offset=152, starts_line=False, line_number=13, label=None, positions=None), - Instruction(opname='STORE_FAST', opcode=110, arg=0, argval='i', argrepr='i', offset=156, start_offset=156, starts_line=False, line_number=13, label=None, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=158, start_offset=158, starts_line=True, line_number=14, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=3, argval=6, argrepr='6', offset=160, start_offset=160, starts_line=False, line_number=14, label=None, positions=None), - Instruction(opname='COMPARE_OP', opcode=58, arg=148, argval='>', argrepr='bool(>)', offset=162, start_offset=162, starts_line=False, line_number=14, label=None, positions=None), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=2, argval=174, argrepr='to L7', offset=166, start_offset=166, starts_line=False, line_number=14, label=None, positions=None), - Instruction(opname='JUMP_BACKWARD', opcode=77, arg=31, argval=112, argrepr='to L5', offset=170, start_offset=170, starts_line=True, line_number=15, label=None, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=174, start_offset=174, starts_line=True, line_number=16, label=7, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=2, argval=4, argrepr='4', offset=176, start_offset=176, starts_line=False, line_number=16, label=None, positions=None), - Instruction(opname='COMPARE_OP', opcode=58, arg=18, argval='<', argrepr='bool(<)', offset=178, start_offset=178, starts_line=False, line_number=16, label=None, positions=None), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=1, argval=188, argrepr='to L8', offset=182, start_offset=182, starts_line=False, line_number=16, label=None, positions=None), - Instruction(opname='JUMP_FORWARD', opcode=79, arg=20, argval=228, argrepr='to L10', offset=186, start_offset=186, starts_line=True, line_number=17, label=None, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=188, start_offset=188, starts_line=True, line_number=11, label=8, positions=None), - Instruction(opname='TO_BOOL', opcode=40, arg=None, argval=None, argrepr='', offset=190, start_offset=190, starts_line=False, line_number=11, label=None, positions=None), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=2, argval=206, argrepr='to L9', offset=198, start_offset=198, starts_line=False, line_number=11, label=None, positions=None), - Instruction(opname='JUMP_BACKWARD', opcode=77, arg=40, argval=126, argrepr='to L6', offset=202, start_offset=202, starts_line=False, line_number=11, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=206, start_offset=206, starts_line=True, line_number=19, label=9, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=6, argval='Who let lolcatz into this test suite?', argrepr="'Who let lolcatz into this test suite?'", offset=216, start_offset=216, starts_line=False, line_number=19, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=218, start_offset=218, starts_line=False, line_number=19, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=226, start_offset=226, starts_line=False, line_number=19, label=None, positions=None), - Instruction(opname='NOP', opcode=30, arg=None, argval=None, argrepr='', offset=228, start_offset=228, starts_line=True, line_number=20, label=10, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=5, argval=1, argrepr='1', offset=230, start_offset=230, starts_line=True, line_number=21, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=7, argval=0, argrepr='0', offset=232, start_offset=232, starts_line=False, line_number=21, label=None, positions=None), - Instruction(opname='BINARY_OP', opcode=45, arg=11, argval=11, argrepr='/', offset=234, start_offset=234, starts_line=False, line_number=21, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=238, start_offset=238, starts_line=False, line_number=21, label=None, positions=None), - Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=240, start_offset=240, starts_line=True, line_number=25, label=None, positions=None), - Instruction(opname='BEFORE_WITH', opcode=2, arg=None, argval=None, argrepr='', offset=242, start_offset=242, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='STORE_FAST', opcode=110, arg=1, argval='dodgy', argrepr='dodgy', offset=244, start_offset=244, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=246, start_offset=246, starts_line=True, line_number=26, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=8, argval='Never reach this', argrepr="'Never reach this'", offset=256, start_offset=256, starts_line=False, line_number=26, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=258, start_offset=258, starts_line=False, line_number=26, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=266, start_offset=266, starts_line=False, line_number=26, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=0, argval=None, argrepr='None', offset=268, start_offset=268, starts_line=True, line_number=25, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=0, argval=None, argrepr='None', offset=270, start_offset=270, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=0, argval=None, argrepr='None', offset=272, start_offset=272, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=2, argval=2, argrepr='', offset=274, start_offset=274, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=282, start_offset=282, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=284, start_offset=284, starts_line=True, line_number=28, label=11, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=10, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=294, start_offset=294, starts_line=False, line_number=28, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=296, start_offset=296, starts_line=False, line_number=28, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=304, start_offset=304, starts_line=False, line_number=28, label=None, positions=None), - Instruction(opname='RETURN_CONST', opcode=103, arg=0, argval=None, argrepr='None', offset=306, start_offset=306, starts_line=False, line_number=28, label=None, positions=None), - Instruction(opname='PUSH_EXC_INFO', opcode=33, arg=None, argval=None, argrepr='', offset=308, start_offset=308, starts_line=True, line_number=25, label=None, positions=None), - Instruction(opname='WITH_EXCEPT_START', opcode=44, arg=None, argval=None, argrepr='', offset=310, start_offset=310, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='TO_BOOL', opcode=40, arg=None, argval=None, argrepr='', offset=312, start_offset=312, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='POP_JUMP_IF_TRUE', opcode=100, arg=1, argval=326, argrepr='to L12', offset=320, start_offset=320, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='RERAISE', opcode=102, arg=2, argval=2, argrepr='', offset=324, start_offset=324, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=326, start_offset=326, starts_line=False, line_number=25, label=12, positions=None), - Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=328, start_offset=328, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=330, start_offset=330, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=332, start_offset=332, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='JUMP_BACKWARD_NO_INTERRUPT', opcode=78, arg=26, argval=284, argrepr='to L11', offset=334, start_offset=334, starts_line=False, line_number=25, label=None, positions=None), - Instruction(opname='COPY', opcode=61, arg=3, argval=3, argrepr='', offset=336, start_offset=336, starts_line=True, line_number=None, label=None, positions=None), - Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=338, start_offset=338, starts_line=False, line_number=None, label=None, positions=None), - Instruction(opname='RERAISE', opcode=102, arg=1, argval=1, argrepr='', offset=340, start_offset=340, starts_line=False, line_number=None, label=None, positions=None), - Instruction(opname='PUSH_EXC_INFO', opcode=33, arg=None, argval=None, argrepr='', offset=342, start_offset=342, starts_line=False, line_number=None, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=4, argval='ZeroDivisionError', argrepr='ZeroDivisionError', offset=344, start_offset=344, starts_line=True, line_number=22, label=None, positions=None), - Instruction(opname='CHECK_EXC_MATCH', opcode=7, arg=None, argval=None, argrepr='', offset=354, start_offset=354, starts_line=False, line_number=22, label=None, positions=None), - Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=14, argval=388, argrepr='to L13', offset=356, start_offset=356, starts_line=False, line_number=22, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=360, start_offset=360, starts_line=False, line_number=22, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=362, start_offset=362, starts_line=True, line_number=23, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=9, argval='Here we go, here we go, here we go...', argrepr="'Here we go, here we go, here we go...'", offset=372, start_offset=372, starts_line=False, line_number=23, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=374, start_offset=374, starts_line=False, line_number=23, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=382, start_offset=382, starts_line=False, line_number=23, label=None, positions=None), - Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=384, start_offset=384, starts_line=False, line_number=23, label=None, positions=None), - Instruction(opname='JUMP_BACKWARD_NO_INTERRUPT', opcode=78, arg=52, argval=284, argrepr='to L11', offset=386, start_offset=386, starts_line=False, line_number=23, label=None, positions=None), - Instruction(opname='RERAISE', opcode=102, arg=0, argval=0, argrepr='', offset=388, start_offset=388, starts_line=True, line_number=22, label=13, positions=None), - Instruction(opname='COPY', opcode=61, arg=3, argval=3, argrepr='', offset=390, start_offset=390, starts_line=True, line_number=None, label=None, positions=None), - Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=392, start_offset=392, starts_line=False, line_number=None, label=None, positions=None), - Instruction(opname='RERAISE', opcode=102, arg=1, argval=1, argrepr='', offset=394, start_offset=394, starts_line=False, line_number=None, label=None, positions=None), - Instruction(opname='PUSH_EXC_INFO', opcode=33, arg=None, argval=None, argrepr='', offset=396, start_offset=396, starts_line=False, line_number=None, label=None, positions=None), - Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=398, start_offset=398, starts_line=True, line_number=28, label=None, positions=None), - Instruction(opname='LOAD_CONST', opcode=83, arg=10, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=408, start_offset=408, starts_line=False, line_number=28, label=None, positions=None), - Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=410, start_offset=410, starts_line=False, line_number=28, label=None, positions=None), - Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=418, start_offset=418, starts_line=False, line_number=28, label=None, positions=None), - Instruction(opname='RERAISE', opcode=102, arg=0, argval=0, argrepr='', offset=420, start_offset=420, starts_line=False, line_number=28, label=None, positions=None), - Instruction(opname='COPY', opcode=61, arg=3, argval=3, argrepr='', offset=422, start_offset=422, starts_line=True, line_number=None, label=None, positions=None), - Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=424, start_offset=424, starts_line=False, line_number=None, label=None, positions=None), - Instruction(opname='RERAISE', opcode=102, arg=1, argval=1, argrepr='', offset=426, start_offset=426, starts_line=False, line_number=None, label=None, positions=None), + Instruction(opname='RESUME', opcode=149, arg=0, argval=0, argrepr='', offset=0, start_offset=0, starts_line=True, line_number=1, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=1, argval='range', argrepr='range + NULL', offset=2, start_offset=2, starts_line=True, line_number=3, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_CONST', opcode=83, arg=1, argval=10, argrepr='10', offset=12, start_offset=12, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=14, start_offset=14, starts_line=False, line_number=3, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='GET_ITER', opcode=19, arg=None, argval=None, argrepr='', offset=22, start_offset=22, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='FOR_ITER', opcode=72, arg=30, argval=88, argrepr='to L4', offset=24, start_offset=24, starts_line=False, line_number=3, label=1, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='STORE_FAST', opcode=110, arg=0, argval='i', argrepr='i', offset=28, start_offset=28, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=30, start_offset=30, starts_line=True, line_number=4, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=40, start_offset=40, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=42, start_offset=42, starts_line=False, line_number=4, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=50, start_offset=50, starts_line=False, line_number=4, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=52, start_offset=52, starts_line=True, line_number=5, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=2, argval=4, argrepr='4', offset=54, start_offset=54, starts_line=False, line_number=5, label=None, positions=None, cache_info=None), + Instruction(opname='COMPARE_OP', opcode=58, arg=18, argval='<', argrepr='bool(<)', offset=56, start_offset=56, starts_line=False, line_number=5, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=2, argval=68, argrepr='to L2', offset=60, start_offset=60, starts_line=False, line_number=5, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='JUMP_BACKWARD', opcode=77, arg=22, argval=24, argrepr='to L1', offset=64, start_offset=64, starts_line=True, line_number=6, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=68, start_offset=68, starts_line=True, line_number=7, label=2, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=3, argval=6, argrepr='6', offset=70, start_offset=70, starts_line=False, line_number=7, label=None, positions=None, cache_info=None), + Instruction(opname='COMPARE_OP', opcode=58, arg=148, argval='>', argrepr='bool(>)', offset=72, start_offset=72, starts_line=False, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_TRUE', opcode=100, arg=2, argval=84, argrepr='to L3', offset=76, start_offset=76, starts_line=False, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='JUMP_BACKWARD', opcode=77, arg=30, argval=24, argrepr='to L1', offset=80, start_offset=80, starts_line=False, line_number=7, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=84, start_offset=84, starts_line=True, line_number=8, label=3, positions=None, cache_info=None), + Instruction(opname='JUMP_FORWARD', opcode=79, arg=13, argval=114, argrepr='to L5', offset=86, start_offset=86, starts_line=False, line_number=8, label=None, positions=None, cache_info=None), + Instruction(opname='END_FOR', opcode=11, arg=None, argval=None, argrepr='', offset=88, start_offset=88, starts_line=True, line_number=3, label=4, positions=None, cache_info=None), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=90, start_offset=90, starts_line=False, line_number=3, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=92, start_offset=92, starts_line=True, line_number=10, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_CONST', opcode=83, arg=4, argval='I can haz else clause?', argrepr="'I can haz else clause?'", offset=102, start_offset=102, starts_line=False, line_number=10, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=104, start_offset=104, starts_line=False, line_number=10, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=112, start_offset=112, starts_line=False, line_number=10, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST_CHECK', opcode=87, arg=0, argval='i', argrepr='i', offset=114, start_offset=114, starts_line=True, line_number=11, label=5, positions=None, cache_info=None), + Instruction(opname='TO_BOOL', opcode=40, arg=None, argval=None, argrepr='', offset=116, start_offset=116, starts_line=False, line_number=11, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=40, argval=208, argrepr='to L9', offset=124, start_offset=124, starts_line=False, line_number=11, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=128, start_offset=128, starts_line=True, line_number=12, label=6, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=138, start_offset=138, starts_line=False, line_number=12, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=140, start_offset=140, starts_line=False, line_number=12, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=148, start_offset=148, starts_line=False, line_number=12, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=150, start_offset=150, starts_line=True, line_number=13, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=5, argval=1, argrepr='1', offset=152, start_offset=152, starts_line=False, line_number=13, label=None, positions=None, cache_info=None), + Instruction(opname='BINARY_OP', opcode=45, arg=23, argval=23, argrepr='-=', offset=154, start_offset=154, starts_line=False, line_number=13, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='STORE_FAST', opcode=110, arg=0, argval='i', argrepr='i', offset=158, start_offset=158, starts_line=False, line_number=13, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=160, start_offset=160, starts_line=True, line_number=14, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=3, argval=6, argrepr='6', offset=162, start_offset=162, starts_line=False, line_number=14, label=None, positions=None, cache_info=None), + Instruction(opname='COMPARE_OP', opcode=58, arg=148, argval='>', argrepr='bool(>)', offset=164, start_offset=164, starts_line=False, line_number=14, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=2, argval=176, argrepr='to L7', offset=168, start_offset=168, starts_line=False, line_number=14, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='JUMP_BACKWARD', opcode=77, arg=31, argval=114, argrepr='to L5', offset=172, start_offset=172, starts_line=True, line_number=15, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=176, start_offset=176, starts_line=True, line_number=16, label=7, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=2, argval=4, argrepr='4', offset=178, start_offset=178, starts_line=False, line_number=16, label=None, positions=None, cache_info=None), + Instruction(opname='COMPARE_OP', opcode=58, arg=18, argval='<', argrepr='bool(<)', offset=180, start_offset=180, starts_line=False, line_number=16, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=1, argval=190, argrepr='to L8', offset=184, start_offset=184, starts_line=False, line_number=16, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='JUMP_FORWARD', opcode=79, arg=20, argval=230, argrepr='to L10', offset=188, start_offset=188, starts_line=True, line_number=17, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=190, start_offset=190, starts_line=True, line_number=11, label=8, positions=None, cache_info=None), + Instruction(opname='TO_BOOL', opcode=40, arg=None, argval=None, argrepr='', offset=192, start_offset=192, starts_line=False, line_number=11, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=2, argval=208, argrepr='to L9', offset=200, start_offset=200, starts_line=False, line_number=11, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='JUMP_BACKWARD', opcode=77, arg=40, argval=128, argrepr='to L6', offset=204, start_offset=204, starts_line=False, line_number=11, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=208, start_offset=208, starts_line=True, line_number=19, label=9, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_CONST', opcode=83, arg=6, argval='Who let lolcatz into this test suite?', argrepr="'Who let lolcatz into this test suite?'", offset=218, start_offset=218, starts_line=False, line_number=19, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=220, start_offset=220, starts_line=False, line_number=19, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=228, start_offset=228, starts_line=False, line_number=19, label=None, positions=None, cache_info=None), + Instruction(opname='NOP', opcode=30, arg=None, argval=None, argrepr='', offset=230, start_offset=230, starts_line=True, line_number=20, label=10, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=5, argval=1, argrepr='1', offset=232, start_offset=232, starts_line=True, line_number=21, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=7, argval=0, argrepr='0', offset=234, start_offset=234, starts_line=False, line_number=21, label=None, positions=None, cache_info=None), + Instruction(opname='BINARY_OP', opcode=45, arg=11, argval=11, argrepr='/', offset=236, start_offset=236, starts_line=False, line_number=21, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=240, start_offset=240, starts_line=False, line_number=21, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_FAST', opcode=85, arg=0, argval='i', argrepr='i', offset=242, start_offset=242, starts_line=True, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='BEFORE_WITH', opcode=2, arg=None, argval=None, argrepr='', offset=244, start_offset=244, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='STORE_FAST', opcode=110, arg=1, argval='dodgy', argrepr='dodgy', offset=246, start_offset=246, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=248, start_offset=248, starts_line=True, line_number=26, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_CONST', opcode=83, arg=8, argval='Never reach this', argrepr="'Never reach this'", offset=258, start_offset=258, starts_line=False, line_number=26, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=260, start_offset=260, starts_line=False, line_number=26, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=268, start_offset=268, starts_line=False, line_number=26, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=0, argval=None, argrepr='None', offset=270, start_offset=270, starts_line=True, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=0, argval=None, argrepr='None', offset=272, start_offset=272, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_CONST', opcode=83, arg=0, argval=None, argrepr='None', offset=274, start_offset=274, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=2, argval=2, argrepr='', offset=276, start_offset=276, starts_line=False, line_number=25, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=284, start_offset=284, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=286, start_offset=286, starts_line=True, line_number=28, label=11, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_CONST', opcode=83, arg=10, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=296, start_offset=296, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=298, start_offset=298, starts_line=False, line_number=28, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=306, start_offset=306, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), + Instruction(opname='RETURN_CONST', opcode=103, arg=0, argval=None, argrepr='None', offset=308, start_offset=308, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), + Instruction(opname='PUSH_EXC_INFO', opcode=33, arg=None, argval=None, argrepr='', offset=310, start_offset=310, starts_line=True, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='WITH_EXCEPT_START', opcode=44, arg=None, argval=None, argrepr='', offset=312, start_offset=312, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='TO_BOOL', opcode=40, arg=None, argval=None, argrepr='', offset=314, start_offset=314, starts_line=False, line_number=25, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_JUMP_IF_TRUE', opcode=100, arg=1, argval=328, argrepr='to L12', offset=322, start_offset=322, starts_line=False, line_number=25, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='RERAISE', opcode=102, arg=2, argval=2, argrepr='', offset=326, start_offset=326, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=328, start_offset=328, starts_line=False, line_number=25, label=12, positions=None, cache_info=None), + Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=330, start_offset=330, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=332, start_offset=332, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=334, start_offset=334, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='JUMP_BACKWARD_NO_INTERRUPT', opcode=78, arg=26, argval=286, argrepr='to L11', offset=336, start_offset=336, starts_line=False, line_number=25, label=None, positions=None, cache_info=None), + Instruction(opname='COPY', opcode=61, arg=3, argval=3, argrepr='', offset=338, start_offset=338, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=340, start_offset=340, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=102, arg=1, argval=1, argrepr='', offset=342, start_offset=342, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='PUSH_EXC_INFO', opcode=33, arg=None, argval=None, argrepr='', offset=344, start_offset=344, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=4, argval='ZeroDivisionError', argrepr='ZeroDivisionError', offset=346, start_offset=346, starts_line=True, line_number=22, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='CHECK_EXC_MATCH', opcode=7, arg=None, argval=None, argrepr='', offset=356, start_offset=356, starts_line=False, line_number=22, label=None, positions=None, cache_info=None), + Instruction(opname='POP_JUMP_IF_FALSE', opcode=97, arg=14, argval=390, argrepr='to L13', offset=358, start_offset=358, starts_line=False, line_number=22, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=362, start_offset=362, starts_line=False, line_number=22, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=364, start_offset=364, starts_line=True, line_number=23, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_CONST', opcode=83, arg=9, argval='Here we go, here we go, here we go...', argrepr="'Here we go, here we go, here we go...'", offset=374, start_offset=374, starts_line=False, line_number=23, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=376, start_offset=376, starts_line=False, line_number=23, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=384, start_offset=384, starts_line=False, line_number=23, label=None, positions=None, cache_info=None), + Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=386, start_offset=386, starts_line=False, line_number=23, label=None, positions=None, cache_info=None), + Instruction(opname='JUMP_BACKWARD_NO_INTERRUPT', opcode=78, arg=52, argval=286, argrepr='to L11', offset=388, start_offset=388, starts_line=False, line_number=23, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=102, arg=0, argval=0, argrepr='', offset=390, start_offset=390, starts_line=True, line_number=22, label=13, positions=None, cache_info=None), + Instruction(opname='COPY', opcode=61, arg=3, argval=3, argrepr='', offset=392, start_offset=392, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=394, start_offset=394, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=102, arg=1, argval=1, argrepr='', offset=396, start_offset=396, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='PUSH_EXC_INFO', opcode=33, arg=None, argval=None, argrepr='', offset=398, start_offset=398, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='LOAD_GLOBAL', opcode=91, arg=3, argval='print', argrepr='print + NULL', offset=400, start_offset=400, starts_line=True, line_number=28, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('index', 1, b'\x00\x00'), ('module_keys_version', 1, b'\x00\x00'), ('builtin_keys_version', 1, b'\x00\x00')]), + Instruction(opname='LOAD_CONST', opcode=83, arg=10, argval="OK, now we're done", argrepr='"OK, now we\'re done"', offset=410, start_offset=410, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), + Instruction(opname='CALL', opcode=53, arg=1, argval=1, argrepr='', offset=412, start_offset=412, starts_line=False, line_number=28, label=None, positions=None, cache_info=[('counter', 1, b'\x00\x00'), ('func_version', 2, b'\x00\x00\x00\x00')]), + Instruction(opname='POP_TOP', opcode=32, arg=None, argval=None, argrepr='', offset=420, start_offset=420, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=102, arg=0, argval=0, argrepr='', offset=422, start_offset=422, starts_line=False, line_number=28, label=None, positions=None, cache_info=None), + Instruction(opname='COPY', opcode=61, arg=3, argval=3, argrepr='', offset=424, start_offset=424, starts_line=True, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='POP_EXCEPT', opcode=31, arg=None, argval=None, argrepr='', offset=426, start_offset=426, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), + Instruction(opname='RERAISE', opcode=102, arg=1, argval=1, argrepr='', offset=428, start_offset=428, starts_line=False, line_number=None, label=None, positions=None, cache_info=None), ] # One last piece of inspect fodder to check the default line number handling diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-17-05-09-32.gh-issue-112354.Run9ko.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-17-05-09-32.gh-issue-112354.Run9ko.rst new file mode 100644 index 00000000000000..ed45ba49c3ad42 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-17-05-09-32.gh-issue-112354.Run9ko.rst @@ -0,0 +1,2 @@ +The ``END_FOR`` instruction now pops only one value. This is to better +support side exits in loops. diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index 4fb78cf632d70e..657e9345cf5ab7 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,7 +1,7 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,243,164,0,0,0,149,0,83,0,83,1, + 0,0,0,0,0,243,166,0,0,0,149,0,83,0,83,1, 75,0,114,0,83,0,83,1,75,1,114,1,92,2,34,0, 83,2,53,1,0,0,0,0,0,0,32,0,92,2,34,0, 83,3,92,0,82,6,0,0,0,0,0,0,0,0,0,0, @@ -11,28 +11,28 @@ unsigned char M_test_frozenmain[] = { 0,0,83,4,5,0,0,0,114,5,83,5,19,0,72,20, 0,0,114,6,92,2,34,0,83,6,92,6,14,0,83,7, 92,5,92,6,5,0,0,0,14,0,51,4,53,1,0,0, - 0,0,0,0,32,0,77,22,0,0,11,0,103,1,41,8, - 233,0,0,0,0,78,122,18,70,114,111,122,101,110,32,72, - 101,108,108,111,32,87,111,114,108,100,122,8,115,121,115,46, - 97,114,103,118,218,6,99,111,110,102,105,103,41,5,218,12, - 112,114,111,103,114,97,109,95,110,97,109,101,218,10,101,120, - 101,99,117,116,97,98,108,101,218,15,117,115,101,95,101,110, - 118,105,114,111,110,109,101,110,116,218,17,99,111,110,102,105, - 103,117,114,101,95,99,95,115,116,100,105,111,218,14,98,117, - 102,102,101,114,101,100,95,115,116,100,105,111,122,7,99,111, - 110,102,105,103,32,122,2,58,32,41,7,218,3,115,121,115, - 218,17,95,116,101,115,116,105,110,116,101,114,110,97,108,99, - 97,112,105,218,5,112,114,105,110,116,218,4,97,114,103,118, - 218,11,103,101,116,95,99,111,110,102,105,103,115,114,3,0, - 0,0,218,3,107,101,121,169,0,243,0,0,0,0,250,18, - 116,101,115,116,95,102,114,111,122,101,110,109,97,105,110,46, - 112,121,250,8,60,109,111,100,117,108,101,62,114,18,0,0, - 0,1,0,0,0,115,99,0,0,0,240,3,1,1,1,243, - 8,0,1,11,219,0,24,225,0,5,208,6,26,212,0,27, - 217,0,5,128,106,144,35,151,40,145,40,212,0,27,216,9, - 26,215,9,38,210,9,38,211,9,40,168,24,209,9,50,128, - 6,240,2,6,12,2,242,0,7,1,42,128,67,241,14,0, - 5,10,136,71,144,67,144,53,152,2,152,54,160,35,153,59, - 152,45,208,10,40,214,4,41,241,15,7,1,42,114,16,0, - 0,0, + 0,0,0,0,32,0,77,22,0,0,11,0,32,0,103,1, + 41,8,233,0,0,0,0,78,122,18,70,114,111,122,101,110, + 32,72,101,108,108,111,32,87,111,114,108,100,122,8,115,121, + 115,46,97,114,103,118,218,6,99,111,110,102,105,103,41,5, + 218,12,112,114,111,103,114,97,109,95,110,97,109,101,218,10, + 101,120,101,99,117,116,97,98,108,101,218,15,117,115,101,95, + 101,110,118,105,114,111,110,109,101,110,116,218,17,99,111,110, + 102,105,103,117,114,101,95,99,95,115,116,100,105,111,218,14, + 98,117,102,102,101,114,101,100,95,115,116,100,105,111,122,7, + 99,111,110,102,105,103,32,122,2,58,32,41,7,218,3,115, + 121,115,218,17,95,116,101,115,116,105,110,116,101,114,110,97, + 108,99,97,112,105,218,5,112,114,105,110,116,218,4,97,114, + 103,118,218,11,103,101,116,95,99,111,110,102,105,103,115,114, + 3,0,0,0,218,3,107,101,121,169,0,243,0,0,0,0, + 250,18,116,101,115,116,95,102,114,111,122,101,110,109,97,105, + 110,46,112,121,250,8,60,109,111,100,117,108,101,62,114,18, + 0,0,0,1,0,0,0,115,99,0,0,0,240,3,1,1, + 1,243,8,0,1,11,219,0,24,225,0,5,208,6,26,212, + 0,27,217,0,5,128,106,144,35,151,40,145,40,212,0,27, + 216,9,26,215,9,38,210,9,38,211,9,40,168,24,209,9, + 50,128,6,240,2,6,12,2,242,0,7,1,42,128,67,241, + 14,0,5,10,136,71,144,67,144,53,152,2,152,54,160,35, + 153,59,152,45,208,10,40,214,4,41,242,15,7,1,42,114, + 16,0,0,0, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index 18749ce60ecd45..fef3cd4ff7d781 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -265,9 +265,9 @@ dummy_func( res = NULL; } - macro(END_FOR) = POP_TOP + POP_TOP; + macro(END_FOR) = POP_TOP; - inst(INSTRUMENTED_END_FOR, (receiver, value --)) { + inst(INSTRUMENTED_END_FOR, (receiver, value -- receiver)) { TIER_ONE_ONLY /* Need to create a fake StopIteration error here, * to conform to PEP 380 */ @@ -2550,8 +2550,8 @@ dummy_func( next_instr[oparg].op.code == INSTRUMENTED_END_FOR); Py_DECREF(iter); STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instruction */ + JUMPBY(oparg + 2); DISPATCH(); } // Common case: no jump, leave it to the code generator @@ -2599,8 +2599,8 @@ dummy_func( next_instr[oparg].op.code == INSTRUMENTED_END_FOR); STACK_SHRINK(1); Py_DECREF(iter); - /* Skip END_FOR */ - target = next_instr + oparg + 1; + /* Skip END_FOR and POP_TOP */ + target = next_instr + oparg + 2; } INSTRUMENTED_JUMP(this_instr, target, PY_MONITORING_EVENT_BRANCH); } @@ -2621,8 +2621,8 @@ dummy_func( } Py_DECREF(iter); STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); DISPATCH(); } } @@ -2667,8 +2667,8 @@ dummy_func( } Py_DECREF(iter); STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); DISPATCH(); } } @@ -2709,8 +2709,8 @@ dummy_func( if (r->len <= 0) { STACK_SHRINK(1); Py_DECREF(r); - // Jump over END_FOR instruction. - JUMPBY(oparg + 1); + // Jump over END_FOR and POP_TOP instructions. + JUMPBY(oparg + 2); DISPATCH(); } } diff --git a/Python/compile.c b/Python/compile.c index 2a6291ccb51b0c..7cf05dd0683119 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -3075,7 +3075,12 @@ compiler_for(struct compiler *c, stmt_ty s) ADDOP_JUMP(c, NO_LOCATION, JUMP, start); USE_LABEL(c, cleanup); + /* It is important for instrumentation that the `END_FOR` comes first. + * Iteration over a generator will jump to the first of these instructions, + * but a non-generator will jump to a later instruction. + */ ADDOP(c, NO_LOCATION, END_FOR); + ADDOP(c, NO_LOCATION, POP_TOP); compiler_pop_fblock(c, FOR_LOOP, start); @@ -5390,7 +5395,12 @@ compiler_sync_comprehension_generator(struct compiler *c, location loc, ADDOP_JUMP(c, elt_loc, JUMP, start); USE_LABEL(c, anchor); + /* It is important for instrumentation that the `END_FOR` comes first. + * Iteration over a generator will jump to the first of these instructions, + * but a non-generator will jump to a later instruction. + */ ADDOP(c, NO_LOCATION, END_FOR); + ADDOP(c, NO_LOCATION, POP_TOP); } return SUCCESS; diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c4bb3aeec5e224..16f1db30620d72 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -2342,17 +2342,9 @@ next_instr += 1; INSTRUCTION_STATS(END_FOR); PyObject *value; - // _POP_TOP value = stack_pointer[-1]; - { - Py_DECREF(value); - } - // _POP_TOP - value = stack_pointer[-2]; - { - Py_DECREF(value); - } - stack_pointer += -2; + Py_DECREF(value); + stack_pointer += -1; DISPATCH(); } @@ -2505,8 +2497,8 @@ next_instr[oparg].op.code == INSTRUMENTED_END_FOR); Py_DECREF(iter); STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instruction */ + JUMPBY(oparg + 2); DISPATCH(); } // Common case: no jump, leave it to the code generator @@ -2567,8 +2559,8 @@ } Py_DECREF(iter); STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); DISPATCH(); } } @@ -2608,8 +2600,8 @@ if (r->len <= 0) { STACK_SHRINK(1); Py_DECREF(r); - // Jump over END_FOR instruction. - JUMPBY(oparg + 1); + // Jump over END_FOR and POP_TOP instructions. + JUMPBY(oparg + 2); DISPATCH(); } } @@ -2655,8 +2647,8 @@ } Py_DECREF(iter); STACK_SHRINK(1); - /* Jump forward oparg, then skip following END_FOR instruction */ - JUMPBY(oparg + 1); + /* Jump forward oparg, then skip following END_FOR and POP_TOP instructions */ + JUMPBY(oparg + 2); DISPATCH(); } } @@ -2952,9 +2944,8 @@ } PyErr_SetRaisedException(NULL); } - Py_DECREF(receiver); Py_DECREF(value); - stack_pointer += -2; + stack_pointer += -1; DISPATCH(); } @@ -3005,8 +2996,8 @@ next_instr[oparg].op.code == INSTRUMENTED_END_FOR); STACK_SHRINK(1); Py_DECREF(iter); - /* Skip END_FOR */ - target = next_instr + oparg + 1; + /* Skip END_FOR and POP_TOP */ + target = next_instr + oparg + 2; } INSTRUMENTED_JUMP(this_instr, target, PY_MONITORING_EVENT_BRANCH); DISPATCH(); diff --git a/Python/optimizer.c b/Python/optimizer.c index 4b6ed1781b5b78..db615068ff517f 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -572,9 +572,10 @@ translate_bytecode_to_trace( uop = _PyUOp_Replacements[uop]; assert(uop != 0); if (uop == _FOR_ITER_TIER_TWO) { - target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1 + extended; - assert(_PyCode_CODE(code)[target-1].op.code == END_FOR || - _PyCode_CODE(code)[target-1].op.code == INSTRUMENTED_END_FOR); + target += 1 + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 2 + extended; + assert(_PyCode_CODE(code)[target-2].op.code == END_FOR || + _PyCode_CODE(code)[target-2].op.code == INSTRUMENTED_END_FOR); + assert(_PyCode_CODE(code)[target-1].op.code == POP_TOP); } break; default: diff --git a/Tools/build/generate_global_objects.py b/Tools/build/generate_global_objects.py index ded19ee489e79b..33d1b323fc1753 100644 --- a/Tools/build/generate_global_objects.py +++ b/Tools/build/generate_global_objects.py @@ -123,6 +123,14 @@ '__rdivmod__', '__buffer__', '__release_buffer__', + + #Workarounds for GH-108918 + 'alias', + 'args', + 'exc_type', + 'exc_value', + 'self', + 'traceback', ] NON_GENERATED_IMMORTAL_OBJECTS = [ From 191531f352ce387a2d3a61544fb6feefab754d4a Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Wed, 24 Jan 2024 20:14:15 +0300 Subject: [PATCH 080/160] Update outdated comment in ``Python/bytecodes.c`` (#114522) --- Python/bytecodes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index fef3cd4ff7d781..ebd5b06abb2d4e 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1,6 +1,6 @@ // This file contains instruction definitions. -// It is read by Tools/cases_generator/generate_cases.py -// to generate Python/generated_cases.c.h. +// It is read by generators stored in Tools/cases_generator/ +// to generate Python/generated_cases.c.h and others. // Note that there is some dummy C code at the top and bottom of the file // to fool text editors like VS Code into believing this is valid C code. // The actual instruction definitions start at // BEGIN BYTECODES //. From 6888cccac0776d965cc38a7240e1bdbacb952b91 Mon Sep 17 00:00:00 2001 From: plokmijnuhby <39633434+plokmijnuhby@users.noreply.github.com> Date: Wed, 24 Jan 2024 19:58:34 +0000 Subject: [PATCH 081/160] gh-108731: Add description of __slots__ to MemberDescriptorType docs (GH-108745) --- Doc/library/types.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Doc/library/types.rst b/Doc/library/types.rst index 8ce67cf77253c3..c8c981024c1aeb 100644 --- a/Doc/library/types.rst +++ b/Doc/library/types.rst @@ -398,6 +398,10 @@ Standard names are defined for the following types: data members which use standard conversion functions; it has the same purpose as the :class:`property` type, but for classes defined in extension modules. + In addition, when a class is defined with a :attr:`~object.__slots__` attribute, then for + each slot, an instance of :class:`!MemberDescriptorType` will be added as an attribute + on the class. This allows the slot to appear in the class's :attr:`~object.__dict__`. + .. impl-detail:: In other implementations of Python, this type may be identical to From d5c21c12c17b6e4db2378755af8e3699516da187 Mon Sep 17 00:00:00 2001 From: Vincent Cunningham Date: Wed, 24 Jan 2024 16:23:28 -0800 Subject: [PATCH 082/160] gh-100107: Make py.exe launcher ignore app aliases that launch Microsoft Store (GH-114358) --- ...-01-23-00-05-05.gh-issue-100107.lkbP_Q.rst | 1 + PC/launcher2.c | 69 +++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 Misc/NEWS.d/next/Windows/2024-01-23-00-05-05.gh-issue-100107.lkbP_Q.rst diff --git a/Misc/NEWS.d/next/Windows/2024-01-23-00-05-05.gh-issue-100107.lkbP_Q.rst b/Misc/NEWS.d/next/Windows/2024-01-23-00-05-05.gh-issue-100107.lkbP_Q.rst new file mode 100644 index 00000000000000..388d61a2b3bd6d --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2024-01-23-00-05-05.gh-issue-100107.lkbP_Q.rst @@ -0,0 +1 @@ +The ``py.exe`` launcher will no longer attempt to run the Microsoft Store redirector when launching a script containing a ``/usr/bin/env`` shebang diff --git a/PC/launcher2.c b/PC/launcher2.c index 2a8f8a101fc8a6..e426eccd700044 100644 --- a/PC/launcher2.c +++ b/PC/launcher2.c @@ -572,6 +572,21 @@ findArgv0End(const wchar_t *buffer, int bufferLength) *** COMMAND-LINE PARSING *** \******************************************************************************/ +// Adapted from https://stackoverflow.com/a/65583702 +typedef struct AppExecLinkFile { // For tag IO_REPARSE_TAG_APPEXECLINK + DWORD reparseTag; + WORD reparseDataLength; + WORD reserved; + ULONG version; + wchar_t stringList[MAX_PATH * 4]; // Multistring (Consecutive UTF-16 strings each ending with a NUL) + /* There are normally 4 strings here. Ex: + Package ID: L"Microsoft.DesktopAppInstaller_8wekyb3d8bbwe" + Entry Point: L"Microsoft.DesktopAppInstaller_8wekyb3d8bbwe!PythonRedirector" + Executable: L"C:\Program Files\WindowsApps\Microsoft.DesktopAppInstaller_1.17.106910_x64__8wekyb3d8bbwe\AppInstallerPythonRedirector.exe" + Applic. Type: L"0" // Integer as ASCII. "0" = Desktop bridge application; Else sandboxed UWP application + */ +} AppExecLinkFile; + int parseCommandLine(SearchInfo *search) @@ -763,6 +778,55 @@ _shebangStartsWith(const wchar_t *buffer, int bufferLength, const wchar_t *prefi } +int +ensure_no_redirector_stub(wchar_t* filename, wchar_t* buffer) +{ + // Make sure we didn't find a reparse point that will open the Microsoft Store + // If we did, pretend there was no shebang and let normal handling take over + WIN32_FIND_DATAW findData; + HANDLE hFind = FindFirstFileW(buffer, &findData); + if (!hFind) { + // Let normal handling take over + debug(L"# Did not find %s on PATH\n", filename); + return RC_NO_SHEBANG; + } + + FindClose(hFind); + + if (!(findData.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && + findData.dwReserved0 & IO_REPARSE_TAG_APPEXECLINK)) { + return 0; + } + + HANDLE hReparsePoint = CreateFileW(buffer, 0, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_FLAG_OPEN_REPARSE_POINT, NULL); + if (!hReparsePoint) { + // Let normal handling take over + debug(L"# Did not find %s on PATH\n", filename); + return RC_NO_SHEBANG; + } + + AppExecLinkFile appExecLink; + + if (!DeviceIoControl(hReparsePoint, FSCTL_GET_REPARSE_POINT, NULL, 0, &appExecLink, sizeof(appExecLink), NULL, NULL)) { + // Let normal handling take over + debug(L"# Did not find %s on PATH\n", filename); + CloseHandle(hReparsePoint); + return RC_NO_SHEBANG; + } + + CloseHandle(hReparsePoint); + + const wchar_t* redirectorPackageId = L"Microsoft.DesktopAppInstaller_8wekyb3d8bbwe"; + + if (0 == wcscmp(appExecLink.stringList, redirectorPackageId)) { + debug(L"# ignoring redirector that would launch store\n"); + return RC_NO_SHEBANG; + } + + return 0; +} + + int searchPath(SearchInfo *search, const wchar_t *shebang, int shebangLength) { @@ -826,6 +890,11 @@ searchPath(SearchInfo *search, const wchar_t *shebang, int shebangLength) return RC_BAD_VIRTUAL_PATH; } + int result = ensure_no_redirector_stub(filename, buffer); + if (result) { + return result; + } + // Check that we aren't going to call ourselves again // If we are, pretend there was no shebang and let normal handling take over if (GetModuleFileNameW(NULL, filename, MAXLEN) && From c63c6142f9146e1e977f4c824c56e8979e6aca87 Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 25 Jan 2024 00:38:34 +0000 Subject: [PATCH 083/160] gh-114272: Fix or skip tests that fail due to spaces in paths (GH-114451) --- Lib/test/test_asyncio/test_subprocess.py | 7 ++- Lib/test/test_launcher.py | 55 +++++++++++++++--------- Lib/test/test_os.py | 7 ++- Lib/test/test_webbrowser.py | 1 + 4 files changed, 45 insertions(+), 25 deletions(-) diff --git a/Lib/test/test_asyncio/test_subprocess.py b/Lib/test/test_asyncio/test_subprocess.py index 859d2932c33fed..808b21c6617551 100644 --- a/Lib/test/test_asyncio/test_subprocess.py +++ b/Lib/test/test_asyncio/test_subprocess.py @@ -207,7 +207,7 @@ def test_kill(self): def test_kill_issue43884(self): if sys.platform == 'win32': - blocking_shell_command = f'{sys.executable} -c "import time; time.sleep(2)"' + blocking_shell_command = f'"{sys.executable}" -c "import time; time.sleep(2)"' else: blocking_shell_command = 'sleep 1; sleep 1' creationflags = 0 @@ -745,7 +745,10 @@ async def check_stdout_output(self, coro, output): def test_create_subprocess_env_shell(self) -> None: async def main() -> None: - cmd = f'''{sys.executable} -c "import os, sys; sys.stdout.write(os.getenv('FOO'))"''' + executable = sys.executable + if sys.platform == "win32": + executable = f'"{executable}"' + cmd = f'''{executable} -c "import os, sys; sys.stdout.write(os.getenv('FOO'))"''' env = os.environ.copy() env["FOO"] = "bar" proc = await asyncio.create_subprocess_shell( diff --git a/Lib/test/test_launcher.py b/Lib/test/test_launcher.py index 3da6173cfd3f13..2528a51240fbf7 100644 --- a/Lib/test/test_launcher.py +++ b/Lib/test/test_launcher.py @@ -90,6 +90,12 @@ "test-command=TEST_EXE.exe", ]) + +def quote(s): + s = str(s) + return f'"{s}"' if " " in s else s + + def create_registry_data(root, data): def _create_registry_data(root, key, value): if isinstance(value, dict): @@ -542,10 +548,10 @@ def test_virtualenv_with_env(self): data1 = self.run_py([], env={**env, "PY_PYTHON": "PythonTestSuite/3"}) data2 = self.run_py(["-V:PythonTestSuite/3"], env={**env, "PY_PYTHON": "PythonTestSuite/3"}) # Compare stdout, because stderr goes via ascii - self.assertEqual(data1["stdout"].strip(), str(venv_exe)) + self.assertEqual(data1["stdout"].strip(), quote(venv_exe)) self.assertEqual(data1["SearchInfo.lowPriorityTag"], "True") # Ensure passing the argument doesn't trigger the same behaviour - self.assertNotEqual(data2["stdout"].strip(), str(venv_exe)) + self.assertNotEqual(data2["stdout"].strip(), quote(venv_exe)) self.assertNotEqual(data2["SearchInfo.lowPriorityTag"], "True") def test_py_shebang(self): @@ -554,7 +560,7 @@ def test_py_shebang(self): data = self.run_py([script, "-postarg"]) self.assertEqual("PythonTestSuite", data["SearchInfo.company"]) self.assertEqual("3.100", data["SearchInfo.tag"]) - self.assertEqual(f"X.Y.exe -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"X.Y.exe -prearg {quote(script)} -postarg", data["stdout"].strip()) def test_python_shebang(self): with self.py_ini(TEST_PY_DEFAULTS): @@ -562,7 +568,7 @@ def test_python_shebang(self): data = self.run_py([script, "-postarg"]) self.assertEqual("PythonTestSuite", data["SearchInfo.company"]) self.assertEqual("3.100", data["SearchInfo.tag"]) - self.assertEqual(f"X.Y.exe -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"X.Y.exe -prearg {quote(script)} -postarg", data["stdout"].strip()) def test_py2_shebang(self): with self.py_ini(TEST_PY_DEFAULTS): @@ -570,7 +576,8 @@ def test_py2_shebang(self): data = self.run_py([script, "-postarg"]) self.assertEqual("PythonTestSuite", data["SearchInfo.company"]) self.assertEqual("3.100-32", data["SearchInfo.tag"]) - self.assertEqual(f"X.Y-32.exe -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"X.Y-32.exe -prearg {quote(script)} -postarg", + data["stdout"].strip()) def test_py3_shebang(self): with self.py_ini(TEST_PY_DEFAULTS): @@ -578,7 +585,8 @@ def test_py3_shebang(self): data = self.run_py([script, "-postarg"]) self.assertEqual("PythonTestSuite", data["SearchInfo.company"]) self.assertEqual("3.100-arm64", data["SearchInfo.tag"]) - self.assertEqual(f"X.Y-arm64.exe -X fake_arg_for_test -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"X.Y-arm64.exe -X fake_arg_for_test -prearg {quote(script)} -postarg", + data["stdout"].strip()) def test_py_shebang_nl(self): with self.py_ini(TEST_PY_DEFAULTS): @@ -586,7 +594,8 @@ def test_py_shebang_nl(self): data = self.run_py([script, "-postarg"]) self.assertEqual("PythonTestSuite", data["SearchInfo.company"]) self.assertEqual("3.100", data["SearchInfo.tag"]) - self.assertEqual(f"X.Y.exe -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"X.Y.exe -prearg {quote(script)} -postarg", + data["stdout"].strip()) def test_py2_shebang_nl(self): with self.py_ini(TEST_PY_DEFAULTS): @@ -594,7 +603,8 @@ def test_py2_shebang_nl(self): data = self.run_py([script, "-postarg"]) self.assertEqual("PythonTestSuite", data["SearchInfo.company"]) self.assertEqual("3.100-32", data["SearchInfo.tag"]) - self.assertEqual(f"X.Y-32.exe -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"X.Y-32.exe -prearg {quote(script)} -postarg", + data["stdout"].strip()) def test_py3_shebang_nl(self): with self.py_ini(TEST_PY_DEFAULTS): @@ -602,7 +612,8 @@ def test_py3_shebang_nl(self): data = self.run_py([script, "-postarg"]) self.assertEqual("PythonTestSuite", data["SearchInfo.company"]) self.assertEqual("3.100-arm64", data["SearchInfo.tag"]) - self.assertEqual(f"X.Y-arm64.exe -X fake_arg_for_test -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"X.Y-arm64.exe -X fake_arg_for_test -prearg {quote(script)} -postarg", + data["stdout"].strip()) def test_py_shebang_short_argv0(self): with self.py_ini(TEST_PY_DEFAULTS): @@ -630,7 +641,8 @@ def test_search_path(self): [script, "-postarg"], env={"PATH": f"{exe.parent};{os.getenv('PATH')}"}, ) - self.assertEqual(f"{exe} -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"{quote(exe)} -prearg {quote(script)} -postarg", + data["stdout"].strip()) def test_search_path_exe(self): # Leave the .exe on the name to ensure we don't add it a second time @@ -643,7 +655,8 @@ def test_search_path_exe(self): [script, "-postarg"], env={"PATH": f"{exe.parent};{os.getenv('PATH')}"}, ) - self.assertEqual(f"{exe} -prearg {script} -postarg", data["stdout"].strip()) + self.assertEqual(f"{quote(exe)} -prearg {quote(script)} -postarg", + data["stdout"].strip()) def test_recursive_search_path(self): stem = self.get_py_exe().stem @@ -654,7 +667,7 @@ def test_recursive_search_path(self): env={"PATH": f"{self.get_py_exe().parent};{os.getenv('PATH')}"}, ) # The recursive search is ignored and we get normal "py" behavior - self.assertEqual(f"X.Y.exe {script}", data["stdout"].strip()) + self.assertEqual(f"X.Y.exe {quote(script)}", data["stdout"].strip()) def test_install(self): data = self.run_py(["-V:3.10"], env={"PYLAUNCHER_ALWAYS_INSTALL": "1"}, expect_returncode=111) @@ -674,7 +687,7 @@ def test_literal_shebang_absolute(self): with self.script("#! C:/some_random_app -witharg") as script: data = self.run_py([script]) self.assertEqual( - f"C:\\some_random_app -witharg {script}", + f"C:\\some_random_app -witharg {quote(script)}", data["stdout"].strip(), ) @@ -682,7 +695,7 @@ def test_literal_shebang_relative(self): with self.script("#! ..\\some_random_app -witharg") as script: data = self.run_py([script]) self.assertEqual( - f"{script.parent.parent}\\some_random_app -witharg {script}", + f"{quote(script.parent.parent / 'some_random_app')} -witharg {quote(script)}", data["stdout"].strip(), ) @@ -690,14 +703,14 @@ def test_literal_shebang_quoted(self): with self.script('#! "some random app" -witharg') as script: data = self.run_py([script]) self.assertEqual( - f'"{script.parent}\\some random app" -witharg {script}', + f"{quote(script.parent / 'some random app')} -witharg {quote(script)}", data["stdout"].strip(), ) with self.script('#! some" random "app -witharg') as script: data = self.run_py([script]) self.assertEqual( - f'"{script.parent}\\some random app" -witharg {script}', + f"{quote(script.parent / 'some random app')} -witharg {quote(script)}", data["stdout"].strip(), ) @@ -705,7 +718,7 @@ def test_literal_shebang_quoted_escape(self): with self.script('#! some\\" random "app -witharg') as script: data = self.run_py([script]) self.assertEqual( - f'"{script.parent}\\some\\ random app" -witharg {script}', + f"{quote(script.parent / 'some/ random app')} -witharg {quote(script)}", data["stdout"].strip(), ) @@ -714,7 +727,7 @@ def test_literal_shebang_command(self): with self.script('#! test-command arg1') as script: data = self.run_py([script]) self.assertEqual( - f"TEST_EXE.exe arg1 {script}", + f"TEST_EXE.exe arg1 {quote(script)}", data["stdout"].strip(), ) @@ -723,7 +736,7 @@ def test_literal_shebang_invalid_template(self): data = self.run_py([script]) expect = script.parent / "/usr/bin/not-python" self.assertEqual( - f"{expect} arg1 {script}", + f"{quote(expect)} arg1 {quote(script)}", data["stdout"].strip(), ) @@ -746,8 +759,8 @@ def test_shebang_command_in_venv(self): with self.script(f'#! /usr/bin/env {stem} arg1') as script: data = self.run_py([script], env=env) - self.assertEqual(data["stdout"].strip(), f"{venv_exe} arg1 {script}") + self.assertEqual(data["stdout"].strip(), f"{quote(venv_exe)} arg1 {quote(script)}") with self.script(f'#! /usr/bin/env {exe.stem} arg1') as script: data = self.run_py([script], env=env) - self.assertEqual(data["stdout"].strip(), f"{exe} arg1 {script}") + self.assertEqual(data["stdout"].strip(), f"{quote(exe)} arg1 {quote(script)}") diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 98b30d2108a1a1..ed1f304c6c8cac 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -4596,8 +4596,11 @@ def test_pipe_spawnl(self): with open(filename, "w") as fp: print(code, file=fp, end="") - cmd = [sys.executable, filename] - exitcode = os.spawnl(os.P_WAIT, cmd[0], *cmd) + executable = sys.executable + cmd = [executable, filename] + if os.name == "nt" and " " in cmd[0]: + cmd[0] = f'"{cmd[0]}"' + exitcode = os.spawnl(os.P_WAIT, executable, *cmd) self.assertEqual(exitcode, 0) diff --git a/Lib/test/test_webbrowser.py b/Lib/test/test_webbrowser.py index ca481c57c3d972..8c074cb28a87e3 100644 --- a/Lib/test/test_webbrowser.py +++ b/Lib/test/test_webbrowser.py @@ -307,6 +307,7 @@ def test_get(self): webbrowser.get('fakebrowser') self.assertIsNotNone(webbrowser._tryorder) + @unittest.skipIf(" " in sys.executable, "test assumes no space in path (GH-114452)") def test_synthesize(self): webbrowser = import_helper.import_fresh_module('webbrowser') name = os.path.basename(sys.executable).lower() From ea3cd0498c443e93be441736c804258e93d21edd Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Thu, 25 Jan 2024 06:10:51 -0500 Subject: [PATCH 084/160] gh-114312: Collect stats for unlikely events (GH-114493) --- Include/cpython/pystats.h | 14 ++++++ Include/internal/pycore_code.h | 2 + Include/internal/pycore_interp.h | 29 ++++++++++++ Lib/test/test_optimizer.py | 75 ++++++++++++++++++++++++++++++++ Modules/_testinternalcapi.c | 16 +++++++ Objects/funcobject.c | 9 ++++ Objects/typeobject.c | 3 ++ Python/pylifecycle.c | 18 ++++++++ Python/pystate.c | 1 + Python/specialize.c | 11 +++++ Tools/scripts/summarize_stats.py | 22 +++++++++- 11 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 Lib/test/test_optimizer.py diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index ba67eefef3e37a..bf0cfe4cb695b4 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -122,11 +122,25 @@ typedef struct _optimization_stats { uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; } OptimizationStats; +typedef struct _rare_event_stats { + /* Setting an object's class, obj.__class__ = ... */ + uint64_t set_class; + /* Setting the bases of a class, cls.__bases__ = ... */ + uint64_t set_bases; + /* Setting the PEP 523 frame eval function, _PyInterpreterState_SetFrameEvalFunc() */ + uint64_t set_eval_frame_func; + /* Modifying the builtins, __builtins__.__dict__[var] = ... */ + uint64_t builtin_dict; + /* Modifying a function, e.g. func.__defaults__ = ..., etc. */ + uint64_t func_modification; +} RareEventStats; + typedef struct _stats { OpcodeStats opcode_stats[256]; CallStats call_stats; ObjectStats object_stats; OptimizationStats optimization_stats; + RareEventStats rare_event_stats; GCStats *gc_stats; } PyStats; diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 73df6c3568ffe0..fdd5918228455d 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -295,6 +295,7 @@ extern int _PyStaticCode_Init(PyCodeObject *co); _Py_stats->optimization_stats.name[bucket]++; \ } \ } while (0) +#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0) // Export for '_opcode' shared extension PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); @@ -313,6 +314,7 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); #define UOP_STAT_INC(opname, name) ((void)0) #define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) #define OPT_HIST(length, name) ((void)0) +#define RARE_EVENT_STAT_INC(name) ((void)0) #endif // !Py_STATS // Utility functions for reading/writing 32/64-bit values in the inline caches. diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index f953b8426e180a..662a18d93f329d 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -60,6 +60,21 @@ struct _stoptheworld_state { /* cross-interpreter data registry */ +/* Tracks some rare events per-interpreter, used by the optimizer to turn on/off + specific optimizations. */ +typedef struct _rare_events { + /* Setting an object's class, obj.__class__ = ... */ + uint8_t set_class; + /* Setting the bases of a class, cls.__bases__ = ... */ + uint8_t set_bases; + /* Setting the PEP 523 frame eval function, _PyInterpreterState_SetFrameEvalFunc() */ + uint8_t set_eval_frame_func; + /* Modifying the builtins, __builtins__.__dict__[var] = ... */ + uint8_t builtin_dict; + int builtins_dict_watcher_id; + /* Modifying a function, e.g. func.__defaults__ = ..., etc. */ + uint8_t func_modification; +} _rare_events; /* interpreter state */ @@ -217,6 +232,7 @@ struct _is { uint16_t optimizer_resume_threshold; uint16_t optimizer_backedge_threshold; uint32_t next_func_version; + _rare_events rare_events; _Py_GlobalMonitors monitors; bool sys_profile_initialized; @@ -347,6 +363,19 @@ PyAPI_FUNC(PyStatus) _PyInterpreterState_New( PyInterpreterState **pinterp); +#define RARE_EVENT_INTERP_INC(interp, name) \ + do { \ + /* saturating add */ \ + if (interp->rare_events.name < UINT8_MAX) interp->rare_events.name++; \ + RARE_EVENT_STAT_INC(name); \ + } while (0); \ + +#define RARE_EVENT_INC(name) \ + do { \ + PyInterpreterState *interp = PyInterpreterState_Get(); \ + RARE_EVENT_INTERP_INC(interp, name); \ + } while (0); \ + #ifdef __cplusplus } #endif diff --git a/Lib/test/test_optimizer.py b/Lib/test/test_optimizer.py new file mode 100644 index 00000000000000..b56bf3cfd9560e --- /dev/null +++ b/Lib/test/test_optimizer.py @@ -0,0 +1,75 @@ +import _testinternalcapi +import unittest +import types + + +class TestRareEventCounters(unittest.TestCase): + def test_set_class(self): + class A: + pass + class B: + pass + a = A() + + orig_counter = _testinternalcapi.get_rare_event_counters()["set_class"] + a.__class__ = B + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["set_class"] + ) + + def test_set_bases(self): + class A: + pass + class B: + pass + class C(B): + pass + + orig_counter = _testinternalcapi.get_rare_event_counters()["set_bases"] + C.__bases__ = (A,) + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["set_bases"] + ) + + def test_set_eval_frame_func(self): + orig_counter = _testinternalcapi.get_rare_event_counters()["set_eval_frame_func"] + _testinternalcapi.set_eval_frame_record([]) + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["set_eval_frame_func"] + ) + _testinternalcapi.set_eval_frame_default() + + def test_builtin_dict(self): + orig_counter = _testinternalcapi.get_rare_event_counters()["builtin_dict"] + if isinstance(__builtins__, types.ModuleType): + builtins = __builtins__.__dict__ + else: + builtins = __builtins__ + builtins["FOO"] = 42 + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["builtin_dict"] + ) + del builtins["FOO"] + + def test_func_modification(self): + def func(x=0): + pass + + for attribute in ( + "__code__", + "__defaults__", + "__kwdefaults__" + ): + orig_counter = _testinternalcapi.get_rare_event_counters()["func_modification"] + setattr(func, attribute, getattr(func, attribute)) + self.assertEqual( + orig_counter + 1, + _testinternalcapi.get_rare_event_counters()["func_modification"] + ) + +if __name__ == "__main__": + unittest.main() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 7d277df164d3ec..2c32c691afa583 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1635,6 +1635,21 @@ get_type_module_name(PyObject *self, PyObject *type) return _PyType_GetModuleName((PyTypeObject *)type); } +static PyObject * +get_rare_event_counters(PyObject *self, PyObject *type) +{ + PyInterpreterState *interp = PyInterpreterState_Get(); + + return Py_BuildValue( + "{sksksksksk}", + "set_class", interp->rare_events.set_class, + "set_bases", interp->rare_events.set_bases, + "set_eval_frame_func", interp->rare_events.set_eval_frame_func, + "builtin_dict", interp->rare_events.builtin_dict, + "func_modification", interp->rare_events.func_modification + ); +} + #ifdef Py_GIL_DISABLED static PyObject * @@ -1711,6 +1726,7 @@ static PyMethodDef module_functions[] = { {"restore_crossinterp_data", restore_crossinterp_data, METH_VARARGS}, _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF {"get_type_module_name", get_type_module_name, METH_O}, + {"get_rare_event_counters", get_rare_event_counters, METH_NOARGS}, #ifdef Py_GIL_DISABLED {"py_thread_id", get_py_thread_id, METH_NOARGS}, #endif diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 2620dc69bfd79b..08b2823d8cf024 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -53,6 +53,15 @@ handle_func_event(PyFunction_WatchEvent event, PyFunctionObject *func, if (interp->active_func_watchers) { notify_func_watchers(interp, event, func, new_value); } + switch (event) { + case PyFunction_EVENT_MODIFY_CODE: + case PyFunction_EVENT_MODIFY_DEFAULTS: + case PyFunction_EVENT_MODIFY_KWDEFAULTS: + RARE_EVENT_INTERP_INC(interp, func_modification); + break; + default: + break; + } } int diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 3a35a5b5975898..a8c3b8896d36eb 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -1371,6 +1371,7 @@ type_set_bases(PyTypeObject *type, PyObject *new_bases, void *context) res = 0; } + RARE_EVENT_INC(set_bases); Py_DECREF(old_bases); Py_DECREF(old_base); @@ -5842,6 +5843,8 @@ object_set_class(PyObject *self, PyObject *value, void *closure) Py_SET_TYPE(self, newto); if (oldto->tp_flags & Py_TPFLAGS_HEAPTYPE) Py_DECREF(oldto); + + RARE_EVENT_INC(set_class); return 0; } else { diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 0d5eec06e9b458..261622adc4cc77 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -605,6 +605,12 @@ init_interp_create_gil(PyThreadState *tstate, int gil) _PyEval_InitGIL(tstate, own_gil); } +static int +builtins_dict_watcher(PyDict_WatchEvent event, PyObject *dict, PyObject *key, PyObject *new_value) +{ + RARE_EVENT_INC(builtin_dict); + return 0; +} static PyStatus pycore_create_interpreter(_PyRuntimeState *runtime, @@ -1266,6 +1272,14 @@ init_interp_main(PyThreadState *tstate) } } + if ((interp->rare_events.builtins_dict_watcher_id = PyDict_AddWatcher(&builtins_dict_watcher)) == -1) { + return _PyStatus_ERR("failed to add builtin dict watcher"); + } + + if (PyDict_Watch(interp->rare_events.builtins_dict_watcher_id, interp->builtins) != 0) { + return _PyStatus_ERR("failed to set builtin dict watcher"); + } + assert(!_PyErr_Occurred(tstate)); return _PyStatus_OK(); @@ -1592,6 +1606,10 @@ static void finalize_modules(PyThreadState *tstate) { PyInterpreterState *interp = tstate->interp; + + // Stop collecting stats on __builtin__ modifications during teardown + PyDict_Unwatch(interp->rare_events.builtins_dict_watcher_id, interp->builtins); + PyObject *modules = _PyImport_GetModules(interp); if (modules == NULL) { // Already done diff --git a/Python/pystate.c b/Python/pystate.c index 548c77b7dc7ebb..c9b521351444a7 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2616,6 +2616,7 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, if (eval_frame != NULL) { _Py_Executors_InvalidateAll(interp); } + RARE_EVENT_INC(set_eval_frame_func); interp->eval_frame = eval_frame; } diff --git a/Python/specialize.c b/Python/specialize.c index 13e0440dd9dd0d..a9efbe0453b94e 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -267,6 +267,16 @@ print_optimization_stats(FILE *out, OptimizationStats *stats) } } +static void +print_rare_event_stats(FILE *out, RareEventStats *stats) +{ + fprintf(out, "Rare event (set_class): %" PRIu64 "\n", stats->set_class); + fprintf(out, "Rare event (set_bases): %" PRIu64 "\n", stats->set_bases); + fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func); + fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict); + fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification); +} + static void print_stats(FILE *out, PyStats *stats) { @@ -275,6 +285,7 @@ print_stats(FILE *out, PyStats *stats) print_object_stats(out, &stats->object_stats); print_gc_stats(out, stats->gc_stats); print_optimization_stats(out, &stats->optimization_stats); + print_rare_event_stats(out, &stats->rare_event_stats); } void diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 1e9dc07bae8981..9b7e7b999ea7c7 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -412,6 +412,14 @@ def get_histogram(self, prefix: str) -> list[tuple[int, int]]: rows.sort() return rows + def get_rare_events(self) -> list[tuple[str, int]]: + prefix = "Rare event " + return [ + (key[len(prefix) + 1:-1], val) + for key, val in self._data.items() + if key.startswith(prefix) + ] + class Count(int): def markdown(self) -> str: @@ -1064,6 +1072,17 @@ def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None) ) +def rare_event_section() -> Section: + def calc_rare_event_table(stats: Stats) -> Table: + return [(x, Count(y)) for x, y in stats.get_rare_events()] + + return Section( + "Rare events", + "Counts of rare/unlikely events", + [Table(("Event", "Count:"), calc_rare_event_table, JoinMode.CHANGE)], + ) + + def meta_stats_section() -> Section: def calc_rows(stats: Stats) -> Rows: return [("Number of data files", Count(stats.get("__nfiles__")))] @@ -1085,6 +1104,7 @@ def calc_rows(stats: Stats) -> Rows: object_stats_section(), gc_stats_section(), optimization_section(), + rare_event_section(), meta_stats_section(), ] @@ -1162,7 +1182,7 @@ def output_stats(inputs: list[Path], json_output=str | None): case 1: data = load_raw_data(Path(inputs[0])) if json_output is not None: - with open(json_output, 'w', encoding='utf-8') as f: + with open(json_output, "w", encoding="utf-8") as f: save_raw_data(data, f) # type: ignore stats = Stats(data) output_markdown(sys.stdout, LAYOUT, stats) From 0315941441f1e5f944a758c67eb1763b00e6e462 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 25 Jan 2024 12:54:19 +0000 Subject: [PATCH 085/160] gh-114265: remove i_loc_propagated, jump threading does not consider line numbers anymore (#114535) --- Lib/test/test_peepholer.py | 3 +- Python/flowgraph.c | 88 ++++++++++++++++++++------------------ 2 files changed, 49 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py index 76a6f25c34bbd3..2ea186c85c8823 100644 --- a/Lib/test/test_peepholer.py +++ b/Lib/test/test_peepholer.py @@ -1150,10 +1150,11 @@ def get_insts(lno1, lno2, op1, op2): lno1, lno2 = (4, 5) with self.subTest(lno = (lno1, lno2), ops = (op1, op2)): insts = get_insts(lno1, lno2, op1, op2) + op = 'JUMP' if 'JUMP' in (op1, op2) else 'JUMP_NO_INTERRUPT' expected_insts = [ ('LOAD_NAME', 0, 10), ('NOP', 0, 4), - (op2, 0, 5), + (op, 0, 5), ] self.cfg_optimization_test(insts, expected_insts, consts=list(range(5))) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 2fc90b8877b475..de831358eb9ac8 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -29,7 +29,6 @@ typedef struct _PyCfgInstruction { int i_opcode; int i_oparg; _PyCompilerSrcLocation i_loc; - unsigned i_loc_propagated : 1; /* location was set by propagate_line_numbers */ struct _PyCfgBasicblock *i_target; /* target block (if jump instruction) */ struct _PyCfgBasicblock *i_except; /* target block when exception is raised */ } cfg_instr; @@ -146,6 +145,16 @@ basicblock_next_instr(basicblock *b) return b->b_iused++; } +static cfg_instr * +basicblock_last_instr(const basicblock *b) { + assert(b->b_iused >= 0); + if (b->b_iused > 0) { + assert(b->b_instr != NULL); + return &b->b_instr[b->b_iused - 1]; + } + return NULL; +} + /* Allocate a new block and return a pointer to it. Returns NULL on error. */ @@ -186,6 +195,22 @@ basicblock_addop(basicblock *b, int opcode, int oparg, location loc) return SUCCESS; } +static int +basicblock_add_jump(basicblock *b, int opcode, basicblock *target, location loc) +{ + cfg_instr *last = basicblock_last_instr(b); + if (last && is_jump(last)) { + return ERROR; + } + + RETURN_IF_ERROR( + basicblock_addop(b, opcode, target->b_label.id, loc)); + last = basicblock_last_instr(b); + assert(last && last->i_opcode == opcode); + last->i_target = target; + return SUCCESS; +} + static inline int basicblock_append_instructions(basicblock *target, basicblock *source) { @@ -199,16 +224,6 @@ basicblock_append_instructions(basicblock *target, basicblock *source) return SUCCESS; } -static cfg_instr * -basicblock_last_instr(const basicblock *b) { - assert(b->b_iused >= 0); - if (b->b_iused > 0) { - assert(b->b_instr != NULL); - return &b->b_instr[b->b_iused - 1]; - } - return NULL; -} - static inline int basicblock_nofallthrough(const basicblock *b) { cfg_instr *last = basicblock_last_instr(b); @@ -560,8 +575,8 @@ normalize_jumps_in_block(cfg_builder *g, basicblock *b) { if (backwards_jump == NULL) { return ERROR; } - basicblock_addop(backwards_jump, JUMP, target->b_label.id, last->i_loc); - backwards_jump->b_instr[0].i_target = target; + RETURN_IF_ERROR( + basicblock_add_jump(backwards_jump, JUMP, target, last->i_loc)); last->i_opcode = reversed_opcode; last->i_target = b->b_next; @@ -1141,13 +1156,7 @@ remove_redundant_jumps(cfg_builder *g) { basicblock *next = next_nonempty_block(b->b_next); if (jump_target == next) { changes++; - if (last->i_loc_propagated) { - b->b_iused--; - } - else { - assert(last->i_loc.lineno != -1); - INSTR_SET_OP0(last, NOP); - } + INSTR_SET_OP0(last, NOP); } } } @@ -1184,23 +1193,23 @@ inline_small_exit_blocks(basicblock *bb) { // target->i_target using the provided opcode. Return whether or not the // optimization was successful. static bool -jump_thread(cfg_instr *inst, cfg_instr *target, int opcode) +jump_thread(basicblock *bb, cfg_instr *inst, cfg_instr *target, int opcode) { assert(is_jump(inst)); assert(is_jump(target)); + assert(inst == basicblock_last_instr(bb)); // bpo-45773: If inst->i_target == target->i_target, then nothing actually // changes (and we fall into an infinite loop): - if (inst->i_loc.lineno == -1) assert(inst->i_loc_propagated); - if (target->i_loc.lineno == -1) assert(target->i_loc_propagated); - if ((inst->i_loc.lineno == target->i_loc.lineno || - inst->i_loc_propagated || target->i_loc_propagated) && - inst->i_target != target->i_target) - { - inst->i_target = target->i_target; - inst->i_opcode = opcode; - if (inst->i_loc_propagated && !target->i_loc_propagated) { - inst->i_loc = target->i_loc; - } + if (inst->i_target != target->i_target) { + /* Change inst to NOP and append a jump to target->i_target. The + * NOP will be removed later if it's not needed for the lineno. + */ + INSTR_SET_OP0(inst, NOP); + + RETURN_IF_ERROR( + basicblock_add_jump( + bb, opcode, target->i_target, target->i_loc)); + return true; } return false; @@ -1673,29 +1682,29 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) case POP_JUMP_IF_NONE: switch (target->i_opcode) { case JUMP: - i -= jump_thread(inst, target, inst->i_opcode); + i -= jump_thread(bb, inst, target, inst->i_opcode); } break; case POP_JUMP_IF_FALSE: switch (target->i_opcode) { case JUMP: - i -= jump_thread(inst, target, POP_JUMP_IF_FALSE); + i -= jump_thread(bb, inst, target, POP_JUMP_IF_FALSE); } break; case POP_JUMP_IF_TRUE: switch (target->i_opcode) { case JUMP: - i -= jump_thread(inst, target, POP_JUMP_IF_TRUE); + i -= jump_thread(bb, inst, target, POP_JUMP_IF_TRUE); } break; case JUMP: case JUMP_NO_INTERRUPT: switch (target->i_opcode) { case JUMP: - i -= jump_thread(inst, target, JUMP); + i -= jump_thread(bb, inst, target, JUMP); continue; case JUMP_NO_INTERRUPT: - i -= jump_thread(inst, target, opcode); + i -= jump_thread(bb, inst, target, opcode); continue; } break; @@ -1707,7 +1716,7 @@ optimize_basic_block(PyObject *const_cache, basicblock *bb, PyObject *consts) * of FOR_ITER. */ /* - i -= jump_thread(inst, target, FOR_ITER); + i -= jump_thread(bb, inst, target, FOR_ITER); */ } break; @@ -2410,7 +2419,6 @@ propagate_line_numbers(basicblock *entryblock) { for (int i = 0; i < b->b_iused; i++) { if (b->b_instr[i].i_loc.lineno < 0) { b->b_instr[i].i_loc = prev_location; - b->b_instr[i].i_loc_propagated = 1; } else { prev_location = b->b_instr[i].i_loc; @@ -2420,7 +2428,6 @@ propagate_line_numbers(basicblock *entryblock) { if (b->b_next->b_iused > 0) { if (b->b_next->b_instr[0].i_loc.lineno < 0) { b->b_next->b_instr[0].i_loc = prev_location; - b->b_next->b_instr[0].i_loc_propagated = 1; } } } @@ -2429,7 +2436,6 @@ propagate_line_numbers(basicblock *entryblock) { if (target->b_predecessors == 1) { if (target->b_instr[0].i_loc.lineno < 0) { target->b_instr[0].i_loc = prev_location; - target->b_instr[0].i_loc_propagated = 1; } } } From e721adf4bd47b20ba0a93ad6471084de31bf20c7 Mon Sep 17 00:00:00 2001 From: AN Long Date: Thu, 25 Jan 2024 22:35:05 +0800 Subject: [PATCH 086/160] gh-77465: Increase test coverage for the numbers module (GH-111738) Co-authored-by: Serhiy Storchaka --- Lib/test/test_abstract_numbers.py | 158 +++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_abstract_numbers.py b/Lib/test/test_abstract_numbers.py index 2e06f0d16fdd05..72232b670cdb89 100644 --- a/Lib/test/test_abstract_numbers.py +++ b/Lib/test/test_abstract_numbers.py @@ -1,14 +1,34 @@ """Unit tests for numbers.py.""" +import abc import math import operator import unittest -from numbers import Complex, Real, Rational, Integral +from numbers import Complex, Real, Rational, Integral, Number + + +def concretize(cls): + def not_implemented(*args, **kwargs): + raise NotImplementedError() + + for name in dir(cls): + try: + value = getattr(cls, name) + if value.__isabstractmethod__: + setattr(cls, name, not_implemented) + except AttributeError: + pass + abc.update_abstractmethods(cls) + return cls + class TestNumbers(unittest.TestCase): def test_int(self): self.assertTrue(issubclass(int, Integral)) + self.assertTrue(issubclass(int, Rational)) + self.assertTrue(issubclass(int, Real)) self.assertTrue(issubclass(int, Complex)) + self.assertTrue(issubclass(int, Number)) self.assertEqual(7, int(7).real) self.assertEqual(0, int(7).imag) @@ -18,8 +38,11 @@ def test_int(self): self.assertEqual(1, int(7).denominator) def test_float(self): + self.assertFalse(issubclass(float, Integral)) self.assertFalse(issubclass(float, Rational)) self.assertTrue(issubclass(float, Real)) + self.assertTrue(issubclass(float, Complex)) + self.assertTrue(issubclass(float, Number)) self.assertEqual(7.3, float(7.3).real) self.assertEqual(0, float(7.3).imag) @@ -27,8 +50,11 @@ def test_float(self): self.assertEqual(-7.3, float(-7.3).conjugate()) def test_complex(self): + self.assertFalse(issubclass(complex, Integral)) + self.assertFalse(issubclass(complex, Rational)) self.assertFalse(issubclass(complex, Real)) self.assertTrue(issubclass(complex, Complex)) + self.assertTrue(issubclass(complex, Number)) c1, c2 = complex(3, 2), complex(4,1) # XXX: This is not ideal, but see the comment in math_trunc(). @@ -40,5 +66,135 @@ def test_complex(self): self.assertRaises(TypeError, int, c1) +class TestNumbersDefaultMethods(unittest.TestCase): + def test_complex(self): + @concretize + class MyComplex(Complex): + def __init__(self, real, imag): + self.r = real + self.i = imag + + @property + def real(self): + return self.r + + @property + def imag(self): + return self.i + + def __add__(self, other): + if isinstance(other, Complex): + return MyComplex(self.imag + other.imag, + self.real + other.real) + raise NotImplementedError + + def __neg__(self): + return MyComplex(-self.real, -self.imag) + + def __eq__(self, other): + if isinstance(other, Complex): + return self.imag == other.imag and self.real == other.real + if isinstance(other, Number): + return self.imag == 0 and self.real == other.real + + # test __bool__ + self.assertTrue(bool(MyComplex(1, 1))) + self.assertTrue(bool(MyComplex(0, 1))) + self.assertTrue(bool(MyComplex(1, 0))) + self.assertFalse(bool(MyComplex(0, 0))) + + # test __sub__ + self.assertEqual(MyComplex(2, 3) - complex(1, 2), MyComplex(1, 1)) + + # test __rsub__ + self.assertEqual(complex(2, 3) - MyComplex(1, 2), MyComplex(1, 1)) + + def test_real(self): + @concretize + class MyReal(Real): + def __init__(self, n): + self.n = n + + def __pos__(self): + return self.n + + def __float__(self): + return float(self.n) + + def __floordiv__(self, other): + return self.n // other + + def __rfloordiv__(self, other): + return other // self.n + + def __mod__(self, other): + return self.n % other + + def __rmod__(self, other): + return other % self.n + + # test __divmod__ + self.assertEqual(divmod(MyReal(3), 2), (1, 1)) + + # test __rdivmod__ + self.assertEqual(divmod(3, MyReal(2)), (1, 1)) + + # test __complex__ + self.assertEqual(complex(MyReal(1)), 1+0j) + + # test real + self.assertEqual(MyReal(3).real, 3) + + # test imag + self.assertEqual(MyReal(3).imag, 0) + + # test conjugate + self.assertEqual(MyReal(123).conjugate(), 123) + + + def test_rational(self): + @concretize + class MyRational(Rational): + def __init__(self, numerator, denominator): + self.n = numerator + self.d = denominator + + @property + def numerator(self): + return self.n + + @property + def denominator(self): + return self.d + + # test__float__ + self.assertEqual(float(MyRational(5, 2)), 2.5) + + + def test_integral(self): + @concretize + class MyIntegral(Integral): + def __init__(self, n): + self.n = n + + def __pos__(self): + return self.n + + def __int__(self): + return self.n + + # test __index__ + self.assertEqual(operator.index(MyIntegral(123)), 123) + + # test __float__ + self.assertEqual(float(MyIntegral(123)), 123.0) + + # test numerator + self.assertEqual(MyIntegral(123).numerator, 123) + + # test denominator + self.assertEqual(MyIntegral(123).denominator, 1) + + if __name__ == "__main__": unittest.main() From 07ef63fb6a0fb996d5f56c79f4ccd7a1887a6b2b Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Thu, 25 Jan 2024 09:38:43 -0500 Subject: [PATCH 087/160] Doc/library/sys.monitoring.rst: remove contradictory paragraph. (GH-113619) --- Doc/library/sys.monitoring.rst | 3 --- 1 file changed, 3 deletions(-) diff --git a/Doc/library/sys.monitoring.rst b/Doc/library/sys.monitoring.rst index 762581b7eda7f1..4980227c60b21e 100644 --- a/Doc/library/sys.monitoring.rst +++ b/Doc/library/sys.monitoring.rst @@ -75,9 +75,6 @@ following IDs are pre-defined to make co-operation of tools easier:: sys.monitoring.PROFILER_ID = 2 sys.monitoring.OPTIMIZER_ID = 5 -There is no obligation to set an ID, nor is there anything preventing a tool -from using an ID even it is already in use. -However, tools are encouraged to use a unique ID and respect other tools. Events ------ From 8278fa2f5625b41be91191d18ee8eeab904a54ff Mon Sep 17 00:00:00 2001 From: Tian Gao Date: Thu, 25 Jan 2024 08:48:50 -0800 Subject: [PATCH 088/160] gh-111051: Check if file is modifed during debugging in `pdb` (#111052) --- Lib/pdb.py | 21 +++++ Lib/test/test_pdb.py | 81 +++++++++++++++++++ ...-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst | 1 + 3 files changed, 103 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst diff --git a/Lib/pdb.py b/Lib/pdb.py index 68f810620f8826..6f7719eb9ba6c5 100755 --- a/Lib/pdb.py +++ b/Lib/pdb.py @@ -233,6 +233,8 @@ class Pdb(bdb.Bdb, cmd.Cmd): # but in case there are recursions, we stop at 999. MAX_CHAINED_EXCEPTION_DEPTH = 999 + _file_mtime_table = {} + def __init__(self, completekey='tab', stdin=None, stdout=None, skip=None, nosigint=False, readrc=True): bdb.Bdb.__init__(self, skip=skip) @@ -437,6 +439,20 @@ def _cmdloop(self): except KeyboardInterrupt: self.message('--KeyboardInterrupt--') + def _validate_file_mtime(self): + """Check if the source file of the current frame has been modified since + the last time we saw it. If so, give a warning.""" + try: + filename = self.curframe.f_code.co_filename + mtime = os.path.getmtime(filename) + except Exception: + return + if (filename in self._file_mtime_table and + mtime != self._file_mtime_table[filename]): + self.message(f"*** WARNING: file '{filename}' was edited, " + "running stale code until the program is rerun") + self._file_mtime_table[filename] = mtime + # Called before loop, handles display expressions # Set up convenience variable containers def preloop(self): @@ -681,6 +697,7 @@ def onecmd(self, line): a breakpoint command list definition. """ if not self.commands_defining: + self._validate_file_mtime() return cmd.Cmd.onecmd(self, line) else: return self.handle_command_def(line) @@ -2021,6 +2038,10 @@ def _run(self, target: Union[_ModuleTarget, _ScriptTarget]): __main__.__dict__.clear() __main__.__dict__.update(target.namespace) + # Clear the mtime table for program reruns, assume all the files + # are up to date. + self._file_mtime_table.clear() + self.run(target.code) def _format_exc(self, exc: BaseException): diff --git a/Lib/test/test_pdb.py b/Lib/test/test_pdb.py index 03487aa6ffd81f..c64df62c761471 100644 --- a/Lib/test/test_pdb.py +++ b/Lib/test/test_pdb.py @@ -3056,6 +3056,87 @@ def test_blocks_at_first_code_line(self): self.assertTrue(any("__main__.py(4)()" in l for l in stdout.splitlines()), stdout) + def test_file_modified_after_execution(self): + script = """ + print("hello") + """ + + commands = """ + filename = $_frame.f_code.co_filename + f = open(filename, "w") + f.write("print('goodbye')") + f.close() + ll + """ + + stdout, stderr = self.run_pdb_script(script, commands) + self.assertIn("WARNING:", stdout) + self.assertIn("was edited", stdout) + + def test_file_modified_after_execution_with_multiple_instances(self): + script = """ + import pdb; pdb.Pdb().set_trace() + with open(__file__, "w") as f: + f.write("print('goodbye')\\n" * 5) + import pdb; pdb.Pdb().set_trace() + """ + + commands = """ + continue + continue + """ + + filename = 'main.py' + with open(filename, 'w') as f: + f.write(textwrap.dedent(script)) + self.addCleanup(os_helper.unlink, filename) + self.addCleanup(os_helper.rmtree, '__pycache__') + cmd = [sys.executable, filename] + with subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stdin=subprocess.PIPE, + stderr=subprocess.STDOUT, + env = {**os.environ, 'PYTHONIOENCODING': 'utf-8'}, + ) as proc: + stdout, _ = proc.communicate(str.encode(commands)) + stdout = stdout and bytes.decode(stdout) + + self.assertEqual(proc.returncode, 0) + self.assertIn("WARNING:", stdout) + self.assertIn("was edited", stdout) + + def test_file_modified_after_execution_with_restart(self): + script = """ + import random + # Any code with a source to step into so this script is not checked + # for changes when it's being changed + random.randint(1, 4) + print("hello") + """ + + commands = """ + ll + n + s + filename = $_frame.f_back.f_code.co_filename + def change_file(content, filename): + with open(filename, "w") as f: + f.write(f"print({content})") + + change_file('world', filename) + restart + ll + """ + + stdout, stderr = self.run_pdb_script(script, commands) + # Make sure the code is running correctly and the file is edited + self.assertIn("hello", stdout) + self.assertIn("world", stdout) + # The file was edited, but restart should clear the state and consider + # the file as up to date + self.assertNotIn("WARNING:", stdout) + def test_relative_imports(self): self.module_name = 't_main' os_helper.rmtree(self.module_name) diff --git a/Misc/NEWS.d/next/Library/2023-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst b/Misc/NEWS.d/next/Library/2023-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst new file mode 100644 index 00000000000000..adb3241b89ae3e --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-19-02-08-12.gh-issue-111051.8h1Dpk.rst @@ -0,0 +1 @@ +Added check for file modification during debugging with :mod:`pdb` From 4850410b60183dac021ded219a49be140fe5fefe Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Thu, 25 Jan 2024 09:34:03 -0800 Subject: [PATCH 089/160] gh-112075: Add try-incref functions from nogil branch for use in dict thread safety (#114512) * Bring in a subset of biased reference counting: https://github.com/colesbury/nogil/commit/b6b12a9a94e The NoGIL branch has functions for attempting to do an incref on an object which may or may not be in flight. This just brings those functions over so that they will be usable from in the dict implementation to get items w/o holding a lock. There's a handful of small simple modifications: Adding inline to the force inline functions to avoid a warning, and switching from _Py_ALWAYS_INLINE to Py_ALWAYS_INLINE as that's available Remove _Py_REF_LOCAL_SHIFT as it doesn't exist yet (and is currently 0 in the 3.12 nogil branch anyway) ob_ref_shared is currently Py_ssize_t and not uint32_t, so use that _PY_LIKELY doesn't exist, so drop it _Py_ThreadLocal becomes _Py_IsOwnedByCurrentThread Add '_PyInterpreterState_GET()' to _Py_IncRefTotal calls. Co-Authored-By: Sam Gross --- Include/internal/pycore_object.h | 136 +++++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index f413b8451e5ab4..ec14c07a2991ff 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -376,6 +376,142 @@ static inline void _PyObject_GC_UNTRACK( _PyObject_GC_UNTRACK(__FILE__, __LINE__, _PyObject_CAST(op)) #endif +#ifdef Py_GIL_DISABLED + +/* Tries to increment an object's reference count + * + * This is a specialized version of _Py_TryIncref that only succeeds if the + * object is immortal or local to this thread. It does not handle the case + * where the reference count modification requires an atomic operation. This + * allows call sites to specialize for the immortal/local case. + */ +static inline int +_Py_TryIncrefFast(PyObject *op) { + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + local += 1; + if (local == 0) { + // immortal + return 1; + } + if (_Py_IsOwnedByCurrentThread(op)) { + _Py_INCREF_STAT_INC(); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif + return 1; + } + return 0; +} + +static inline int +_Py_TryIncRefShared(PyObject *op) +{ + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + for (;;) { + // If the shared refcount is zero and the object is either merged + // or may not have weak references, then we cannot incref it. + if (shared == 0 || shared == _Py_REF_MERGED) { + return 0; + } + + if (_Py_atomic_compare_exchange_ssize( + &op->ob_ref_shared, + &shared, + shared + (1 << _Py_REF_SHARED_SHIFT))) { +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyInterpreterState_GET()); +#endif + _Py_INCREF_STAT_INC(); + return 1; + } + } +} + +/* Tries to incref the object op and ensures that *src still points to it. */ +static inline int +_Py_TryIncref(PyObject **src, PyObject *op) +{ + if (_Py_TryIncrefFast(op)) { + return 1; + } + if (!_Py_TryIncRefShared(op)) { + return 0; + } + if (op != _Py_atomic_load_ptr(src)) { + Py_DECREF(op); + return 0; + } + return 1; +} + +/* Loads and increfs an object from ptr, which may contain a NULL value. + Safe with concurrent (atomic) updates to ptr. + NOTE: The writer must set maybe-weakref on the stored object! */ +static inline PyObject * +_Py_XGetRef(PyObject **ptr) +{ + for (;;) { + PyObject *value = _Py_atomic_load_ptr(ptr); + if (value == NULL) { + return value; + } + if (_Py_TryIncref(ptr, value)) { + return value; + } + } +} + +/* Attempts to loads and increfs an object from ptr. Returns NULL + on failure, which may be due to a NULL value or a concurrent update. */ +static inline PyObject * +_Py_TryXGetRef(PyObject **ptr) +{ + PyObject *value = _Py_atomic_load_ptr(ptr); + if (value == NULL) { + return value; + } + if (_Py_TryIncref(ptr, value)) { + return value; + } + return NULL; +} + +/* Like Py_NewRef but also optimistically sets _Py_REF_MAYBE_WEAKREF + on objects owned by a different thread. */ +static inline PyObject * +_Py_NewRefWithLock(PyObject *op) +{ + if (_Py_TryIncrefFast(op)) { + return op; + } + _Py_INCREF_STAT_INC(); + for (;;) { + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + Py_ssize_t new_shared = shared + (1 << _Py_REF_SHARED_SHIFT); + if ((shared & _Py_REF_SHARED_FLAG_MASK) == 0) { + new_shared |= _Py_REF_MAYBE_WEAKREF; + } + if (_Py_atomic_compare_exchange_ssize( + &op->ob_ref_shared, + &shared, + new_shared)) { + return op; + } + } +} + +static inline PyObject * +_Py_XNewRefWithLock(PyObject *obj) +{ + if (obj == NULL) { + return NULL; + } + return _Py_NewRefWithLock(obj); +} + +#endif + #ifdef Py_REF_DEBUG extern void _PyInterpreterState_FinalizeRefTotal(PyInterpreterState *); extern void _Py_FinalizeRefTotal(_PyRuntimeState *); From b52fc70d1ab3be7866ab71065bae61a03a28bfae Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 25 Jan 2024 13:27:36 -0500 Subject: [PATCH 090/160] gh-112529: Implement GC for free-threaded builds (#114262) * gh-112529: Implement GC for free-threaded builds This implements a mark and sweep GC for the free-threaded builds of CPython. The implementation relies on mimalloc to find GC tracked objects (i.e., "containers"). --- Include/internal/pycore_freelist.h | 10 + Include/internal/pycore_gc.h | 35 +- Include/internal/pycore_object.h | 8 + Include/internal/pycore_object_stack.h | 84 + Include/object.h | 4 +- Lib/gzip.py | 2 +- Lib/test/test_gc.py | 13 +- Lib/test/test_io.py | 4 +- Makefile.pre.in | 3 + ...-01-18-20-20-37.gh-issue-112529.oVNvDG.rst | 3 + PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 + Python/gc.c | 8 +- Python/gc_free_threading.c | 1701 ++++++++++++++++- Python/object_stack.c | 87 + Python/pystate.c | 4 +- 18 files changed, 1952 insertions(+), 22 deletions(-) create mode 100644 Include/internal/pycore_object_stack.h create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst create mode 100644 Python/object_stack.c diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index 4ab93ee2bf6c32..dfb12839affedf 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -20,6 +20,7 @@ extern "C" { # define PyFloat_MAXFREELIST 100 # define PyContext_MAXFREELIST 255 # define _PyAsyncGen_MAXFREELIST 80 +# define _PyObjectStackChunk_MAXFREELIST 4 #else # define PyTuple_NFREELISTS 0 # define PyTuple_MAXFREELIST 0 @@ -27,6 +28,7 @@ extern "C" { # define PyFloat_MAXFREELIST 0 # define PyContext_MAXFREELIST 0 # define _PyAsyncGen_MAXFREELIST 0 +# define _PyObjectStackChunk_MAXFREELIST 0 #endif struct _Py_list_state { @@ -93,6 +95,13 @@ struct _Py_async_gen_state { #endif }; +struct _PyObjectStackChunk; + +struct _Py_object_stack_state { + struct _PyObjectStackChunk *free_list; + Py_ssize_t numfree; +}; + typedef struct _Py_freelist_state { struct _Py_float_state float_state; struct _Py_tuple_state tuple_state; @@ -100,6 +109,7 @@ typedef struct _Py_freelist_state { struct _Py_slice_state slice_state; struct _Py_context_state context_state; struct _Py_async_gen_state async_gen_state; + struct _Py_object_stack_state object_stack_state; } _PyFreeListState; #ifdef __cplusplus diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index d53de97709a782..b362a294a59042 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -37,10 +37,22 @@ static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) { } +/* Bit flags for ob_gc_bits (in Py_GIL_DISABLED builds) */ +#ifdef Py_GIL_DISABLED +# define _PyGC_BITS_TRACKED (1) +# define _PyGC_BITS_FINALIZED (2) +# define _PyGC_BITS_UNREACHABLE (4) +# define _PyGC_BITS_FROZEN (8) +#endif + /* True if the object is currently tracked by the GC. */ static inline int _PyObject_GC_IS_TRACKED(PyObject *op) { +#ifdef Py_GIL_DISABLED + return (op->ob_gc_bits & _PyGC_BITS_TRACKED) != 0; +#else PyGC_Head *gc = _Py_AS_GC(op); return (gc->_gc_next != 0); +#endif } #define _PyObject_GC_IS_TRACKED(op) _PyObject_GC_IS_TRACKED(_Py_CAST(PyObject*, op)) @@ -107,24 +119,29 @@ static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) { gc->_gc_prev = ((gc->_gc_prev & ~_PyGC_PREV_MASK) | uprev); } -static inline int _PyGCHead_FINALIZED(PyGC_Head *gc) { - return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0); -} -static inline void _PyGCHead_SET_FINALIZED(PyGC_Head *gc) { - gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED; -} - static inline int _PyGC_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + return (op->ob_gc_bits & _PyGC_BITS_FINALIZED) != 0; +#else PyGC_Head *gc = _Py_AS_GC(op); - return _PyGCHead_FINALIZED(gc); + return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0); +#endif } static inline void _PyGC_SET_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + op->ob_gc_bits |= _PyGC_BITS_FINALIZED; +#else PyGC_Head *gc = _Py_AS_GC(op); - _PyGCHead_SET_FINALIZED(gc); + gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED; +#endif } static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + op->ob_gc_bits &= ~_PyGC_BITS_FINALIZED; +#else PyGC_Head *gc = _Py_AS_GC(op); gc->_gc_prev &= ~_PyGC_PREV_MASK_FINALIZED; +#endif } diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index ec14c07a2991ff..4e52ffc77c5956 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -322,6 +322,9 @@ static inline void _PyObject_GC_TRACK( "object is in generation which is garbage collected", filename, lineno, __func__); +#ifdef Py_GIL_DISABLED + op->ob_gc_bits |= _PyGC_BITS_TRACKED; +#else PyInterpreterState *interp = _PyInterpreterState_GET(); PyGC_Head *generation0 = interp->gc.generation0; PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); @@ -329,6 +332,7 @@ static inline void _PyObject_GC_TRACK( _PyGCHead_SET_PREV(gc, last); _PyGCHead_SET_NEXT(gc, generation0); generation0->_gc_prev = (uintptr_t)gc; +#endif } /* Tell the GC to stop tracking this object. @@ -352,6 +356,9 @@ static inline void _PyObject_GC_UNTRACK( "object not tracked by the garbage collector", filename, lineno, __func__); +#ifdef Py_GIL_DISABLED + op->ob_gc_bits &= ~_PyGC_BITS_TRACKED; +#else PyGC_Head *gc = _Py_AS_GC(op); PyGC_Head *prev = _PyGCHead_PREV(gc); PyGC_Head *next = _PyGCHead_NEXT(gc); @@ -359,6 +366,7 @@ static inline void _PyObject_GC_UNTRACK( _PyGCHead_SET_PREV(next, prev); gc->_gc_next = 0; gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; +#endif } // Macros to accept any type for the parameter, and to automatically pass diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h new file mode 100644 index 00000000000000..1dc1c1591525de --- /dev/null +++ b/Include/internal/pycore_object_stack.h @@ -0,0 +1,84 @@ +#ifndef Py_INTERNAL_OBJECT_STACK_H +#define Py_INTERNAL_OBJECT_STACK_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// _PyObjectStack is a stack of Python objects implemented as a linked list of +// fixed size buffers. + +// Chosen so that _PyObjectStackChunk is a power-of-two size. +#define _Py_OBJECT_STACK_CHUNK_SIZE 254 + +typedef struct _PyObjectStackChunk { + struct _PyObjectStackChunk *prev; + Py_ssize_t n; + PyObject *objs[_Py_OBJECT_STACK_CHUNK_SIZE]; +} _PyObjectStackChunk; + +typedef struct _PyObjectStack { + _PyObjectStackChunk *head; +} _PyObjectStack; + + +extern _PyObjectStackChunk * +_PyObjectStackChunk_New(void); + +extern void +_PyObjectStackChunk_Free(_PyObjectStackChunk *); + +extern void +_PyObjectStackChunk_ClearFreeList(_PyFreeListState *state, int is_finalization); + +// Push an item onto the stack. Return -1 on allocation failure, 0 on success. +static inline int +_PyObjectStack_Push(_PyObjectStack *stack, PyObject *obj) +{ + _PyObjectStackChunk *buf = stack->head; + if (buf == NULL || buf->n == _Py_OBJECT_STACK_CHUNK_SIZE) { + buf = _PyObjectStackChunk_New(); + if (buf == NULL) { + return -1; + } + buf->prev = stack->head; + buf->n = 0; + stack->head = buf; + } + + assert(buf->n >= 0 && buf->n < _Py_OBJECT_STACK_CHUNK_SIZE); + buf->objs[buf->n] = obj; + buf->n++; + return 0; +} + +// Pop the top item from the stack. Return NULL if the stack is empty. +static inline PyObject * +_PyObjectStack_Pop(_PyObjectStack *stack) +{ + _PyObjectStackChunk *buf = stack->head; + if (buf == NULL) { + return NULL; + } + assert(buf->n > 0 && buf->n <= _Py_OBJECT_STACK_CHUNK_SIZE); + buf->n--; + PyObject *obj = buf->objs[buf->n]; + if (buf->n == 0) { + stack->head = buf->prev; + _PyObjectStackChunk_Free(buf); + } + return obj; +} + +// Remove all items from the stack +extern void +_PyObjectStack_Clear(_PyObjectStack *stack); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_OBJECT_STACK_H diff --git a/Include/object.h b/Include/object.h index 48f1ddf7510887..ef3fb721c2b012 100644 --- a/Include/object.h +++ b/Include/object.h @@ -212,7 +212,9 @@ struct _object { struct _PyMutex { uint8_t v; }; struct _object { - uintptr_t ob_tid; // thread id (or zero) + // ob_tid stores the thread id (or zero). It is also used by the GC to + // store linked lists and the computed "gc_refs" refcount. + uintptr_t ob_tid; uint16_t _padding; struct _PyMutex ob_mutex; // per-object lock uint8_t ob_gc_bits; // gc-related state diff --git a/Lib/gzip.py b/Lib/gzip.py index 177f9080dc5af8..fda93e0261e028 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -349,7 +349,7 @@ def closed(self): def close(self): fileobj = self.fileobj - if fileobj is None: + if fileobj is None or self._buffer.closed: return try: if self.mode == WRITE: diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 1d71dd9e262a6a..b01f344cb14a1a 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1,7 +1,7 @@ import unittest import unittest.mock from test.support import (verbose, refcount_test, - cpython_only, requires_subprocess) + cpython_only, requires_subprocess, Py_GIL_DISABLED) from test.support.import_helper import import_module from test.support.os_helper import temp_dir, TESTFN, unlink from test.support.script_helper import assert_python_ok, make_script @@ -815,6 +815,15 @@ def test_freeze(self): self.assertEqual(gc.get_freeze_count(), 0) def test_get_objects(self): + gc.collect() + l = [] + l.append(l) + self.assertTrue( + any(l is element for element in gc.get_objects()) + ) + + @unittest.skipIf(Py_GIL_DISABLED, 'need generational GC') + def test_get_objects_generations(self): gc.collect() l = [] l.append(l) @@ -1225,7 +1234,7 @@ def test_refcount_errors(self): p.stderr.close() # Verify that stderr has a useful error message: self.assertRegex(stderr, - br'gc\.c:[0-9]+: gc_decref: Assertion "gc_get_refs\(g\) > 0" failed.') + br'gc.*\.c:[0-9]+: .*: Assertion "gc_get_refs\(.+\) .*" failed.') self.assertRegex(stderr, br'refcount is too small') # "address : 0x7fb5062efc18" diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 936edea3cad70c..9e28b936e00bd5 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3652,10 +3652,8 @@ def _check_create_at_shutdown(self, **kwargs): codecs.lookup('utf-8') class C: - def __init__(self): - self.buf = io.BytesIO() def __del__(self): - io.TextIOWrapper(self.buf, **{kwargs}) + io.TextIOWrapper(io.BytesIO(), **{kwargs}) print("ok") c = C() """.format(iomod=iomod, kwargs=kwargs) diff --git a/Makefile.pre.in b/Makefile.pre.in index 21b122ae0fcd9f..37a8b06987c710 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -439,6 +439,7 @@ PYTHON_OBJS= \ Python/modsupport.o \ Python/mysnprintf.o \ Python/mystrtoul.o \ + Python/object_stack.o \ Python/optimizer.o \ Python/optimizer_analysis.o \ Python/parking_lot.o \ @@ -1832,6 +1833,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_frame.h \ $(srcdir)/Include/internal/pycore_freelist.h \ $(srcdir)/Include/internal/pycore_function.h \ + $(srcdir)/Include/internal/pycore_gc.h \ $(srcdir)/Include/internal/pycore_genobject.h \ $(srcdir)/Include/internal/pycore_getopt.h \ $(srcdir)/Include/internal/pycore_gil.h \ @@ -1853,6 +1855,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_namespace.h \ $(srcdir)/Include/internal/pycore_object.h \ $(srcdir)/Include/internal/pycore_object_alloc.h \ + $(srcdir)/Include/internal/pycore_object_stack.h \ $(srcdir)/Include/internal/pycore_object_state.h \ $(srcdir)/Include/internal/pycore_obmalloc.h \ $(srcdir)/Include/internal/pycore_obmalloc_init.h \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst new file mode 100644 index 00000000000000..b3aa43801da488 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-18-20-20-37.gh-issue-112529.oVNvDG.rst @@ -0,0 +1,3 @@ +The free-threaded build now has its own thread-safe GC implementation that +uses mimalloc to find GC tracked objects. It is non-generational, unlike the +existing GC implementation. diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 610581bc96cb1a..dde801fc0fd525 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -230,6 +230,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 3141913c043869..90ccb954b424bc 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -289,6 +289,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 57275fb2039ee0..e0b9fc137457a0 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -591,6 +591,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 51cbb079b5b550..fd79436f5add97 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -1355,6 +1355,9 @@ Python + + Python + Python diff --git a/Python/gc.c b/Python/gc.c index 14870505ef1308..46646760291526 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -15,6 +15,8 @@ #include "pycore_weakref.h" // _PyWeakref_ClearRef() #include "pydtrace.h" +#ifndef Py_GIL_DISABLED + typedef struct _gc_runtime_state GCState; #ifdef Py_DEBUG @@ -964,10 +966,10 @@ finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) PyGC_Head *gc = GC_NEXT(collectable); PyObject *op = FROM_GC(gc); gc_list_move(gc, &seen); - if (!_PyGCHead_FINALIZED(gc) && + if (!_PyGC_FINALIZED(op) && (finalize = Py_TYPE(op)->tp_finalize) != NULL) { - _PyGCHead_SET_FINALIZED(gc); + _PyGC_SET_FINALIZED(op); Py_INCREF(op); finalize(op); assert(!_PyErr_Occurred(tstate)); @@ -1942,3 +1944,5 @@ PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) done: gcstate->enabled = origenstate; } + +#endif // Py_GIL_DISABLED diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 207a43b68d21f5..f2cd84981461a4 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -1,9 +1,1704 @@ +// Cyclic garbage collector implementation for free-threaded build. #include "Python.h" -#include "pycore_pystate.h" // _PyFreeListState_GET() -#include "pycore_tstate.h" // _PyThreadStateImpl +#include "pycore_ceval.h" // _Py_set_eval_breaker_bit() +#include "pycore_context.h" +#include "pycore_dict.h" // _PyDict_MaybeUntrack() +#include "pycore_initconfig.h" +#include "pycore_interp.h" // PyInterpreterState.gc +#include "pycore_object.h" +#include "pycore_object_alloc.h" // _PyObject_MallocWithType() +#include "pycore_object_stack.h" +#include "pycore_pyerrors.h" +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_tstate.h" // _PyThreadStateImpl +#include "pycore_weakref.h" // _PyWeakref_ClearRef() +#include "pydtrace.h" #ifdef Py_GIL_DISABLED +typedef struct _gc_runtime_state GCState; + +#ifdef Py_DEBUG +# define GC_DEBUG +#endif + +// Automatically choose the generation that needs collecting. +#define GENERATION_AUTO (-1) + +// A linked-list of objects using the `ob_tid` field as the next pointer. +struct worklist { + uintptr_t head; +}; + +struct worklist_iter { + uintptr_t *ptr; // pointer to current object + uintptr_t *next; // next value of ptr +}; + +struct visitor_args { + size_t offset; // offset of PyObject from start of block +}; + +// Per-collection state +struct collection_state { + struct visitor_args base; + PyInterpreterState *interp; + GCState *gcstate; + Py_ssize_t collected; + Py_ssize_t uncollectable; + struct worklist unreachable; + struct worklist legacy_finalizers; + struct worklist wrcb_to_call; + struct worklist objs_to_decref; +}; + +// iterate over a worklist +#define WORKSTACK_FOR_EACH(stack, op) \ + for ((op) = (PyObject *)(stack)->head; (op) != NULL; (op) = (PyObject *)(op)->ob_tid) + +// iterate over a worklist with support for removing the current object +#define WORKSTACK_FOR_EACH_ITER(stack, iter, op) \ + for (worklist_iter_init((iter), &(stack)->head), (op) = (PyObject *)(*(iter)->ptr); \ + (op) != NULL; \ + worklist_iter_init((iter), (iter)->next), (op) = (PyObject *)(*(iter)->ptr)) + +static void +worklist_push(struct worklist *worklist, PyObject *op) +{ + assert(op->ob_tid == 0); + op->ob_tid = worklist->head; + worklist->head = (uintptr_t)op; +} + +static PyObject * +worklist_pop(struct worklist *worklist) +{ + PyObject *op = (PyObject *)worklist->head; + if (op != NULL) { + worklist->head = op->ob_tid; + op->ob_tid = 0; + } + return op; +} + +static void +worklist_iter_init(struct worklist_iter *iter, uintptr_t *next) +{ + iter->ptr = next; + PyObject *op = (PyObject *)*(iter->ptr); + if (op) { + iter->next = &op->ob_tid; + } +} + +static void +worklist_remove(struct worklist_iter *iter) +{ + PyObject *op = (PyObject *)*(iter->ptr); + *(iter->ptr) = op->ob_tid; + op->ob_tid = 0; + iter->next = iter->ptr; +} + +static inline int +gc_is_unreachable(PyObject *op) +{ + return (op->ob_gc_bits & _PyGC_BITS_UNREACHABLE) != 0; +} + +static void +gc_set_unreachable(PyObject *op) +{ + op->ob_gc_bits |= _PyGC_BITS_UNREACHABLE; +} + +static void +gc_clear_unreachable(PyObject *op) +{ + op->ob_gc_bits &= ~_PyGC_BITS_UNREACHABLE; +} + +// Initialize the `ob_tid` field to zero if the object is not already +// initialized as unreachable. +static void +gc_maybe_init_refs(PyObject *op) +{ + if (!gc_is_unreachable(op)) { + gc_set_unreachable(op); + op->ob_tid = 0; + } +} + +static inline Py_ssize_t +gc_get_refs(PyObject *op) +{ + return (Py_ssize_t)op->ob_tid; +} + +static inline void +gc_add_refs(PyObject *op, Py_ssize_t refs) +{ + assert(_PyObject_GC_IS_TRACKED(op)); + op->ob_tid += refs; +} + +static inline void +gc_decref(PyObject *op) +{ + op->ob_tid -= 1; +} + +// Merge refcounts while the world is stopped. +static void +merge_refcount(PyObject *op, Py_ssize_t extra) +{ + assert(_PyInterpreterState_GET()->stoptheworld.world_stopped); + + Py_ssize_t refcount = Py_REFCNT(op); + refcount += extra; + +#ifdef Py_REF_DEBUG + _Py_AddRefTotal(_PyInterpreterState_GET(), extra); +#endif + + // No atomics necessary; all other threads in this interpreter are paused. + op->ob_tid = 0; + op->ob_ref_local = 0; + op->ob_ref_shared = _Py_REF_SHARED(refcount, _Py_REF_MERGED); +} + +static void +gc_restore_tid(PyObject *op) +{ + mi_segment_t *segment = _mi_ptr_segment(op); + if (_Py_REF_IS_MERGED(op->ob_ref_shared)) { + op->ob_tid = 0; + } + else { + // NOTE: may change ob_tid if the object was re-initialized by + // a different thread or its segment was abandoned and reclaimed. + // The segment thread id might be zero, in which case we should + // ensure the refcounts are now merged. + op->ob_tid = segment->thread_id; + if (op->ob_tid == 0) { + merge_refcount(op, 0); + } + } +} + +static void +gc_restore_refs(PyObject *op) +{ + if (gc_is_unreachable(op)) { + gc_restore_tid(op); + gc_clear_unreachable(op); + } +} + +// Given a mimalloc memory block return the PyObject stored in it or NULL if +// the block is not allocated or the object is not tracked or is immortal. +static PyObject * +op_from_block(void *block, void *arg, bool include_frozen) +{ + struct visitor_args *a = arg; + if (block == NULL) { + return NULL; + } + PyObject *op = (PyObject *)((char*)block + a->offset); + assert(PyObject_IS_GC(op)); + if (!_PyObject_GC_IS_TRACKED(op)) { + return NULL; + } + if (!include_frozen && (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0) { + return NULL; + } + return op; +} + +static int +gc_visit_heaps_lock_held(PyInterpreterState *interp, mi_block_visit_fun *visitor, + struct visitor_args *arg) +{ + // Offset of PyObject header from start of memory block. + Py_ssize_t offset_base = sizeof(PyGC_Head); + if (_PyMem_DebugEnabled()) { + // The debug allocator adds two words at the beginning of each block. + offset_base += 2 * sizeof(size_t); + } + + // Objects with Py_TPFLAGS_PREHEADER have two extra fields + Py_ssize_t offset_pre = offset_base + 2 * sizeof(PyObject*); + + // visit each thread's heaps for GC objects + for (PyThreadState *p = interp->threads.head; p != NULL; p = p->next) { + struct _mimalloc_thread_state *m = &((_PyThreadStateImpl *)p)->mimalloc; + + arg->offset = offset_base; + if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC], true, + visitor, arg)) { + return -1; + } + arg->offset = offset_pre; + if (!mi_heap_visit_blocks(&m->heaps[_Py_MIMALLOC_HEAP_GC_PRE], true, + visitor, arg)) { + return -1; + } + } + + // visit blocks in the per-interpreter abandoned pool (from dead threads) + mi_abandoned_pool_t *pool = &interp->mimalloc.abandoned_pool; + arg->offset = offset_base; + if (!_mi_abandoned_pool_visit_blocks(pool, _Py_MIMALLOC_HEAP_GC, true, + visitor, arg)) { + return -1; + } + arg->offset = offset_pre; + if (!_mi_abandoned_pool_visit_blocks(pool, _Py_MIMALLOC_HEAP_GC_PRE, true, + visitor, arg)) { + return -1; + } + return 0; +} + +// Visits all GC objects in the interpreter's heaps. +// NOTE: It is not safe to allocate or free any mimalloc managed memory while +// this function is running. +static int +gc_visit_heaps(PyInterpreterState *interp, mi_block_visit_fun *visitor, + struct visitor_args *arg) +{ + // Other threads in the interpreter must be paused so that we can safely + // traverse their heaps. + assert(interp->stoptheworld.world_stopped); + + int err; + HEAD_LOCK(&_PyRuntime); + err = gc_visit_heaps_lock_held(interp, visitor, arg); + HEAD_UNLOCK(&_PyRuntime); + return err; +} + +// Subtract an incoming reference from the computed "gc_refs" refcount. +static int +visit_decref(PyObject *op, void *arg) +{ + if (_PyObject_GC_IS_TRACKED(op) && !_Py_IsImmortal(op)) { + // If update_refs hasn't reached this object yet, mark it + // as (tentatively) unreachable and initialize ob_tid to zero. + gc_maybe_init_refs(op); + gc_decref(op); + } + return 0; +} + +// Compute the number of external references to objects in the heap +// by subtracting internal references from the refcount. The difference is +// computed in the ob_tid field (we restore it later). +static bool +update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + // Exclude immortal objects from garbage collection + if (_Py_IsImmortal(op)) { + op->ob_tid = 0; + _PyObject_GC_UNTRACK(op); + gc_clear_unreachable(op); + return true; + } + + // Untrack tuples and dicts as necessary in this pass. + if (PyTuple_CheckExact(op)) { + _PyTuple_MaybeUntrack(op); + if (!_PyObject_GC_IS_TRACKED(op)) { + gc_restore_refs(op); + return true; + } + } + else if (PyDict_CheckExact(op)) { + _PyDict_MaybeUntrack(op); + if (!_PyObject_GC_IS_TRACKED(op)) { + gc_restore_refs(op); + return true; + } + } + + Py_ssize_t refcount = Py_REFCNT(op); + _PyObject_ASSERT(op, refcount >= 0); + + // Add the actual refcount to ob_tid. + gc_maybe_init_refs(op); + gc_add_refs(op, refcount); + + // Subtract internal references from ob_tid. Objects with ob_tid > 0 + // are directly reachable from outside containers, and so can't be + // collected. + Py_TYPE(op)->tp_traverse(op, visit_decref, NULL); + return true; +} + +static int +visit_clear_unreachable(PyObject *op, _PyObjectStack *stack) +{ + if (gc_is_unreachable(op)) { + _PyObject_ASSERT(op, _PyObject_GC_IS_TRACKED(op)); + gc_clear_unreachable(op); + return _PyObjectStack_Push(stack, op); + } + return 0; +} + +// Transitively clear the unreachable bit on all objects reachable from op. +static int +mark_reachable(PyObject *op) +{ + _PyObjectStack stack = { NULL }; + do { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + if (traverse(op, (visitproc)&visit_clear_unreachable, &stack) < 0) { + _PyObjectStack_Clear(&stack); + return -1; + } + op = _PyObjectStack_Pop(&stack); + } while (op != NULL); + return 0; +} + +#ifdef GC_DEBUG +static bool +validate_gc_objects(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + _PyObject_ASSERT(op, gc_is_unreachable(op)); + _PyObject_ASSERT_WITH_MSG(op, gc_get_refs(op) >= 0, + "refcount is too small"); + return true; +} +#endif + +static bool +mark_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + if (gc_is_unreachable(op) && gc_get_refs(op) != 0) { + // Object is reachable but currently marked as unreachable. + // Mark it as reachable and traverse its pointers to find + // any other object that may be directly reachable from it. + gc_clear_unreachable(op); + + // Transitively mark reachable objects by clearing the unreachable flag. + if (mark_reachable(op) < 0) { + return false; + } + } + + return true; +} + +/* Return true if object has a pre-PEP 442 finalization method. */ +static int +has_legacy_finalizer(PyObject *op) +{ + return Py_TYPE(op)->tp_del != NULL; +} + +static bool +scan_heap_visitor(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + struct collection_state *state = (struct collection_state *)args; + if (gc_is_unreachable(op)) { + // Merge and add one to the refcount to prevent deallocation while we + // are holding on to it in a worklist. + merge_refcount(op, 1); + + if (has_legacy_finalizer(op)) { + // would be unreachable, but has legacy finalizer + gc_clear_unreachable(op); + worklist_push(&state->legacy_finalizers, op); + } + else { + worklist_push(&state->unreachable, op); + } + } + else { + // object is reachable, restore `ob_tid`; we're done with these objects + gc_restore_tid(op); + state->gcstate->long_lived_total++; + } + + return true; +} + +static int +move_legacy_finalizer_reachable(struct collection_state *state); + +static int +deduce_unreachable_heap(PyInterpreterState *interp, + struct collection_state *state) +{ + // Identify objects that are directly reachable from outside the GC heap + // by computing the difference between the refcount and the number of + // incoming references. + gc_visit_heaps(interp, &update_refs, &state->base); + +#ifdef GC_DEBUG + // Check that all objects are marked as unreachable and that the computed + // reference count difference (stored in `ob_tid`) is non-negative. + gc_visit_heaps(interp, &validate_gc_objects, &state->base); +#endif + + // Transitively mark reachable objects by clearing the + // _PyGC_BITS_UNREACHABLE flag. + if (gc_visit_heaps(interp, &mark_heap_visitor, &state->base) < 0) { + return -1; + } + + // Identify remaining unreachable objects and push them onto a stack. + // Restores ob_tid for reachable objects. + gc_visit_heaps(interp, &scan_heap_visitor, &state->base); + + if (state->legacy_finalizers.head) { + // There may be objects reachable from legacy finalizers that are in + // the unreachable set. We need to mark them as reachable. + if (move_legacy_finalizer_reachable(state) < 0) { + return -1; + } + } + + return 0; +} + +static int +move_legacy_finalizer_reachable(struct collection_state *state) +{ + // Clear the reachable bit on all objects transitively reachable + // from the objects with legacy finalizers. + PyObject *op; + WORKSTACK_FOR_EACH(&state->legacy_finalizers, op) { + if (mark_reachable(op) < 0) { + return -1; + } + } + + // Move the reachable objects from the unreachable worklist to the legacy + // finalizer worklist. + struct worklist_iter iter; + WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) { + if (!gc_is_unreachable(op)) { + worklist_remove(&iter); + worklist_push(&state->legacy_finalizers, op); + } + } + + return 0; +} + +// Clear all weakrefs to unreachable objects. Weakrefs with callbacks are +// enqueued in `wrcb_to_call`, but not invoked yet. +static void +clear_weakrefs(struct collection_state *state) +{ + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (PyWeakref_Check(op)) { + // Clear weakrefs that are themselves unreachable to ensure their + // callbacks will not be executed later from a `tp_clear()` + // inside delete_garbage(). That would be unsafe: it could + // resurrect a dead object or access a an already cleared object. + // See bpo-38006 for one example. + _PyWeakref_ClearRef((PyWeakReference *)op); + } + + if (!_PyType_SUPPORTS_WEAKREFS(Py_TYPE(op))) { + continue; + } + + // NOTE: This is never triggered for static types so we can avoid the + // (slightly) more costly _PyObject_GET_WEAKREFS_LISTPTR(). + PyWeakReference **wrlist = _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(op); + + // `op` may have some weakrefs. March over the list, clear + // all the weakrefs, and enqueue the weakrefs with callbacks + // that must be called into wrcb_to_call. + for (PyWeakReference *wr = *wrlist; wr != NULL; wr = *wrlist) { + // _PyWeakref_ClearRef clears the weakref but leaves + // the callback pointer intact. Obscure: it also + // changes *wrlist. + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == op); + _PyWeakref_ClearRef(wr); + _PyObject_ASSERT((PyObject *)wr, wr->wr_object == Py_None); + + // We do not invoke callbacks for weakrefs that are themselves + // unreachable. This is partly for historical reasons: weakrefs + // predate safe object finalization, and a weakref that is itself + // unreachable may have a callback that resurrects other + // unreachable objects. + if (wr->wr_callback == NULL || gc_is_unreachable((PyObject *)wr)) { + continue; + } + + // Create a new reference so that wr can't go away before we can + // process it again. + merge_refcount((PyObject *)wr, 1); + + // Enqueue weakref to be called later. + worklist_push(&state->wrcb_to_call, (PyObject *)wr); + } + } +} + +static void +call_weakref_callbacks(struct collection_state *state) +{ + // Invoke the callbacks we decided to honor. + PyObject *op; + while ((op = worklist_pop(&state->wrcb_to_call)) != NULL) { + _PyObject_ASSERT(op, PyWeakref_Check(op)); + + PyWeakReference *wr = (PyWeakReference *)op; + PyObject *callback = wr->wr_callback; + _PyObject_ASSERT(op, callback != NULL); + + /* copy-paste of weakrefobject.c's handle_callback() */ + PyObject *temp = PyObject_CallOneArg(callback, (PyObject *)wr); + if (temp == NULL) { + PyErr_WriteUnraisable(callback); + } + else { + Py_DECREF(temp); + } + + gc_restore_tid(op); + Py_DECREF(op); // drop worklist reference + } +} + + +static GCState * +get_gc_state(void) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + return &interp->gc; +} + + +void +_PyGC_InitState(GCState *gcstate) +{ +} + + +PyStatus +_PyGC_Init(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + + gcstate->garbage = PyList_New(0); + if (gcstate->garbage == NULL) { + return _PyStatus_NO_MEMORY(); + } + + gcstate->callbacks = PyList_New(0); + if (gcstate->callbacks == NULL) { + return _PyStatus_NO_MEMORY(); + } + + return _PyStatus_OK(); +} + +static void +debug_cycle(const char *msg, PyObject *op) +{ + PySys_FormatStderr("gc: %s <%s %p>\n", + msg, Py_TYPE(op)->tp_name, op); +} + +/* Run first-time finalizers (if any) on all the objects in collectable. + * Note that this may remove some (or even all) of the objects from the + * list, due to refcounts falling to 0. + */ +static void +finalize_garbage(struct collection_state *state) +{ + // NOTE: the unreachable worklist holds a strong reference to the object + // to prevent it from being deallocated while we are holding on to it. + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + if (!_PyGC_FINALIZED(op)) { + destructor finalize = Py_TYPE(op)->tp_finalize; + if (finalize != NULL) { + _PyGC_SET_FINALIZED(op); + finalize(op); + assert(!_PyErr_Occurred(_PyThreadState_GET())); + } + } + } +} + +// Break reference cycles by clearing the containers involved. +static void +delete_garbage(struct collection_state *state) +{ + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = state->gcstate; + + assert(!_PyErr_Occurred(tstate)); + + PyObject *op; + while ((op = worklist_pop(&state->objs_to_decref)) != NULL) { + Py_DECREF(op); + } + + while ((op = worklist_pop(&state->unreachable)) != NULL) { + _PyObject_ASSERT(op, gc_is_unreachable(op)); + + // Clear the unreachable flag. + gc_clear_unreachable(op); + + if (!_PyObject_GC_IS_TRACKED(op)) { + // Object might have been untracked by some other tp_clear() call. + Py_DECREF(op); // drop the reference from the worklist + continue; + } + + state->collected++; + + if (gcstate->debug & _PyGC_DEBUG_SAVEALL) { + assert(gcstate->garbage != NULL); + if (PyList_Append(gcstate->garbage, op) < 0) { + _PyErr_Clear(tstate); + } + } + else { + inquiry clear = Py_TYPE(op)->tp_clear; + if (clear != NULL) { + (void) clear(op); + if (_PyErr_Occurred(tstate)) { + PyErr_FormatUnraisable("Exception ignored in tp_clear of %s", + Py_TYPE(op)->tp_name); + } + } + } + + Py_DECREF(op); // drop the reference from the worklist + } +} + +static void +handle_legacy_finalizers(struct collection_state *state) +{ + GCState *gcstate = state->gcstate; + assert(gcstate->garbage != NULL); + + PyObject *op; + while ((op = worklist_pop(&state->legacy_finalizers)) != NULL) { + state->uncollectable++; + + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) { + debug_cycle("uncollectable", op); + } + + if ((gcstate->debug & _PyGC_DEBUG_SAVEALL) || has_legacy_finalizer(op)) { + if (PyList_Append(gcstate->garbage, op) < 0) { + PyErr_Clear(); + } + } + Py_DECREF(op); // drop worklist reference + } +} + +// Show stats for objects in each generations +static void +show_stats_each_generations(GCState *gcstate) +{ + // TODO +} + +// Traversal callback for handle_resurrected_objects. +static int +visit_decref_unreachable(PyObject *op, void *data) +{ + if (gc_is_unreachable(op) && _PyObject_GC_IS_TRACKED(op)) { + op->ob_ref_local -= 1; + } + return 0; +} + +// Handle objects that may have resurrected after a call to 'finalize_garbage'. +static int +handle_resurrected_objects(struct collection_state *state) +{ + // First, find externally reachable objects by computing the reference + // count difference in ob_ref_local. We can't use ob_tid here because + // that's already used to store the unreachable worklist. + PyObject *op; + struct worklist_iter iter; + WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) { + assert(gc_is_unreachable(op)); + assert(_Py_REF_IS_MERGED(op->ob_ref_shared)); + + if (!_PyObject_GC_IS_TRACKED(op)) { + // Object was untracked by a finalizer. Schedule it for a Py_DECREF + // after we finish with the stop-the-world pause. + gc_clear_unreachable(op); + worklist_remove(&iter); + worklist_push(&state->objs_to_decref, op); + continue; + } + + Py_ssize_t refcount = (op->ob_ref_shared >> _Py_REF_SHARED_SHIFT); + if (refcount > INT32_MAX) { + // The refcount is too big to fit in `ob_ref_local`. Mark the + // object as immortal and bail out. + gc_clear_unreachable(op); + worklist_remove(&iter); + _Py_SetImmortal(op); + continue; + } + + op->ob_ref_local += (uint32_t)refcount; + + // Subtract one to account for the reference from the worklist. + op->ob_ref_local -= 1; + + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + (visitproc)visit_decref_unreachable, + NULL); + } + + // Find resurrected objects + bool any_resurrected = false; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + int32_t gc_refs = (int32_t)op->ob_ref_local; + op->ob_ref_local = 0; // restore ob_ref_local + + _PyObject_ASSERT(op, gc_refs >= 0); + + if (gc_is_unreachable(op) && gc_refs > 0) { + // Clear the unreachable flag on any transitively reachable objects + // from this one. + any_resurrected = true; + gc_clear_unreachable(op); + if (mark_reachable(op) < 0) { + return -1; + } + } + } + + if (any_resurrected) { + // Remove resurrected objects from the unreachable list. + WORKSTACK_FOR_EACH_ITER(&state->unreachable, &iter, op) { + if (!gc_is_unreachable(op)) { + _PyObject_ASSERT(op, Py_REFCNT(op) > 1); + worklist_remove(&iter); + merge_refcount(op, -1); // remove worklist reference + } + } + } + +#ifdef GC_DEBUG + WORKSTACK_FOR_EACH(&state->unreachable, op) { + _PyObject_ASSERT(op, gc_is_unreachable(op)); + _PyObject_ASSERT(op, _PyObject_GC_IS_TRACKED(op)); + _PyObject_ASSERT(op, op->ob_ref_local == 0); + _PyObject_ASSERT(op, _Py_REF_IS_MERGED(op->ob_ref_shared)); + } +#endif + + return 0; +} + + +/* Invoke progress callbacks to notify clients that garbage collection + * is starting or stopping + */ +static void +invoke_gc_callback(PyThreadState *tstate, const char *phase, + int generation, Py_ssize_t collected, + Py_ssize_t uncollectable) +{ + assert(!_PyErr_Occurred(tstate)); + + /* we may get called very early */ + GCState *gcstate = &tstate->interp->gc; + if (gcstate->callbacks == NULL) { + return; + } + + /* The local variable cannot be rebound, check it for sanity */ + assert(PyList_CheckExact(gcstate->callbacks)); + PyObject *info = NULL; + if (PyList_GET_SIZE(gcstate->callbacks) != 0) { + info = Py_BuildValue("{sisnsn}", + "generation", generation, + "collected", collected, + "uncollectable", uncollectable); + if (info == NULL) { + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } + } + + PyObject *phase_obj = PyUnicode_FromString(phase); + if (phase_obj == NULL) { + Py_XDECREF(info); + PyErr_FormatUnraisable("Exception ignored on invoking gc callbacks"); + return; + } + + PyObject *stack[] = {phase_obj, info}; + for (Py_ssize_t i=0; icallbacks); i++) { + PyObject *r, *cb = PyList_GET_ITEM(gcstate->callbacks, i); + Py_INCREF(cb); /* make sure cb doesn't go away */ + r = PyObject_Vectorcall(cb, stack, 2, NULL); + if (r == NULL) { + PyErr_WriteUnraisable(cb); + } + else { + Py_DECREF(r); + } + Py_DECREF(cb); + } + Py_DECREF(phase_obj); + Py_XDECREF(info); + assert(!_PyErr_Occurred(tstate)); +} + + +/* Find the oldest generation (highest numbered) where the count + * exceeds the threshold. Objects in the that generation and + * generations younger than it will be collected. */ +static int +gc_select_generation(GCState *gcstate) +{ + for (int i = NUM_GENERATIONS-1; i >= 0; i--) { + if (gcstate->generations[i].count > gcstate->generations[i].threshold) { + /* Avoid quadratic performance degradation in number + of tracked objects (see also issue #4074): + + To limit the cost of garbage collection, there are two strategies; + - make each collection faster, e.g. by scanning fewer objects + - do less collections + This heuristic is about the latter strategy. + + In addition to the various configurable thresholds, we only trigger a + full collection if the ratio + + long_lived_pending / long_lived_total + + is above a given value (hardwired to 25%). + + The reason is that, while "non-full" collections (i.e., collections of + the young and middle generations) will always examine roughly the same + number of objects -- determined by the aforementioned thresholds --, + the cost of a full collection is proportional to the total number of + long-lived objects, which is virtually unbounded. + + Indeed, it has been remarked that doing a full collection every + of object creations entails a dramatic performance + degradation in workloads which consist in creating and storing lots of + long-lived objects (e.g. building a large list of GC-tracked objects would + show quadratic performance, instead of linear as expected: see issue #4074). + + Using the above ratio, instead, yields amortized linear performance in + the total number of objects (the effect of which can be summarized + thusly: "each full garbage collection is more and more costly as the + number of objects grows, but we do fewer and fewer of them"). + + This heuristic was suggested by Martin von Löwis on python-dev in + June 2008. His original analysis and proposal can be found at: + http://mail.python.org/pipermail/python-dev/2008-June/080579.html + */ + if (i == NUM_GENERATIONS - 1 + && gcstate->long_lived_pending < gcstate->long_lived_total / 4) + { + continue; + } + return i; + } + } + return -1; +} + +static void +cleanup_worklist(struct worklist *worklist) +{ + PyObject *op; + while ((op = worklist_pop(worklist)) != NULL) { + gc_restore_tid(op); + gc_clear_unreachable(op); + Py_DECREF(op); + } +} + +static void +gc_collect_internal(PyInterpreterState *interp, struct collection_state *state) +{ + _PyEval_StopTheWorld(interp); + // Find unreachable objects + int err = deduce_unreachable_heap(interp, state); + if (err < 0) { + _PyEval_StartTheWorld(interp); + goto error; + } + + // Print debugging information. + if (interp->gc.debug & _PyGC_DEBUG_COLLECTABLE) { + PyObject *op; + WORKSTACK_FOR_EACH(&state->unreachable, op) { + debug_cycle("collectable", op); + } + } + + // Clear weakrefs and enqueue callbacks (but do not call them). + clear_weakrefs(state); + _PyEval_StartTheWorld(interp); + + // Call weakref callbacks and finalizers after unpausing other threads to + // avoid potential deadlocks. + call_weakref_callbacks(state); + finalize_garbage(state); + + // Handle any objects that may have resurrected after the finalization. + _PyEval_StopTheWorld(interp); + err = handle_resurrected_objects(state); + _PyEval_StartTheWorld(interp); + + if (err < 0) { + goto error; + } + + // Call tp_clear on objects in the unreachable set. This will cause + // the reference cycles to be broken. It may also cause some objects + // to be freed. + delete_garbage(state); + + // Append objects with legacy finalizers to the "gc.garbage" list. + handle_legacy_finalizers(state); + return; + +error: + cleanup_worklist(&state->unreachable); + cleanup_worklist(&state->legacy_finalizers); + cleanup_worklist(&state->wrcb_to_call); + cleanup_worklist(&state->objs_to_decref); + PyErr_NoMemory(); + PyErr_FormatUnraisable("Out of memory during garbage collection"); +} + +/* This is the main function. Read this to understand how the + * collection process works. */ +static Py_ssize_t +gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) +{ + int i; + Py_ssize_t m = 0; /* # objects collected */ + Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ + _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ + GCState *gcstate = &tstate->interp->gc; + + // gc_collect_main() must not be called before _PyGC_Init + // or after _PyGC_Fini() + assert(gcstate->garbage != NULL); + assert(!_PyErr_Occurred(tstate)); + + int expected = 0; + if (!_Py_atomic_compare_exchange_int(&gcstate->collecting, &expected, 1)) { + // Don't start a garbage collection if one is already in progress. + return 0; + } + + if (generation == GENERATION_AUTO) { + // Select the oldest generation that needs collecting. We will collect + // objects from that generation and all generations younger than it. + generation = gc_select_generation(gcstate); + if (generation < 0) { + // No generation needs to be collected. + _Py_atomic_store_int(&gcstate->collecting, 0); + return 0; + } + } + + assert(generation >= 0 && generation < NUM_GENERATIONS); + +#ifdef Py_STATS + if (_Py_stats) { + _Py_stats->object_stats.object_visits = 0; + } +#endif + GC_STAT_ADD(generation, collections, 1); + + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "start", generation, 0, 0); + } + + if (gcstate->debug & _PyGC_DEBUG_STATS) { + PySys_WriteStderr("gc: collecting generation %d...\n", generation); + show_stats_each_generations(gcstate); + t1 = _PyTime_GetPerfCounter(); + } + + if (PyDTrace_GC_START_ENABLED()) { + PyDTrace_GC_START(generation); + } + + /* update collection and allocation counters */ + if (generation+1 < NUM_GENERATIONS) { + gcstate->generations[generation+1].count += 1; + } + for (i = 0; i <= generation; i++) { + gcstate->generations[i].count = 0; + } + + PyInterpreterState *interp = tstate->interp; + + struct collection_state state = { + .interp = interp, + .gcstate = gcstate, + }; + + gc_collect_internal(interp, &state); + + m = state.collected; + n = state.uncollectable; + + if (gcstate->debug & _PyGC_DEBUG_STATS) { + double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); + PySys_WriteStderr( + "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", + n+m, n, d); + } + + // Clear free lists in all threads + _PyGC_ClearAllFreeLists(interp); + + if (_PyErr_Occurred(tstate)) { + if (reason == _Py_GC_REASON_SHUTDOWN) { + _PyErr_Clear(tstate); + } + else { + PyErr_FormatUnraisable("Exception ignored in garbage collection"); + } + } + + /* Update stats */ + struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; + stats->collections++; + stats->collected += m; + stats->uncollectable += n; + + GC_STAT_ADD(generation, objects_collected, m); +#ifdef Py_STATS + if (_Py_stats) { + GC_STAT_ADD(generation, object_visits, + _Py_stats->object_stats.object_visits); + _Py_stats->object_stats.object_visits = 0; + } +#endif + + if (PyDTrace_GC_DONE_ENABLED()) { + PyDTrace_GC_DONE(n + m); + } + + if (reason != _Py_GC_REASON_SHUTDOWN) { + invoke_gc_callback(tstate, "stop", generation, m, n); + } + + assert(!_PyErr_Occurred(tstate)); + _Py_atomic_store_int(&gcstate->collecting, 0); + return n + m; +} + +struct get_referrers_args { + struct visitor_args base; + PyObject *objs; + struct worklist results; +}; + +static int +referrersvisit(PyObject* obj, void *arg) +{ + PyObject *objs = arg; + Py_ssize_t i; + for (i = 0; i < PyTuple_GET_SIZE(objs); i++) { + if (PyTuple_GET_ITEM(objs, i) == obj) { + return 1; + } + } + return 0; +} + +static bool +visit_get_referrers(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op == NULL) { + return true; + } + + struct get_referrers_args *arg = (struct get_referrers_args *)args; + if (Py_TYPE(op)->tp_traverse(op, referrersvisit, arg->objs)) { + op->ob_tid = 0; // we will restore the refcount later + worklist_push(&arg->results, op); + } + + return true; +} + +PyObject * +_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs) +{ + PyObject *result = PyList_New(0); + if (!result) { + return NULL; + } + + _PyEval_StopTheWorld(interp); + + // Append all objects to a worklist. This abuses ob_tid. We will restore + // it later. NOTE: We can't append to the PyListObject during + // gc_visit_heaps() because PyList_Append() may reclaim an abandoned + // mimalloc segments while we are traversing them. + struct get_referrers_args args = { .objs = objs }; + gc_visit_heaps(interp, &visit_get_referrers, &args.base); + + bool error = false; + PyObject *op; + while ((op = worklist_pop(&args.results)) != NULL) { + gc_restore_tid(op); + if (op != objs && PyList_Append(result, op) < 0) { + error = true; + break; + } + } + + // In case of error, clear the remaining worklist + while ((op = worklist_pop(&args.results)) != NULL) { + gc_restore_tid(op); + } + + _PyEval_StartTheWorld(interp); + + if (error) { + Py_DECREF(result); + return NULL; + } + + return result; +} + +struct get_objects_args { + struct visitor_args base; + struct worklist objects; +}; + +static bool +visit_get_objects(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op == NULL) { + return true; + } + + struct get_objects_args *arg = (struct get_objects_args *)args; + op->ob_tid = 0; // we will restore the refcount later + worklist_push(&arg->objects, op); + + return true; +} + +PyObject * +_PyGC_GetObjects(PyInterpreterState *interp, Py_ssize_t generation) +{ + PyObject *result = PyList_New(0); + if (!result) { + return NULL; + } + + _PyEval_StopTheWorld(interp); + + // Append all objects to a worklist. This abuses ob_tid. We will restore + // it later. NOTE: We can't append to the list during gc_visit_heaps() + // because PyList_Append() may reclaim an abandoned mimalloc segment + // while we are traversing it. + struct get_objects_args args = { 0 }; + gc_visit_heaps(interp, &visit_get_objects, &args.base); + + bool error = false; + PyObject *op; + while ((op = worklist_pop(&args.objects)) != NULL) { + gc_restore_tid(op); + if (op != result && PyList_Append(result, op) < 0) { + error = true; + break; + } + } + + // In case of error, clear the remaining worklist + while ((op = worklist_pop(&args.objects)) != NULL) { + gc_restore_tid(op); + } + + _PyEval_StartTheWorld(interp); + + if (error) { + Py_DECREF(result); + return NULL; + } + + return result; +} + +static bool +visit_freeze(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op != NULL) { + op->ob_gc_bits |= _PyGC_BITS_FROZEN; + } + return true; +} + +void +_PyGC_Freeze(PyInterpreterState *interp) +{ + struct visitor_args args; + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &visit_freeze, &args); + _PyEval_StartTheWorld(interp); +} + +static bool +visit_unfreeze(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op != NULL) { + op->ob_gc_bits &= ~_PyGC_BITS_FROZEN; + } + return true; +} + +void +_PyGC_Unfreeze(PyInterpreterState *interp) +{ + struct visitor_args args; + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &visit_unfreeze, &args); + _PyEval_StartTheWorld(interp); +} + +struct count_frozen_args { + struct visitor_args base; + Py_ssize_t count; +}; + +static bool +visit_count_frozen(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, true); + if (op != NULL && (op->ob_gc_bits & _PyGC_BITS_FROZEN) != 0) { + struct count_frozen_args *arg = (struct count_frozen_args *)args; + arg->count++; + } + return true; +} + +Py_ssize_t +_PyGC_GetFreezeCount(PyInterpreterState *interp) +{ + struct count_frozen_args args = { .count = 0 }; + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &visit_count_frozen, &args.base); + _PyEval_StartTheWorld(interp); + return args.count; +} + +/* C API for controlling the state of the garbage collector */ +int +PyGC_Enable(void) +{ + GCState *gcstate = get_gc_state(); + int old_state = gcstate->enabled; + gcstate->enabled = 1; + return old_state; +} + +int +PyGC_Disable(void) +{ + GCState *gcstate = get_gc_state(); + int old_state = gcstate->enabled; + gcstate->enabled = 0; + return old_state; +} + +int +PyGC_IsEnabled(void) +{ + GCState *gcstate = get_gc_state(); + return gcstate->enabled; +} + +/* Public API to invoke gc.collect() from C */ +Py_ssize_t +PyGC_Collect(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = &tstate->interp->gc; + + if (!gcstate->enabled) { + return 0; + } + + Py_ssize_t n; + PyObject *exc = _PyErr_GetRaisedException(tstate); + n = gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_MANUAL); + _PyErr_SetRaisedException(tstate, exc); + + return n; +} + +Py_ssize_t +_PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) +{ + return gc_collect_main(tstate, generation, reason); +} + +Py_ssize_t +_PyGC_CollectNoFail(PyThreadState *tstate) +{ + /* Ideally, this function is only called on interpreter shutdown, + and therefore not recursively. Unfortunately, when there are daemon + threads, a daemon thread can start a cyclic garbage collection + during interpreter shutdown (and then never finish it). + See http://bugs.python.org/issue8713#msg195178 for an example. + */ + return gc_collect_main(tstate, NUM_GENERATIONS - 1, _Py_GC_REASON_SHUTDOWN); +} + +void +_PyGC_DumpShutdownStats(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + if (!(gcstate->debug & _PyGC_DEBUG_SAVEALL) + && gcstate->garbage != NULL && PyList_GET_SIZE(gcstate->garbage) > 0) { + const char *message; + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) { + message = "gc: %zd uncollectable objects at shutdown"; + } + else { + message = "gc: %zd uncollectable objects at shutdown; " \ + "use gc.set_debug(gc.DEBUG_UNCOLLECTABLE) to list them"; + } + /* PyErr_WarnFormat does too many things and we are at shutdown, + the warnings module's dependencies (e.g. linecache) may be gone + already. */ + if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, + "gc", NULL, message, + PyList_GET_SIZE(gcstate->garbage))) + { + PyErr_WriteUnraisable(NULL); + } + if (gcstate->debug & _PyGC_DEBUG_UNCOLLECTABLE) { + PyObject *repr = NULL, *bytes = NULL; + repr = PyObject_Repr(gcstate->garbage); + if (!repr || !(bytes = PyUnicode_EncodeFSDefault(repr))) { + PyErr_WriteUnraisable(gcstate->garbage); + } + else { + PySys_WriteStderr( + " %s\n", + PyBytes_AS_STRING(bytes) + ); + } + Py_XDECREF(repr); + Py_XDECREF(bytes); + } + } +} + + +void +_PyGC_Fini(PyInterpreterState *interp) +{ + GCState *gcstate = &interp->gc; + Py_CLEAR(gcstate->garbage); + Py_CLEAR(gcstate->callbacks); + + /* We expect that none of this interpreters objects are shared + with other interpreters. + See https://github.com/python/cpython/issues/90228. */ +} + +/* for debugging */ + +#ifdef Py_DEBUG +static int +visit_validate(PyObject *op, void *parent_raw) +{ + PyObject *parent = _PyObject_CAST(parent_raw); + if (_PyObject_IsFreed(op)) { + _PyObject_ASSERT_FAILED_MSG(parent, + "PyObject_GC_Track() object is not valid"); + } + return 0; +} +#endif + + +/* extension modules might be compiled with GC support so these + functions must always be available */ + +void +PyObject_GC_Track(void *op_raw) +{ + PyObject *op = _PyObject_CAST(op_raw); + if (_PyObject_GC_IS_TRACKED(op)) { + _PyObject_ASSERT_FAILED_MSG(op, + "object already tracked " + "by the garbage collector"); + } + _PyObject_GC_TRACK(op); + +#ifdef Py_DEBUG + /* Check that the object is valid: validate objects traversed + by tp_traverse() */ + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void)traverse(op, visit_validate, op); +#endif +} + +void +PyObject_GC_UnTrack(void *op_raw) +{ + PyObject *op = _PyObject_CAST(op_raw); + /* Obscure: the Py_TRASHCAN mechanism requires that we be able to + * call PyObject_GC_UnTrack twice on an object. + */ + if (_PyObject_GC_IS_TRACKED(op)) { + _PyObject_GC_UNTRACK(op); + } +} + +int +PyObject_IS_GC(PyObject *obj) +{ + return _PyObject_IS_GC(obj); +} + +void +_Py_ScheduleGC(PyInterpreterState *interp) +{ + _Py_set_eval_breaker_bit(interp, _PY_GC_SCHEDULED_BIT, 1); +} + +void +_PyObject_GC_Link(PyObject *op) +{ + PyThreadState *tstate = _PyThreadState_GET(); + GCState *gcstate = &tstate->interp->gc; + gcstate->generations[0].count++; /* number of allocated GC objects */ + if (gcstate->generations[0].count > gcstate->generations[0].threshold && + gcstate->enabled && + gcstate->generations[0].threshold && + !_Py_atomic_load_int_relaxed(&gcstate->collecting) && + !_PyErr_Occurred(tstate)) + { + _Py_ScheduleGC(tstate->interp); + } +} + +void +_Py_RunGC(PyThreadState *tstate) +{ + gc_collect_main(tstate, GENERATION_AUTO, _Py_GC_REASON_HEAP); +} + +static PyObject * +gc_alloc(PyTypeObject *tp, size_t basicsize, size_t presize) +{ + PyThreadState *tstate = _PyThreadState_GET(); + if (basicsize > PY_SSIZE_T_MAX - presize) { + return _PyErr_NoMemory(tstate); + } + size_t size = presize + basicsize; + char *mem = _PyObject_MallocWithType(tp, size); + if (mem == NULL) { + return _PyErr_NoMemory(tstate); + } + ((PyObject **)mem)[0] = NULL; + ((PyObject **)mem)[1] = NULL; + PyObject *op = (PyObject *)(mem + presize); + _PyObject_GC_Link(op); + return op; +} + +PyObject * +_PyObject_GC_New(PyTypeObject *tp) +{ + size_t presize = _PyType_PreHeaderSize(tp); + PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp), presize); + if (op == NULL) { + return NULL; + } + _PyObject_Init(op, tp); + return op; +} + +PyVarObject * +_PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems) +{ + PyVarObject *op; + + if (nitems < 0) { + PyErr_BadInternalCall(); + return NULL; + } + size_t presize = _PyType_PreHeaderSize(tp); + size_t size = _PyObject_VAR_SIZE(tp, nitems); + op = (PyVarObject *)gc_alloc(tp, size, presize); + if (op == NULL) { + return NULL; + } + _PyObject_InitVar(op, tp, nitems); + return op; +} + +PyObject * +PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *tp, size_t extra_size) +{ + size_t presize = _PyType_PreHeaderSize(tp); + PyObject *op = gc_alloc(tp, _PyObject_SIZE(tp) + extra_size, presize); + if (op == NULL) { + return NULL; + } + memset(op, 0, _PyObject_SIZE(tp) + extra_size); + _PyObject_Init(op, tp); + return op; +} + +PyVarObject * +_PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems) +{ + const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems); + const size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); + _PyObject_ASSERT((PyObject *)op, !_PyObject_GC_IS_TRACKED(op)); + if (basicsize > (size_t)PY_SSIZE_T_MAX - presize) { + return (PyVarObject *)PyErr_NoMemory(); + } + char *mem = (char *)op - presize; + mem = (char *)_PyObject_ReallocWithType(Py_TYPE(op), mem, presize + basicsize); + if (mem == NULL) { + return (PyVarObject *)PyErr_NoMemory(); + } + op = (PyVarObject *) (mem + presize); + Py_SET_SIZE(op, nitems); + return op; +} + +void +PyObject_GC_Del(void *op) +{ + size_t presize = _PyType_PreHeaderSize(((PyObject *)op)->ob_type); + if (_PyObject_GC_IS_TRACKED(op)) { +#ifdef Py_DEBUG + PyObject *exc = PyErr_GetRaisedException(); + if (PyErr_WarnExplicitFormat(PyExc_ResourceWarning, "gc", 0, + "gc", NULL, "Object of type %s is not untracked before destruction", + ((PyObject*)op)->ob_type->tp_name)) { + PyErr_WriteUnraisable(NULL); + } + PyErr_SetRaisedException(exc); +#endif + } + GCState *gcstate = get_gc_state(); + if (gcstate->generations[0].count > 0) { + gcstate->generations[0].count--; + } + PyObject_Free(((char *)op)-presize); +} + +int +PyObject_GC_IsTracked(PyObject* obj) +{ + if (_PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj)) { + return 1; + } + return 0; +} + +int +PyObject_GC_IsFinalized(PyObject *obj) +{ + if (_PyObject_IS_GC(obj) && _PyGC_FINALIZED(obj)) { + return 1; + } + return 0; +} + +struct custom_visitor_args { + struct visitor_args base; + gcvisitobjects_t callback; + void *arg; +}; + +static bool +custom_visitor_wrapper(const mi_heap_t *heap, const mi_heap_area_t *area, + void *block, size_t block_size, void *args) +{ + PyObject *op = op_from_block(block, args, false); + if (op == NULL) { + return true; + } + + struct custom_visitor_args *wrapper = (struct custom_visitor_args *)args; + if (!wrapper->callback(op, wrapper->arg)) { + return false; + } + + return true; +} + +void +PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void *arg) +{ + PyInterpreterState *interp = _PyInterpreterState_GET(); + struct custom_visitor_args wrapper = { + .callback = callback, + .arg = arg, + }; + + _PyEval_StopTheWorld(interp); + gc_visit_heaps(interp, &custom_visitor_wrapper, &wrapper.base); + _PyEval_StartTheWorld(interp); +} + /* Clear all free lists * All free lists are cleared during the collection of the highest generation. * Allocated items in the free list may keep a pymalloc arena occupied. @@ -25,4 +1720,4 @@ _PyGC_ClearAllFreeLists(PyInterpreterState *interp) HEAD_UNLOCK(&_PyRuntime); } -#endif +#endif // Py_GIL_DISABLED diff --git a/Python/object_stack.c b/Python/object_stack.c new file mode 100644 index 00000000000000..66b37bcbb44475 --- /dev/null +++ b/Python/object_stack.c @@ -0,0 +1,87 @@ +// Stack of Python objects + +#include "Python.h" +#include "pycore_freelist.h" +#include "pycore_pystate.h" +#include "pycore_object_stack.h" + +extern _PyObjectStackChunk *_PyObjectStackChunk_New(void); +extern void _PyObjectStackChunk_Free(_PyObjectStackChunk *); + +static struct _Py_object_stack_state * +get_state(void) +{ + _PyFreeListState *state = _PyFreeListState_GET(); + return &state->object_stack_state; +} + +_PyObjectStackChunk * +_PyObjectStackChunk_New(void) +{ + _PyObjectStackChunk *buf; + struct _Py_object_stack_state *state = get_state(); + if (state->numfree > 0) { + buf = state->free_list; + state->free_list = buf->prev; + state->numfree--; + } + else { + // NOTE: we use PyMem_RawMalloc() here because this is used by the GC + // during mimalloc heap traversal. In that context, it is not safe to + // allocate mimalloc memory, such as via PyMem_Malloc(). + buf = PyMem_RawMalloc(sizeof(_PyObjectStackChunk)); + if (buf == NULL) { + return NULL; + } + } + buf->prev = NULL; + buf->n = 0; + return buf; +} + +void +_PyObjectStackChunk_Free(_PyObjectStackChunk *buf) +{ + assert(buf->n == 0); + struct _Py_object_stack_state *state = get_state(); + if (state->numfree >= 0 && + state->numfree < _PyObjectStackChunk_MAXFREELIST) + { + buf->prev = state->free_list; + state->free_list = buf; + state->numfree++; + } + else { + PyMem_RawFree(buf); + } +} + +void +_PyObjectStack_Clear(_PyObjectStack *queue) +{ + while (queue->head != NULL) { + _PyObjectStackChunk *buf = queue->head; + buf->n = 0; + queue->head = buf->prev; + _PyObjectStackChunk_Free(buf); + } +} + +void +_PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalization) +{ + if (!is_finalization) { + // Ignore requests to clear the free list during GC. We use object + // stacks during GC, so emptying the free-list is counterproductive. + return; + } + + struct _Py_object_stack_state *state = &free_lists->object_stack_state; + while (state->numfree > 0) { + _PyObjectStackChunk *buf = state->free_list; + state->free_list = buf->prev; + state->numfree--; + PyMem_RawFree(buf); + } + state->numfree = -1; +} diff --git a/Python/pystate.c b/Python/pystate.c index c9b521351444a7..5ad5b6f3fcc634 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -10,6 +10,7 @@ #include "pycore_frame.h" #include "pycore_initconfig.h" // _PyStatus_OK() #include "pycore_object.h" // _PyType_InitCache() +#include "pycore_object_stack.h" // _PyObjectStackChunk_ClearFreeList() #include "pycore_parking_lot.h" // _PyParkingLot_AfterFork() #include "pycore_pyerrors.h" // _PyErr_Clear() #include "pycore_pylifecycle.h" // _PyAST_Fini() @@ -1468,6 +1469,7 @@ _Py_ClearFreeLists(_PyFreeListState *state, int is_finalization) _PyList_ClearFreeList(state, is_finalization); _PyContext_ClearFreeList(state, is_finalization); _PyAsyncGen_ClearFreeLists(state, is_finalization); + _PyObjectStackChunk_ClearFreeList(state, is_finalization); } void @@ -2055,7 +2057,6 @@ start_the_world(struct _stoptheworld_state *stw) HEAD_LOCK(runtime); stw->requested = 0; stw->world_stopped = 0; - stw->requester = NULL; // Switch threads back to the detached state. PyInterpreterState *i; PyThreadState *t; @@ -2066,6 +2067,7 @@ start_the_world(struct _stoptheworld_state *stw) _PyParkingLot_UnparkAll(&t->state); } } + stw->requester = NULL; HEAD_UNLOCK(runtime); if (stw->is_global) { _PyRWMutex_Unlock(&runtime->stoptheworld_mutex); From d96358ff9de646dbf64dfdfed46d510da7ec4803 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Thu, 25 Jan 2024 22:46:32 +0300 Subject: [PATCH 091/160] gh-114315: Make `threading.Lock` a real class, not a factory function (#114479) `threading.Lock` is now the underlying class and is constructable rather than the old factory function. This allows for type annotations to refer to it which had no non-ugly way to be expressed prior to this. --------- Co-authored-by: Alex Waygood Co-authored-by: Gregory P. Smith --- Doc/library/threading.rst | 7 ++-- Lib/test/test_threading.py | 20 ++++++++--- Lib/threading.py | 4 +-- ...-01-23-14-11-49.gh-issue-114315.KeVdzl.rst | 2 ++ Modules/_threadmodule.c | 33 ++++++++++++++++--- 5 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-23-14-11-49.gh-issue-114315.KeVdzl.rst diff --git a/Doc/library/threading.rst b/Doc/library/threading.rst index b85b7f008d1594..5fbf9379b8202c 100644 --- a/Doc/library/threading.rst +++ b/Doc/library/threading.rst @@ -534,9 +534,10 @@ All methods are executed atomically. lock, subsequent attempts to acquire it block, until it is released; any thread may release it. - Note that ``Lock`` is actually a factory function which returns an instance - of the most efficient version of the concrete Lock class that is supported - by the platform. + .. versionchanged:: 3.13 + ``Lock`` is now a class. In earlier Pythons, ``Lock`` was a factory + function which returned an instance of the underlying private lock + type. .. method:: acquire(blocking=True, timeout=-1) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index dbdc46fff1e313..1ab223b81e939e 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -171,11 +171,21 @@ def test_args_argument(self): t.start() t.join() - @cpython_only - def test_disallow_instantiation(self): - # Ensure that the type disallows instantiation (bpo-43916) - lock = threading.Lock() - test.support.check_disallow_instantiation(self, type(lock)) + def test_lock_no_args(self): + threading.Lock() # works + self.assertRaises(TypeError, threading.Lock, 1) + self.assertRaises(TypeError, threading.Lock, a=1) + self.assertRaises(TypeError, threading.Lock, 1, 2, a=1, b=2) + + def test_lock_no_subclass(self): + # Intentionally disallow subclasses of threading.Lock because they have + # never been allowed, so why start now just because the type is public? + with self.assertRaises(TypeError): + class MyLock(threading.Lock): pass + + def test_lock_or_none(self): + import types + self.assertIsInstance(threading.Lock | None, types.UnionType) # Create a bunch of threads, let each do some work, wait until all are # done. diff --git a/Lib/threading.py b/Lib/threading.py index ecf799bc26ab06..00b95f8d92a1f0 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -5,7 +5,6 @@ import _thread import functools import warnings -import _weakref from time import monotonic as _time from _weakrefset import WeakSet @@ -37,6 +36,7 @@ _start_joinable_thread = _thread.start_joinable_thread _daemon_threads_allowed = _thread.daemon_threads_allowed _allocate_lock = _thread.allocate_lock +_LockType = _thread.LockType _set_sentinel = _thread._set_sentinel get_ident = _thread.get_ident _is_main_interpreter = _thread._is_main_interpreter @@ -115,7 +115,7 @@ def gettrace(): # Synchronization classes -Lock = _allocate_lock +Lock = _LockType def RLock(*args, **kwargs): """Factory function that returns a new reentrant lock. diff --git a/Misc/NEWS.d/next/Library/2024-01-23-14-11-49.gh-issue-114315.KeVdzl.rst b/Misc/NEWS.d/next/Library/2024-01-23-14-11-49.gh-issue-114315.KeVdzl.rst new file mode 100644 index 00000000000000..a8a19fc525d019 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-23-14-11-49.gh-issue-114315.KeVdzl.rst @@ -0,0 +1,2 @@ +Make :class:`threading.Lock` a real class, not a factory function. Add +``__new__`` to ``_thread.lock`` type. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 99f97eb6d0adcc..5cceb84658deb7 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -5,6 +5,7 @@ #include "Python.h" #include "pycore_interp.h" // _PyInterpreterState.threads.count #include "pycore_moduleobject.h" // _PyModule_GetState() +#include "pycore_modsupport.h" // _PyArg_NoKeywords() #include "pycore_pylifecycle.h" #include "pycore_pystate.h" // _PyThreadState_SetCurrent() #include "pycore_sysmodule.h" // _PySys_GetAttr() @@ -349,6 +350,27 @@ lock__at_fork_reinit(lockobject *self, PyObject *Py_UNUSED(args)) } #endif /* HAVE_FORK */ +static lockobject *newlockobject(PyObject *module); + +static PyObject * +lock_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) +{ + // convert to AC? + if (!_PyArg_NoKeywords("lock", kwargs)) { + goto error; + } + if (!_PyArg_CheckPositional("lock", PyTuple_GET_SIZE(args), 0, 0)) { + goto error; + } + + PyObject *module = PyType_GetModuleByDef(type, &thread_module); + assert(module != NULL); + return (PyObject *)newlockobject(module); + +error: + return NULL; +} + static PyMethodDef lock_methods[] = { {"acquire_lock", _PyCFunction_CAST(lock_PyThread_acquire_lock), @@ -398,6 +420,7 @@ static PyType_Slot lock_type_slots[] = { {Py_tp_methods, lock_methods}, {Py_tp_traverse, lock_traverse}, {Py_tp_members, lock_type_members}, + {Py_tp_new, lock_new}, {0, 0} }; @@ -405,7 +428,7 @@ static PyType_Spec lock_type_spec = { .name = "_thread.lock", .basicsize = sizeof(lockobject), .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | - Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE), + Py_TPFLAGS_IMMUTABLETYPE), .slots = lock_type_slots, }; @@ -1442,8 +1465,6 @@ A subthread can use this function to interrupt the main thread.\n\ Note: the default signal handler for SIGINT raises ``KeyboardInterrupt``." ); -static lockobject *newlockobject(PyObject *module); - static PyObject * thread_PyThread_allocate_lock(PyObject *module, PyObject *Py_UNUSED(ignored)) { @@ -1841,10 +1862,14 @@ thread_module_exec(PyObject *module) } // Lock - state->lock_type = (PyTypeObject *)PyType_FromSpec(&lock_type_spec); + state->lock_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &lock_type_spec, NULL); if (state->lock_type == NULL) { return -1; } + if (PyModule_AddType(module, state->lock_type) < 0) { + return -1; + } + // Old alias: lock -> LockType if (PyDict_SetItemString(d, "LockType", (PyObject *)state->lock_type) < 0) { return -1; } From 33ae9895d4ac0d88447e529038bc4725ddd8c291 Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Thu, 25 Jan 2024 23:00:52 +0300 Subject: [PATCH 092/160] gh-114561: Mark some tests in ``test_wincosoleio`` with `requires_resource('console')` decorator (GH-114565) --- Lib/test/test_winconsoleio.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_winconsoleio.py b/Lib/test/test_winconsoleio.py index 72ff9606908ed5..209e4464e1a5c0 100644 --- a/Lib/test/test_winconsoleio.py +++ b/Lib/test/test_winconsoleio.py @@ -6,7 +6,7 @@ import sys import tempfile import unittest -from test.support import os_helper +from test.support import os_helper, requires_resource if sys.platform != 'win32': raise unittest.SkipTest("test only relevant on win32") @@ -150,6 +150,7 @@ def assertStdinRoundTrip(self, text): sys.stdin = old_stdin self.assertEqual(actual, text) + @requires_resource('console') def test_input(self): # ASCII self.assertStdinRoundTrip('abc123') @@ -164,6 +165,7 @@ def test_input_nonbmp(self): # Non-BMP self.assertStdinRoundTrip('\U00100000\U0010ffff\U0010fffd') + @requires_resource('console') def test_partial_reads(self): # Test that reading less than 1 full character works when stdin # contains multibyte UTF-8 sequences @@ -199,6 +201,7 @@ def test_partial_surrogate_reads(self): self.assertEqual(actual, expected, 'stdin.read({})'.format(read_count)) + @requires_resource('console') def test_ctrl_z(self): with open('CONIN$', 'rb', buffering=0) as stdin: source = '\xC4\x1A\r\n'.encode('utf-16-le') From ac5e53e15057bc0326a03f56e400ce345d1cebeb Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 25 Jan 2024 20:06:48 +0000 Subject: [PATCH 093/160] gh-107901: compiler replaces POP_BLOCK instruction by NOPs before optimisations (#114530) --- Python/flowgraph.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/flowgraph.c b/Python/flowgraph.c index de831358eb9ac8..96610b3cb11a43 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -903,6 +903,7 @@ label_exception_targets(basicblock *entryblock) { } else if (instr->i_opcode == POP_BLOCK) { handler = pop_except_block(except_stack); + INSTR_SET_OP0(instr, NOP); } else if (is_jump(instr)) { instr->i_except = handler; @@ -2313,7 +2314,7 @@ convert_pseudo_ops(cfg_builder *g) for (basicblock *b = entryblock; b != NULL; b = b->b_next) { for (int i = 0; i < b->b_iused; i++) { cfg_instr *instr = &b->b_instr[i]; - if (is_block_push(instr) || instr->i_opcode == POP_BLOCK) { + if (is_block_push(instr)) { INSTR_SET_OP0(instr, NOP); } else if (instr->i_opcode == LOAD_CLOSURE) { From 841eacd07646e643f87d7f063106633a25315910 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Fri, 26 Jan 2024 05:49:37 +0900 Subject: [PATCH 094/160] Add CODEOWNERS for dbm (gh-114555) --- .github/CODEOWNERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9587b3996a9ac2..4984170f0d17ff 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -42,6 +42,9 @@ Lib/test/test_type_*.py @JelleZijlstra Lib/test/test_capi/test_misc.py @markshannon @gvanrossum Tools/c-analyzer/ @ericsnowcurrently +# dbm +**/*dbm* @corona10 @erlend-aasland @serhiy-storchaka + # Exceptions Lib/traceback.py @iritkatriel Lib/test/test_except*.py @iritkatriel From b69548a0f52418b8a2cf7c7a885fdd7d3bfb1b0b Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 26 Jan 2024 01:12:46 +0000 Subject: [PATCH 095/160] GH-73435: Add `pathlib.PurePath.full_match()` (#114350) In 49f90ba we added support for the recursive wildcard `**` in `pathlib.PurePath.match()`. This should allow arbitrary prefix and suffix matching, like `p.match('foo/**')` or `p.match('**/foo')`, but there's a problem: for relative patterns only, `match()` implicitly inserts a `**` token on the left hand side, causing all patterns to match from the right. As a result, it's impossible to match relative patterns from the left: `PurePath('foo/bar').match('bar/**')` is true! This commit reverts the changes to `match()`, and instead adds a new `full_match()` method that: - Allows empty patterns - Supports the recursive wildcard `**` - Matches the *entire* path when given a relative pattern --- Doc/library/glob.rst | 5 +- Doc/library/pathlib.rst | 60 +++++++------- Doc/whatsnew/3.13.rst | 3 +- Lib/pathlib/__init__.py | 7 ++ Lib/pathlib/_abc.py | 54 +++++++++---- Lib/test/test_pathlib/test_pathlib_abc.py | 98 +++++++++++++++++------ 6 files changed, 155 insertions(+), 72 deletions(-) diff --git a/Doc/library/glob.rst b/Doc/library/glob.rst index 6e4f72c19ff4c9..19a0bbba8966ba 100644 --- a/Doc/library/glob.rst +++ b/Doc/library/glob.rst @@ -147,8 +147,9 @@ The :mod:`glob` module defines the following functions: .. seealso:: - :meth:`pathlib.PurePath.match` and :meth:`pathlib.Path.glob` methods, - which call this function to implement pattern matching and globbing. + :meth:`pathlib.PurePath.full_match` and :meth:`pathlib.Path.glob` + methods, which call this function to implement pattern matching and + globbing. .. versionadded:: 3.13 diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index fcbc0bf489b344..2f4ff4efec47f8 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -559,55 +559,55 @@ Pure paths provide the following methods and properties: PureWindowsPath('c:/Program Files') -.. method:: PurePath.match(pattern, *, case_sensitive=None) +.. method:: PurePath.full_match(pattern, *, case_sensitive=None) Match this path against the provided glob-style pattern. Return ``True`` - if matching is successful, ``False`` otherwise. - - If *pattern* is relative, the path can be either relative or absolute, - and matching is done from the right:: + if matching is successful, ``False`` otherwise. For example:: - >>> PurePath('a/b.py').match('*.py') - True - >>> PurePath('/a/b/c.py').match('b/*.py') + >>> PurePath('a/b.py').full_match('a/*.py') True - >>> PurePath('/a/b/c.py').match('a/*.py') + >>> PurePath('a/b.py').full_match('*.py') False + >>> PurePath('/a/b/c.py').full_match('/a/**') + True + >>> PurePath('/a/b/c.py').full_match('**/*.py') + True - If *pattern* is absolute, the path must be absolute, and the whole path - must match:: + As with other methods, case-sensitivity follows platform defaults:: - >>> PurePath('/a.py').match('/*.py') - True - >>> PurePath('a/b.py').match('/*.py') + >>> PurePosixPath('b.py').full_match('*.PY') False + >>> PureWindowsPath('b.py').full_match('*.PY') + True - The *pattern* may be another path object; this speeds up matching the same - pattern against multiple files:: + Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. - >>> pattern = PurePath('*.py') - >>> PurePath('a/b.py').match(pattern) - True + .. versionadded:: 3.13 - .. versionchanged:: 3.12 - Accepts an object implementing the :class:`os.PathLike` interface. - As with other methods, case-sensitivity follows platform defaults:: +.. method:: PurePath.match(pattern, *, case_sensitive=None) - >>> PurePosixPath('b.py').match('*.PY') - False - >>> PureWindowsPath('b.py').match('*.PY') + Match this path against the provided non-recursive glob-style pattern. + Return ``True`` if matching is successful, ``False`` otherwise. + + This method is similar to :meth:`~PurePath.full_match`, but empty patterns + aren't allowed (:exc:`ValueError` is raised), the recursive wildcard + "``**``" isn't supported (it acts like non-recursive "``*``"), and if a + relative pattern is provided, then matching is done from the right:: + + >>> PurePath('a/b.py').match('*.py') + True + >>> PurePath('/a/b/c.py').match('b/*.py') True + >>> PurePath('/a/b/c.py').match('a/*.py') + False - Set *case_sensitive* to ``True`` or ``False`` to override this behaviour. + .. versionchanged:: 3.12 + The *pattern* parameter accepts a :term:`path-like object`. .. versionchanged:: 3.12 The *case_sensitive* parameter was added. - .. versionchanged:: 3.13 - Support for the recursive wildcard "``**``" was added. In previous - versions, it acted like the non-recursive wildcard "``*``". - .. method:: PurePath.relative_to(other, walk_up=False) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 40f0cd37fe9318..8c2bb05920d5b6 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -336,7 +336,8 @@ pathlib object from a 'file' URI (``file:/``). (Contributed by Barney Gale in :gh:`107465`.) -* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`. +* Add :meth:`pathlib.PurePath.full_match` for matching paths with + shell-style wildcards, including the recursive wildcard "``**``". (Contributed by Barney Gale in :gh:`73435`.) * Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`, diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index b043aed12b3849..eee82ef26bc7e7 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -490,6 +490,13 @@ def _pattern_stack(self): parts.reverse() return parts + @property + def _pattern_str(self): + """The path expressed as a string, for use in pattern-matching.""" + # The string representation of an empty path is a single dot ('.'). Empty + # paths shouldn't match wildcards, so we change it to the empty string. + path_str = str(self) + return '' if path_str == '.' else path_str # Subclassing os.PathLike makes isinstance() checks slower, # which in turn makes Path construction slower. Register instead! diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 553e1a399061d3..6303a18680befc 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -47,8 +47,8 @@ def _is_case_sensitive(pathmod): re = glob = None -@functools.lru_cache(maxsize=256) -def _compile_pattern(pat, sep, case_sensitive): +@functools.lru_cache(maxsize=512) +def _compile_pattern(pat, sep, case_sensitive, recursive=True): """Compile given glob pattern to a re.Pattern object (observing case sensitivity).""" global re, glob @@ -56,10 +56,7 @@ def _compile_pattern(pat, sep, case_sensitive): import re, glob flags = re.NOFLAG if case_sensitive else re.IGNORECASE - regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep) - # The string representation of an empty path is a single dot ('.'). Empty - # paths shouldn't match wildcards, so we consume it with an atomic group. - regex = r'(\.\Z)?+' + regex + regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep) return re.compile(regex, flags=flags).match @@ -441,23 +438,48 @@ def _pattern_stack(self): raise NotImplementedError("Non-relative patterns are unsupported") return parts + @property + def _pattern_str(self): + """The path expressed as a string, for use in pattern-matching.""" + return str(self) + def match(self, path_pattern, *, case_sensitive=None): """ - Return True if this path matches the given pattern. + Return True if this path matches the given pattern. If the pattern is + relative, matching is done from the right; otherwise, the entire path + is matched. The recursive wildcard '**' is *not* supported by this + method. """ if not isinstance(path_pattern, PurePathBase): path_pattern = self.with_segments(path_pattern) if case_sensitive is None: case_sensitive = _is_case_sensitive(self.pathmod) sep = path_pattern.pathmod.sep - if path_pattern.anchor: - pattern_str = str(path_pattern) - elif path_pattern.parts: - pattern_str = str('**' / path_pattern) - else: + path_parts = self.parts[::-1] + pattern_parts = path_pattern.parts[::-1] + if not pattern_parts: raise ValueError("empty pattern") - match = _compile_pattern(pattern_str, sep, case_sensitive) - return match(str(self)) is not None + if len(path_parts) < len(pattern_parts): + return False + if len(path_parts) > len(pattern_parts) and path_pattern.anchor: + return False + for path_part, pattern_part in zip(path_parts, pattern_parts): + match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False) + if match(path_part) is None: + return False + return True + + def full_match(self, pattern, *, case_sensitive=None): + """ + Return True if this path matches the given glob-style pattern. The + pattern is matched against the entire path. + """ + if not isinstance(pattern, PurePathBase): + pattern = self.with_segments(pattern) + if case_sensitive is None: + case_sensitive = _is_case_sensitive(self.pathmod) + match = _compile_pattern(pattern._pattern_str, pattern.pathmod.sep, case_sensitive) + return match(self._pattern_str) is not None @@ -781,8 +803,8 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): if filter_paths: # Filter out paths that don't match pattern. prefix_len = len(str(self._make_child_relpath('_'))) - 1 - match = _compile_pattern(str(pattern), sep, case_sensitive) - paths = (path for path in paths if match(str(path), prefix_len)) + match = _compile_pattern(pattern._pattern_str, sep, case_sensitive) + paths = (path for path in paths if match(path._pattern_str, prefix_len)) return paths def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 199718a8a69c5a..364f776dbb1413 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -249,29 +249,8 @@ def test_match_common(self): self.assertFalse(P('/ab.py').match('/a/*.py')) self.assertFalse(P('/a/b/c.py').match('/a/*.py')) # Multi-part glob-style pattern. - self.assertTrue(P('a').match('**')) - self.assertTrue(P('c.py').match('**')) - self.assertTrue(P('a/b/c.py').match('**')) - self.assertTrue(P('/a/b/c.py').match('**')) - self.assertTrue(P('/a/b/c.py').match('/**')) - self.assertTrue(P('/a/b/c.py').match('/a/**')) - self.assertTrue(P('/a/b/c.py').match('**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/**/*.py')) + self.assertFalse(P('/a/b/c.py').match('/**/*.py')) self.assertTrue(P('/a/b/c.py').match('/a/**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/a/b/**/*.py')) - self.assertTrue(P('/a/b/c.py').match('/**/**/**/**/*.py')) - self.assertFalse(P('c.py').match('**/a.py')) - self.assertFalse(P('c.py').match('c/**')) - self.assertFalse(P('a/b/c.py').match('**/a')) - self.assertFalse(P('a/b/c.py').match('**/a/b')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c.')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').match('**/a/b/c./**')) - self.assertFalse(P('a/b/c.py').match('/a/b/c.py/**')) - self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py')) - self.assertRaises(ValueError, P('a').match, '**a/b/c') - self.assertRaises(ValueError, P('a').match, 'a/b/c**') # Case-sensitive flag self.assertFalse(P('A.py').match('a.PY', case_sensitive=True)) self.assertTrue(P('A.py').match('a.PY', case_sensitive=False)) @@ -279,9 +258,82 @@ def test_match_common(self): self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False)) # Matching against empty path self.assertFalse(P('').match('*')) - self.assertTrue(P('').match('**')) + self.assertFalse(P('').match('**')) self.assertFalse(P('').match('**/*')) + def test_full_match_common(self): + P = self.cls + # Simple relative pattern. + self.assertTrue(P('b.py').full_match('b.py')) + self.assertFalse(P('a/b.py').full_match('b.py')) + self.assertFalse(P('/a/b.py').full_match('b.py')) + self.assertFalse(P('a.py').full_match('b.py')) + self.assertFalse(P('b/py').full_match('b.py')) + self.assertFalse(P('/a.py').full_match('b.py')) + self.assertFalse(P('b.py/c').full_match('b.py')) + # Wildcard relative pattern. + self.assertTrue(P('b.py').full_match('*.py')) + self.assertFalse(P('a/b.py').full_match('*.py')) + self.assertFalse(P('/a/b.py').full_match('*.py')) + self.assertFalse(P('b.pyc').full_match('*.py')) + self.assertFalse(P('b./py').full_match('*.py')) + self.assertFalse(P('b.py/c').full_match('*.py')) + # Multi-part relative pattern. + self.assertTrue(P('ab/c.py').full_match('a*/*.py')) + self.assertFalse(P('/d/ab/c.py').full_match('a*/*.py')) + self.assertFalse(P('a.py').full_match('a*/*.py')) + self.assertFalse(P('/dab/c.py').full_match('a*/*.py')) + self.assertFalse(P('ab/c.py/d').full_match('a*/*.py')) + # Absolute pattern. + self.assertTrue(P('/b.py').full_match('/*.py')) + self.assertFalse(P('b.py').full_match('/*.py')) + self.assertFalse(P('a/b.py').full_match('/*.py')) + self.assertFalse(P('/a/b.py').full_match('/*.py')) + # Multi-part absolute pattern. + self.assertTrue(P('/a/b.py').full_match('/a/*.py')) + self.assertFalse(P('/ab.py').full_match('/a/*.py')) + self.assertFalse(P('/a/b/c.py').full_match('/a/*.py')) + # Multi-part glob-style pattern. + self.assertTrue(P('a').full_match('**')) + self.assertTrue(P('c.py').full_match('**')) + self.assertTrue(P('a/b/c.py').full_match('**')) + self.assertTrue(P('/a/b/c.py').full_match('**')) + self.assertTrue(P('/a/b/c.py').full_match('/**')) + self.assertTrue(P('/a/b/c.py').full_match('/a/**')) + self.assertTrue(P('/a/b/c.py').full_match('**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/a/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/a/b/**/*.py')) + self.assertTrue(P('/a/b/c.py').full_match('/**/**/**/**/*.py')) + self.assertFalse(P('c.py').full_match('**/a.py')) + self.assertFalse(P('c.py').full_match('c/**')) + self.assertFalse(P('a/b/c.py').full_match('**/a')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c.')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**')) + self.assertFalse(P('a/b/c.py').full_match('/a/b/c.py/**')) + self.assertFalse(P('a/b/c.py').full_match('/**/a/b/c.py')) + self.assertRaises(ValueError, P('a').full_match, '**a/b/c') + self.assertRaises(ValueError, P('a').full_match, 'a/b/c**') + # Case-sensitive flag + self.assertFalse(P('A.py').full_match('a.PY', case_sensitive=True)) + self.assertTrue(P('A.py').full_match('a.PY', case_sensitive=False)) + self.assertFalse(P('c:/a/B.Py').full_match('C:/A/*.pY', case_sensitive=True)) + self.assertTrue(P('/a/b/c.py').full_match('/A/*/*.Py', case_sensitive=False)) + # Matching against empty path + self.assertFalse(P('').full_match('*')) + self.assertTrue(P('').full_match('**')) + self.assertFalse(P('').full_match('**/*')) + # Matching with empty pattern + self.assertTrue(P('').full_match('')) + self.assertTrue(P('.').full_match('.')) + self.assertFalse(P('/').full_match('')) + self.assertFalse(P('/').full_match('.')) + self.assertFalse(P('foo').full_match('')) + self.assertFalse(P('foo').full_match('.')) + def test_parts_common(self): # `parts` returns a tuple. sep = self.sep From 456e274578dc9863f42ab24d62adc0d8c511b50f Mon Sep 17 00:00:00 2001 From: Mariusz Felisiak Date: Fri, 26 Jan 2024 09:33:13 +0100 Subject: [PATCH 096/160] gh-112451: Prohibit subclassing of datetime.timezone. (#114190) This is consistent with C-extension datetime.timezone. --- Lib/_pydatetime.py | 3 +++ Lib/test/datetimetester.py | 4 ++++ .../Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst | 2 ++ 3 files changed, 9 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst diff --git a/Lib/_pydatetime.py b/Lib/_pydatetime.py index bca2acf1fc88cf..355145387e355b 100644 --- a/Lib/_pydatetime.py +++ b/Lib/_pydatetime.py @@ -2347,6 +2347,9 @@ def __new__(cls, offset, name=_Omitted): "timedelta(hours=24).") return cls._create(offset, name) + def __init_subclass__(cls): + raise TypeError("type 'datetime.timezone' is not an acceptable base type") + @classmethod def _create(cls, offset, name=None): self = tzinfo.__new__(cls) diff --git a/Lib/test/datetimetester.py b/Lib/test/datetimetester.py index 8bda17358db87f..53ad5e57ada017 100644 --- a/Lib/test/datetimetester.py +++ b/Lib/test/datetimetester.py @@ -301,6 +301,10 @@ def test_inheritance(self): self.assertIsInstance(timezone.utc, tzinfo) self.assertIsInstance(self.EST, tzinfo) + def test_cannot_subclass(self): + with self.assertRaises(TypeError): + class MyTimezone(timezone): pass + def test_utcoffset(self): dummy = self.DT for h in [0, 1.5, 12]: diff --git a/Misc/NEWS.d/next/Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst b/Misc/NEWS.d/next/Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst new file mode 100644 index 00000000000000..126ca36a3b7cb1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-24-20-11-46.gh-issue-112451.7YrG4p.rst @@ -0,0 +1,2 @@ +Prohibit subclassing pure-Python :class:`datetime.timezone`. This is consistent +with C-extension implementation. Patch by Mariusz Felisiak. From 582d95e8bb0b78bf1b6b9a12371108b9993d3b84 Mon Sep 17 00:00:00 2001 From: Seth Michael Larson Date: Fri, 26 Jan 2024 03:48:13 -0600 Subject: [PATCH 097/160] gh-114250: Fetch metadata for pip and its vendored dependencies from PyPI (#114450) --- Misc/sbom.spdx.json | 624 +++++++++++++++++++++++++++++++++++ Tools/build/generate_sbom.py | 263 ++++++++++++--- 2 files changed, 837 insertions(+), 50 deletions(-) diff --git a/Misc/sbom.spdx.json b/Misc/sbom.spdx.json index 5b3cd04ffa7f74..94566772338b10 100644 --- a/Misc/sbom.spdx.json +++ b/Misc/sbom.spdx.json @@ -1695,6 +1695,510 @@ "primaryPackagePurpose": "SOURCE", "versionInfo": "2.5.1" }, + { + "SPDXID": "SPDXRef-PACKAGE-cachecontrol", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "95dedbec849f46dda3137866dc28b9d133fc9af55f5b805ab1291833e4457aa4" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/1d/e3/a22348e6226dcd585d5a4b5f0175b3a16dabfd3912cbeb02f321d00e56c7/cachecontrol-0.13.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/cachecontrol@0.13.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "cachecontrol", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.13.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-colorama", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/colorama@0.4.6", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "colorama", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.4.6" + }, + { + "SPDXID": "SPDXRef-PACKAGE-distlib", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/76/cb/6bbd2b10170ed991cf64e8c8b85e01f2fb38f95d1bc77617569e0b0b26ac/distlib-0.3.6-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/distlib@0.3.6", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "distlib", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.3.6" + }, + { + "SPDXID": "SPDXRef-PACKAGE-distro", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "99522ca3e365cac527b44bde033f64c6945d90eb9f769703caaec52b09bbd3ff" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/f4/2c/c90a3adaf0ddb70afe193f5ebfb539612af57cffe677c3126be533df3098/distro-1.8.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/distro@1.8.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "distro", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.8.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-msgpack", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/9f/4a/36d936e54cf71e23ad276564465f6a54fb129e3d61520b76e13e0bb29167/msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/msgpack@1.0.5", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "msgpack", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.0.5" + }, + { + "SPDXID": "SPDXRef-PACKAGE-packaging", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/05/8e/8de486cbd03baba4deef4142bd643a3e7bbe954a784dc1bb17142572d127/packaging-21.3-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/packaging@21.3", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "packaging", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "21.3" + }, + { + "SPDXID": "SPDXRef-PACKAGE-platformdirs", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "cec7b889196b9144d088e4c57d9ceef7374f6c39694ad1577a0aab50d27ea28c" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/9e/d8/563a9fc17153c588c8c2042d2f0f84a89057cdb1c30270f589c88b42d62c/platformdirs-3.8.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/platformdirs@3.8.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "platformdirs", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.8.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-pyparsing", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "d554a96d1a7d3ddaf7183104485bc19fd80543ad6ac5bdb6426719d766fb06c1" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/a4/24/6ae4c9c45cf99d96b06b5d99e25526c060303171fb0aea9da2bfd7dbde93/pyparsing-3.1.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/pyparsing@3.1.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "pyparsing", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.1.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-pyproject-hooks", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "283c11acd6b928d2f6a7c73fa0d01cb2bdc5f07c57a2eeb6e83d5e56b97976f8" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/d5/ea/9ae603de7fbb3df820b23a70f6aff92bf8c7770043254ad8d2dc9d6bcba4/pyproject_hooks-1.0.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/pyproject-hooks@1.0.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "pyproject-hooks", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.0.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-requests", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/requests@2.31.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "requests", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "2.31.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-certifi", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/4c/dd/2234eab22353ffc7d94e8d13177aaa050113286e93e7b40eae01fbf7c3d9/certifi-2023.7.22-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/certifi@2023.7.22", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "certifi", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "2023.7.22" + }, + { + "SPDXID": "SPDXRef-PACKAGE-chardet", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "362777fb014af596ad31334fde1e8c327dfdb076e1960d1694662d46a6917ab9" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/74/8f/8fc49109009e8d2169d94d72e6b1f4cd45c13d147ba7d6170fb41f22b08f/chardet-5.1.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/chardet@5.1.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "chardet", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "5.1.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-idna", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/idna@3.4", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "idna", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "3.4" + }, + { + "SPDXID": "SPDXRef-PACKAGE-rich", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "8f87bc7ee54675732fa66a05ebfe489e27264caeeff3728c945d25971b6485ec" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/fc/1e/482e5eec0b89b593e81d78f819a9412849814e22225842b598908e7ac560/rich-13.4.2-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/rich@13.4.2", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "rich", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "13.4.2" + }, + { + "SPDXID": "SPDXRef-PACKAGE-pygments", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/34/a7/37c8d68532ba71549db4212cb036dbd6161b40e463aba336770e80c72f84/Pygments-2.15.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/pygments@2.15.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "pygments", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "2.15.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-typing-extensions", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/ec/6b/63cc3df74987c36fe26157ee12e09e8f9db4de771e0f3404263117e75b95/typing_extensions-4.7.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/typing_extensions@4.7.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "typing_extensions", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "4.7.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-resolvelib", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "d2da45d1a8dfee81bdd591647783e340ef3bcb104b54c383f70d422ef5cc7dbf" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/d2/fc/e9ccf0521607bcd244aa0b3fbd574f71b65e9ce6a112c83af988bbbe2e23/resolvelib-1.0.1-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/resolvelib@1.0.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "resolvelib", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.0.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-setuptools", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "11e52c67415a381d10d6b462ced9cfb97066179f0e871399e006c4ab101fc85f" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/c7/42/be1c7bbdd83e1bfb160c94b9cafd8e25efc7400346cf7ccdbdb452c467fa/setuptools-68.0.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/setuptools@68.0.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "setuptools", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "68.0.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-six", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/six@1.16.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "six", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.16.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-tenacity", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "2f277afb21b851637e8f52e6a613ff08734c347dc19ade928e519d7d2d8569b0" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/e7/b0/c23bd61e1b32c9b96fbca996c87784e196a812da8d621d8d04851f6c8181/tenacity-8.2.2-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/tenacity@8.2.2", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "tenacity", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "8.2.2" + }, + { + "SPDXID": "SPDXRef-PACKAGE-tomli", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/tomli@2.0.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "tomli", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "2.0.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-truststore", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "e37a5642ae9fc48caa8f120b6283d77225d600d224965a672c9e8ef49ce4bb4c" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/20/56/7811d5439b6a56374f274a8672d8f18b4deadadeb3a9f0c86424b98b6f96/truststore-0.8.0-py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/truststore@0.8.0", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "truststore", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.8.0" + }, + { + "SPDXID": "SPDXRef-PACKAGE-webencodings", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/webencodings@0.5.1", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "webencodings", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "0.5.1" + }, + { + "SPDXID": "SPDXRef-PACKAGE-urllib3", + "checksums": [ + { + "algorithm": "SHA256", + "checksumValue": "94a757d178c9be92ef5539b8840d48dc9cf1b2709c9d6b588232a055c524458b" + } + ], + "downloadLocation": "https://files.pythonhosted.org/packages/48/fe/a5c6cc46e9fe9171d7ecf0f33ee7aae14642f8d74baa7af4d7840f9358be/urllib3-1.26.17-py2.py3-none-any.whl", + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": "pkg:pypi/urllib3@1.26.17", + "referenceType": "purl" + } + ], + "licenseConcluded": "MIT", + "name": "urllib3", + "primaryPackagePurpose": "SOURCE", + "versionInfo": "1.26.17" + }, { "SPDXID": "SPDXRef-PACKAGE-pip", "checksums": [ @@ -1724,6 +2228,126 @@ } ], "relationships": [ + { + "relatedSpdxElement": "SPDXRef-PACKAGE-cachecontrol", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-certifi", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-chardet", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-colorama", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-distlib", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-distro", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-idna", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-msgpack", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-packaging", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-platformdirs", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-pygments", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-pyparsing", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-pyproject-hooks", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-requests", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-resolvelib", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-rich", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-setuptools", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-six", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-tenacity", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-tomli", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-truststore", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-typing-extensions", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-urllib3", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, + { + "relatedSpdxElement": "SPDXRef-PACKAGE-webencodings", + "relationshipType": "DEPENDS_ON", + "spdxElementId": "SPDXRef-PACKAGE-pip" + }, { "relatedSpdxElement": "SPDXRef-FILE-Modules-expat-COPYING", "relationshipType": "CONTAINS", diff --git a/Tools/build/generate_sbom.py b/Tools/build/generate_sbom.py index 317d48fee3a9d4..aceb13f141cba4 100644 --- a/Tools/build/generate_sbom.py +++ b/Tools/build/generate_sbom.py @@ -8,6 +8,7 @@ import subprocess import sys import typing +import zipfile from urllib.request import urlopen CPYTHON_ROOT_DIR = pathlib.Path(__file__).parent.parent.parent @@ -16,10 +17,16 @@ # the license expression is a valid SPDX license expression: # See: https://spdx.org/licenses ALLOWED_LICENSE_EXPRESSIONS = { - "MIT", - "CC0-1.0", "Apache-2.0", + "Apache-2.0 OR BSD-2-Clause", "BSD-2-Clause", + "BSD-3-Clause", + "CC0-1.0", + "ISC", + "LGPL-2.1-only", + "MIT", + "MPL-2.0", + "Python-2.0.1", } # Properties which are required for our purposes. @@ -31,14 +38,13 @@ "checksums", "licenseConcluded", "externalRefs", - "originator", "primaryPackagePurpose", ]) class PackageFiles(typing.NamedTuple): """Structure for describing the files of a package""" - include: list[str] + include: list[str] | None exclude: list[str] | None = None @@ -118,62 +124,209 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]: return sorted([line.split()[-1] for line in git_check_ignore_lines if line.startswith("::")]) +def fetch_package_metadata_from_pypi(project: str, version: str, filename: str | None = None) -> tuple[str, str] | None: + """ + Fetches the SHA256 checksum and download location from PyPI. + If we're given a filename then we match with that, otherwise we use wheels. + """ + # Get pip's download location from PyPI. Check that the checksum is correct too. + try: + raw_text = urlopen(f"https://pypi.org/pypi/{project}/{version}/json").read() + release_metadata = json.loads(raw_text) + url: dict[str, typing.Any] + + # Look for a matching artifact filename and then check + # its remote checksum to the local one. + for url in release_metadata["urls"]: + # pip can only use Python-only dependencies, so there's + # no risk of picking the 'incorrect' wheel here. + if ( + (filename is None and url["packagetype"] == "bdist_wheel") + or (filename is not None and url["filename"] == filename) + ): + break + else: + raise ValueError(f"No matching filename on PyPI for '{filename}'") + + # Successfully found the download URL for the matching artifact. + download_url = url["url"] + checksum_sha256 = url["digests"]["sha256"] + return download_url, checksum_sha256 + + except (OSError, ValueError) as e: + # Fail if we're running in CI where we should have an internet connection. + error_if( + "CI" in os.environ, + f"Couldn't fetch metadata for project '{project}' from PyPI: {e}" + ) + return None + + +def find_ensurepip_pip_wheel() -> pathlib.Path | None: + """Try to find the pip wheel bundled in ensurepip. If missing return None""" + + ensurepip_bundled_dir = CPYTHON_ROOT_DIR / "Lib/ensurepip/_bundled" + + pip_wheels = [] + try: + for wheel_filename in os.listdir(ensurepip_bundled_dir): + if wheel_filename.startswith("pip-"): + pip_wheels.append(wheel_filename) + else: + print(f"Unexpected wheel in ensurepip: '{wheel_filename}'") + sys.exit(1) + + # Ignore this error, likely caused by downstream distributors + # deleting the 'ensurepip/_bundled' directory. + except FileNotFoundError: + pass + + if len(pip_wheels) == 0: + return None + elif len(pip_wheels) > 1: + print("Multiple pip wheels detected in 'Lib/ensurepip/_bundled'") + sys.exit(1) + # Otherwise return the one pip wheel. + return ensurepip_bundled_dir / pip_wheels[0] + + +def maybe_remove_pip_and_deps_from_sbom(sbom_data: dict[str, typing.Any]) -> None: + """ + Removes pip and its dependencies from the SBOM data + if the pip wheel is removed from ensurepip. This is done + by redistributors of Python and pip. + """ + + # If there's a wheel we don't remove anything. + if find_ensurepip_pip_wheel() is not None: + return + + # Otherwise we traverse the relationships + # to find dependent packages to remove. + sbom_pip_spdx_id = spdx_id("SPDXRef-PACKAGE-pip") + sbom_spdx_ids_to_remove = {sbom_pip_spdx_id} + + # Find all package SPDXIDs that pip depends on. + for sbom_relationship in sbom_data["relationships"]: + if ( + sbom_relationship["relationshipType"] == "DEPENDS_ON" + and sbom_relationship["spdxElementId"] == sbom_pip_spdx_id + ): + sbom_spdx_ids_to_remove.add(sbom_relationship["relatedSpdxElement"]) + + # Remove all the packages and relationships. + sbom_data["packages"] = [ + sbom_package for sbom_package in sbom_data["packages"] + if sbom_package["SPDXID"] not in sbom_spdx_ids_to_remove + ] + sbom_data["relationships"] = [ + sbom_relationship for sbom_relationship in sbom_data["relationships"] + if sbom_relationship["relatedSpdxElement"] not in sbom_spdx_ids_to_remove + ] + + def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None: """pip is a part of a packaging ecosystem (Python, surprise!) so it's actually automatable to discover the metadata we need like the version and checksums - so let's do that on behalf of our friends at the PyPA. + so let's do that on behalf of our friends at the PyPA. This function also + discovers vendored packages within pip and fetches their metadata. """ global PACKAGE_TO_FILES - ensurepip_bundled_dir = CPYTHON_ROOT_DIR / "Lib/ensurepip/_bundled" - pip_wheels = [] - - # Find the hopefully one pip wheel in the bundled directory. - for wheel_filename in os.listdir(ensurepip_bundled_dir): - if wheel_filename.startswith("pip-"): - pip_wheels.append(wheel_filename) - if len(pip_wheels) != 1: - print("Zero or multiple pip wheels detected in 'Lib/ensurepip/_bundled'") - sys.exit(1) - pip_wheel_filename = pip_wheels[0] + pip_wheel_filepath = find_ensurepip_pip_wheel() + if pip_wheel_filepath is None: + return # There's no pip wheel, nothing to discover. # Add the wheel filename to the list of files so the SBOM file # and relationship generator can work its magic on the wheel too. PACKAGE_TO_FILES["pip"] = PackageFiles( - include=[f"Lib/ensurepip/_bundled/{pip_wheel_filename}"] + include=[str(pip_wheel_filepath.relative_to(CPYTHON_ROOT_DIR))] ) # Wheel filename format puts the version right after the project name. - pip_version = pip_wheel_filename.split("-")[1] + pip_version = pip_wheel_filepath.name.split("-")[1] pip_checksum_sha256 = hashlib.sha256( - (ensurepip_bundled_dir / pip_wheel_filename).read_bytes() + pip_wheel_filepath.read_bytes() ).hexdigest() - # Get pip's download location from PyPI. Check that the checksum is correct too. - try: - raw_text = urlopen(f"https://pypi.org/pypi/pip/{pip_version}/json").read() - pip_release_metadata = json.loads(raw_text) - url: dict[str, typing.Any] + pip_metadata = fetch_package_metadata_from_pypi( + project="pip", + version=pip_version, + filename=pip_wheel_filepath.name, + ) + # We couldn't fetch any metadata from PyPI, + # so we give up on verifying if we're not in CI. + if pip_metadata is None: + return + + pip_download_url, pip_actual_sha256 = pip_metadata + if pip_actual_sha256 != pip_checksum_sha256: + raise ValueError("Unexpected") + + # Parse 'pip/_vendor/vendor.txt' from the wheel for sub-dependencies. + with zipfile.ZipFile(pip_wheel_filepath) as whl: + vendor_txt_data = whl.read("pip/_vendor/vendor.txt").decode() + + # With this version regex we're assuming that pip isn't using pre-releases. + # If any version doesn't match we get a failure below, so we're safe doing this. + version_pin_re = re.compile(r"^([a-zA-Z0-9_.-]+)==([0-9.]*[0-9])$") + sbom_pip_dependency_spdx_ids = set() + for line in vendor_txt_data.splitlines(): + line = line.partition("#")[0].strip() # Strip comments and whitespace. + if not line: # Skip empty lines. + continue + + # Non-empty lines we must be able to match. + match = version_pin_re.match(line) + error_if(match is None, f"Couldn't parse line from pip vendor.txt: '{line}'") + assert match is not None # Make mypy happy. + + # Parse out and normalize the project name. + project_name, project_version = match.groups() + project_name = project_name.lower() + + # At this point if pip's metadata fetch succeeded we should + # expect this request to also succeed. + project_metadata = ( + fetch_package_metadata_from_pypi(project_name, project_version) + ) + assert project_metadata is not None + project_download_url, project_checksum_sha256 = project_metadata + + # Update our SBOM data with what we received from PyPI. + # Don't overwrite any existing values. + sbom_project_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{project_name}") + sbom_pip_dependency_spdx_ids.add(sbom_project_spdx_id) + for package in sbom_data["packages"]: + if package["SPDXID"] != sbom_project_spdx_id: + continue - # Look for a matching artifact filename and then check - # its remote checksum to the local one. - for url in pip_release_metadata["urls"]: - if url["filename"] == pip_wheel_filename: + # Only thing missing from this blob is the `licenseConcluded`, + # that needs to be triaged by human maintainers if the list changes. + package.update({ + "SPDXID": sbom_project_spdx_id, + "name": project_name, + "versionInfo": project_version, + "downloadLocation": project_download_url, + "checksums": [ + {"algorithm": "SHA256", "checksumValue": project_checksum_sha256} + ], + "externalRefs": [ + { + "referenceCategory": "PACKAGE_MANAGER", + "referenceLocator": f"pkg:pypi/{project_name}@{project_version}", + "referenceType": "purl", + }, + ], + "primaryPackagePurpose": "SOURCE" + }) break - else: - raise ValueError(f"No matching filename on PyPI for '{pip_wheel_filename}'") - if url["digests"]["sha256"] != pip_checksum_sha256: - raise ValueError(f"Local pip checksum doesn't match artifact on PyPI") - - # Successfully found the download URL for the matching artifact. - pip_download_url = url["url"] - except (OSError, ValueError) as e: - print(f"Couldn't fetch pip's metadata from PyPI: {e}") - sys.exit(1) + PACKAGE_TO_FILES[project_name] = PackageFiles(include=None) # Remove pip from the existing SBOM packages if it's there # and then overwrite its entry with our own generated one. + sbom_pip_spdx_id = spdx_id("SPDXRef-PACKAGE-pip") sbom_data["packages"] = [ sbom_package for sbom_package in sbom_data["packages"] @@ -181,7 +334,7 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None: ] sbom_data["packages"].append( { - "SPDXID": spdx_id("SPDXRef-PACKAGE-pip"), + "SPDXID": sbom_pip_spdx_id, "name": "pip", "versionInfo": pip_version, "originator": "Organization: Python Packaging Authority", @@ -205,12 +358,27 @@ def discover_pip_sbom_package(sbom_data: dict[str, typing.Any]) -> None: "primaryPackagePurpose": "SOURCE", } ) + for sbom_dep_spdx_id in sorted(sbom_pip_dependency_spdx_ids): + sbom_data["relationships"].append({ + "spdxElementId": sbom_pip_spdx_id, + "relatedSpdxElement": sbom_dep_spdx_id, + "relationshipType": "DEPENDS_ON" + }) def main() -> None: sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json" sbom_data = json.loads(sbom_path.read_bytes()) + # Check if pip should be removed if the wheel is missing. + # We can't reset the SBOM relationship data until checking this. + maybe_remove_pip_and_deps_from_sbom(sbom_data) + + # We regenerate all of this information. Package information + # should be preserved though since that is edited by humans. + sbom_data["files"] = [] + sbom_data["relationships"] = [] + # Insert pip's SBOM metadata from the wheel. discover_pip_sbom_package(sbom_data) @@ -227,9 +395,10 @@ def main() -> None: "name" not in package, "Package is missing the 'name' field" ) + missing_required_keys = REQUIRED_PROPERTIES_PACKAGE - set(package.keys()) error_if( - set(package.keys()) != REQUIRED_PROPERTIES_PACKAGE, - f"Package '{package['name']}' is missing required fields", + bool(missing_required_keys), + f"Package '{package['name']}' is missing required fields: {missing_required_keys}", ) error_if( package["SPDXID"] != spdx_id(f"SPDXRef-PACKAGE-{package['name']}"), @@ -257,15 +426,11 @@ def main() -> None: f"License identifier '{license_concluded}' not in SBOM tool allowlist" ) - # Regenerate file information from current data. - sbom_files = [] - sbom_relationships = [] - # We call 'sorted()' here a lot to avoid filesystem scan order issues. for name, files in sorted(PACKAGE_TO_FILES.items()): package_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{name}") exclude = files.exclude or () - for include in sorted(files.include): + for include in sorted(files.include or ()): # Find all the paths and then filter them through .gitignore. paths = glob.glob(include, root_dir=CPYTHON_ROOT_DIR, recursive=True) paths = filter_gitignored_paths(paths) @@ -285,7 +450,7 @@ def main() -> None: checksum_sha256 = hashlib.sha256(data).hexdigest() file_spdx_id = spdx_id(f"SPDXRef-FILE-{path}") - sbom_files.append({ + sbom_data["files"].append({ "SPDXID": file_spdx_id, "fileName": path, "checksums": [ @@ -295,15 +460,13 @@ def main() -> None: }) # Tie each file back to its respective package. - sbom_relationships.append({ + sbom_data["relationships"].append({ "spdxElementId": package_spdx_id, "relatedSpdxElement": file_spdx_id, "relationshipType": "CONTAINS", }) # Update the SBOM on disk - sbom_data["files"] = sbom_files - sbom_data["relationships"] = sbom_relationships sbom_path.write_text(json.dumps(sbom_data, indent=2, sort_keys=True)) From 01d970c1b8acf3ccf199d5de151a635ffd9d8c61 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Fri, 26 Jan 2024 12:55:22 +0300 Subject: [PATCH 098/160] gh-101100: Fix sphinx warnings in `c-api/file.rst` (#114546) Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> --- Doc/c-api/file.rst | 9 +++++++-- Doc/c-api/object.rst | 8 ++++++++ Doc/tools/.nitignore | 2 -- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst index b36c800e00444a..0a03841e467cad 100644 --- a/Doc/c-api/file.rst +++ b/Doc/c-api/file.rst @@ -65,8 +65,13 @@ the :mod:`io` APIs instead. Overrides the normal behavior of :func:`io.open_code` to pass its parameter through the provided handler. - The handler is a function of type :c:expr:`PyObject *(\*)(PyObject *path, - void *userData)`, where *path* is guaranteed to be :c:type:`PyUnicodeObject`. + The handler is a function of type: + + .. c:type:: Py_OpenCodeHookFunction + + Equivalent of :c:expr:`PyObject *(\*)(PyObject *path, + void *userData)`, where *path* is guaranteed to be + :c:type:`PyUnicodeObject`. The *userData* pointer is passed into the hook function. Since hook functions may be called from different runtimes, this pointer should not diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 8a179690d048e3..4f656779c80b1a 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -19,6 +19,14 @@ Object Protocol to NotImplemented and return it). +.. c:macro:: Py_PRINT_RAW + + Flag to be used with multiple functions that print the object (like + :c:func:`PyObject_Print` and :c:func:`PyFile_WriteObject`). + If passed, these function would use the :func:`str` of the object + instead of the :func:`repr`. + + .. c:function:: int PyObject_Print(PyObject *o, FILE *fp, int flags) Print an object *o*, on file *fp*. Returns ``-1`` on error. The flags argument diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 00b4b6919ff14a..d56a44ad09a6f8 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -4,7 +4,6 @@ Doc/c-api/descriptor.rst Doc/c-api/exceptions.rst -Doc/c-api/file.rst Doc/c-api/float.rst Doc/c-api/gcsupport.rst Doc/c-api/init.rst @@ -12,7 +11,6 @@ Doc/c-api/init_config.rst Doc/c-api/intro.rst Doc/c-api/memoryview.rst Doc/c-api/module.rst -Doc/c-api/object.rst Doc/c-api/stable.rst Doc/c-api/sys.rst Doc/c-api/type.rst From 06c5de36f222b926bbc94831536096b974bd5e77 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 11:05:08 +0100 Subject: [PATCH 099/160] Docs: reword dbm.gnu introduction (#114548) Also... - consistently spell GDBM as GDBM - silence gdbm class refs - improve accuracy of dbm.gdbm.open() spec --- Doc/library/dbm.rst | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index eca1c25602a018..0a9d28a41c6d7a 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -137,27 +137,28 @@ then prints out the contents of the database:: The individual submodules are described in the following sections. -:mod:`dbm.gnu` --- GNU's reinterpretation of dbm ------------------------------------------------- +:mod:`dbm.gnu` --- GNU database manager +--------------------------------------- .. module:: dbm.gnu :platform: Unix - :synopsis: GNU's reinterpretation of dbm. + :synopsis: GNU database manager **Source code:** :source:`Lib/dbm/gnu.py` -------------- -This module is quite similar to the :mod:`dbm` module, but uses the GNU library -``gdbm`` instead to provide some additional functionality. Please note that the -file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible. +The :mod:`dbm.gnu` module provides an interface to the :abbr:`GDBM (GNU dbm)` +library, similar to the :mod:`dbm.ndbm` module, but with additional +functionality like crash tolerance. -The :mod:`dbm.gnu` module provides an interface to the GNU DBM library. -``dbm.gnu.gdbm`` objects behave like mappings (dictionaries), except that keys and -values are always converted to bytes before storing. Printing a ``gdbm`` -object doesn't print the -keys and values, and the :meth:`items` and :meth:`values` methods are not -supported. +:class:`!gdbm` objects behave similar to :term:`mappings `, +except that keys and values are always converted to :class:`bytes` before storing, +and the :meth:`!items` and :meth:`!values` methods are not supported. + +.. note:: + The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are + incompatible and can not be used interchangeably. .. exception:: error @@ -165,9 +166,9 @@ supported. raised for general mapping errors like specifying an incorrect key. -.. function:: open(filename[, flag[, mode]]) +.. function:: open(filename, flag="r", mode=0o666, /) - Open a ``gdbm`` database and return a :class:`gdbm` object. The *filename* + Open a GDBM database and return a :class:`!gdbm` object. The *filename* argument is the name of the database file. The optional *flag* argument can be: @@ -196,14 +197,14 @@ supported. | ``'u'`` | Do not lock database. | +---------+--------------------------------------------+ - Not all flags are valid for all versions of ``gdbm``. The module constant + Not all flags are valid for all versions of GDBM. The module constant :const:`open_flags` is a string of supported flag characters. The exception :exc:`error` is raised if an invalid flag is specified. The optional *mode* argument is the Unix mode of the file, used only when the database has to be created. It defaults to octal ``0o666``. - In addition to the dictionary-like methods, ``gdbm`` objects have the + In addition to the dictionary-like methods, :class:`gdbm` objects have the following methods: .. versionchanged:: 3.11 @@ -212,7 +213,7 @@ supported. .. method:: gdbm.firstkey() It's possible to loop over every key in the database using this method and the - :meth:`nextkey` method. The traversal is ordered by ``gdbm``'s internal + :meth:`nextkey` method. The traversal is ordered by GDBM's internal hash values, and won't be sorted by the key values. This method returns the starting key. @@ -230,7 +231,7 @@ supported. .. method:: gdbm.reorganize() If you have carried out a lot of deletions and would like to shrink the space - used by the ``gdbm`` file, this routine will reorganize the database. ``gdbm`` + used by the GDBM file, this routine will reorganize the database. :class:`!gdbm` objects will not shorten the length of a database file except by using this reorganization; otherwise, deleted file space will be kept and reused as new (key, value) pairs are added. @@ -242,11 +243,11 @@ supported. .. method:: gdbm.close() - Close the ``gdbm`` database. + Close the GDBM database. .. method:: gdbm.clear() - Remove all items from the ``gdbm`` database. + Remove all items from the GDBM database. .. versionadded:: 3.13 From d0f7f5c41d71758c59f9372a192e927d73cf7c27 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Fri, 26 Jan 2024 05:10:03 -0500 Subject: [PATCH 100/160] gh-114312: Fix rare event counter tests on aarch64 (GH-114554) --- Modules/_testinternalcapi.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 2c32c691afa583..c4a648a1816392 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1642,11 +1642,11 @@ get_rare_event_counters(PyObject *self, PyObject *type) return Py_BuildValue( "{sksksksksk}", - "set_class", interp->rare_events.set_class, - "set_bases", interp->rare_events.set_bases, - "set_eval_frame_func", interp->rare_events.set_eval_frame_func, - "builtin_dict", interp->rare_events.builtin_dict, - "func_modification", interp->rare_events.func_modification + "set_class", (unsigned long)interp->rare_events.set_class, + "set_bases", (unsigned long)interp->rare_events.set_bases, + "set_eval_frame_func", (unsigned long)interp->rare_events.set_eval_frame_func, + "builtin_dict", (unsigned long)interp->rare_events.builtin_dict, + "func_modification", (unsigned long)interp->rare_events.func_modification ); } From dcd28b5c35dda8e2cb7c5f66450f2aff0948c001 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 11:11:35 +0100 Subject: [PATCH 101/160] gh-114569: Use PyMem_* APIs for most non-PyObject uses (#114574) Fix usage in Modules, Objects, and Parser subdirectories. --- Modules/_elementtree.c | 23 +++++++++++++---------- Modules/_sre/sre_lib.h | 4 ++-- Modules/mathmodule.c | 12 ++++++------ Modules/pyexpat.c | 2 +- Objects/bytearrayobject.c | 14 +++++++------- Objects/typeobject.c | 10 +++++----- Parser/lexer/lexer.c | 4 ++-- 7 files changed, 36 insertions(+), 33 deletions(-) diff --git a/Modules/_elementtree.c b/Modules/_elementtree.c index b574c96d3f9625..54451081211654 100644 --- a/Modules/_elementtree.c +++ b/Modules/_elementtree.c @@ -267,7 +267,7 @@ typedef struct { LOCAL(int) create_extra(ElementObject* self, PyObject* attrib) { - self->extra = PyObject_Malloc(sizeof(ElementObjectExtra)); + self->extra = PyMem_Malloc(sizeof(ElementObjectExtra)); if (!self->extra) { PyErr_NoMemory(); return -1; @@ -295,10 +295,11 @@ dealloc_extra(ElementObjectExtra *extra) for (i = 0; i < extra->length; i++) Py_DECREF(extra->children[i]); - if (extra->children != extra->_children) - PyObject_Free(extra->children); + if (extra->children != extra->_children) { + PyMem_Free(extra->children); + } - PyObject_Free(extra); + PyMem_Free(extra); } LOCAL(void) @@ -495,14 +496,16 @@ element_resize(ElementObject* self, Py_ssize_t extra) * "children", which needs at least 4 bytes. Although it's a * false alarm always assume at least one child to be safe. */ - children = PyObject_Realloc(self->extra->children, - size * sizeof(PyObject*)); - if (!children) + children = PyMem_Realloc(self->extra->children, + size * sizeof(PyObject*)); + if (!children) { goto nomemory; + } } else { - children = PyObject_Malloc(size * sizeof(PyObject*)); - if (!children) + children = PyMem_Malloc(size * sizeof(PyObject*)); + if (!children) { goto nomemory; + } /* copy existing children from static area to malloc buffer */ memcpy(children, self->extra->children, self->extra->length * sizeof(PyObject*)); @@ -3044,7 +3047,7 @@ _elementtree_TreeBuilder_start_impl(TreeBuilderObject *self, PyObject *tag, #define EXPAT(st, func) ((st)->expat_capi->func) static XML_Memory_Handling_Suite ExpatMemoryHandler = { - PyObject_Malloc, PyObject_Realloc, PyObject_Free}; + PyMem_Malloc, PyMem_Realloc, PyMem_Free}; typedef struct { PyObject_HEAD diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index f5497d9ff2b93f..97fbb0a75e54b6 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1122,7 +1122,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) /* install new repeat context */ /* TODO(https://github.com/python/cpython/issues/67877): Fix this * potential memory leak. */ - ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep)); + ctx->u.rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(*ctx->u.rep)); if (!ctx->u.rep) { PyErr_NoMemory(); RETURN_FAILURE; @@ -1136,7 +1136,7 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) state->ptr = ptr; DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); state->repeat = ctx->u.rep->prev; - PyObject_Free(ctx->u.rep); + PyMem_Free(ctx->u.rep); if (ret) { RETURN_ON_ERROR(ret); diff --git a/Modules/mathmodule.c b/Modules/mathmodule.c index 2a796c1c55d2f0..0be46b1574c1fe 100644 --- a/Modules/mathmodule.c +++ b/Modules/mathmodule.c @@ -2570,7 +2570,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) goto error_exit; } if (n > NUM_STACK_ELEMS) { - diffs = (double *) PyObject_Malloc(n * sizeof(double)); + diffs = (double *) PyMem_Malloc(n * sizeof(double)); if (diffs == NULL) { PyErr_NoMemory(); goto error_exit; @@ -2590,7 +2590,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) } result = vector_norm(n, diffs, max, found_nan); if (diffs != diffs_on_stack) { - PyObject_Free(diffs); + PyMem_Free(diffs); } if (p_allocated) { Py_DECREF(p); @@ -2602,7 +2602,7 @@ math_dist_impl(PyObject *module, PyObject *p, PyObject *q) error_exit: if (diffs != diffs_on_stack) { - PyObject_Free(diffs); + PyMem_Free(diffs); } if (p_allocated) { Py_DECREF(p); @@ -2626,7 +2626,7 @@ math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs) double *coordinates = coord_on_stack; if (nargs > NUM_STACK_ELEMS) { - coordinates = (double *) PyObject_Malloc(nargs * sizeof(double)); + coordinates = (double *) PyMem_Malloc(nargs * sizeof(double)); if (coordinates == NULL) { return PyErr_NoMemory(); } @@ -2643,13 +2643,13 @@ math_hypot(PyObject *self, PyObject *const *args, Py_ssize_t nargs) } result = vector_norm(nargs, coordinates, max, found_nan); if (coordinates != coord_on_stack) { - PyObject_Free(coordinates); + PyMem_Free(coordinates); } return PyFloat_FromDouble(result); error_exit: if (coordinates != coord_on_stack) { - PyObject_Free(coordinates); + PyMem_Free(coordinates); } return NULL; } diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index ec44892d101e44..7c08eda83e66b2 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -21,7 +21,7 @@ module pyexpat #define XML_COMBINED_VERSION (10000*XML_MAJOR_VERSION+100*XML_MINOR_VERSION+XML_MICRO_VERSION) static XML_Memory_Handling_Suite ExpatMemoryHandler = { - PyObject_Malloc, PyObject_Realloc, PyObject_Free}; + PyMem_Malloc, PyMem_Realloc, PyMem_Free}; enum HandlerTypes { StartElement, diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 659de7d3dd5a99..acc59b926448ca 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -132,7 +132,7 @@ PyByteArray_FromStringAndSize(const char *bytes, Py_ssize_t size) } else { alloc = size + 1; - new->ob_bytes = PyObject_Malloc(alloc); + new->ob_bytes = PyMem_Malloc(alloc); if (new->ob_bytes == NULL) { Py_DECREF(new); return PyErr_NoMemory(); @@ -221,17 +221,17 @@ PyByteArray_Resize(PyObject *self, Py_ssize_t requested_size) } if (logical_offset > 0) { - sval = PyObject_Malloc(alloc); + sval = PyMem_Malloc(alloc); if (sval == NULL) { PyErr_NoMemory(); return -1; } memcpy(sval, PyByteArray_AS_STRING(self), Py_MIN((size_t)requested_size, (size_t)Py_SIZE(self))); - PyObject_Free(obj->ob_bytes); + PyMem_Free(obj->ob_bytes); } else { - sval = PyObject_Realloc(obj->ob_bytes, alloc); + sval = PyMem_Realloc(obj->ob_bytes, alloc); if (sval == NULL) { PyErr_NoMemory(); return -1; @@ -951,7 +951,7 @@ bytearray_repr(PyByteArrayObject *self) } newsize += 6 + length * 4; - buffer = PyObject_Malloc(newsize); + buffer = PyMem_Malloc(newsize); if (buffer == NULL) { PyErr_NoMemory(); return NULL; @@ -1008,7 +1008,7 @@ bytearray_repr(PyByteArrayObject *self) } v = PyUnicode_FromStringAndSize(buffer, p - buffer); - PyObject_Free(buffer); + PyMem_Free(buffer); return v; } @@ -1088,7 +1088,7 @@ bytearray_dealloc(PyByteArrayObject *self) PyErr_Print(); } if (self->ob_bytes != 0) { - PyObject_Free(self->ob_bytes); + PyMem_Free(self->ob_bytes); } Py_TYPE(self)->tp_free((PyObject *)self); } diff --git a/Objects/typeobject.c b/Objects/typeobject.c index a8c3b8896d36eb..114cf21f95e744 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3493,7 +3493,7 @@ type_new_set_doc(PyTypeObject *type) // Silently truncate the docstring if it contains a null byte Py_ssize_t size = strlen(doc_str) + 1; - char *tp_doc = (char *)PyObject_Malloc(size); + char *tp_doc = (char *)PyMem_Malloc(size); if (tp_doc == NULL) { PyErr_NoMemory(); return -1; @@ -4166,12 +4166,12 @@ _PyType_FromMetaclass_impl( goto finally; } if (slot->pfunc == NULL) { - PyObject_Free(tp_doc); + PyMem_Free(tp_doc); tp_doc = NULL; } else { size_t len = strlen(slot->pfunc)+1; - tp_doc = PyObject_Malloc(len); + tp_doc = PyMem_Malloc(len); if (tp_doc == NULL) { PyErr_NoMemory(); goto finally; @@ -4501,7 +4501,7 @@ _PyType_FromMetaclass_impl( Py_CLEAR(res); } Py_XDECREF(bases); - PyObject_Free(tp_doc); + PyMem_Free(tp_doc); Py_XDECREF(ht_name); PyMem_Free(_ht_tpname); return (PyObject*)res; @@ -5099,7 +5099,7 @@ type_dealloc(PyObject *self) /* A type's tp_doc is heap allocated, unlike the tp_doc slots * of most other objects. It's okay to cast it to char *. */ - PyObject_Free((char *)type->tp_doc); + PyMem_Free((char *)type->tp_doc); PyHeapTypeObject *et = (PyHeapTypeObject *)type; Py_XDECREF(et->ht_name); diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index ebf7686773ff45..82b0e4ee352d62 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -129,7 +129,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { if (hash_detected) { Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; - char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); + char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char)); if (!result) { return -1; } @@ -154,7 +154,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { result[j] = '\0'; // Null-terminate the result string res = PyUnicode_DecodeUTF8(result, j, NULL); - PyObject_Free(result); + PyMem_Free(result); } else { res = PyUnicode_DecodeUTF8( tok_mode->last_expr_buffer, From 65cf5dce11a38e327b9b0abfca279d650452b34f Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 11:15:34 +0100 Subject: [PATCH 102/160] Docs: rework dbm introduction (#114551) - add refs to other parts of the docs (dict, bytes, etc.) - clarify whichdb() return value by using list markup - silence refs to example or generic submodule methods (keys, get, etc.) --- Doc/library/dbm.rst | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 0a9d28a41c6d7a..1a8e0158fcdbd5 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -28,10 +28,11 @@ the Oracle Berkeley DB. available --- :mod:`dbm.gnu`, :mod:`dbm.ndbm` or :mod:`dbm.dumb` --- should be used to open a given file. - Returns one of the following values: ``None`` if the file can't be opened - because it's unreadable or doesn't exist; the empty string (``''``) if the - file's format can't be guessed; or a string containing the required module - name, such as ``'dbm.ndbm'`` or ``'dbm.gnu'``. + Return one of the following values: + + * ``None`` if the file can't be opened because it's unreadable or doesn't exist + * the empty string (``''``) if the file's format can't be guessed + * a string containing the required module name, such as ``'dbm.ndbm'`` or ``'dbm.gnu'`` .. versionchanged:: 3.11 Accepts :term:`path-like object` for filename. @@ -74,13 +75,13 @@ the Oracle Berkeley DB. modified by the prevailing umask). -The object returned by :func:`.open` supports the same basic functionality as -dictionaries; keys and their corresponding values can be stored, retrieved, and -deleted, and the :keyword:`in` operator and the :meth:`keys` method are -available, as well as :meth:`get` and :meth:`setdefault`. +The object returned by :func:`open` supports the same basic functionality as a +:class:`dict`; keys and their corresponding values can be stored, retrieved, and +deleted, and the :keyword:`in` operator and the :meth:`!keys` method are +available, as well as :meth:`!get` and :meth:`!setdefault`. .. versionchanged:: 3.2 - :meth:`get` and :meth:`setdefault` are now available in all database modules. + :meth:`!get` and :meth:`!setdefault` are now available in all database modules. .. versionchanged:: 3.8 Deleting a key from a read-only database raises database module specific error @@ -89,7 +90,7 @@ available, as well as :meth:`get` and :meth:`setdefault`. .. versionchanged:: 3.11 Accepts :term:`path-like object` for file. -Key and values are always stored as bytes. This means that when +Key and values are always stored as :class:`bytes`. This means that when strings are used they are implicitly converted to the default encoding before being stored. From 4cf068ed0879cccf86a45f06fb274b350b89e911 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 13:35:56 +0100 Subject: [PATCH 103/160] Docs: reword dbm.ndbm introduction (#114549) - add abbreviation directives for NDBM and GDBM - consistently spell NDBM as NDBM - silence broken ndbm class refs - improve accuracy of dbm.ndbm.open() spec - use replacement text for NDBM/GDBM file format incompatibility note --- Doc/library/dbm.rst | 47 +++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 1a8e0158fcdbd5..bc5bb0cec0cef7 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -52,6 +52,10 @@ the Oracle Berkeley DB. .. |flag_n| replace:: Always create a new, empty database, open for reading and writing. +.. |incompat_note| replace:: + The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are incompatible + and can not be used interchangeably. + .. function:: open(file, flag='r', mode=0o666) Open the database file *file* and return a corresponding object. @@ -157,9 +161,7 @@ functionality like crash tolerance. except that keys and values are always converted to :class:`bytes` before storing, and the :meth:`!items` and :meth:`!values` methods are not supported. -.. note:: - The file formats created by :mod:`dbm.gnu` and :mod:`dbm.ndbm` are - incompatible and can not be used interchangeably. +.. note:: |incompat_note| .. exception:: error @@ -253,29 +255,31 @@ and the :meth:`!items` and :meth:`!values` methods are not supported. .. versionadded:: 3.13 -:mod:`dbm.ndbm` --- Interface based on ndbm -------------------------------------------- +:mod:`dbm.ndbm` --- New Database Manager +---------------------------------------- .. module:: dbm.ndbm :platform: Unix - :synopsis: The standard "database" interface, based on ndbm. + :synopsis: The New Database Manager **Source code:** :source:`Lib/dbm/ndbm.py` -------------- -The :mod:`dbm.ndbm` module provides an interface to the Unix "(n)dbm" library. -Dbm objects behave like mappings (dictionaries), except that keys and values are -always stored as bytes. Printing a ``dbm`` object doesn't print the keys and -values, and the :meth:`items` and :meth:`values` methods are not supported. +The :mod:`dbm.ndbm` module provides an interface to the +:abbr:`NDBM (New Database Manager)` library. +:class:`!ndbm` objects behave similar to :term:`mappings `, +except that keys and values are always stored as :class:`bytes`, +and the :meth:`!items` and :meth:`!values` methods are not supported. + +This module can be used with the "classic" NDBM interface or the +:abbr:`GDBM (GNU dbm)` compatibility interface. -This module can be used with the "classic" ndbm interface or the GNU GDBM -compatibility interface. On Unix, the :program:`configure` script will attempt -to locate the appropriate header file to simplify building this module. +.. note:: |incompat_note| .. warning:: - The ndbm library shipped as part of macOS has an undocumented limitation on the + The NDBM library shipped as part of macOS has an undocumented limitation on the size of values, which can result in corrupted database files when storing values larger than this limit. Reading such corrupted files can result in a hard crash (segmentation fault). @@ -288,13 +292,14 @@ to locate the appropriate header file to simplify building this module. .. data:: library - Name of the ``ndbm`` implementation library used. + Name of the NDBM implementation library used. -.. function:: open(filename[, flag[, mode]]) +.. function:: open(filename, flag="r", mode=0o666, /) - Open a dbm database and return a ``ndbm`` object. The *filename* argument is the - name of the database file (without the :file:`.dir` or :file:`.pag` extensions). + Open an NDBM database and return an :class:`!ndbm` object. + The *filename* argument is the name of the database file + (without the :file:`.dir` or :file:`.pag` extensions). The optional *flag* argument must be one of these values: @@ -310,7 +315,7 @@ to locate the appropriate header file to simplify building this module. database has to be created. It defaults to octal ``0o666`` (and will be modified by the prevailing umask). - In addition to the dictionary-like methods, ``ndbm`` objects + In addition to the dictionary-like methods, :class:`!ndbm` objects provide the following method: .. versionchanged:: 3.11 @@ -318,11 +323,11 @@ to locate the appropriate header file to simplify building this module. .. method:: ndbm.close() - Close the ``ndbm`` database. + Close the NDBM database. .. method:: ndbm.clear() - Remove all items from the ``ndbm`` database. + Remove all items from the NDBM database. .. versionadded:: 3.13 From 8710faeac28e65c65862359413e8341492f529af Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 13:36:37 +0100 Subject: [PATCH 104/160] Docs: fix versionchanged directives for dbm.open() and dbm.whichdb() (#114594) --- Doc/library/dbm.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index bc5bb0cec0cef7..55846e996b5d26 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -34,8 +34,8 @@ the Oracle Berkeley DB. * the empty string (``''``) if the file's format can't be guessed * a string containing the required module name, such as ``'dbm.ndbm'`` or ``'dbm.gnu'`` -.. versionchanged:: 3.11 - Accepts :term:`path-like object` for filename. + .. versionchanged:: 3.11 + *filename* accepts a :term:`path-like object`. .. Substitutions for the open() flag param docs; all submodules use the same text. @@ -78,6 +78,9 @@ the Oracle Berkeley DB. database has to be created. It defaults to octal ``0o666`` (and will be modified by the prevailing umask). + .. versionchanged:: 3.11 + *file* accepts a :term:`path-like object`. + The object returned by :func:`open` supports the same basic functionality as a :class:`dict`; keys and their corresponding values can be stored, retrieved, and @@ -91,9 +94,6 @@ available, as well as :meth:`!get` and :meth:`!setdefault`. Deleting a key from a read-only database raises database module specific error instead of :exc:`KeyError`. -.. versionchanged:: 3.11 - Accepts :term:`path-like object` for file. - Key and values are always stored as :class:`bytes`. This means that when strings are used they are implicitly converted to the default encoding before being stored. From 30b7b4f73cd876732244de06d079a2caf2a7b95c Mon Sep 17 00:00:00 2001 From: Ned Batchelder Date: Fri, 26 Jan 2024 08:42:49 -0500 Subject: [PATCH 105/160] Docs: 'still' is a better word than 'nonetheless' (#114598) --- Doc/library/dataclasses.rst | 2 +- Doc/library/imaplib.rst | 2 +- Doc/using/cmdline.rst | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index cde147d1cbb501..88f2e0251b1e51 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -141,7 +141,7 @@ Module contents then :func:`dataclass` *may* add an implicit :meth:`~object.__hash__` method. Although not recommended, you can force :func:`dataclass` to create a :meth:`~object.__hash__` method with ``unsafe_hash=True``. This might be the case - if your class is logically immutable but can nonetheless be mutated. + if your class is logically immutable but can still be mutated. This is a specialized use case and should be considered carefully. Here are the rules governing implicit creation of a :meth:`~object.__hash__` diff --git a/Doc/library/imaplib.rst b/Doc/library/imaplib.rst index 1f774e64b0eae3..d5c868def3b64f 100644 --- a/Doc/library/imaplib.rst +++ b/Doc/library/imaplib.rst @@ -531,7 +531,7 @@ An :class:`IMAP4` instance has the following methods: allowed creation of such tags, and popular IMAP servers, such as Gmail, accept and produce such flags. There are non-Python programs which also create such tags. Although it is an RFC violation and IMAP clients and - servers are supposed to be strict, imaplib nonetheless continues to allow + servers are supposed to be strict, imaplib still continues to allow such tags to be created for backward compatibility reasons, and as of Python 3.6, handles them if they are sent from the server, since this improves real-world compatibility. diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index df8b07c6118599..53c95ca1a05c9b 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -623,7 +623,7 @@ Setting the environment variable ``TERM`` to ``dumb`` will disable color. If the environment variable ``FORCE_COLOR`` is set, then color will be enabled regardless of the value of TERM. This is useful on CI systems which -aren’t terminals but can none-the-less display ANSI escape sequences. +aren’t terminals but can still display ANSI escape sequences. If the environment variable ``NO_COLOR`` is set, Python will disable all color in the output. This takes precedence over ``FORCE_COLOR``. From 442a299af06d0dfe89484a841451666503479e2e Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 26 Jan 2024 14:38:24 +0000 Subject: [PATCH 106/160] gh-114272: Allow _wmi audit test to succeed even if it times out (GH-114602) --- Lib/test/audit-tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/test/audit-tests.py b/Lib/test/audit-tests.py index ce4a11b119c900..de7d0da560a1c7 100644 --- a/Lib/test/audit-tests.py +++ b/Lib/test/audit-tests.py @@ -487,7 +487,13 @@ def hook(event, args): print(event, args[0]) sys.addaudithook(hook) - _wmi.exec_query("SELECT * FROM Win32_OperatingSystem") + try: + _wmi.exec_query("SELECT * FROM Win32_OperatingSystem") + except WindowsError as e: + # gh-112278: WMI may be slow response when first called, but we still + # get the audit event, so just ignore the timeout + if e.winerror != 258: + raise def test_syslog(): import syslog From 0bd8297a2208125f76807cdf01f72abe5c94136b Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 16:11:45 +0100 Subject: [PATCH 107/160] Docs: mark up dbm.open() with param list (#114601) Also consolidate following paragraphs regarding database objects. --- Doc/library/dbm.rst | 52 ++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 55846e996b5d26..3a7dad1a0736a0 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -58,41 +58,33 @@ the Oracle Berkeley DB. .. function:: open(file, flag='r', mode=0o666) - Open the database file *file* and return a corresponding object. + Open a database and return the corresponding database object. - If the database file already exists, the :func:`whichdb` function is used to - determine its type and the appropriate module is used; if it does not exist, - the first module listed above that can be imported is used. + :param file: + The database file to open. - The optional *flag* argument can be: + If the database file already exists, the :func:`whichdb` function is used to + determine its type and the appropriate module is used; if it does not exist, + the first submodule listed above that can be imported is used. + :type file: :term:`path-like object` - .. csv-table:: - :header: "Value", "Meaning" + :param str flag: + * ``'r'`` (default), |flag_r| + * ``'w'``, |flag_w| + * ``'c'``, |flag_c| + * ``'n'``, |flag_n| - ``'r'`` (default), |flag_r| - ``'w'``, |flag_w| - ``'c'``, |flag_c| - ``'n'``, |flag_n| - - The optional *mode* argument is the Unix mode of the file, used only when the - database has to be created. It defaults to octal ``0o666`` (and will be - modified by the prevailing umask). + :param int mode: + The Unix file access mode of the file (default: octal ``0o666``), + used only when the database has to be created. .. versionchanged:: 3.11 *file* accepts a :term:`path-like object`. - -The object returned by :func:`open` supports the same basic functionality as a +The object returned by :func:`~dbm.open` supports the same basic functionality as a :class:`dict`; keys and their corresponding values can be stored, retrieved, and deleted, and the :keyword:`in` operator and the :meth:`!keys` method are -available, as well as :meth:`!get` and :meth:`!setdefault`. - -.. versionchanged:: 3.2 - :meth:`!get` and :meth:`!setdefault` are now available in all database modules. - -.. versionchanged:: 3.8 - Deleting a key from a read-only database raises database module specific error - instead of :exc:`KeyError`. +available, as well as :meth:`!get` and :meth:`!setdefault` methods. Key and values are always stored as :class:`bytes`. This means that when strings are used they are implicitly converted to the default encoding before @@ -101,9 +93,17 @@ being stored. These objects also support being used in a :keyword:`with` statement, which will automatically close them when done. +.. versionchanged:: 3.2 + :meth:`!get` and :meth:`!setdefault` methods are now available for all + :mod:`dbm` backends. + .. versionchanged:: 3.4 Added native support for the context management protocol to the objects - returned by :func:`.open`. + returned by :func:`~dbm.open`. + +.. versionchanged:: 3.8 + Deleting a key from a read-only database raises a database module specific exception + instead of :exc:`KeyError`. The following example records some hostnames and a corresponding title, and then prints out the contents of the database:: From 504334c7be5a56237df2598d338cd494a42fca4c Mon Sep 17 00:00:00 2001 From: Rito Takeuchi Date: Sat, 27 Jan 2024 00:19:41 +0900 Subject: [PATCH 108/160] gh-77749: Fix inconsistent behavior of non-ASCII handling in EmailPolicy.fold() (GH-6986) It now always encodes non-ASCII characters in headers if utf8 is false. Co-authored-by: Serhiy Storchaka --- Lib/email/policy.py | 9 ++++++++- Lib/test/test_email/test_policy.py | 17 +++++++++++++++++ ...024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst | 2 ++ 3 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst diff --git a/Lib/email/policy.py b/Lib/email/policy.py index 611deb50bb5290..8816c84ed175a7 100644 --- a/Lib/email/policy.py +++ b/Lib/email/policy.py @@ -210,8 +210,15 @@ def _fold(self, name, value, refold_binary=False): self.refold_source == 'long' and (lines and len(lines[0])+len(name)+2 > maxlen or any(len(x) > maxlen for x in lines[1:]))) - if refold or refold_binary and _has_surrogates(value): + + if not refold: + if not self.utf8: + refold = not value.isascii() + elif refold_binary: + refold = _has_surrogates(value) + if refold: return self.header_factory(name, ''.join(lines)).fold(policy=self) + return name + ': ' + self.linesep.join(lines) + self.linesep diff --git a/Lib/test/test_email/test_policy.py b/Lib/test/test_email/test_policy.py index e87c275549406d..c6b9c80efe1b54 100644 --- a/Lib/test/test_email/test_policy.py +++ b/Lib/test/test_email/test_policy.py @@ -135,6 +135,23 @@ def test_policy_addition(self): for attr, value in expected.items(): self.assertEqual(getattr(added, attr), value) + def test_fold_utf8(self): + expected_ascii = 'Subject: =?utf-8?q?=C3=A1?=\n' + expected_utf8 = 'Subject: á\n' + + msg = email.message.EmailMessage() + s = 'á' + msg['Subject'] = s + + p_ascii = email.policy.default.clone() + p_utf8 = email.policy.default.clone(utf8=True) + + self.assertEqual(p_ascii.fold('Subject', msg['Subject']), expected_ascii) + self.assertEqual(p_utf8.fold('Subject', msg['Subject']), expected_utf8) + + self.assertEqual(p_ascii.fold('Subject', s), expected_ascii) + self.assertEqual(p_utf8.fold('Subject', s), expected_utf8) + def test_fold_zero_max_line_length(self): expected = 'Subject: =?utf-8?q?=C3=A1?=\n' diff --git a/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst b/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst new file mode 100644 index 00000000000000..f1c99c09d2dfe1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-26-16-46-21.gh-issue-77749.NY_7TS.rst @@ -0,0 +1,2 @@ +:meth:`email.policy.EmailPolicy.fold` now always encodes non-ASCII characters +in headers if :attr:`~email.policy.EmailPolicy.utf8` is false. From 699779256ec4d4b8afb8211de08ef1382c78c370 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 27 Jan 2024 00:25:16 +0900 Subject: [PATCH 109/160] gh-111968: Unify freelist naming schema to Eric's suggestion (gh-114581) --- Include/internal/pycore_freelist.h | 14 +++++++------- Objects/floatobject.c | 4 ++-- Objects/genobject.c | 4 ++-- Objects/listobject.c | 4 ++-- Objects/sliceobject.c | 14 +++++++------- Objects/tupleobject.c | 2 +- Python/context.c | 4 ++-- Python/object_stack.c | 4 ++-- 8 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h index dfb12839affedf..b91d2bc066b783 100644 --- a/Include/internal/pycore_freelist.h +++ b/Include/internal/pycore_freelist.h @@ -103,13 +103,13 @@ struct _Py_object_stack_state { }; typedef struct _Py_freelist_state { - struct _Py_float_state float_state; - struct _Py_tuple_state tuple_state; - struct _Py_list_state list_state; - struct _Py_slice_state slice_state; - struct _Py_context_state context_state; - struct _Py_async_gen_state async_gen_state; - struct _Py_object_stack_state object_stack_state; + struct _Py_float_state floats; + struct _Py_tuple_state tuples; + struct _Py_list_state lists; + struct _Py_slice_state slices; + struct _Py_context_state contexts; + struct _Py_async_gen_state async_gens; + struct _Py_object_stack_state object_stacks; } _PyFreeListState; #ifdef __cplusplus diff --git a/Objects/floatobject.c b/Objects/floatobject.c index 912c450a5e1055..b7611d5f96ac3b 100644 --- a/Objects/floatobject.c +++ b/Objects/floatobject.c @@ -32,7 +32,7 @@ get_float_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); assert(state != NULL); - return &state->float_state; + return &state->floats; } #endif @@ -1993,7 +1993,7 @@ void _PyFloat_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_float_state *state = &freelist_state->float_state; + struct _Py_float_state *state = &freelist_state->floats; PyFloatObject *f = state->free_list; while (f != NULL) { PyFloatObject *next = (PyFloatObject*) Py_TYPE(f); diff --git a/Objects/genobject.c b/Objects/genobject.c index e9aeb7ab9a9fa8..f47197330fdd80 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -1633,7 +1633,7 @@ static struct _Py_async_gen_state * get_async_gen_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); - return &state->async_gen_state; + return &state->async_gens; } #endif @@ -1659,7 +1659,7 @@ void _PyAsyncGen_ClearFreeLists(_PyFreeListState *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_async_gen_state *state = &freelist_state->async_gen_state; + struct _Py_async_gen_state *state = &freelist_state->async_gens; while (state->value_numfree > 0) { _PyAsyncGenWrappedValue *o; diff --git a/Objects/listobject.c b/Objects/listobject.c index 401d1026133f4e..1e885f9cb80c4c 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -26,7 +26,7 @@ get_list_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); assert(state != NULL); - return &state->list_state; + return &state->lists; } #endif @@ -124,7 +124,7 @@ void _PyList_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_list_state *state = &freelist_state->list_state; + struct _Py_list_state *state = &freelist_state->lists; while (state->numfree > 0) { PyListObject *op = state->free_list[--state->numfree]; assert(PyList_CheckExact(op)); diff --git a/Objects/sliceobject.c b/Objects/sliceobject.c index 440c1da30620c3..8b9d6bbfd858b7 100644 --- a/Objects/sliceobject.c +++ b/Objects/sliceobject.c @@ -106,9 +106,9 @@ PyObject _Py_EllipsisObject = _PyObject_HEAD_INIT(&PyEllipsis_Type); void _PySlice_ClearCache(_PyFreeListState *state) { #ifdef WITH_FREELISTS - PySliceObject *obj = state->slice_state.slice_cache; + PySliceObject *obj = state->slices.slice_cache; if (obj != NULL) { - state->slice_state.slice_cache = NULL; + state->slices.slice_cache = NULL; PyObject_GC_Del(obj); } #endif @@ -132,9 +132,9 @@ _PyBuildSlice_Consume2(PyObject *start, PyObject *stop, PyObject *step) PySliceObject *obj; #ifdef WITH_FREELISTS _PyFreeListState *state = _PyFreeListState_GET(); - if (state->slice_state.slice_cache != NULL) { - obj = state->slice_state.slice_cache; - state->slice_state.slice_cache = NULL; + if (state->slices.slice_cache != NULL) { + obj = state->slices.slice_cache; + state->slices.slice_cache = NULL; _Py_NewReference((PyObject *)obj); } else @@ -370,8 +370,8 @@ slice_dealloc(PySliceObject *r) Py_DECREF(r->stop); #ifdef WITH_FREELISTS _PyFreeListState *state = _PyFreeListState_GET(); - if (state->slice_state.slice_cache == NULL) { - state->slice_state.slice_cache = r; + if (state->slices.slice_cache == NULL) { + state->slices.slice_cache = r; } else #endif diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index e1b8e4004c6163..b9bf6cd48f6129 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -1125,7 +1125,7 @@ tuple_iter(PyObject *seq) * freelists * *************/ -#define STATE (state->tuple_state) +#define STATE (state->tuples) #define FREELIST_FINALIZED (STATE.numfree[0] < 0) static inline PyTupleObject * diff --git a/Python/context.c b/Python/context.c index 1e90811c374ec6..294485e5b407df 100644 --- a/Python/context.c +++ b/Python/context.c @@ -69,7 +69,7 @@ static struct _Py_context_state * get_context_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); - return &state->context_state; + return &state->contexts; } #endif @@ -1270,7 +1270,7 @@ void _PyContext_ClearFreeList(_PyFreeListState *freelist_state, int is_finalization) { #ifdef WITH_FREELISTS - struct _Py_context_state *state = &freelist_state->context_state; + struct _Py_context_state *state = &freelist_state->contexts; for (; state->numfree > 0; state->numfree--) { PyContext *ctx = state->freelist; state->freelist = (PyContext *)ctx->ctx_weakreflist; diff --git a/Python/object_stack.c b/Python/object_stack.c index 66b37bcbb44475..8544892eb71dcb 100644 --- a/Python/object_stack.c +++ b/Python/object_stack.c @@ -12,7 +12,7 @@ static struct _Py_object_stack_state * get_state(void) { _PyFreeListState *state = _PyFreeListState_GET(); - return &state->object_stack_state; + return &state->object_stacks; } _PyObjectStackChunk * @@ -76,7 +76,7 @@ _PyObjectStackChunk_ClearFreeList(_PyFreeListState *free_lists, int is_finalizat return; } - struct _Py_object_stack_state *state = &free_lists->object_stack_state; + struct _Py_object_stack_state *state = &free_lists->object_stacks; while (state->numfree > 0) { _PyObjectStackChunk *buf = state->free_list; state->free_list = buf->prev; From f9c505698a1ac27f5a380780767665ffd2fb8ebc Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Sat, 27 Jan 2024 01:20:21 +0900 Subject: [PATCH 110/160] gh-112087: Make list_repr and list_length to be thread-safe (gh-114582) --- Include/cpython/listobject.h | 4 ++++ Objects/listobject.c | 27 +++++++++++++++++---------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/Include/cpython/listobject.h b/Include/cpython/listobject.h index 8ade1b164681f9..49f5e8d6d1a0d6 100644 --- a/Include/cpython/listobject.h +++ b/Include/cpython/listobject.h @@ -29,7 +29,11 @@ typedef struct { static inline Py_ssize_t PyList_GET_SIZE(PyObject *op) { PyListObject *list = _PyList_CAST(op); +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_ssize_relaxed(&(_PyVarObject_CAST(list)->ob_size)); +#else return Py_SIZE(list); +#endif } #define PyList_GET_SIZE(op) PyList_GET_SIZE(_PyObject_CAST(op)) diff --git a/Objects/listobject.c b/Objects/listobject.c index 1e885f9cb80c4c..56785e5f37a450 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -383,18 +383,11 @@ list_dealloc(PyObject *self) } static PyObject * -list_repr(PyObject *self) +list_repr_impl(PyListObject *v) { - PyListObject *v = (PyListObject *)self; - Py_ssize_t i; PyObject *s; _PyUnicodeWriter writer; - - if (Py_SIZE(v) == 0) { - return PyUnicode_FromString("[]"); - } - - i = Py_ReprEnter((PyObject*)v); + Py_ssize_t i = Py_ReprEnter((PyObject*)v); if (i != 0) { return i > 0 ? PyUnicode_FromString("[...]") : NULL; } @@ -439,10 +432,24 @@ list_repr(PyObject *self) return NULL; } +static PyObject * +list_repr(PyObject *self) +{ + if (PyList_GET_SIZE(self) == 0) { + return PyUnicode_FromString("[]"); + } + PyListObject *v = (PyListObject *)self; + PyObject *ret = NULL; + Py_BEGIN_CRITICAL_SECTION(v); + ret = list_repr_impl(v); + Py_END_CRITICAL_SECTION(); + return ret; +} + static Py_ssize_t list_length(PyObject *a) { - return Py_SIZE(a); + return PyList_GET_SIZE(a); } static int From 102569d150b690efe94c13921e93da66081ba1cf Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 26 Jan 2024 17:27:29 +0000 Subject: [PATCH 111/160] Use Unicode unconditionally for _winapi.CreateFile (GH-114611) Currently it switches based on build settings, but argument clinic does not handle it correctly. --- Modules/_winapi.c | 17 +++++++++-------- Modules/clinic/_winapi.c.h | 13 ++++++++----- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 26302b559817b3..5e5eb123c4ccff 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -441,7 +441,7 @@ _winapi_ConnectNamedPipe_impl(PyObject *module, HANDLE handle, /*[clinic input] _winapi.CreateFile -> HANDLE - file_name: LPCTSTR + file_name: LPCWSTR desired_access: DWORD share_mode: DWORD security_attributes: LPSECURITY_ATTRIBUTES @@ -452,12 +452,12 @@ _winapi.CreateFile -> HANDLE [clinic start generated code]*/ static HANDLE -_winapi_CreateFile_impl(PyObject *module, LPCTSTR file_name, +_winapi_CreateFile_impl(PyObject *module, LPCWSTR file_name, DWORD desired_access, DWORD share_mode, LPSECURITY_ATTRIBUTES security_attributes, DWORD creation_disposition, DWORD flags_and_attributes, HANDLE template_file) -/*[clinic end generated code: output=417ddcebfc5a3d53 input=6423c3e40372dbd5]*/ +/*[clinic end generated code: output=818c811e5e04d550 input=1fa870ed1c2e3d69]*/ { HANDLE handle; @@ -468,14 +468,15 @@ _winapi_CreateFile_impl(PyObject *module, LPCTSTR file_name, } Py_BEGIN_ALLOW_THREADS - handle = CreateFile(file_name, desired_access, - share_mode, security_attributes, - creation_disposition, - flags_and_attributes, template_file); + handle = CreateFileW(file_name, desired_access, + share_mode, security_attributes, + creation_disposition, + flags_and_attributes, template_file); Py_END_ALLOW_THREADS - if (handle == INVALID_HANDLE_VALUE) + if (handle == INVALID_HANDLE_VALUE) { PyErr_SetFromWindowsErr(0); + } return handle; } diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 3a3231c051ef71..d1052f38919dde 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -162,7 +162,7 @@ PyDoc_STRVAR(_winapi_CreateFile__doc__, {"CreateFile", _PyCFunction_CAST(_winapi_CreateFile), METH_FASTCALL, _winapi_CreateFile__doc__}, static HANDLE -_winapi_CreateFile_impl(PyObject *module, LPCTSTR file_name, +_winapi_CreateFile_impl(PyObject *module, LPCWSTR file_name, DWORD desired_access, DWORD share_mode, LPSECURITY_ATTRIBUTES security_attributes, DWORD creation_disposition, @@ -172,7 +172,7 @@ static PyObject * _winapi_CreateFile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - LPCTSTR file_name; + LPCWSTR file_name = NULL; DWORD desired_access; DWORD share_mode; LPSECURITY_ATTRIBUTES security_attributes; @@ -181,8 +181,8 @@ _winapi_CreateFile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HANDLE template_file; HANDLE _return_value; - if (!_PyArg_ParseStack(args, nargs, "skk" F_POINTER "kk" F_HANDLE ":CreateFile", - &file_name, &desired_access, &share_mode, &security_attributes, &creation_disposition, &flags_and_attributes, &template_file)) { + if (!_PyArg_ParseStack(args, nargs, "O&kk" F_POINTER "kk" F_HANDLE ":CreateFile", + _PyUnicode_WideCharString_Converter, &file_name, &desired_access, &share_mode, &security_attributes, &creation_disposition, &flags_and_attributes, &template_file)) { goto exit; } _return_value = _winapi_CreateFile_impl(module, file_name, desired_access, share_mode, security_attributes, creation_disposition, flags_and_attributes, template_file); @@ -195,6 +195,9 @@ _winapi_CreateFile(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return_value = HANDLE_TO_PYNUM(_return_value); exit: + /* Cleanup for file_name */ + PyMem_Free((void *)file_name); + return return_value; } @@ -1479,4 +1482,4 @@ _winapi_CopyFile2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO return return_value; } -/*[clinic end generated code: output=e1a9908bb82a6379 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=2350d4f2275d3a6f input=a9049054013a1b77]*/ From d91ddff5de61447844f1dac575d2e670c8d7e26b Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Fri, 26 Jan 2024 17:33:44 +0000 Subject: [PATCH 112/160] gh-114435: Allow test_stat_inaccessible_file() to have matching ino/dev (GH-114571) This may occur if Windows allows reading stat information from a file even if the current user does not have access. --- Lib/test/test_os.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index ed1f304c6c8cac..e6f0dfde8cb4ae 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -3129,10 +3129,9 @@ def cleanup(): if support.verbose: print(" without access:", stat2) - # We cannot get st_dev/st_ino, so ensure those are 0 or else our test - # is not set up correctly - self.assertEqual(0, stat2.st_dev) - self.assertEqual(0, stat2.st_ino) + # We may not get st_dev/st_ino, so ensure those are 0 or match + self.assertIn(stat2.st_dev, (0, stat1.st_dev)) + self.assertIn(stat2.st_ino, (0, stat1.st_ino)) # st_mode and st_size should match (for a normal file, at least) self.assertEqual(stat1.st_mode, stat2.st_mode) From 3f62bf32caf04cedb2c59579a0ce835d1e793d4d Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 26 Jan 2024 20:44:45 +0300 Subject: [PATCH 113/160] Document PyOS_strtoul and PyOS_strtol (GH-114048) --- Doc/c-api/conversion.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/Doc/c-api/conversion.rst b/Doc/c-api/conversion.rst index c5350123dfdfdc..4aaf3905e81c8a 100644 --- a/Doc/c-api/conversion.rst +++ b/Doc/c-api/conversion.rst @@ -48,6 +48,42 @@ The return value (*rv*) for these functions should be interpreted as follows: The following functions provide locale-independent string to number conversions. +.. c:function:: unsigned long PyOS_strtoul(const char *str, char **ptr, int base) + + Convert the initial part of the string in ``str`` to an :c:expr:`unsigned + long` value according to the given ``base``, which must be between ``2`` and + ``36`` inclusive, or be the special value ``0``. + + Leading white space and case of characters are ignored. If ``base`` is zero + it looks for a leading ``0b``, ``0o`` or ``0x`` to tell which base. If + these are absent it defaults to ``10``. Base must be 0 or between 2 and 36 + (inclusive). If ``ptr`` is non-``NULL`` it will contain a pointer to the + end of the scan. + + If the converted value falls out of range of corresponding return type, + range error occurs (:c:data:`errno` is set to :c:macro:`!ERANGE`) and + :c:macro:`!ULONG_MAX` is returned. If no conversion can be performed, ``0`` + is returned. + + See also the Unix man page :manpage:`strtoul(3)`. + + .. versionadded:: 3.2 + + +.. c:function:: long PyOS_strtol(const char *str, char **ptr, int base) + + Convert the initial part of the string in ``str`` to an :c:expr:`long` value + according to the given ``base``, which must be between ``2`` and ``36`` + inclusive, or be the special value ``0``. + + Same as :c:func:`PyOS_strtoul`, but return a :c:expr:`long` value instead + and :c:macro:`LONG_MAX` on overflows. + + See also the Unix man page :manpage:`strtol(3)`. + + .. versionadded:: 3.2 + + .. c:function:: double PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) Convert a string ``s`` to a :c:expr:`double`, raising a Python From 6c2b419fb91c8d7daa769d39f73768114b5eb45a Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Fri, 26 Jan 2024 19:12:48 +0100 Subject: [PATCH 114/160] Docs: rework the dbm.dumb introduction (#114550) - consistently use correct parameter markup - consistently use submodule name as database name - improve accuracy of the dbm.dumb.open() spec - remove dumbdbm class refs and replace them with generic "database object" - use parameter list for dbm.dumb.open() --- Doc/library/dbm.rst | 66 ++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 3a7dad1a0736a0..076e86143d06a6 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -351,13 +351,14 @@ This module can be used with the "classic" NDBM interface or the -------------- -The :mod:`dbm.dumb` module provides a persistent dictionary-like interface which -is written entirely in Python. Unlike other modules such as :mod:`dbm.gnu` no -external library is required. As with other persistent mappings, the keys and -values are always stored as bytes. - -The module defines the following: +The :mod:`dbm.dumb` module provides a persistent :class:`dict`-like +interface which is written entirely in Python. +Unlike other :mod:`dbm` backends, such as :mod:`dbm.gnu`, no +external library is required. +As with other :mod:`dbm` backends, +the keys and values are always stored as :class:`bytes`. +The :mod:`!dbm.dumb` module defines the following: .. exception:: error @@ -365,26 +366,33 @@ The module defines the following: raised for general mapping errors like specifying an incorrect key. -.. function:: open(filename[, flag[, mode]]) +.. function:: open(filename, flag="c", mode=0o666) - Open a ``dumbdbm`` database and return a dumbdbm object. The *filename* argument is - the basename of the database file (without any specific extensions). When a - dumbdbm database is created, files with :file:`.dat` and :file:`.dir` extensions - are created. + Open a :mod:`!dbm.dumb` database. + The returned database object behaves similar to a :term:`mapping`, + in addition to providing :meth:`~dumbdbm.sync` and :meth:`~dumbdbm.close` + methods. - The optional *flag* argument can be: + :param filename: + The basename of the database file (without extensions). + A new database creates the following files: - .. csv-table:: - :header: "Value", "Meaning" + - :file:`{filename}.dat` + - :file:`{filename}.dir` + :type database: :term:`path-like object` - ``'r'``, |flag_r| - ``'w'``, |flag_w| - ``'c'`` (default), |flag_c| - ``'n'``, |flag_n| + :param str flag: + .. csv-table:: + :header: "Value", "Meaning" - The optional *mode* argument is the Unix mode of the file, used only when the - database has to be created. It defaults to octal ``0o666`` (and will be modified - by the prevailing umask). + ``'r'``, |flag_r| + ``'w'``, |flag_w| + ``'c'`` (default), |flag_c| + ``'n'``, |flag_n| + + :param int mode: + The Unix file access mode of the file (default: ``0o666``), + used only when the database has to be created. .. warning:: It is possible to crash the Python interpreter when loading a database @@ -392,20 +400,18 @@ The module defines the following: Python's AST compiler. .. versionchanged:: 3.5 - :func:`.open` always creates a new database when the flag has the value - ``'n'``. + :func:`open` always creates a new database when *flag* is ``'n'``. .. versionchanged:: 3.8 - A database opened with flags ``'r'`` is now read-only. Opening with - flags ``'r'`` and ``'w'`` no longer creates a database if it does not - exist. + A database opened read-only if *flag* is ``'r'``. + A database is not created if it does not exist if *flag* is ``'r'`` or ``'w'``. .. versionchanged:: 3.11 - Accepts :term:`path-like object` for filename. + *filename* accepts a :term:`path-like object`. In addition to the methods provided by the - :class:`collections.abc.MutableMapping` class, :class:`dumbdbm` objects - provide the following methods: + :class:`collections.abc.MutableMapping` class, + the following methods are provided: .. method:: dumbdbm.sync() @@ -414,5 +420,5 @@ The module defines the following: .. method:: dumbdbm.close() - Close the ``dumbdbm`` database. + Close the database. From 7e31d6dea276ac91402aefb023c58d239dfd9246 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 26 Jan 2024 18:14:24 +0000 Subject: [PATCH 115/160] gh-88569: add `ntpath.isreserved()` (#95486) Add `ntpath.isreserved()`, which identifies reserved pathnames such as "NUL", "AUX" and "CON". Deprecate `pathlib.PurePath.is_reserved()`. --------- Co-authored-by: Eryk Sun Co-authored-by: Brett Cannon Co-authored-by: Steve Dower --- Doc/library/os.path.rst | 22 ++++++++ Doc/library/pathlib.rst | 13 ++--- Doc/whatsnew/3.13.rst | 15 +++++ Lib/ntpath.py | 40 ++++++++++++- Lib/pathlib/__init__.py | 28 +++------- Lib/test/test_ntpath.py | 56 +++++++++++++++++++ Lib/test/test_pathlib/test_pathlib.py | 48 ++-------------- ...2-07-31-01-24-40.gh-issue-88569.eU0--b.rst | 4 ++ 8 files changed, 154 insertions(+), 72 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-07-31-01-24-40.gh-issue-88569.eU0--b.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 3cab7a260df008..34bc76b231de92 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -326,6 +326,28 @@ the :mod:`glob` module.) .. versionadded:: 3.12 +.. function:: isreserved(path) + + Return ``True`` if *path* is a reserved pathname on the current system. + + On Windows, reserved filenames include those that end with a space or dot; + those that contain colons (i.e. file streams such as "name:stream"), + wildcard characters (i.e. ``'*?"<>'``), pipe, or ASCII control characters; + as well as DOS device names such as "NUL", "CON", "CONIN$", "CONOUT$", + "AUX", "PRN", "COM1", and "LPT1". + + .. note:: + + This function approximates rules for reserved paths on most Windows + systems. These rules change over time in various Windows releases. + This function may be updated in future Python releases as changes to + the rules become broadly available. + + .. availability:: Windows. + + .. versionadded:: 3.13 + + .. function:: join(path, *paths) Join one or more path segments intelligently. The return value is the diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index 2f4ff4efec47f8..f1aba793fda03e 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -535,14 +535,13 @@ Pure paths provide the following methods and properties: reserved under Windows, ``False`` otherwise. With :class:`PurePosixPath`, ``False`` is always returned. - >>> PureWindowsPath('nul').is_reserved() - True - >>> PurePosixPath('nul').is_reserved() - False - - File system calls on reserved paths can fail mysteriously or have - unintended effects. + .. versionchanged:: 3.13 + Windows path names that contain a colon, or end with a dot or a space, + are considered reserved. UNC paths may be reserved. + .. deprecated-removed:: 3.13 3.15 + This method is deprecated; use :func:`os.path.isreserved` to detect + reserved paths on Windows. .. method:: PurePath.joinpath(*pathsegments) diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 8c2bb05920d5b6..985e34b453f63a 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -321,6 +321,9 @@ os os.path ------- +* Add :func:`os.path.isreserved` to check if a path is reserved on the current + system. This function is only available on Windows. + (Contributed by Barney Gale in :gh:`88569`.) * On Windows, :func:`os.path.isabs` no longer considers paths starting with exactly one (back)slash to be absolute. (Contributed by Barney Gale and Jon Foster in :gh:`44626`.) @@ -498,6 +501,12 @@ Deprecated security and functionality bugs. This includes removal of the ``--cgi`` flag to the ``python -m http.server`` command line in 3.15. +* :mod:`pathlib`: + + * :meth:`pathlib.PurePath.is_reserved` is deprecated and scheduled for + removal in Python 3.15. Use :func:`os.path.isreserved` to detect reserved + paths on Windows. + * :mod:`sys`: :func:`sys._enablelegacywindowsfsencoding` function. Replace it with :envvar:`PYTHONLEGACYWINDOWSFSENCODING` environment variable. (Contributed by Inada Naoki in :gh:`73427`.) @@ -709,6 +718,12 @@ Pending Removal in Python 3.15 :func:`locale.getlocale()` instead. (Contributed by Hugo van Kemenade in :gh:`111187`.) +* :mod:`pathlib`: + + * :meth:`pathlib.PurePath.is_reserved` is deprecated and scheduled for + removal in Python 3.15. Use :func:`os.path.isreserved` to detect reserved + paths on Windows. + * :class:`typing.NamedTuple`: * The undocumented keyword argument syntax for creating NamedTuple classes diff --git a/Lib/ntpath.py b/Lib/ntpath.py index aa0e018eb668c2..e7cbfe17ecb3c8 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -26,8 +26,8 @@ __all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", - "ismount", "expanduser","expandvars","normpath","abspath", - "curdir","pardir","sep","pathsep","defpath","altsep", + "ismount","isreserved","expanduser","expandvars","normpath", + "abspath","curdir","pardir","sep","pathsep","defpath","altsep", "extsep","devnull","realpath","supports_unicode_filenames","relpath", "samefile", "sameopenfile", "samestat", "commonpath", "isjunction"] @@ -330,6 +330,42 @@ def ismount(path): return False +_reserved_chars = frozenset( + {chr(i) for i in range(32)} | + {'"', '*', ':', '<', '>', '?', '|', '/', '\\'} +) + +_reserved_names = frozenset( + {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | + {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | + {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} +) + +def isreserved(path): + """Return true if the pathname is reserved by the system.""" + # Refer to "Naming Files, Paths, and Namespaces": + # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file + path = os.fsdecode(splitroot(path)[2]).replace(altsep, sep) + return any(_isreservedname(name) for name in reversed(path.split(sep))) + +def _isreservedname(name): + """Return true if the filename is reserved by the system.""" + # Trailing dots and spaces are reserved. + if name.endswith(('.', ' ')) and name not in ('.', '..'): + return True + # Wildcards, separators, colon, and pipe (*?"<>/\:|) are reserved. + # ASCII control characters (0-31) are reserved. + # Colon is reserved for file streams (e.g. "name:stream[:type]"). + if _reserved_chars.intersection(name): + return True + # DOS device names are reserved (e.g. "nul" or "nul .txt"). The rules + # are complex and vary across Windows versions. On the side of + # caution, return True for names that may not be reserved. + if name.partition('.')[0].rstrip(' ').upper() in _reserved_names: + return True + return False + + # Expand paths beginning with '~' or '~user'. # '~' means $HOME; '~user' means that user's home directory. # If the path doesn't begin with '~', or if the user or $HOME is unknown, diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index eee82ef26bc7e7..cc159edab5796f 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -33,15 +33,6 @@ ] -# Reference for Windows paths can be found at -# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . -_WIN_RESERVED_NAMES = frozenset( - {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | - {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | - {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} -) - - class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" @@ -433,18 +424,13 @@ def is_absolute(self): def is_reserved(self): """Return True if the path contains one of the special names reserved by the system, if any.""" - if self.pathmod is not ntpath or not self.name: - return False - - # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not - # exist). We err on the side of caution and return True for paths - # which are not considered reserved by Windows. - if self.drive.startswith('\\\\'): - # UNC paths are never reserved. - return False - name = self.name.partition('.')[0].partition(':')[0].rstrip(' ') - return name.upper() in _WIN_RESERVED_NAMES + msg = ("pathlib.PurePath.is_reserved() is deprecated and scheduled " + "for removal in Python 3.15. Use os.path.isreserved() to " + "detect reserved paths on Windows.") + warnings.warn(msg, DeprecationWarning, stacklevel=2) + if self.pathmod is ntpath: + return self.pathmod.isreserved(self) + return False def as_uri(self): """Return the path as a URI.""" diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index aefcb98f1c30eb..9cb03e3cd5de8d 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -981,6 +981,62 @@ def test_ismount(self): self.assertTrue(ntpath.ismount(b"\\\\localhost\\c$")) self.assertTrue(ntpath.ismount(b"\\\\localhost\\c$\\")) + def test_isreserved(self): + self.assertFalse(ntpath.isreserved('')) + self.assertFalse(ntpath.isreserved('.')) + self.assertFalse(ntpath.isreserved('..')) + self.assertFalse(ntpath.isreserved('/')) + self.assertFalse(ntpath.isreserved('/foo/bar')) + # A name that ends with a space or dot is reserved. + self.assertTrue(ntpath.isreserved('foo.')) + self.assertTrue(ntpath.isreserved('foo ')) + # ASCII control characters are reserved. + self.assertTrue(ntpath.isreserved('\foo')) + # Wildcard characters, colon, and pipe are reserved. + self.assertTrue(ntpath.isreserved('foo*bar')) + self.assertTrue(ntpath.isreserved('foo?bar')) + self.assertTrue(ntpath.isreserved('foo"bar')) + self.assertTrue(ntpath.isreserved('foobar')) + self.assertTrue(ntpath.isreserved('foo:bar')) + self.assertTrue(ntpath.isreserved('foo|bar')) + # Case-insensitive DOS-device names are reserved. + self.assertTrue(ntpath.isreserved('nul')) + self.assertTrue(ntpath.isreserved('aux')) + self.assertTrue(ntpath.isreserved('prn')) + self.assertTrue(ntpath.isreserved('con')) + self.assertTrue(ntpath.isreserved('conin$')) + self.assertTrue(ntpath.isreserved('conout$')) + # COM/LPT + 1-9 or + superscript 1-3 are reserved. + self.assertTrue(ntpath.isreserved('COM1')) + self.assertTrue(ntpath.isreserved('LPT9')) + self.assertTrue(ntpath.isreserved('com\xb9')) + self.assertTrue(ntpath.isreserved('com\xb2')) + self.assertTrue(ntpath.isreserved('lpt\xb3')) + # DOS-device name matching ignores characters after a dot or + # a colon and also ignores trailing spaces. + self.assertTrue(ntpath.isreserved('NUL.txt')) + self.assertTrue(ntpath.isreserved('PRN ')) + self.assertTrue(ntpath.isreserved('AUX .txt')) + self.assertTrue(ntpath.isreserved('COM1:bar')) + self.assertTrue(ntpath.isreserved('LPT9 :bar')) + # DOS-device names are only matched at the beginning + # of a path component. + self.assertFalse(ntpath.isreserved('bar.com9')) + self.assertFalse(ntpath.isreserved('bar.lpt9')) + # The entire path is checked, except for the drive. + self.assertTrue(ntpath.isreserved('c:/bar/baz/NUL')) + self.assertTrue(ntpath.isreserved('c:/NUL/bar/baz')) + self.assertFalse(ntpath.isreserved('//./NUL')) + # Bytes are supported. + self.assertFalse(ntpath.isreserved(b'')) + self.assertFalse(ntpath.isreserved(b'.')) + self.assertFalse(ntpath.isreserved(b'..')) + self.assertFalse(ntpath.isreserved(b'/')) + self.assertFalse(ntpath.isreserved(b'/foo/bar')) + self.assertTrue(ntpath.isreserved(b'foo.')) + self.assertTrue(ntpath.isreserved(b'nul')) + def assertEqualCI(self, s1, s2): """Assert that two strings are equal ignoring case differences.""" self.assertEqual(s1.lower(), s2.lower()) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index bdbe92369639ef..2da3afdd198015 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -349,6 +349,12 @@ def test_is_relative_to_several_args(self): with self.assertWarns(DeprecationWarning): p.is_relative_to('a', 'b') + def test_is_reserved_deprecated(self): + P = self.cls + p = P('a/b') + with self.assertWarns(DeprecationWarning): + p.is_reserved() + def test_match_empty(self): P = self.cls self.assertRaises(ValueError, P('a').match, '') @@ -414,13 +420,6 @@ def test_is_absolute(self): self.assertTrue(P('//a').is_absolute()) self.assertTrue(P('//a/b').is_absolute()) - def test_is_reserved(self): - P = self.cls - self.assertIs(False, P('').is_reserved()) - self.assertIs(False, P('/').is_reserved()) - self.assertIs(False, P('/foo/bar').is_reserved()) - self.assertIs(False, P('/dev/con/PRN/NUL').is_reserved()) - def test_join(self): P = self.cls p = P('//a') @@ -1082,41 +1081,6 @@ def test_div(self): self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) self.assertEqual(p / P('E:d:s'), P('E:d:s')) - def test_is_reserved(self): - P = self.cls - self.assertIs(False, P('').is_reserved()) - self.assertIs(False, P('/').is_reserved()) - self.assertIs(False, P('/foo/bar').is_reserved()) - # UNC paths are never reserved. - self.assertIs(False, P('//my/share/nul/con/aux').is_reserved()) - # Case-insensitive DOS-device names are reserved. - self.assertIs(True, P('nul').is_reserved()) - self.assertIs(True, P('aux').is_reserved()) - self.assertIs(True, P('prn').is_reserved()) - self.assertIs(True, P('con').is_reserved()) - self.assertIs(True, P('conin$').is_reserved()) - self.assertIs(True, P('conout$').is_reserved()) - # COM/LPT + 1-9 or + superscript 1-3 are reserved. - self.assertIs(True, P('COM1').is_reserved()) - self.assertIs(True, P('LPT9').is_reserved()) - self.assertIs(True, P('com\xb9').is_reserved()) - self.assertIs(True, P('com\xb2').is_reserved()) - self.assertIs(True, P('lpt\xb3').is_reserved()) - # DOS-device name mataching ignores characters after a dot or - # a colon and also ignores trailing spaces. - self.assertIs(True, P('NUL.txt').is_reserved()) - self.assertIs(True, P('PRN ').is_reserved()) - self.assertIs(True, P('AUX .txt').is_reserved()) - self.assertIs(True, P('COM1:bar').is_reserved()) - self.assertIs(True, P('LPT9 :bar').is_reserved()) - # DOS-device names are only matched at the beginning - # of a path component. - self.assertIs(False, P('bar.com9').is_reserved()) - self.assertIs(False, P('bar.lpt9').is_reserved()) - # Only the last path component matters. - self.assertIs(True, P('c:/baz/con/NUL').is_reserved()) - self.assertIs(False, P('c:/NUL/con/baz').is_reserved()) - class PurePathSubclassTest(PurePathTest): class cls(pathlib.PurePath): diff --git a/Misc/NEWS.d/next/Library/2022-07-31-01-24-40.gh-issue-88569.eU0--b.rst b/Misc/NEWS.d/next/Library/2022-07-31-01-24-40.gh-issue-88569.eU0--b.rst new file mode 100644 index 00000000000000..31dd985bb5c3b6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-07-31-01-24-40.gh-issue-88569.eU0--b.rst @@ -0,0 +1,4 @@ +Add :func:`os.path.isreserved`, which identifies reserved pathnames such +as "NUL", "AUX" and "CON". This function is only available on Windows. + +Deprecate :meth:`pathlib.PurePath.is_reserved`. From df17b5264378f38f49b16343b5016a8882212a8a Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Fri, 26 Jan 2024 11:33:02 -0700 Subject: [PATCH 116/160] Add More Entries to CODEOWNERS (#114617) --- .github/CODEOWNERS | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 4984170f0d17ff..ae915423ece955 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -45,6 +45,30 @@ Tools/c-analyzer/ @ericsnowcurrently # dbm **/*dbm* @corona10 @erlend-aasland @serhiy-storchaka +# runtime state/lifecycle +**/*pylifecycle* @ericsnowcurrently +**/*pystate* @ericsnowcurrently +**/*preconfig* @ericsnowcurrently +**/*initconfig* @ericsnowcurrently +**/*pathconfig* @ericsnowcurrently +**/*sysmodule* @ericsnowcurrently +**/*bltinmodule* @ericsnowcurrently +**/*gil* @ericsnowcurrently +Include/internal/pycore_runtime.h @ericsnowcurrently +Include/internal/pycore_interp.h @ericsnowcurrently +Include/internal/pycore_tstate.h @ericsnowcurrently +Include/internal/pycore_*_state.h @ericsnowcurrently +Include/internal/pycore_*_init.h @ericsnowcurrently +Include/internal/pycore_atexit.h @ericsnowcurrently +Include/internal/pycore_freelist.h @ericsnowcurrently +Include/internal/pycore_global_objects.h @ericsnowcurrently +Include/internal/pycore_obmalloc.h @ericsnowcurrently +Include/internal/pycore_pymem.h @ericsnowcurrently +Modules/main.c @ericsnowcurrently +Programs/_bootstrap_python.c @ericsnowcurrently +Programs/python.c @ericsnowcurrently +Tools/build/generate_global_objects.py @ericsnowcurrently + # Exceptions Lib/traceback.py @iritkatriel Lib/test/test_except*.py @iritkatriel @@ -79,7 +103,20 @@ Modules/_hacl/** @gpshead # Import (including importlib). **/*import* @brettcannon @ericsnowcurrently @ncoghlan @warsaw /Python/import.c @kumaraditya303 -**/*importlib/resources/* @jaraco @warsaw @FFY00 +Python/dynload_*.c @ericsnowcurrently +**/*freeze* @ericsnowcurrently +**/*frozen* @ericsnowcurrently +**/*modsupport* @ericsnowcurrently +**/*modulefinder* @ericsnowcurrently +**/*moduleobject* @ericsnowcurrently +**/*multiphase* @ericsnowcurrently +**/*pkgutil* @ericsnowcurrently +**/*pythonrun* @ericsnowcurrently +**/*runpy* @ericsnowcurrently +**/*singlephase* @ericsnowcurrently +Lib/test/test_module/ @ericsnowcurrently +Doc/c-api/module.rst @ericsnowcurrently +**/*importlib/resources/* @jaraco @warsaw @FFY00 **/importlib/metadata/* @jaraco @warsaw # Dates and times @@ -198,6 +235,8 @@ Doc/c-api/stable.rst @encukou Doc/howto/clinic.rst @erlend-aasland # Subinterpreters +**/*interpreteridobject.* @ericsnowcurrently +**/*crossinterp* @ericsnowcurrently Lib/test/support/interpreters/ @ericsnowcurrently Modules/_xx*interp*module.c @ericsnowcurrently Lib/test/test_interpreters/ @ericsnowcurrently From 07236f5b39a2e534cf190cd4f7c73300d209520b Mon Sep 17 00:00:00 2001 From: Tristan Pank Date: Fri, 26 Jan 2024 14:48:22 -0500 Subject: [PATCH 117/160] gh-114494: Change logging docstring to bool for exec_info (GH=114558) --- Lib/logging/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/logging/__init__.py b/Lib/logging/__init__.py index eb7e020d1edfc0..684b58d5548f91 100644 --- a/Lib/logging/__init__.py +++ b/Lib/logging/__init__.py @@ -1493,7 +1493,7 @@ def debug(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.debug("Houston, we have a %s", "thorny problem", exc_info=1) + logger.debug("Houston, we have a %s", "thorny problem", exc_info=True) """ if self.isEnabledFor(DEBUG): self._log(DEBUG, msg, args, **kwargs) @@ -1505,7 +1505,7 @@ def info(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.info("Houston, we have a %s", "notable problem", exc_info=1) + logger.info("Houston, we have a %s", "notable problem", exc_info=True) """ if self.isEnabledFor(INFO): self._log(INFO, msg, args, **kwargs) @@ -1517,7 +1517,7 @@ def warning(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.warning("Houston, we have a %s", "bit of a problem", exc_info=1) + logger.warning("Houston, we have a %s", "bit of a problem", exc_info=True) """ if self.isEnabledFor(WARNING): self._log(WARNING, msg, args, **kwargs) @@ -1529,7 +1529,7 @@ def error(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.error("Houston, we have a %s", "major problem", exc_info=1) + logger.error("Houston, we have a %s", "major problem", exc_info=True) """ if self.isEnabledFor(ERROR): self._log(ERROR, msg, args, **kwargs) @@ -1547,7 +1547,7 @@ def critical(self, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.critical("Houston, we have a %s", "major disaster", exc_info=1) + logger.critical("Houston, we have a %s", "major disaster", exc_info=True) """ if self.isEnabledFor(CRITICAL): self._log(CRITICAL, msg, args, **kwargs) @@ -1565,7 +1565,7 @@ def log(self, level, msg, *args, **kwargs): To pass exception information, use the keyword argument exc_info with a true value, e.g. - logger.log(level, "We have a %s", "mysterious problem", exc_info=1) + logger.log(level, "We have a %s", "mysterious problem", exc_info=True) """ if not isinstance(level, int): if raiseExceptions: From b5c7c84673b96bfdd7c877521a970f7a4beafece Mon Sep 17 00:00:00 2001 From: Aiden Fox Ivey Date: Fri, 26 Jan 2024 15:36:50 -0500 Subject: [PATCH 118/160] gh-114490: Add check for Mach-O linkage in Lib/platform.py (#114491) ``platform.architecture()`` now returns the format of binaries (e.g. Mach-O) instead of the default empty string. Co-authored-by: AN Long --- Lib/platform.py | 2 ++ .../next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst | 1 + 2 files changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst diff --git a/Lib/platform.py b/Lib/platform.py index 75aa55510858fd..b56472235ee9e4 100755 --- a/Lib/platform.py +++ b/Lib/platform.py @@ -752,6 +752,8 @@ def architecture(executable=sys.executable, bits='', linkage=''): # Linkage if 'ELF' in fileout: linkage = 'ELF' + elif 'Mach-O' in fileout: + linkage = "Mach-O" elif 'PE' in fileout: # E.g. Windows uses this format if 'Windows' in fileout: diff --git a/Misc/NEWS.d/next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst b/Misc/NEWS.d/next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst new file mode 100644 index 00000000000000..abd296f8608518 --- /dev/null +++ b/Misc/NEWS.d/next/macOS/2024-01-23-11-35-26.gh-issue-114490.FrQOQ0.rst @@ -0,0 +1 @@ +Add Mach-O linkage support for :func:`platform.architecture()`. From 7a9727e10c14a82e8e20f5b85e368a6f937db203 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 26 Jan 2024 22:29:28 +0000 Subject: [PATCH 119/160] pathlib tests: annotate tests needing symlinks with decorator (#114625) Add `@needs_symlinks` decorator for tests that require symlink support in the path class. Also add `@needs_windows` and `@needs_posix` decorators for tests that require a specific a specific path flavour. These aren't much used yet, but will be later. --- Lib/test/test_pathlib/test_pathlib.py | 17 ++--- Lib/test/test_pathlib/test_pathlib_abc.py | 86 +++++++++++++---------- 2 files changed, 55 insertions(+), 48 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2da3afdd198015..2cef3b295559d8 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -19,6 +19,7 @@ from test.support import os_helper from test.support.os_helper import TESTFN, FakePath from test.test_pathlib import test_pathlib_abc +from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows, needs_symlinks try: import grp, pwd @@ -26,11 +27,6 @@ grp = pwd = None -only_nt = unittest.skipIf(os.name != 'nt', - 'test requires a Windows-compatible system') -only_posix = unittest.skipIf(os.name == 'nt', - 'test requires a POSIX-compatible system') - root_in_posix = False if hasattr(os, 'geteuid'): root_in_posix = (os.geteuid() == 0) @@ -1268,7 +1264,7 @@ def test_chmod(self): self.assertEqual(p.stat().st_mode, new_mode) # On Windows, os.chmod does not follow symlinks (issue #15411) - @only_posix + @needs_posix @os_helper.skip_unless_working_chmod def test_chmod_follow_symlinks_true(self): p = self.cls(self.base) / 'linkA' @@ -1537,7 +1533,7 @@ def test_mkdir_exist_ok_root(self): self.cls('/').resolve().mkdir(exist_ok=True) self.cls('/').resolve().mkdir(parents=True, exist_ok=True) - @only_nt # XXX: not sure how to test this on POSIX. + @needs_windows # XXX: not sure how to test this on POSIX. def test_mkdir_with_unknown_drive(self): for d in 'ZYXWVUTSRQPONMLKJIHGFEDCBA': p = self.cls(d + ':\\') @@ -1602,9 +1598,8 @@ def my_mkdir(path, mode=0o777): self.assertNotIn(str(p12), concurrently_created) self.assertTrue(p.exists()) + @needs_symlinks def test_symlink_to(self): - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls(self.base) target = P / 'fileA' # Symlinking a path target. @@ -1848,7 +1843,7 @@ def test_rglob_pathlike(self): self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) -@only_posix +@unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') class PosixPathTest(PathTest, PurePosixPathTest): cls = pathlib.PosixPath @@ -2024,7 +2019,7 @@ def test_from_uri_pathname2url(self): self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar')) -@only_nt +@unittest.skipIf(os.name != 'nt', 'test requires a Windows-compatible system') class WindowsPathTest(PathTest, PureWindowsPathTest): cls = pathlib.WindowsPath diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 364f776dbb1413..b19e9b40419c7a 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -11,6 +11,27 @@ from test.support.os_helper import TESTFN +_tests_needing_posix = set() +_tests_needing_windows = set() +_tests_needing_symlinks = set() + + +def needs_posix(fn): + """Decorator that marks a test as requiring a POSIX-flavoured path class.""" + _tests_needing_posix.add(fn.__name__) + return fn + +def needs_windows(fn): + """Decorator that marks a test as requiring a Windows-flavoured path class.""" + _tests_needing_windows.add(fn.__name__) + return fn + +def needs_symlinks(fn): + """Decorator that marks a test as requiring a path class that supports symlinks.""" + _tests_needing_symlinks.add(fn.__name__) + return fn + + class UnsupportedOperationTest(unittest.TestCase): def test_is_notimplemented(self): self.assertTrue(issubclass(UnsupportedOperation, NotImplementedError)) @@ -115,6 +136,11 @@ class DummyPurePathTest(unittest.TestCase): base = f'/this/path/kills/fascists/{TESTFN}' def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_posix and self.cls.pathmod is not posixpath: + self.skipTest('requires POSIX-flavoured path class') + if name in _tests_needing_windows and self.cls.pathmod is posixpath: + self.skipTest('requires Windows-flavoured path class') p = self.cls('a') self.pathmod = p.pathmod self.sep = self.pathmod.sep @@ -888,6 +914,9 @@ class DummyPathTest(DummyPurePathTest): def setUp(self): super().setUp() + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') pathmod = self.cls.pathmod p = self.cls(self.base) p.mkdir(parents=True) @@ -1045,9 +1074,8 @@ def test_iterdir(self): expected += ['linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'] self.assertEqual(paths, { P(self.base, q) for q in expected }) + @needs_symlinks def test_iterdir_symlink(self): - if not self.can_symlink: - self.skipTest("symlinks required") # __iter__ on a symlink to a directory. P = self.cls p = P(self.base, 'linkB') @@ -1116,9 +1144,8 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", True, []) _check(path, "dirb/file*", False, ["dirB/fileB"]) + @needs_symlinks def test_glob_follow_symlinks_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") def _check(path, glob, expected): actual = {path for path in path.glob(glob, follow_symlinks=True) if path.parts.count("linkD") <= 1} # exclude symlink loop. @@ -1144,9 +1171,8 @@ def _check(path, glob, expected): _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) _check(p, "*/dirD/**/", ["dirC/dirD/"]) + @needs_symlinks def test_glob_no_follow_symlinks_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") def _check(path, glob, expected): actual = {path for path in path.glob(glob, follow_symlinks=False)} self.assertEqual(actual, { P(self.base, q) for q in expected }) @@ -1210,9 +1236,8 @@ def _check(glob, expected): _check(p.rglob("*.txt"), ["dirC/novel.txt"]) _check(p.rglob("*.*"), ["dirC/novel.txt"]) + @needs_symlinks def test_rglob_follow_symlinks_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") def _check(path, glob, expected): actual = {path for path in path.rglob(glob, follow_symlinks=True) if path.parts.count("linkD") <= 1} # exclude symlink loop. @@ -1243,9 +1268,8 @@ def _check(path, glob, expected): _check(p, "*.txt", ["dirC/novel.txt"]) _check(p, "*.*", ["dirC/novel.txt"]) + @needs_symlinks def test_rglob_no_follow_symlinks_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") def _check(path, glob, expected): actual = {path for path in path.rglob(glob, follow_symlinks=False)} self.assertEqual(actual, { P(self.base, q) for q in expected }) @@ -1269,10 +1293,9 @@ def _check(path, glob, expected): _check(p, "*.txt", ["dirC/novel.txt"]) _check(p, "*.*", ["dirC/novel.txt"]) + @needs_symlinks def test_rglob_symlink_loop(self): # Don't get fooled by symlink loops (Issue #26012). - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls p = P(self.base) given = set(p.rglob('*')) @@ -1302,10 +1325,9 @@ def test_glob_dotdot(self): self.assertEqual(set(p.glob("xyzzy/..")), set()) self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) + @needs_symlinks def test_glob_permissions(self): # See bpo-38894 - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls base = P(self.base) / 'permissions' base.mkdir() @@ -1322,19 +1344,17 @@ def test_glob_permissions(self): self.assertEqual(len(set(base.glob("*/fileC"))), 50) self.assertEqual(len(set(base.glob("*/file*"))), 50) + @needs_symlinks def test_glob_long_symlink(self): # See gh-87695 - if not self.can_symlink: - self.skipTest("symlinks required") base = self.cls(self.base) / 'long_symlink' base.mkdir() bad_link = base / 'bad_link' bad_link.symlink_to("bad" * 200) self.assertEqual(sorted(base.glob('**/*')), [bad_link]) + @needs_symlinks def test_readlink(self): - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls(self.base) self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) self.assertEqual((P / 'brokenLink').readlink(), @@ -1358,9 +1378,8 @@ def _check_resolve(self, p, expected, strict=True): # This can be used to check both relative and absolute resolutions. _check_resolve_relative = _check_resolve_absolute = _check_resolve + @needs_symlinks def test_resolve_common(self): - if not self.can_symlink: - self.skipTest("symlinks required") P = self.cls p = P(self.base, 'foo') with self.assertRaises(OSError) as cm: @@ -1419,10 +1438,9 @@ def test_resolve_common(self): # resolves to 'dirB/..' first before resolving to parent of dirB. self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + @needs_symlinks def test_resolve_dot(self): # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ - if not self.can_symlink: - self.skipTest("symlinks required") pathmod = self.pathmod p = self.cls(self.base) p.joinpath('0').symlink_to('.', target_is_directory=True) @@ -1441,11 +1459,9 @@ def _check_symlink_loop(self, *args): path.resolve(strict=True) self.assertEqual(cm.exception.errno, errno.ELOOP) + @needs_posix + @needs_symlinks def test_resolve_loop(self): - if not self.can_symlink: - self.skipTest("symlinks required") - if self.cls.pathmod is not posixpath: - self.skipTest("symlink loops work differently with concrete Windows paths") # Loops with relative symlinks. self.cls(self.base, 'linkX').symlink_to('linkX/inside') self._check_symlink_loop(self.base, 'linkX') @@ -1487,9 +1503,8 @@ def test_stat(self): self.assertEqual(statA.st_dev, statC.st_dev) # other attributes not used by pathlib. + @needs_symlinks def test_stat_no_follow_symlinks(self): - if not self.can_symlink: - self.skipTest("symlinks required") p = self.cls(self.base) / 'linkA' st = p.stat() self.assertNotEqual(st, p.stat(follow_symlinks=False)) @@ -1499,9 +1514,8 @@ def test_stat_no_follow_symlinks_nosymlink(self): st = p.stat() self.assertEqual(st, p.stat(follow_symlinks=False)) + @needs_symlinks def test_lstat(self): - if not self.can_symlink: - self.skipTest("symlinks required") p = self.cls(self.base)/ 'linkA' st = p.stat() self.assertNotEqual(st, p.lstat()) @@ -1634,9 +1648,6 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\x00').is_char_device(), False) def _check_complex_symlinks(self, link0_target): - if not self.can_symlink: - self.skipTest("symlinks required") - # Test solving a non-looping chain of symlinks (issue #19887). pathmod = self.pathmod P = self.cls(self.base) @@ -1682,12 +1693,15 @@ def _check_complex_symlinks(self, link0_target): finally: os.chdir(old_path) + @needs_symlinks def test_complex_symlinks_absolute(self): self._check_complex_symlinks(self.base) + @needs_symlinks def test_complex_symlinks_relative(self): self._check_complex_symlinks('.') + @needs_symlinks def test_complex_symlinks_relative_dot_dot(self): self._check_complex_symlinks(self.pathmod.join('dirA', '..')) @@ -1803,9 +1817,8 @@ def test_walk_bottom_up(self): raise AssertionError(f"Unexpected path: {path}") self.assertTrue(seen_testfn) + @needs_symlinks def test_walk_follow_symlinks(self): - if not self.can_symlink: - self.skipTest("symlinks required") self.setUpWalk() walk_it = self.walk_path.walk(follow_symlinks=True) for root, dirs, files in walk_it: @@ -1816,9 +1829,8 @@ def test_walk_follow_symlinks(self): else: self.fail("Didn't follow symlink with follow_symlinks=True") + @needs_symlinks def test_walk_symlink_location(self): - if not self.can_symlink: - self.skipTest("symlinks required") self.setUpWalk() # Tests whether symlinks end up in filenames or dirnames depending # on the `follow_symlinks` argument. From fe5905e21ac90a2fb5ebd62779f56bcd87b5f7a0 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 27 Jan 2024 01:30:25 +0000 Subject: [PATCH 120/160] Cover OS-specific behaviour in `PurePath` and `Path` tests (#114632) Test Posix- and Windows-specific behaviour from `PurePathTest` and `PathTest`. --- Lib/test/test_pathlib/test_pathlib.py | 198 +++++++++++++++++--------- 1 file changed, 128 insertions(+), 70 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2cef3b295559d8..b0067c25d208b9 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -234,8 +234,10 @@ def test_eq_common(self): self.assertNotEqual(P(), {}) self.assertNotEqual(P(), int) - def test_equivalences(self): - for k, tuples in self.equivalences.items(): + def test_equivalences(self, equivalences=None): + if equivalences is None: + equivalences = self.equivalences + for k, tuples in equivalences.items(): canon = k.replace('/', self.sep) posix = k.replace(self.sep, '/') if canon != posix: @@ -356,11 +358,8 @@ def test_match_empty(self): self.assertRaises(ValueError, P('a').match, '') self.assertRaises(ValueError, P('a').match, '.') - -class PurePosixPathTest(PurePathTest): - cls = pathlib.PurePosixPath - - def test_parse_path(self): + @needs_posix + def test_parse_path_posix(self): check = self._check_parse_path # Collapsing of excess leading slashes, except for the double-slash # special case. @@ -372,25 +371,29 @@ def test_parse_path(self): check('c:\\a', '', '', ['c:\\a',]) check('\\a', '', '', ['\\a',]) - def test_root(self): + @needs_posix + def test_root_posix(self): P = self.cls self.assertEqual(P('/a/b').root, '/') self.assertEqual(P('///a/b').root, '/') # POSIX special case for two leading slashes. self.assertEqual(P('//a/b').root, '//') - def test_eq(self): + @needs_posix + def test_eq_posix(self): P = self.cls self.assertNotEqual(P('a/b'), P('A/b')) self.assertEqual(P('/a'), P('///a')) self.assertNotEqual(P('/a'), P('//a')) - def test_as_uri(self): + @needs_posix + def test_as_uri_posix(self): P = self.cls self.assertEqual(P('/').as_uri(), 'file:///') self.assertEqual(P('/a/b.c').as_uri(), 'file:///a/b.c') self.assertEqual(P('/a/b%#c').as_uri(), 'file:///a/b%25%23c') + @needs_posix def test_as_uri_non_ascii(self): from urllib.parse import quote_from_bytes P = self.cls @@ -401,11 +404,13 @@ def test_as_uri_non_ascii(self): self.assertEqual(P('/a/b\xe9').as_uri(), 'file:///a/b' + quote_from_bytes(os.fsencode('\xe9'))) - def test_match(self): + @needs_posix + def test_match_posix(self): P = self.cls self.assertFalse(P('A.py').match('a.PY')) - def test_is_absolute(self): + @needs_posix + def test_is_absolute_posix(self): P = self.cls self.assertFalse(P().is_absolute()) self.assertFalse(P('a').is_absolute()) @@ -416,7 +421,8 @@ def test_is_absolute(self): self.assertTrue(P('//a').is_absolute()) self.assertTrue(P('//a/b').is_absolute()) - def test_join(self): + @needs_posix + def test_join_posix(self): P = self.cls p = P('//a') pp = p.joinpath('b') @@ -426,7 +432,8 @@ def test_join(self): pp = P('//a').joinpath('/c') self.assertEqual(pp, P('/c')) - def test_div(self): + @needs_posix + def test_div_posix(self): # Basically the same as joinpath(). P = self.cls p = P('//a') @@ -437,18 +444,14 @@ def test_div(self): pp = P('//a') / '/c' self.assertEqual(pp, P('/c')) + @needs_posix def test_parse_windows_path(self): P = self.cls p = P('c:', 'a', 'b') pp = P(pathlib.PureWindowsPath('c:\\a\\b')) self.assertEqual(p, pp) - -class PureWindowsPathTest(PurePathTest): - cls = pathlib.PureWindowsPath - - equivalences = PurePathTest.equivalences.copy() - equivalences.update({ + windows_equivalences = { './a:b': [ ('./a:b',) ], 'c:a': [ ('c:', 'a'), ('c:', 'a/'), ('.', 'c:', 'a') ], 'c:/a': [ @@ -459,9 +462,14 @@ class PureWindowsPathTest(PurePathTest): '//a/b/c': [ ('//a/b', 'c'), ('//a/b/', 'c'), ], - }) + } + + @needs_windows + def test_equivalences_windows(self): + self.test_equivalences(self.windows_equivalences) - def test_parse_path(self): + @needs_windows + def test_parse_path_windows(self): check = self._check_parse_path # First part is anchored. check('c:', 'c:', '', []) @@ -509,7 +517,8 @@ def test_parse_path(self): check('D:a/c:b', 'D:', '', ['a', 'c:b']) check('D:/a/c:b', 'D:', '\\', ['a', 'c:b']) - def test_str(self): + @needs_windows + def test_str_windows(self): p = self.cls('a/b/c') self.assertEqual(str(p), 'a\\b\\c') p = self.cls('c:/a/b/c') @@ -521,7 +530,8 @@ def test_str(self): p = self.cls('//a/b/c/d') self.assertEqual(str(p), '\\\\a\\b\\c\\d') - def test_str_subclass(self): + @needs_windows + def test_str_subclass_windows(self): self._check_str_subclass('.\\a:b') self._check_str_subclass('c:') self._check_str_subclass('c:a') @@ -533,7 +543,8 @@ def test_str_subclass(self): self._check_str_subclass('\\\\some\\share\\a') self._check_str_subclass('\\\\some\\share\\a\\b.txt') - def test_eq(self): + @needs_windows + def test_eq_windows(self): P = self.cls self.assertEqual(P('c:a/b'), P('c:a/b')) self.assertEqual(P('c:a/b'), P('c:', 'a', 'b')) @@ -546,7 +557,8 @@ def test_eq(self): self.assertEqual(P('//Some/SHARE/a/B'), P('//somE/share/A/b')) self.assertEqual(P('\u0130'), P('i\u0307')) - def test_as_uri(self): + @needs_windows + def test_as_uri_windows(self): P = self.cls with self.assertRaises(ValueError): P('/a/b').as_uri() @@ -562,7 +574,8 @@ def test_as_uri(self): self.assertEqual(P('//some/share/a/b%#c\xe9').as_uri(), 'file://some/share/a/b%25%23c%C3%A9') - def test_match(self): + @needs_windows + def test_match_windows(self): P = self.cls # Absolute patterns. self.assertTrue(P('c:/b.py').match('*:/*.py')) @@ -589,7 +602,8 @@ def test_match(self): self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' - def test_ordering_common(self): + @needs_windows + def test_ordering_windows(self): # Case-insensitivity. def assertOrderedEqual(a, b): self.assertLessEqual(a, b) @@ -606,7 +620,8 @@ def assertOrderedEqual(a, b): self.assertFalse(p < q) self.assertFalse(p > q) - def test_parts(self): + @needs_windows + def test_parts_windows(self): P = self.cls p = P('c:a/b') parts = p.parts @@ -618,7 +633,8 @@ def test_parts(self): parts = p.parts self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) - def test_parent(self): + @needs_windows + def test_parent_windows(self): # Anchored P = self.cls p = P('z:a/b/c') @@ -636,7 +652,8 @@ def test_parent(self): self.assertEqual(p.parent.parent, P('//a/b')) self.assertEqual(p.parent.parent.parent, P('//a/b')) - def test_parents(self): + @needs_windows + def test_parents_windows(self): # Anchored P = self.cls p = P('z:a/b/') @@ -682,7 +699,8 @@ def test_parents(self): with self.assertRaises(IndexError): par[2] - def test_drive(self): + @needs_windows + def test_drive_windows(self): P = self.cls self.assertEqual(P('c:').drive, 'c:') self.assertEqual(P('c:a/b').drive, 'c:') @@ -693,7 +711,8 @@ def test_drive(self): self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') self.assertEqual(P('./c:a').drive, '') - def test_root(self): + @needs_windows + def test_root_windows(self): P = self.cls self.assertEqual(P('c:').root, '') self.assertEqual(P('c:a/b').root, '') @@ -703,7 +722,8 @@ def test_root(self): self.assertEqual(P('//a/b/').root, '\\') self.assertEqual(P('//a/b/c/d').root, '\\') - def test_anchor(self): + @needs_windows + def test_anchor_windows(self): P = self.cls self.assertEqual(P('c:').anchor, 'c:') self.assertEqual(P('c:a/b').anchor, 'c:') @@ -713,7 +733,8 @@ def test_anchor(self): self.assertEqual(P('//a/b/').anchor, '\\\\a\\b\\') self.assertEqual(P('//a/b/c/d').anchor, '\\\\a\\b\\') - def test_name(self): + @needs_windows + def test_name_windows(self): P = self.cls self.assertEqual(P('c:').name, '') self.assertEqual(P('c:/').name, '') @@ -724,7 +745,8 @@ def test_name(self): self.assertEqual(P('//My.py/Share.php').name, '') self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') - def test_suffix(self): + @needs_windows + def test_suffix_windows(self): P = self.cls self.assertEqual(P('c:').suffix, '') self.assertEqual(P('c:/').suffix, '') @@ -743,7 +765,8 @@ def test_suffix(self): self.assertEqual(P('//My.py/Share.php').suffix, '') self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') - def test_suffixes(self): + @needs_windows + def test_suffixes_windows(self): P = self.cls self.assertEqual(P('c:').suffixes, []) self.assertEqual(P('c:/').suffixes, []) @@ -762,7 +785,8 @@ def test_suffixes(self): self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) - def test_stem(self): + @needs_windows + def test_stem_windows(self): P = self.cls self.assertEqual(P('c:').stem, '') self.assertEqual(P('c:.').stem, '') @@ -776,7 +800,8 @@ def test_stem(self): self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, 'Some name. Ending with a dot.') - def test_with_name(self): + @needs_windows + def test_with_name_windows(self): P = self.cls self.assertEqual(P('c:a/b').with_name('d.xml'), P('c:a/d.xml')) self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) @@ -792,7 +817,8 @@ def test_with_name(self): self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') - def test_with_stem(self): + @needs_windows + def test_with_stem_windows(self): P = self.cls self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) @@ -808,7 +834,8 @@ def test_with_stem(self): self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') - def test_with_suffix(self): + @needs_windows + def test_with_suffix_windows(self): P = self.cls self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) @@ -832,7 +859,8 @@ def test_with_suffix(self): self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') - def test_relative_to(self): + @needs_windows + def test_relative_to_windows(self): P = self.cls p = P('C:Foo/Bar') self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) @@ -937,7 +965,8 @@ def test_relative_to(self): self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) - def test_is_relative_to(self): + @needs_windows + def test_is_relative_to_windows(self): P = self.cls p = P('C:Foo/Bar') self.assertTrue(p.is_relative_to(P('c:'))) @@ -990,7 +1019,8 @@ def test_is_relative_to(self): self.assertFalse(p.is_relative_to(P('//z/Share/Foo'))) self.assertFalse(p.is_relative_to(P('//Server/z/Foo'))) - def test_is_absolute(self): + @needs_windows + def test_is_absolute_windows(self): P = self.cls # Under NT, only paths with both a drive and a root are absolute. self.assertFalse(P().is_absolute()) @@ -1015,7 +1045,8 @@ def test_is_absolute(self): self.assertTrue(P('//?/UNC/').is_absolute()) self.assertTrue(P('//?/UNC/spam').is_absolute()) - def test_join(self): + @needs_windows + def test_join_windows(self): P = self.cls p = P('C:/a/b') pp = p.joinpath('x/y') @@ -1052,7 +1083,8 @@ def test_join(self): pp = P('//./BootPartition').joinpath('Windows') self.assertEqual(pp, P('//./BootPartition/Windows')) - def test_div(self): + @needs_windows + def test_div_windows(self): # Basically the same as joinpath(). P = self.cls p = P('C:/a/b') @@ -1078,6 +1110,14 @@ def test_div(self): self.assertEqual(p / P('E:d:s'), P('E:d:s')) +class PurePosixPathTest(PurePathTest): + cls = pathlib.PurePosixPath + + +class PureWindowsPathTest(PurePathTest): + cls = pathlib.PureWindowsPath + + class PurePathSubclassTest(PurePathTest): class cls(pathlib.PurePath): pass @@ -1842,12 +1882,8 @@ def test_rglob_pathlike(self): self.assertEqual(expect, set(p.rglob(P(pattern)))) self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) - -@unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') -class PosixPathTest(PathTest, PurePosixPathTest): - cls = pathlib.PosixPath - - def test_absolute(self): + @needs_posix + def test_absolute_posix(self): P = self.cls self.assertEqual(str(P('/').absolute()), '/') self.assertEqual(str(P('/a').absolute()), '/a') @@ -1862,6 +1898,7 @@ def test_absolute(self): is_emscripten or is_wasi, "umask is not implemented on Emscripten/WASI." ) + @needs_posix def test_open_mode(self): old_mask = os.umask(0) self.addCleanup(os.umask, old_mask) @@ -1876,6 +1913,7 @@ def test_open_mode(self): st = os.stat(self.pathmod.join(self.base, 'other_new_file')) self.assertEqual(stat.S_IMODE(st.st_mode), 0o644) + @needs_posix def test_resolve_root(self): current_directory = os.getcwd() try: @@ -1889,6 +1927,7 @@ def test_resolve_root(self): is_emscripten or is_wasi, "umask is not implemented on Emscripten/WASI." ) + @needs_posix def test_touch_mode(self): old_mask = os.umask(0) self.addCleanup(os.umask, old_mask) @@ -1904,7 +1943,8 @@ def test_touch_mode(self): st = os.stat(self.pathmod.join(self.base, 'masked_new_file')) self.assertEqual(stat.S_IMODE(st.st_mode), 0o750) - def test_glob(self): + @needs_posix + def test_glob_posix(self): P = self.cls p = P(self.base) given = set(p.glob("FILEa")) @@ -1912,7 +1952,8 @@ def test_glob(self): self.assertEqual(given, expect) self.assertEqual(set(p.glob("FILEa*")), set()) - def test_rglob(self): + @needs_posix + def test_rglob_posix(self): P = self.cls p = P(self.base, "dirC") given = set(p.rglob("FILEd")) @@ -1924,7 +1965,8 @@ def test_rglob(self): 'pwd module does not expose getpwall()') @unittest.skipIf(sys.platform == "vxworks", "no home directory on VxWorks") - def test_expanduser(self): + @needs_posix + def test_expanduser_posix(self): P = self.cls import_helper.import_module('pwd') import pwd @@ -1979,6 +2021,7 @@ def test_expanduser(self): @unittest.skipIf(sys.platform != "darwin", "Bad file descriptor in /dev/fd affects only macOS") + @needs_posix def test_handling_bad_descriptor(self): try: file_descriptors = list(pathlib.Path('/dev/fd').rglob("*"))[3:] @@ -2000,7 +2043,8 @@ def test_handling_bad_descriptor(self): self.fail("Bad file descriptor not handled.") raise - def test_from_uri(self): + @needs_posix + def test_from_uri_posix(self): P = self.cls self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar')) self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar')) @@ -2013,17 +2057,14 @@ def test_from_uri(self): self.assertRaises(ValueError, P.from_uri, 'file:foo/bar') self.assertRaises(ValueError, P.from_uri, 'http://foo/bar') - def test_from_uri_pathname2url(self): + @needs_posix + def test_from_uri_pathname2url_posix(self): P = self.cls self.assertEqual(P.from_uri('file:' + pathname2url('/foo/bar')), P('/foo/bar')) self.assertEqual(P.from_uri('file:' + pathname2url('//foo/bar')), P('//foo/bar')) - -@unittest.skipIf(os.name != 'nt', 'test requires a Windows-compatible system') -class WindowsPathTest(PathTest, PureWindowsPathTest): - cls = pathlib.WindowsPath - - def test_absolute(self): + @needs_windows + def test_absolute_windows(self): P = self.cls # Simple absolute paths. @@ -2068,7 +2109,8 @@ def test_absolute(self): self.assertEqual(str(P(other_drive).absolute()), other_cwd) self.assertEqual(str(P(other_drive + 'foo').absolute()), other_cwd + '\\foo') - def test_glob(self): + @needs_windows + def test_glob_windows(self): P = self.cls p = P(self.base) self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) @@ -2077,14 +2119,16 @@ def test_glob(self): self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) - def test_rglob(self): + @needs_windows + def test_rglob_windows(self): P = self.cls p = P(self.base, "dirC") self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) - def test_expanduser(self): + @needs_windows + def test_expanduser_windows(self): P = self.cls with os_helper.EnvironmentVarGuard() as env: env.pop('HOME', None) @@ -2137,7 +2181,8 @@ def check(): env['HOME'] = 'C:\\Users\\eve' check() - def test_from_uri(self): + @needs_windows + def test_from_uri_windows(self): P = self.cls # DOS drive paths self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file')) @@ -2158,22 +2203,35 @@ def test_from_uri(self): self.assertRaises(ValueError, P.from_uri, 'file:foo/bar') self.assertRaises(ValueError, P.from_uri, 'http://foo/bar') - def test_from_uri_pathname2url(self): + @needs_windows + def test_from_uri_pathname2url_windows(self): P = self.cls self.assertEqual(P.from_uri('file:' + pathname2url(r'c:\path\to\file')), P('c:/path/to/file')) self.assertEqual(P.from_uri('file:' + pathname2url(r'\\server\path\to\file')), P('//server/path/to/file')) - def test_owner(self): + @needs_windows + def test_owner_windows(self): P = self.cls with self.assertRaises(pathlib.UnsupportedOperation): P('c:/').owner() - def test_group(self): + @needs_windows + def test_group_windows(self): P = self.cls with self.assertRaises(pathlib.UnsupportedOperation): P('c:/').group() +@unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') +class PosixPathTest(PathTest, PurePosixPathTest): + cls = pathlib.PosixPath + + +@unittest.skipIf(os.name != 'nt', 'test requires a Windows-compatible system') +class WindowsPathTest(PathTest, PureWindowsPathTest): + cls = pathlib.WindowsPath + + class PathSubclassTest(PathTest): class cls(pathlib.Path): pass From 2d08af34b873d5e6b4df5082dfc30a37ef59c346 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 27 Jan 2024 02:16:17 +0000 Subject: [PATCH 121/160] Cover OS-specific behaviour in `PurePathBase` and `PathBase` tests. (#114633) Wherever possible, move tests for OS-specific behaviour from `PurePathTest` and `PathTest` to `DummyPurePathTest` and `DummyPathTest`. --- Lib/test/test_pathlib/test_pathlib.py | 627 ---------------------- Lib/test/test_pathlib/test_pathlib_abc.py | 625 +++++++++++++++++++++ 2 files changed, 625 insertions(+), 627 deletions(-) diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index b0067c25d208b9..9c2b26d41d73f8 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -371,14 +371,6 @@ def test_parse_path_posix(self): check('c:\\a', '', '', ['c:\\a',]) check('\\a', '', '', ['\\a',]) - @needs_posix - def test_root_posix(self): - P = self.cls - self.assertEqual(P('/a/b').root, '/') - self.assertEqual(P('///a/b').root, '/') - # POSIX special case for two leading slashes. - self.assertEqual(P('//a/b').root, '//') - @needs_posix def test_eq_posix(self): P = self.cls @@ -404,46 +396,6 @@ def test_as_uri_non_ascii(self): self.assertEqual(P('/a/b\xe9').as_uri(), 'file:///a/b' + quote_from_bytes(os.fsencode('\xe9'))) - @needs_posix - def test_match_posix(self): - P = self.cls - self.assertFalse(P('A.py').match('a.PY')) - - @needs_posix - def test_is_absolute_posix(self): - P = self.cls - self.assertFalse(P().is_absolute()) - self.assertFalse(P('a').is_absolute()) - self.assertFalse(P('a/b/').is_absolute()) - self.assertTrue(P('/').is_absolute()) - self.assertTrue(P('/a').is_absolute()) - self.assertTrue(P('/a/b/').is_absolute()) - self.assertTrue(P('//a').is_absolute()) - self.assertTrue(P('//a/b').is_absolute()) - - @needs_posix - def test_join_posix(self): - P = self.cls - p = P('//a') - pp = p.joinpath('b') - self.assertEqual(pp, P('//a/b')) - pp = P('/a').joinpath('//c') - self.assertEqual(pp, P('//c')) - pp = P('//a').joinpath('/c') - self.assertEqual(pp, P('/c')) - - @needs_posix - def test_div_posix(self): - # Basically the same as joinpath(). - P = self.cls - p = P('//a') - pp = p / 'b' - self.assertEqual(pp, P('//a/b')) - pp = P('/a') / '//c' - self.assertEqual(pp, P('//c')) - pp = P('//a') / '/c' - self.assertEqual(pp, P('/c')) - @needs_posix def test_parse_windows_path(self): P = self.cls @@ -517,32 +469,6 @@ def test_parse_path_windows(self): check('D:a/c:b', 'D:', '', ['a', 'c:b']) check('D:/a/c:b', 'D:', '\\', ['a', 'c:b']) - @needs_windows - def test_str_windows(self): - p = self.cls('a/b/c') - self.assertEqual(str(p), 'a\\b\\c') - p = self.cls('c:/a/b/c') - self.assertEqual(str(p), 'c:\\a\\b\\c') - p = self.cls('//a/b') - self.assertEqual(str(p), '\\\\a\\b\\') - p = self.cls('//a/b/c') - self.assertEqual(str(p), '\\\\a\\b\\c') - p = self.cls('//a/b/c/d') - self.assertEqual(str(p), '\\\\a\\b\\c\\d') - - @needs_windows - def test_str_subclass_windows(self): - self._check_str_subclass('.\\a:b') - self._check_str_subclass('c:') - self._check_str_subclass('c:a') - self._check_str_subclass('c:a\\b.txt') - self._check_str_subclass('c:\\') - self._check_str_subclass('c:\\a') - self._check_str_subclass('c:\\a\\b.txt') - self._check_str_subclass('\\\\some\\share') - self._check_str_subclass('\\\\some\\share\\a') - self._check_str_subclass('\\\\some\\share\\a\\b.txt') - @needs_windows def test_eq_windows(self): P = self.cls @@ -574,34 +500,6 @@ def test_as_uri_windows(self): self.assertEqual(P('//some/share/a/b%#c\xe9').as_uri(), 'file://some/share/a/b%25%23c%C3%A9') - @needs_windows - def test_match_windows(self): - P = self.cls - # Absolute patterns. - self.assertTrue(P('c:/b.py').match('*:/*.py')) - self.assertTrue(P('c:/b.py').match('c:/*.py')) - self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive - self.assertFalse(P('b.py').match('/*.py')) - self.assertFalse(P('b.py').match('c:*.py')) - self.assertFalse(P('b.py').match('c:/*.py')) - self.assertFalse(P('c:b.py').match('/*.py')) - self.assertFalse(P('c:b.py').match('c:/*.py')) - self.assertFalse(P('/b.py').match('c:*.py')) - self.assertFalse(P('/b.py').match('c:/*.py')) - # UNC patterns. - self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) - self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) - self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) - self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) - # Case-insensitivity. - self.assertTrue(P('B.py').match('b.PY')) - self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) - self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) - # Path anchor doesn't match pattern anchor - self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' - self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' - self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' - @needs_windows def test_ordering_windows(self): # Case-insensitivity. @@ -620,495 +518,6 @@ def assertOrderedEqual(a, b): self.assertFalse(p < q) self.assertFalse(p > q) - @needs_windows - def test_parts_windows(self): - P = self.cls - p = P('c:a/b') - parts = p.parts - self.assertEqual(parts, ('c:', 'a', 'b')) - p = P('c:/a/b') - parts = p.parts - self.assertEqual(parts, ('c:\\', 'a', 'b')) - p = P('//a/b/c/d') - parts = p.parts - self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) - - @needs_windows - def test_parent_windows(self): - # Anchored - P = self.cls - p = P('z:a/b/c') - self.assertEqual(p.parent, P('z:a/b')) - self.assertEqual(p.parent.parent, P('z:a')) - self.assertEqual(p.parent.parent.parent, P('z:')) - self.assertEqual(p.parent.parent.parent.parent, P('z:')) - p = P('z:/a/b/c') - self.assertEqual(p.parent, P('z:/a/b')) - self.assertEqual(p.parent.parent, P('z:/a')) - self.assertEqual(p.parent.parent.parent, P('z:/')) - self.assertEqual(p.parent.parent.parent.parent, P('z:/')) - p = P('//a/b/c/d') - self.assertEqual(p.parent, P('//a/b/c')) - self.assertEqual(p.parent.parent, P('//a/b')) - self.assertEqual(p.parent.parent.parent, P('//a/b')) - - @needs_windows - def test_parents_windows(self): - # Anchored - P = self.cls - p = P('z:a/b/') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('z:a')) - self.assertEqual(par[1], P('z:')) - self.assertEqual(par[0:1], (P('z:a'),)) - self.assertEqual(par[:-1], (P('z:a'),)) - self.assertEqual(par[:2], (P('z:a'), P('z:'))) - self.assertEqual(par[1:], (P('z:'),)) - self.assertEqual(par[::2], (P('z:a'),)) - self.assertEqual(par[::-1], (P('z:'), P('z:a'))) - self.assertEqual(list(par), [P('z:a'), P('z:')]) - with self.assertRaises(IndexError): - par[2] - p = P('z:/a/b/') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('z:/a')) - self.assertEqual(par[1], P('z:/')) - self.assertEqual(par[0:1], (P('z:/a'),)) - self.assertEqual(par[0:-1], (P('z:/a'),)) - self.assertEqual(par[:2], (P('z:/a'), P('z:/'))) - self.assertEqual(par[1:], (P('z:/'),)) - self.assertEqual(par[::2], (P('z:/a'),)) - self.assertEqual(par[::-1], (P('z:/'), P('z:/a'),)) - self.assertEqual(list(par), [P('z:/a'), P('z:/')]) - with self.assertRaises(IndexError): - par[2] - p = P('//a/b/c/d') - par = p.parents - self.assertEqual(len(par), 2) - self.assertEqual(par[0], P('//a/b/c')) - self.assertEqual(par[1], P('//a/b')) - self.assertEqual(par[0:1], (P('//a/b/c'),)) - self.assertEqual(par[0:-1], (P('//a/b/c'),)) - self.assertEqual(par[:2], (P('//a/b/c'), P('//a/b'))) - self.assertEqual(par[1:], (P('//a/b'),)) - self.assertEqual(par[::2], (P('//a/b/c'),)) - self.assertEqual(par[::-1], (P('//a/b'), P('//a/b/c'))) - self.assertEqual(list(par), [P('//a/b/c'), P('//a/b')]) - with self.assertRaises(IndexError): - par[2] - - @needs_windows - def test_drive_windows(self): - P = self.cls - self.assertEqual(P('c:').drive, 'c:') - self.assertEqual(P('c:a/b').drive, 'c:') - self.assertEqual(P('c:/').drive, 'c:') - self.assertEqual(P('c:/a/b/').drive, 'c:') - self.assertEqual(P('//a/b').drive, '\\\\a\\b') - self.assertEqual(P('//a/b/').drive, '\\\\a\\b') - self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') - self.assertEqual(P('./c:a').drive, '') - - @needs_windows - def test_root_windows(self): - P = self.cls - self.assertEqual(P('c:').root, '') - self.assertEqual(P('c:a/b').root, '') - self.assertEqual(P('c:/').root, '\\') - self.assertEqual(P('c:/a/b/').root, '\\') - self.assertEqual(P('//a/b').root, '\\') - self.assertEqual(P('//a/b/').root, '\\') - self.assertEqual(P('//a/b/c/d').root, '\\') - - @needs_windows - def test_anchor_windows(self): - P = self.cls - self.assertEqual(P('c:').anchor, 'c:') - self.assertEqual(P('c:a/b').anchor, 'c:') - self.assertEqual(P('c:/').anchor, 'c:\\') - self.assertEqual(P('c:/a/b/').anchor, 'c:\\') - self.assertEqual(P('//a/b').anchor, '\\\\a\\b\\') - self.assertEqual(P('//a/b/').anchor, '\\\\a\\b\\') - self.assertEqual(P('//a/b/c/d').anchor, '\\\\a\\b\\') - - @needs_windows - def test_name_windows(self): - P = self.cls - self.assertEqual(P('c:').name, '') - self.assertEqual(P('c:/').name, '') - self.assertEqual(P('c:a/b').name, 'b') - self.assertEqual(P('c:/a/b').name, 'b') - self.assertEqual(P('c:a/b.py').name, 'b.py') - self.assertEqual(P('c:/a/b.py').name, 'b.py') - self.assertEqual(P('//My.py/Share.php').name, '') - self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') - - @needs_windows - def test_suffix_windows(self): - P = self.cls - self.assertEqual(P('c:').suffix, '') - self.assertEqual(P('c:/').suffix, '') - self.assertEqual(P('c:a/b').suffix, '') - self.assertEqual(P('c:/a/b').suffix, '') - self.assertEqual(P('c:a/b.py').suffix, '.py') - self.assertEqual(P('c:/a/b.py').suffix, '.py') - self.assertEqual(P('c:a/.hgrc').suffix, '') - self.assertEqual(P('c:/a/.hgrc').suffix, '') - self.assertEqual(P('c:a/.hg.rc').suffix, '.rc') - self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') - self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') - self.assertEqual(P('//My.py/Share.php').suffix, '') - self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') - - @needs_windows - def test_suffixes_windows(self): - P = self.cls - self.assertEqual(P('c:').suffixes, []) - self.assertEqual(P('c:/').suffixes, []) - self.assertEqual(P('c:a/b').suffixes, []) - self.assertEqual(P('c:/a/b').suffixes, []) - self.assertEqual(P('c:a/b.py').suffixes, ['.py']) - self.assertEqual(P('c:/a/b.py').suffixes, ['.py']) - self.assertEqual(P('c:a/.hgrc').suffixes, []) - self.assertEqual(P('c:/a/.hgrc').suffixes, []) - self.assertEqual(P('c:a/.hg.rc').suffixes, ['.rc']) - self.assertEqual(P('c:/a/.hg.rc').suffixes, ['.rc']) - self.assertEqual(P('c:a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) - self.assertEqual(P('//My.py/Share.php').suffixes, []) - self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) - self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) - self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) - - @needs_windows - def test_stem_windows(self): - P = self.cls - self.assertEqual(P('c:').stem, '') - self.assertEqual(P('c:.').stem, '') - self.assertEqual(P('c:..').stem, '..') - self.assertEqual(P('c:/').stem, '') - self.assertEqual(P('c:a/b').stem, 'b') - self.assertEqual(P('c:a/b.py').stem, 'b') - self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') - self.assertEqual(P('c:a/.hg.rc').stem, '.hg') - self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') - self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, - 'Some name. Ending with a dot.') - - @needs_windows - def test_with_name_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_name('d.xml'), P('c:a/d.xml')) - self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) - self.assertEqual(P('c:a/Dot ending.').with_name('d.xml'), P('c:a/d.xml')) - self.assertEqual(P('c:/a/Dot ending.').with_name('d.xml'), P('c:/a/d.xml')) - self.assertRaises(ValueError, P('c:').with_name, 'd.xml') - self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') - self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') - self.assertEqual(str(P('a').with_name('d:')), '.\\d:') - self.assertEqual(str(P('a').with_name('d:e')), '.\\d:e') - self.assertEqual(P('c:a/b').with_name('d:'), P('c:a/d:')) - self.assertEqual(P('c:a/b').with_name('d:e'), P('c:a/d:e')) - self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') - self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') - - @needs_windows - def test_with_stem_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) - self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) - self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) - self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) - self.assertRaises(ValueError, P('c:').with_stem, 'd') - self.assertRaises(ValueError, P('c:/').with_stem, 'd') - self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') - self.assertEqual(str(P('a').with_stem('d:')), '.\\d:') - self.assertEqual(str(P('a').with_stem('d:e')), '.\\d:e') - self.assertEqual(P('c:a/b').with_stem('d:'), P('c:a/d:')) - self.assertEqual(P('c:a/b').with_stem('d:e'), P('c:a/d:e')) - self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') - self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') - - @needs_windows - def test_with_suffix_windows(self): - P = self.cls - self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) - self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) - self.assertEqual(P('c:a/b.py').with_suffix('.gz'), P('c:a/b.gz')) - self.assertEqual(P('c:/a/b.py').with_suffix('.gz'), P('c:/a/b.gz')) - # Path doesn't have a "filename" component. - self.assertRaises(ValueError, P('').with_suffix, '.gz') - self.assertRaises(ValueError, P('.').with_suffix, '.gz') - self.assertRaises(ValueError, P('/').with_suffix, '.gz') - self.assertRaises(ValueError, P('//My/Share').with_suffix, '.gz') - # Invalid suffix. - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '/') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '/.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:.gz') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c/d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c\\d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') - self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') - - @needs_windows - def test_relative_to_windows(self): - P = self.cls - p = P('C:Foo/Bar') - self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:foO')), P('Bar')) - self.assertEqual(p.relative_to('c:foO'), P('Bar')) - self.assertEqual(p.relative_to('c:foO/'), P('Bar')) - self.assertEqual(p.relative_to(P('c:foO/baR')), P()) - self.assertEqual(p.relative_to('c:foO/baR'), P()) - self.assertEqual(p.relative_to(P('c:'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:foO'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:foO', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:foO/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('c:foO/baR'), walk_up=True), P()) - self.assertEqual(p.relative_to('c:foO/baR', walk_up=True), P()) - self.assertEqual(p.relative_to(P('C:Foo/Bar/Baz'), walk_up=True), P('..')) - self.assertEqual(p.relative_to(P('C:Foo/Baz'), walk_up=True), P('../Bar')) - self.assertEqual(p.relative_to(P('C:Baz/Bar'), walk_up=True), P('../../Foo/Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, P()) - self.assertRaises(ValueError, p.relative_to, '') - self.assertRaises(ValueError, p.relative_to, P('d:')) - self.assertRaises(ValueError, p.relative_to, P('/')) - self.assertRaises(ValueError, p.relative_to, P('Foo')) - self.assertRaises(ValueError, p.relative_to, P('/Foo')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo/Bar/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo/Baz')) - self.assertRaises(ValueError, p.relative_to, P(), walk_up=True) - self.assertRaises(ValueError, p.relative_to, '', walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo'), walk_up=True) - p = P('C:/Foo/Bar') - self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) - self.assertEqual(p.relative_to('c:/foO'), P('Bar')) - self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) - self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) - self.assertEqual(p.relative_to('c:/foO/baR'), P()) - self.assertEqual(p.relative_to(P('c:/'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('c:/', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('c:/foO'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:/foO', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('c:/foO/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('c:/foO/baR'), walk_up=True), P()) - self.assertEqual(p.relative_to('c:/foO/baR', walk_up=True), P()) - self.assertEqual(p.relative_to('C:/Baz', walk_up=True), P('../Foo/Bar')) - self.assertEqual(p.relative_to('C:/Foo/Bar/Baz', walk_up=True), P('..')) - self.assertEqual(p.relative_to('C:/Foo/Baz', walk_up=True), P('../Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, 'c:') - self.assertRaises(ValueError, p.relative_to, P('c:')) - self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) - self.assertRaises(ValueError, p.relative_to, P('C:Foo')) - self.assertRaises(ValueError, p.relative_to, P('d:')) - self.assertRaises(ValueError, p.relative_to, P('d:/')) - self.assertRaises(ValueError, p.relative_to, P('/')) - self.assertRaises(ValueError, p.relative_to, P('/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) - self.assertRaises(ValueError, p.relative_to, 'c:', walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('c:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('C:Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('d:/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//C/Foo'), walk_up=True) - # UNC paths. - p = P('//Server/Share/Foo/Bar') - self.assertEqual(p.relative_to(P('//sErver/sHare')), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare'), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/'), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo')), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo'), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/'), P('Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar')), P()) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar'), P()) - self.assertEqual(p.relative_to(P('//sErver/sHare'), walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/', walk_up=True), P('Foo/Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo'), walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/', walk_up=True), P('Bar')) - self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar'), walk_up=True), P()) - self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar', walk_up=True), P()) - self.assertEqual(p.relative_to(P('//sErver/sHare/bar'), walk_up=True), P('../Foo/Bar')) - self.assertEqual(p.relative_to('//sErver/sHare/bar', walk_up=True), P('../Foo/Bar')) - # Unrelated paths. - self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo')) - self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo')) - self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) - self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) - - @needs_windows - def test_is_relative_to_windows(self): - P = self.cls - p = P('C:Foo/Bar') - self.assertTrue(p.is_relative_to(P('c:'))) - self.assertTrue(p.is_relative_to('c:')) - self.assertTrue(p.is_relative_to(P('c:foO'))) - self.assertTrue(p.is_relative_to('c:foO')) - self.assertTrue(p.is_relative_to('c:foO/')) - self.assertTrue(p.is_relative_to(P('c:foO/baR'))) - self.assertTrue(p.is_relative_to('c:foO/baR')) - # Unrelated paths. - self.assertFalse(p.is_relative_to(P())) - self.assertFalse(p.is_relative_to('')) - self.assertFalse(p.is_relative_to(P('d:'))) - self.assertFalse(p.is_relative_to(P('/'))) - self.assertFalse(p.is_relative_to(P('Foo'))) - self.assertFalse(p.is_relative_to(P('/Foo'))) - self.assertFalse(p.is_relative_to(P('C:/Foo'))) - self.assertFalse(p.is_relative_to(P('C:Foo/Bar/Baz'))) - self.assertFalse(p.is_relative_to(P('C:Foo/Baz'))) - p = P('C:/Foo/Bar') - self.assertTrue(p.is_relative_to(P('c:/'))) - self.assertTrue(p.is_relative_to(P('c:/foO'))) - self.assertTrue(p.is_relative_to('c:/foO/')) - self.assertTrue(p.is_relative_to(P('c:/foO/baR'))) - self.assertTrue(p.is_relative_to('c:/foO/baR')) - # Unrelated paths. - self.assertFalse(p.is_relative_to('c:')) - self.assertFalse(p.is_relative_to(P('C:/Baz'))) - self.assertFalse(p.is_relative_to(P('C:/Foo/Bar/Baz'))) - self.assertFalse(p.is_relative_to(P('C:/Foo/Baz'))) - self.assertFalse(p.is_relative_to(P('C:Foo'))) - self.assertFalse(p.is_relative_to(P('d:'))) - self.assertFalse(p.is_relative_to(P('d:/'))) - self.assertFalse(p.is_relative_to(P('/'))) - self.assertFalse(p.is_relative_to(P('/Foo'))) - self.assertFalse(p.is_relative_to(P('//C/Foo'))) - # UNC paths. - p = P('//Server/Share/Foo/Bar') - self.assertTrue(p.is_relative_to(P('//sErver/sHare'))) - self.assertTrue(p.is_relative_to('//sErver/sHare')) - self.assertTrue(p.is_relative_to('//sErver/sHare/')) - self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo'))) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo')) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/')) - self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo/Bar'))) - self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/Bar')) - # Unrelated paths. - self.assertFalse(p.is_relative_to(P('/Server/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('c:/Server/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('//z/Share/Foo'))) - self.assertFalse(p.is_relative_to(P('//Server/z/Foo'))) - - @needs_windows - def test_is_absolute_windows(self): - P = self.cls - # Under NT, only paths with both a drive and a root are absolute. - self.assertFalse(P().is_absolute()) - self.assertFalse(P('a').is_absolute()) - self.assertFalse(P('a/b/').is_absolute()) - self.assertFalse(P('/').is_absolute()) - self.assertFalse(P('/a').is_absolute()) - self.assertFalse(P('/a/b/').is_absolute()) - self.assertFalse(P('c:').is_absolute()) - self.assertFalse(P('c:a').is_absolute()) - self.assertFalse(P('c:a/b/').is_absolute()) - self.assertTrue(P('c:/').is_absolute()) - self.assertTrue(P('c:/a').is_absolute()) - self.assertTrue(P('c:/a/b/').is_absolute()) - # UNC paths are absolute by definition. - self.assertTrue(P('//').is_absolute()) - self.assertTrue(P('//a').is_absolute()) - self.assertTrue(P('//a/b').is_absolute()) - self.assertTrue(P('//a/b/').is_absolute()) - self.assertTrue(P('//a/b/c').is_absolute()) - self.assertTrue(P('//a/b/c/d').is_absolute()) - self.assertTrue(P('//?/UNC/').is_absolute()) - self.assertTrue(P('//?/UNC/spam').is_absolute()) - - @needs_windows - def test_join_windows(self): - P = self.cls - p = P('C:/a/b') - pp = p.joinpath('x/y') - self.assertEqual(pp, P('C:/a/b/x/y')) - pp = p.joinpath('/x/y') - self.assertEqual(pp, P('C:/x/y')) - # Joining with a different drive => the first path is ignored, even - # if the second path is relative. - pp = p.joinpath('D:x/y') - self.assertEqual(pp, P('D:x/y')) - pp = p.joinpath('D:/x/y') - self.assertEqual(pp, P('D:/x/y')) - pp = p.joinpath('//host/share/x/y') - self.assertEqual(pp, P('//host/share/x/y')) - # Joining with the same drive => the first path is appended to if - # the second path is relative. - pp = p.joinpath('c:x/y') - self.assertEqual(pp, P('C:/a/b/x/y')) - pp = p.joinpath('c:/x/y') - self.assertEqual(pp, P('C:/x/y')) - # Joining with files with NTFS data streams => the filename should - # not be parsed as a drive letter - pp = p.joinpath(P('./d:s')) - self.assertEqual(pp, P('C:/a/b/d:s')) - pp = p.joinpath(P('./dd:s')) - self.assertEqual(pp, P('C:/a/b/dd:s')) - pp = p.joinpath(P('E:d:s')) - self.assertEqual(pp, P('E:d:s')) - # Joining onto a UNC path with no root - pp = P('//').joinpath('server') - self.assertEqual(pp, P('//server')) - pp = P('//server').joinpath('share') - self.assertEqual(pp, P('//server/share')) - pp = P('//./BootPartition').joinpath('Windows') - self.assertEqual(pp, P('//./BootPartition/Windows')) - - @needs_windows - def test_div_windows(self): - # Basically the same as joinpath(). - P = self.cls - p = P('C:/a/b') - self.assertEqual(p / 'x/y', P('C:/a/b/x/y')) - self.assertEqual(p / 'x' / 'y', P('C:/a/b/x/y')) - self.assertEqual(p / '/x/y', P('C:/x/y')) - self.assertEqual(p / '/x' / 'y', P('C:/x/y')) - # Joining with a different drive => the first path is ignored, even - # if the second path is relative. - self.assertEqual(p / 'D:x/y', P('D:x/y')) - self.assertEqual(p / 'D:' / 'x/y', P('D:x/y')) - self.assertEqual(p / 'D:/x/y', P('D:/x/y')) - self.assertEqual(p / 'D:' / '/x/y', P('D:/x/y')) - self.assertEqual(p / '//host/share/x/y', P('//host/share/x/y')) - # Joining with the same drive => the first path is appended to if - # the second path is relative. - self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y')) - self.assertEqual(p / 'c:/x/y', P('C:/x/y')) - # Joining with files with NTFS data streams => the filename should - # not be parsed as a drive letter - self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s')) - self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) - self.assertEqual(p / P('E:d:s'), P('E:d:s')) - class PurePosixPathTest(PurePathTest): cls = pathlib.PurePosixPath @@ -1943,24 +1352,6 @@ def test_touch_mode(self): st = os.stat(self.pathmod.join(self.base, 'masked_new_file')) self.assertEqual(stat.S_IMODE(st.st_mode), 0o750) - @needs_posix - def test_glob_posix(self): - P = self.cls - p = P(self.base) - given = set(p.glob("FILEa")) - expect = set() if not os_helper.fs_is_case_insensitive(self.base) else given - self.assertEqual(given, expect) - self.assertEqual(set(p.glob("FILEa*")), set()) - - @needs_posix - def test_rglob_posix(self): - P = self.cls - p = P(self.base, "dirC") - given = set(p.rglob("FILEd")) - expect = set() if not os_helper.fs_is_case_insensitive(self.base) else given - self.assertEqual(given, expect) - self.assertEqual(set(p.rglob("FILEd*")), set()) - @unittest.skipUnless(hasattr(pwd, 'getpwall'), 'pwd module does not expose getpwall()') @unittest.skipIf(sys.platform == "vxworks", @@ -2109,24 +1500,6 @@ def test_absolute_windows(self): self.assertEqual(str(P(other_drive).absolute()), other_cwd) self.assertEqual(str(P(other_drive + 'foo').absolute()), other_cwd + '\\foo') - @needs_windows - def test_glob_windows(self): - P = self.cls - p = P(self.base) - self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) - self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") }) - self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") }) - self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) - self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) - - @needs_windows - def test_rglob_windows(self): - P = self.cls - p = P(self.base, "dirC") - self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) - self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) - self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) - @needs_windows def test_expanduser_windows(self): P = self.cls diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index b19e9b40419c7a..ea70931eaa2c7e 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -174,6 +174,19 @@ def test_str_subclass_common(self): self._check_str_subclass('a/b.txt') self._check_str_subclass('/a/b.txt') + @needs_windows + def test_str_subclass_windows(self): + self._check_str_subclass('.\\a:b') + self._check_str_subclass('c:') + self._check_str_subclass('c:a') + self._check_str_subclass('c:a\\b.txt') + self._check_str_subclass('c:\\') + self._check_str_subclass('c:\\a') + self._check_str_subclass('c:\\a\\b.txt') + self._check_str_subclass('\\\\some\\share') + self._check_str_subclass('\\\\some\\share\\a') + self._check_str_subclass('\\\\some\\share\\a\\b.txt') + def test_with_segments_common(self): class P(self.cls): def __init__(self, *pathsegments, session_id): @@ -206,6 +219,55 @@ def test_join_common(self): pp = p.joinpath('/c') self.assertEqual(pp, P('/c')) + @needs_posix + def test_join_posix(self): + P = self.cls + p = P('//a') + pp = p.joinpath('b') + self.assertEqual(pp, P('//a/b')) + pp = P('/a').joinpath('//c') + self.assertEqual(pp, P('//c')) + pp = P('//a').joinpath('/c') + self.assertEqual(pp, P('/c')) + + @needs_windows + def test_join_windows(self): + P = self.cls + p = P('C:/a/b') + pp = p.joinpath('x/y') + self.assertEqual(pp, P('C:/a/b/x/y')) + pp = p.joinpath('/x/y') + self.assertEqual(pp, P('C:/x/y')) + # Joining with a different drive => the first path is ignored, even + # if the second path is relative. + pp = p.joinpath('D:x/y') + self.assertEqual(pp, P('D:x/y')) + pp = p.joinpath('D:/x/y') + self.assertEqual(pp, P('D:/x/y')) + pp = p.joinpath('//host/share/x/y') + self.assertEqual(pp, P('//host/share/x/y')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + pp = p.joinpath('c:x/y') + self.assertEqual(pp, P('C:/a/b/x/y')) + pp = p.joinpath('c:/x/y') + self.assertEqual(pp, P('C:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + pp = p.joinpath(P('./d:s')) + self.assertEqual(pp, P('C:/a/b/d:s')) + pp = p.joinpath(P('./dd:s')) + self.assertEqual(pp, P('C:/a/b/dd:s')) + pp = p.joinpath(P('E:d:s')) + self.assertEqual(pp, P('E:d:s')) + # Joining onto a UNC path with no root + pp = P('//').joinpath('server') + self.assertEqual(pp, P('//server')) + pp = P('//server').joinpath('share') + self.assertEqual(pp, P('//server/share')) + pp = P('//./BootPartition').joinpath('Windows') + self.assertEqual(pp, P('//./BootPartition/Windows')) + def test_div_common(self): # Basically the same as joinpath(). P = self.cls @@ -222,6 +284,44 @@ def test_div_common(self): pp = p/ '/c' self.assertEqual(pp, P('/c')) + @needs_posix + def test_div_posix(self): + # Basically the same as joinpath(). + P = self.cls + p = P('//a') + pp = p / 'b' + self.assertEqual(pp, P('//a/b')) + pp = P('/a') / '//c' + self.assertEqual(pp, P('//c')) + pp = P('//a') / '/c' + self.assertEqual(pp, P('/c')) + + @needs_windows + def test_div_windows(self): + # Basically the same as joinpath(). + P = self.cls + p = P('C:/a/b') + self.assertEqual(p / 'x/y', P('C:/a/b/x/y')) + self.assertEqual(p / 'x' / 'y', P('C:/a/b/x/y')) + self.assertEqual(p / '/x/y', P('C:/x/y')) + self.assertEqual(p / '/x' / 'y', P('C:/x/y')) + # Joining with a different drive => the first path is ignored, even + # if the second path is relative. + self.assertEqual(p / 'D:x/y', P('D:x/y')) + self.assertEqual(p / 'D:' / 'x/y', P('D:x/y')) + self.assertEqual(p / 'D:/x/y', P('D:/x/y')) + self.assertEqual(p / 'D:' / '/x/y', P('D:/x/y')) + self.assertEqual(p / '//host/share/x/y', P('//host/share/x/y')) + # Joining with the same drive => the first path is appended to if + # the second path is relative. + self.assertEqual(p / 'c:x/y', P('C:/a/b/x/y')) + self.assertEqual(p / 'c:/x/y', P('C:/x/y')) + # Joining with files with NTFS data streams => the filename should + # not be parsed as a drive letter + self.assertEqual(p / P('./d:s'), P('C:/a/b/d:s')) + self.assertEqual(p / P('./dd:s'), P('C:/a/b/dd:s')) + self.assertEqual(p / P('E:d:s'), P('E:d:s')) + def _check_str(self, expected, args): p = self.cls(*args) self.assertEqual(str(p), expected.replace('/', self.sep)) @@ -232,6 +332,19 @@ def test_str_common(self): self._check_str(pathstr, (pathstr,)) # Other tests for str() are in test_equivalences(). + @needs_windows + def test_str_windows(self): + p = self.cls('a/b/c') + self.assertEqual(str(p), 'a\\b\\c') + p = self.cls('c:/a/b/c') + self.assertEqual(str(p), 'c:\\a\\b\\c') + p = self.cls('//a/b') + self.assertEqual(str(p), '\\\\a\\b\\') + p = self.cls('//a/b/c') + self.assertEqual(str(p), '\\\\a\\b\\c') + p = self.cls('//a/b/c/d') + self.assertEqual(str(p), '\\\\a\\b\\c\\d') + def test_as_posix_common(self): P = self.cls for pathstr in ('a', 'a/b', 'a/b/c', '/', '/a/b', '/a/b/c'): @@ -287,6 +400,39 @@ def test_match_common(self): self.assertFalse(P('').match('**')) self.assertFalse(P('').match('**/*')) + @needs_posix + def test_match_posix(self): + P = self.cls + self.assertFalse(P('A.py').match('a.PY')) + + @needs_windows + def test_match_windows(self): + P = self.cls + # Absolute patterns. + self.assertTrue(P('c:/b.py').match('*:/*.py')) + self.assertTrue(P('c:/b.py').match('c:/*.py')) + self.assertFalse(P('d:/b.py').match('c:/*.py')) # wrong drive + self.assertFalse(P('b.py').match('/*.py')) + self.assertFalse(P('b.py').match('c:*.py')) + self.assertFalse(P('b.py').match('c:/*.py')) + self.assertFalse(P('c:b.py').match('/*.py')) + self.assertFalse(P('c:b.py').match('c:/*.py')) + self.assertFalse(P('/b.py').match('c:*.py')) + self.assertFalse(P('/b.py').match('c:/*.py')) + # UNC patterns. + self.assertTrue(P('//some/share/a.py').match('//*/*/*.py')) + self.assertTrue(P('//some/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//other/share/a.py').match('//some/share/*.py')) + self.assertFalse(P('//some/share/a/b.py').match('//some/share/*.py')) + # Case-insensitivity. + self.assertTrue(P('B.py').match('b.PY')) + self.assertTrue(P('c:/a/B.Py').match('C:/A/*.pY')) + self.assertTrue(P('//Some/Share/B.Py').match('//somE/sharE/*.pY')) + # Path anchor doesn't match pattern anchor + self.assertFalse(P('c:/b.py').match('/*.py')) # 'c:/' vs '/' + self.assertFalse(P('c:/b.py').match('c:*.py')) # 'c:/' vs 'c:' + self.assertFalse(P('//some/share/a.py').match('/*.py')) # '//some/share/' vs '/' + def test_full_match_common(self): P = self.cls # Simple relative pattern. @@ -372,6 +518,19 @@ def test_parts_common(self): parts = p.parts self.assertEqual(parts, (sep, 'a', 'b')) + @needs_windows + def test_parts_windows(self): + P = self.cls + p = P('c:a/b') + parts = p.parts + self.assertEqual(parts, ('c:', 'a', 'b')) + p = P('c:/a/b') + parts = p.parts + self.assertEqual(parts, ('c:\\', 'a', 'b')) + p = P('//a/b/c/d') + parts = p.parts + self.assertEqual(parts, ('\\\\a\\b\\', 'c', 'd')) + def test_parent_common(self): # Relative P = self.cls @@ -387,6 +546,25 @@ def test_parent_common(self): self.assertEqual(p.parent.parent.parent, P('/')) self.assertEqual(p.parent.parent.parent.parent, P('/')) + @needs_windows + def test_parent_windows(self): + # Anchored + P = self.cls + p = P('z:a/b/c') + self.assertEqual(p.parent, P('z:a/b')) + self.assertEqual(p.parent.parent, P('z:a')) + self.assertEqual(p.parent.parent.parent, P('z:')) + self.assertEqual(p.parent.parent.parent.parent, P('z:')) + p = P('z:/a/b/c') + self.assertEqual(p.parent, P('z:/a/b')) + self.assertEqual(p.parent.parent, P('z:/a')) + self.assertEqual(p.parent.parent.parent, P('z:/')) + self.assertEqual(p.parent.parent.parent.parent, P('z:/')) + p = P('//a/b/c/d') + self.assertEqual(p.parent, P('//a/b/c')) + self.assertEqual(p.parent.parent, P('//a/b')) + self.assertEqual(p.parent.parent.parent, P('//a/b')) + def test_parents_common(self): # Relative P = self.cls @@ -434,12 +612,71 @@ def test_parents_common(self): with self.assertRaises(IndexError): par[3] + @needs_windows + def test_parents_windows(self): + # Anchored + P = self.cls + p = P('z:a/b/') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:a')) + self.assertEqual(par[1], P('z:')) + self.assertEqual(par[0:1], (P('z:a'),)) + self.assertEqual(par[:-1], (P('z:a'),)) + self.assertEqual(par[:2], (P('z:a'), P('z:'))) + self.assertEqual(par[1:], (P('z:'),)) + self.assertEqual(par[::2], (P('z:a'),)) + self.assertEqual(par[::-1], (P('z:'), P('z:a'))) + self.assertEqual(list(par), [P('z:a'), P('z:')]) + with self.assertRaises(IndexError): + par[2] + p = P('z:/a/b/') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('z:/a')) + self.assertEqual(par[1], P('z:/')) + self.assertEqual(par[0:1], (P('z:/a'),)) + self.assertEqual(par[0:-1], (P('z:/a'),)) + self.assertEqual(par[:2], (P('z:/a'), P('z:/'))) + self.assertEqual(par[1:], (P('z:/'),)) + self.assertEqual(par[::2], (P('z:/a'),)) + self.assertEqual(par[::-1], (P('z:/'), P('z:/a'),)) + self.assertEqual(list(par), [P('z:/a'), P('z:/')]) + with self.assertRaises(IndexError): + par[2] + p = P('//a/b/c/d') + par = p.parents + self.assertEqual(len(par), 2) + self.assertEqual(par[0], P('//a/b/c')) + self.assertEqual(par[1], P('//a/b')) + self.assertEqual(par[0:1], (P('//a/b/c'),)) + self.assertEqual(par[0:-1], (P('//a/b/c'),)) + self.assertEqual(par[:2], (P('//a/b/c'), P('//a/b'))) + self.assertEqual(par[1:], (P('//a/b'),)) + self.assertEqual(par[::2], (P('//a/b/c'),)) + self.assertEqual(par[::-1], (P('//a/b'), P('//a/b/c'))) + self.assertEqual(list(par), [P('//a/b/c'), P('//a/b')]) + with self.assertRaises(IndexError): + par[2] + def test_drive_common(self): P = self.cls self.assertEqual(P('a/b').drive, '') self.assertEqual(P('/a/b').drive, '') self.assertEqual(P('').drive, '') + @needs_windows + def test_drive_windows(self): + P = self.cls + self.assertEqual(P('c:').drive, 'c:') + self.assertEqual(P('c:a/b').drive, 'c:') + self.assertEqual(P('c:/').drive, 'c:') + self.assertEqual(P('c:/a/b/').drive, 'c:') + self.assertEqual(P('//a/b').drive, '\\\\a\\b') + self.assertEqual(P('//a/b/').drive, '\\\\a\\b') + self.assertEqual(P('//a/b/c/d').drive, '\\\\a\\b') + self.assertEqual(P('./c:a').drive, '') + def test_root_common(self): P = self.cls sep = self.sep @@ -448,6 +685,24 @@ def test_root_common(self): self.assertEqual(P('/').root, sep) self.assertEqual(P('/a/b').root, sep) + @needs_posix + def test_root_posix(self): + P = self.cls + self.assertEqual(P('/a/b').root, '/') + # POSIX special case for two leading slashes. + self.assertEqual(P('//a/b').root, '//') + + @needs_windows + def test_root_windows(self): + P = self.cls + self.assertEqual(P('c:').root, '') + self.assertEqual(P('c:a/b').root, '') + self.assertEqual(P('c:/').root, '\\') + self.assertEqual(P('c:/a/b/').root, '\\') + self.assertEqual(P('//a/b').root, '\\') + self.assertEqual(P('//a/b/').root, '\\') + self.assertEqual(P('//a/b/c/d').root, '\\') + def test_anchor_common(self): P = self.cls sep = self.sep @@ -456,6 +711,17 @@ def test_anchor_common(self): self.assertEqual(P('/').anchor, sep) self.assertEqual(P('/a/b').anchor, sep) + @needs_windows + def test_anchor_windows(self): + P = self.cls + self.assertEqual(P('c:').anchor, 'c:') + self.assertEqual(P('c:a/b').anchor, 'c:') + self.assertEqual(P('c:/').anchor, 'c:\\') + self.assertEqual(P('c:/a/b/').anchor, 'c:\\') + self.assertEqual(P('//a/b').anchor, '\\\\a\\b\\') + self.assertEqual(P('//a/b/').anchor, '\\\\a\\b\\') + self.assertEqual(P('//a/b/c/d').anchor, '\\\\a\\b\\') + def test_name_empty(self): P = self.cls self.assertEqual(P('').name, '') @@ -470,6 +736,18 @@ def test_name_common(self): self.assertEqual(P('a/b.py').name, 'b.py') self.assertEqual(P('/a/b.py').name, 'b.py') + @needs_windows + def test_name_windows(self): + P = self.cls + self.assertEqual(P('c:').name, '') + self.assertEqual(P('c:/').name, '') + self.assertEqual(P('c:a/b').name, 'b') + self.assertEqual(P('c:/a/b').name, 'b') + self.assertEqual(P('c:a/b.py').name, 'b.py') + self.assertEqual(P('c:/a/b.py').name, 'b.py') + self.assertEqual(P('//My.py/Share.php').name, '') + self.assertEqual(P('//My.py/Share.php/a/b').name, 'b') + def test_suffix_common(self): P = self.cls self.assertEqual(P('').suffix, '') @@ -490,6 +768,26 @@ def test_suffix_common(self): self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') + @needs_windows + def test_suffix_windows(self): + P = self.cls + self.assertEqual(P('c:').suffix, '') + self.assertEqual(P('c:/').suffix, '') + self.assertEqual(P('c:a/b').suffix, '') + self.assertEqual(P('c:/a/b').suffix, '') + self.assertEqual(P('c:a/b.py').suffix, '.py') + self.assertEqual(P('c:/a/b.py').suffix, '.py') + self.assertEqual(P('c:a/.hgrc').suffix, '') + self.assertEqual(P('c:/a/.hgrc').suffix, '') + self.assertEqual(P('c:a/.hg.rc').suffix, '.rc') + self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') + self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') + self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') + self.assertEqual(P('//My.py/Share.php').suffix, '') + self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') + def test_suffixes_common(self): P = self.cls self.assertEqual(P('').suffixes, []) @@ -509,6 +807,26 @@ def test_suffixes_common(self): self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) + @needs_windows + def test_suffixes_windows(self): + P = self.cls + self.assertEqual(P('c:').suffixes, []) + self.assertEqual(P('c:/').suffixes, []) + self.assertEqual(P('c:a/b').suffixes, []) + self.assertEqual(P('c:/a/b').suffixes, []) + self.assertEqual(P('c:a/b.py').suffixes, ['.py']) + self.assertEqual(P('c:/a/b.py').suffixes, ['.py']) + self.assertEqual(P('c:a/.hgrc').suffixes, []) + self.assertEqual(P('c:/a/.hgrc').suffixes, []) + self.assertEqual(P('c:a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('c:/a/.hg.rc').suffixes, ['.rc']) + self.assertEqual(P('c:a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) + self.assertEqual(P('//My.py/Share.php').suffixes, []) + self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) + self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) + self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) + def test_stem_empty(self): P = self.cls self.assertEqual(P('').stem, '') @@ -526,6 +844,20 @@ def test_stem_common(self): self.assertEqual(P('a/Some name. Ending with a dot.').stem, 'Some name. Ending with a dot.') + @needs_windows + def test_stem_windows(self): + P = self.cls + self.assertEqual(P('c:').stem, '') + self.assertEqual(P('c:.').stem, '') + self.assertEqual(P('c:..').stem, '..') + self.assertEqual(P('c:/').stem, '') + self.assertEqual(P('c:a/b').stem, 'b') + self.assertEqual(P('c:a/b.py').stem, 'b') + self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') + self.assertEqual(P('c:a/.hg.rc').stem, '.hg') + self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') + self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, + 'Some name. Ending with a dot.') def test_with_name_common(self): P = self.cls self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) @@ -535,6 +867,23 @@ def test_with_name_common(self): self.assertEqual(P('a/Dot ending.').with_name('d.xml'), P('a/d.xml')) self.assertEqual(P('/a/Dot ending.').with_name('d.xml'), P('/a/d.xml')) + @needs_windows + def test_with_name_windows(self): + P = self.cls + self.assertEqual(P('c:a/b').with_name('d.xml'), P('c:a/d.xml')) + self.assertEqual(P('c:/a/b').with_name('d.xml'), P('c:/a/d.xml')) + self.assertEqual(P('c:a/Dot ending.').with_name('d.xml'), P('c:a/d.xml')) + self.assertEqual(P('c:/a/Dot ending.').with_name('d.xml'), P('c:/a/d.xml')) + self.assertRaises(ValueError, P('c:').with_name, 'd.xml') + self.assertRaises(ValueError, P('c:/').with_name, 'd.xml') + self.assertRaises(ValueError, P('//My/Share').with_name, 'd.xml') + self.assertEqual(str(P('a').with_name('d:')), '.\\d:') + self.assertEqual(str(P('a').with_name('d:e')), '.\\d:e') + self.assertEqual(P('c:a/b').with_name('d:'), P('c:a/d:')) + self.assertEqual(P('c:a/b').with_name('d:e'), P('c:a/d:e')) + self.assertRaises(ValueError, P('c:a/b').with_name, 'd:/e') + self.assertRaises(ValueError, P('c:a/b').with_name, '//My/Share') + def test_with_name_empty(self): P = self.cls self.assertEqual(P('').with_name('d.xml'), P('d.xml')) @@ -559,6 +908,23 @@ def test_with_stem_common(self): self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) + @needs_windows + def test_with_stem_windows(self): + P = self.cls + self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) + self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) + self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) + self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) + self.assertRaises(ValueError, P('c:').with_stem, 'd') + self.assertRaises(ValueError, P('c:/').with_stem, 'd') + self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') + self.assertEqual(str(P('a').with_stem('d:')), '.\\d:') + self.assertEqual(str(P('a').with_stem('d:e')), '.\\d:e') + self.assertEqual(P('c:a/b').with_stem('d:'), P('c:a/d:')) + self.assertEqual(P('c:a/b').with_stem('d:e'), P('c:a/d:e')) + self.assertRaises(ValueError, P('c:a/b').with_stem, 'd:/e') + self.assertRaises(ValueError, P('c:a/b').with_stem, '//My/Share') + def test_with_stem_empty(self): P = self.cls self.assertEqual(P('').with_stem('d'), P('d')) @@ -583,6 +949,31 @@ def test_with_suffix_common(self): self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) + @needs_windows + def test_with_suffix_windows(self): + P = self.cls + self.assertEqual(P('c:a/b').with_suffix('.gz'), P('c:a/b.gz')) + self.assertEqual(P('c:/a/b').with_suffix('.gz'), P('c:/a/b.gz')) + self.assertEqual(P('c:a/b.py').with_suffix('.gz'), P('c:a/b.gz')) + self.assertEqual(P('c:/a/b.py').with_suffix('.gz'), P('c:/a/b.gz')) + # Path doesn't have a "filename" component. + self.assertRaises(ValueError, P('').with_suffix, '.gz') + self.assertRaises(ValueError, P('.').with_suffix, '.gz') + self.assertRaises(ValueError, P('/').with_suffix, '.gz') + self.assertRaises(ValueError, P('//My/Share').with_suffix, '.gz') + # Invalid suffix. + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '/') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '/.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '\\.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c:.gz') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c/d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, 'c\\d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c/d') + self.assertRaises(ValueError, P('c:a/b').with_suffix, '.c\\d') + def test_with_suffix_empty(self): P = self.cls # Path doesn't have a "filename" component. @@ -677,6 +1068,112 @@ def test_relative_to_common(self): self.assertRaises(ValueError, p.relative_to, P("a/.."), walk_up=True) self.assertRaises(ValueError, p.relative_to, P("/a/.."), walk_up=True) + @needs_windows + def test_relative_to_windows(self): + P = self.cls + p = P('C:Foo/Bar') + self.assertEqual(p.relative_to(P('c:')), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:foO')), P('Bar')) + self.assertEqual(p.relative_to('c:foO'), P('Bar')) + self.assertEqual(p.relative_to('c:foO/'), P('Bar')) + self.assertEqual(p.relative_to(P('c:foO/baR')), P()) + self.assertEqual(p.relative_to('c:foO/baR'), P()) + self.assertEqual(p.relative_to(P('c:'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:foO'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:foO', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:foO/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('c:foO/baR'), walk_up=True), P()) + self.assertEqual(p.relative_to('c:foO/baR', walk_up=True), P()) + self.assertEqual(p.relative_to(P('C:Foo/Bar/Baz'), walk_up=True), P('..')) + self.assertEqual(p.relative_to(P('C:Foo/Baz'), walk_up=True), P('../Bar')) + self.assertEqual(p.relative_to(P('C:Baz/Bar'), walk_up=True), P('../../Foo/Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, P()) + self.assertRaises(ValueError, p.relative_to, '') + self.assertRaises(ValueError, p.relative_to, P('d:')) + self.assertRaises(ValueError, p.relative_to, P('/')) + self.assertRaises(ValueError, p.relative_to, P('Foo')) + self.assertRaises(ValueError, p.relative_to, P('/Foo')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo/Bar/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo/Baz')) + self.assertRaises(ValueError, p.relative_to, P(), walk_up=True) + self.assertRaises(ValueError, p.relative_to, '', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo'), walk_up=True) + p = P('C:/Foo/Bar') + self.assertEqual(p.relative_to(P('c:/')), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:/'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:/foO')), P('Bar')) + self.assertEqual(p.relative_to('c:/foO'), P('Bar')) + self.assertEqual(p.relative_to('c:/foO/'), P('Bar')) + self.assertEqual(p.relative_to(P('c:/foO/baR')), P()) + self.assertEqual(p.relative_to('c:/foO/baR'), P()) + self.assertEqual(p.relative_to(P('c:/'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('c:/', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('c:/foO'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:/foO', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('c:/foO/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('c:/foO/baR'), walk_up=True), P()) + self.assertEqual(p.relative_to('c:/foO/baR', walk_up=True), P()) + self.assertEqual(p.relative_to('C:/Baz', walk_up=True), P('../Foo/Bar')) + self.assertEqual(p.relative_to('C:/Foo/Bar/Baz', walk_up=True), P('..')) + self.assertEqual(p.relative_to('C:/Foo/Baz', walk_up=True), P('../Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, 'c:') + self.assertRaises(ValueError, p.relative_to, P('c:')) + self.assertRaises(ValueError, p.relative_to, P('C:/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Bar/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:/Foo/Baz')) + self.assertRaises(ValueError, p.relative_to, P('C:Foo')) + self.assertRaises(ValueError, p.relative_to, P('d:')) + self.assertRaises(ValueError, p.relative_to, P('d:/')) + self.assertRaises(ValueError, p.relative_to, P('/')) + self.assertRaises(ValueError, p.relative_to, P('/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//C/Foo')) + self.assertRaises(ValueError, p.relative_to, 'c:', walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('C:Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('d:/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//C/Foo'), walk_up=True) + # UNC paths. + p = P('//Server/Share/Foo/Bar') + self.assertEqual(p.relative_to(P('//sErver/sHare')), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare'), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/'), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo')), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo'), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/'), P('Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar')), P()) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar'), P()) + self.assertEqual(p.relative_to(P('//sErver/sHare'), walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/', walk_up=True), P('Foo/Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo'), walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/', walk_up=True), P('Bar')) + self.assertEqual(p.relative_to(P('//sErver/sHare/Foo/Bar'), walk_up=True), P()) + self.assertEqual(p.relative_to('//sErver/sHare/Foo/Bar', walk_up=True), P()) + self.assertEqual(p.relative_to(P('//sErver/sHare/bar'), walk_up=True), P('../Foo/Bar')) + self.assertEqual(p.relative_to('//sErver/sHare/bar', walk_up=True), P('../Foo/Bar')) + # Unrelated paths. + self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo')) + self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo')) + self.assertRaises(ValueError, p.relative_to, P('/Server/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('c:/Server/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//z/Share/Foo'), walk_up=True) + self.assertRaises(ValueError, p.relative_to, P('//Server/z/Foo'), walk_up=True) + def test_is_relative_to_common(self): P = self.cls p = P('a/b') @@ -709,6 +1206,98 @@ def test_is_relative_to_common(self): self.assertFalse(p.is_relative_to('')) self.assertFalse(p.is_relative_to(P('a'))) + @needs_windows + def test_is_relative_to_windows(self): + P = self.cls + p = P('C:Foo/Bar') + self.assertTrue(p.is_relative_to(P('c:'))) + self.assertTrue(p.is_relative_to('c:')) + self.assertTrue(p.is_relative_to(P('c:foO'))) + self.assertTrue(p.is_relative_to('c:foO')) + self.assertTrue(p.is_relative_to('c:foO/')) + self.assertTrue(p.is_relative_to(P('c:foO/baR'))) + self.assertTrue(p.is_relative_to('c:foO/baR')) + # Unrelated paths. + self.assertFalse(p.is_relative_to(P())) + self.assertFalse(p.is_relative_to('')) + self.assertFalse(p.is_relative_to(P('d:'))) + self.assertFalse(p.is_relative_to(P('/'))) + self.assertFalse(p.is_relative_to(P('Foo'))) + self.assertFalse(p.is_relative_to(P('/Foo'))) + self.assertFalse(p.is_relative_to(P('C:/Foo'))) + self.assertFalse(p.is_relative_to(P('C:Foo/Bar/Baz'))) + self.assertFalse(p.is_relative_to(P('C:Foo/Baz'))) + p = P('C:/Foo/Bar') + self.assertTrue(p.is_relative_to(P('c:/'))) + self.assertTrue(p.is_relative_to(P('c:/foO'))) + self.assertTrue(p.is_relative_to('c:/foO/')) + self.assertTrue(p.is_relative_to(P('c:/foO/baR'))) + self.assertTrue(p.is_relative_to('c:/foO/baR')) + # Unrelated paths. + self.assertFalse(p.is_relative_to('c:')) + self.assertFalse(p.is_relative_to(P('C:/Baz'))) + self.assertFalse(p.is_relative_to(P('C:/Foo/Bar/Baz'))) + self.assertFalse(p.is_relative_to(P('C:/Foo/Baz'))) + self.assertFalse(p.is_relative_to(P('C:Foo'))) + self.assertFalse(p.is_relative_to(P('d:'))) + self.assertFalse(p.is_relative_to(P('d:/'))) + self.assertFalse(p.is_relative_to(P('/'))) + self.assertFalse(p.is_relative_to(P('/Foo'))) + self.assertFalse(p.is_relative_to(P('//C/Foo'))) + # UNC paths. + p = P('//Server/Share/Foo/Bar') + self.assertTrue(p.is_relative_to(P('//sErver/sHare'))) + self.assertTrue(p.is_relative_to('//sErver/sHare')) + self.assertTrue(p.is_relative_to('//sErver/sHare/')) + self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo'))) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo')) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/')) + self.assertTrue(p.is_relative_to(P('//sErver/sHare/Foo/Bar'))) + self.assertTrue(p.is_relative_to('//sErver/sHare/Foo/Bar')) + # Unrelated paths. + self.assertFalse(p.is_relative_to(P('/Server/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('c:/Server/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('//z/Share/Foo'))) + self.assertFalse(p.is_relative_to(P('//Server/z/Foo'))) + + @needs_posix + def test_is_absolute_posix(self): + P = self.cls + self.assertFalse(P('').is_absolute()) + self.assertFalse(P('a').is_absolute()) + self.assertFalse(P('a/b/').is_absolute()) + self.assertTrue(P('/').is_absolute()) + self.assertTrue(P('/a').is_absolute()) + self.assertTrue(P('/a/b/').is_absolute()) + self.assertTrue(P('//a').is_absolute()) + self.assertTrue(P('//a/b').is_absolute()) + + @needs_windows + def test_is_absolute_windows(self): + P = self.cls + # Under NT, only paths with both a drive and a root are absolute. + self.assertFalse(P().is_absolute()) + self.assertFalse(P('a').is_absolute()) + self.assertFalse(P('a/b/').is_absolute()) + self.assertFalse(P('/').is_absolute()) + self.assertFalse(P('/a').is_absolute()) + self.assertFalse(P('/a/b/').is_absolute()) + self.assertFalse(P('c:').is_absolute()) + self.assertFalse(P('c:a').is_absolute()) + self.assertFalse(P('c:a/b/').is_absolute()) + self.assertTrue(P('c:/').is_absolute()) + self.assertTrue(P('c:/a').is_absolute()) + self.assertTrue(P('c:/a/b/').is_absolute()) + # UNC paths are absolute by definition. + self.assertTrue(P('//').is_absolute()) + self.assertTrue(P('//a').is_absolute()) + self.assertTrue(P('//a/b').is_absolute()) + self.assertTrue(P('//a/b/').is_absolute()) + self.assertTrue(P('//a/b/c').is_absolute()) + self.assertTrue(P('//a/b/c/d').is_absolute()) + self.assertTrue(P('//?/UNC/').is_absolute()) + self.assertTrue(P('//?/UNC/spam').is_absolute()) + # # Tests for the virtual classes. @@ -1124,6 +1713,25 @@ def _check(glob, expected): else: _check(p.glob("*/"), ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) + @needs_posix + def test_glob_posix(self): + P = self.cls + p = P(self.base) + given = set(p.glob("FILEa")) + expect = set() + self.assertEqual(given, expect) + self.assertEqual(set(p.glob("FILEa*")), set()) + + @needs_windows + def test_glob_windows(self): + P = self.cls + p = P(self.base) + self.assertEqual(set(p.glob("FILEa")), { P(self.base, "fileA") }) + self.assertEqual(set(p.glob("*a\\")), { P(self.base, "dirA/") }) + self.assertEqual(set(p.glob("F*a")), { P(self.base, "fileA") }) + self.assertEqual(set(map(str, p.glob("FILEa"))), {f"{p}\\fileA"}) + self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) + def test_glob_empty_pattern(self): def _check(glob, expected): self.assertEqual(set(glob), { P(self.base, q) for q in expected }) @@ -1236,6 +1844,23 @@ def _check(glob, expected): _check(p.rglob("*.txt"), ["dirC/novel.txt"]) _check(p.rglob("*.*"), ["dirC/novel.txt"]) + @needs_posix + def test_rglob_posix(self): + P = self.cls + p = P(self.base, "dirC") + given = set(p.rglob("FILEd")) + expect = set() + self.assertEqual(given, expect) + self.assertEqual(set(p.rglob("FILEd*")), set()) + + @needs_windows + def test_rglob_windows(self): + P = self.cls + p = P(self.base, "dirC") + self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) + self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) + self.assertEqual(set(map(str, p.rglob("FILEd"))), {f"{p}\\dirD\\fileD"}) + @needs_symlinks def test_rglob_follow_symlinks_common(self): def _check(path, glob, expected): From 7a7bce5a0ab249407e866a1e955d21fa2b0c8506 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Fri, 26 Jan 2024 19:38:14 -0800 Subject: [PATCH 122/160] gh-113055: Use pointer for interp->obmalloc state (gh-113412) For interpreters that share state with the main interpreter, this points to the same static memory structure. For interpreters with their own obmalloc state, it is heap allocated. Add free_obmalloc_arenas() which will free the obmalloc arenas and radix tree structures for interpreters with their own obmalloc state. Co-authored-by: Eric Snow --- Include/internal/pycore_interp.h | 12 +- Include/internal/pycore_obmalloc.h | 2 + Include/internal/pycore_obmalloc_init.h | 7 - Include/internal/pycore_runtime_init.h | 1 - ...-12-22-13-21-39.gh-issue-113055.47xBMF.rst | 5 + Objects/obmalloc.c | 121 +++++++++++++++++- Python/pylifecycle.c | 16 +++ Python/pystate.c | 14 +- Tools/c-analyzer/cpython/ignored.tsv | 3 +- 9 files changed, 157 insertions(+), 24 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h index 662a18d93f329d..04e75940dcb573 100644 --- a/Include/internal/pycore_interp.h +++ b/Include/internal/pycore_interp.h @@ -203,7 +203,17 @@ struct _is { struct _mimalloc_interp_state mimalloc; #endif - struct _obmalloc_state obmalloc; + // Per-interpreter state for the obmalloc allocator. For the main + // interpreter and for all interpreters that don't have their + // own obmalloc state, this points to the static structure in + // obmalloc.c obmalloc_state_main. For other interpreters, it is + // heap allocated by _PyMem_init_obmalloc() and freed when the + // interpreter structure is freed. In the case of a heap allocated + // obmalloc state, it is not safe to hold on to or use memory after + // the interpreter is freed. The obmalloc state corresponding to + // that allocated memory is gone. See free_obmalloc_arenas() for + // more comments. + struct _obmalloc_state *obmalloc; PyObject *audit_hooks; PyType_WatchCallback type_watchers[TYPE_MAX_WATCHERS]; diff --git a/Include/internal/pycore_obmalloc.h b/Include/internal/pycore_obmalloc.h index 17572dba65487d..9140d8f08f0af1 100644 --- a/Include/internal/pycore_obmalloc.h +++ b/Include/internal/pycore_obmalloc.h @@ -686,6 +686,8 @@ extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void); _Py_GetGlobalAllocatedBlocks() extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *); extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *); +extern int _PyMem_init_obmalloc(PyInterpreterState *interp); +extern bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp); #ifdef WITH_PYMALLOC diff --git a/Include/internal/pycore_obmalloc_init.h b/Include/internal/pycore_obmalloc_init.h index 8ee72ff2d4126f..e6811b7aeca73c 100644 --- a/Include/internal/pycore_obmalloc_init.h +++ b/Include/internal/pycore_obmalloc_init.h @@ -59,13 +59,6 @@ extern "C" { .dump_debug_stats = -1, \ } -#define _obmalloc_state_INIT(obmalloc) \ - { \ - .pools = { \ - .used = _obmalloc_pools_INIT(obmalloc.pools), \ - }, \ - } - #ifdef __cplusplus } diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index b4806ab09fd145..0a5c92bb84b524 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -155,7 +155,6 @@ extern PyTypeObject _PyExc_MemoryError; { \ .id_refcount = -1, \ .imports = IMPORTS_INIT, \ - .obmalloc = _obmalloc_state_INIT(INTERP.obmalloc), \ .ceval = { \ .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ }, \ diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst new file mode 100644 index 00000000000000..90f49272218c96 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-22-13-21-39.gh-issue-113055.47xBMF.rst @@ -0,0 +1,5 @@ +Make interp->obmalloc a pointer. For interpreters that share state with the +main interpreter, this points to the same static memory structure. For +interpreters with their own obmalloc state, it is heap allocated. Add +free_obmalloc_arenas() which will free the obmalloc arenas and radix tree +structures for interpreters with their own obmalloc state. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 16d5bcb53e7eb7..bea4ea85332bdd 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -7,6 +7,7 @@ #include "pycore_pyerrors.h" // _Py_FatalErrorFormat() #include "pycore_pymem.h" #include "pycore_pystate.h" // _PyInterpreterState_GET +#include "pycore_obmalloc_init.h" #include // malloc() #include @@ -1016,6 +1017,13 @@ static int running_on_valgrind = -1; typedef struct _obmalloc_state OMState; +/* obmalloc state for main interpreter and shared by all interpreters without + * their own obmalloc state. By not explicitly initalizing this structure, it + * will be allocated in the BSS which is a small performance win. The radix + * tree arrays are fairly large but are sparsely used. */ +static struct _obmalloc_state obmalloc_state_main; +static bool obmalloc_state_initialized; + static inline int has_own_state(PyInterpreterState *interp) { @@ -1028,10 +1036,8 @@ static inline OMState * get_state(void) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!has_own_state(interp)) { - interp = _PyInterpreterState_Main(); - } - return &interp->obmalloc; + assert(interp->obmalloc != NULL); // otherwise not initialized or freed + return interp->obmalloc; } // These macros all rely on a local "state" variable. @@ -1094,7 +1100,11 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) "the interpreter doesn't have its own allocator"); } #endif - OMState *state = &interp->obmalloc; + OMState *state = interp->obmalloc; + + if (state == NULL) { + return 0; + } Py_ssize_t n = raw_allocated_blocks; /* add up allocated blocks for used pools */ @@ -1116,6 +1126,8 @@ _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp) return n; } +static void free_obmalloc_arenas(PyInterpreterState *interp); + void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) { @@ -1124,10 +1136,20 @@ _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp) return; } #endif - if (has_own_state(interp)) { + if (has_own_state(interp) && interp->obmalloc != NULL) { Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp); assert(has_own_state(interp) || leaked == 0); interp->runtime->obmalloc.interpreter_leaks += leaked; + if (_PyMem_obmalloc_state_on_heap(interp) && leaked == 0) { + // free the obmalloc arenas and radix tree nodes. If leaked > 0 + // then some of the memory allocated by obmalloc has not been + // freed. It might be safe to free the arenas in that case but + // it's possible that extension modules are still using that + // memory. So, it is safer to not free and to leak. Perhaps there + // should be warning when this happens. It should be possible to + // use a tool like "-fsanitize=address" to track down these leaks. + free_obmalloc_arenas(interp); + } } } @@ -2717,9 +2739,96 @@ _PyDebugAllocatorStats(FILE *out, (void)printone(out, buf2, num_blocks * sizeof_block); } +// Return true if the obmalloc state structure is heap allocated, +// by PyMem_RawCalloc(). For the main interpreter, this structure +// allocated in the BSS. Allocating that way gives some memory savings +// and a small performance win (at least on a demand paged OS). On +// 64-bit platforms, the obmalloc structure is 256 kB. Most of that +// memory is for the arena_map_top array. Since normally only one entry +// of that array is used, only one page of resident memory is actually +// used, rather than the full 256 kB. +bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp) +{ +#if WITH_PYMALLOC + return interp->obmalloc && interp->obmalloc != &obmalloc_state_main; +#else + return false; +#endif +} + +#ifdef WITH_PYMALLOC +static void +init_obmalloc_pools(PyInterpreterState *interp) +{ + // initialize the obmalloc->pools structure. This must be done + // before the obmalloc alloc/free functions can be called. + poolp temp[OBMALLOC_USED_POOLS_SIZE] = + _obmalloc_pools_INIT(interp->obmalloc->pools); + memcpy(&interp->obmalloc->pools.used, temp, sizeof(temp)); +} +#endif /* WITH_PYMALLOC */ + +int _PyMem_init_obmalloc(PyInterpreterState *interp) +{ +#ifdef WITH_PYMALLOC + /* Initialize obmalloc, but only for subinterpreters, + since the main interpreter is initialized statically. */ + if (_Py_IsMainInterpreter(interp) + || _PyInterpreterState_HasFeature(interp, + Py_RTFLAGS_USE_MAIN_OBMALLOC)) { + interp->obmalloc = &obmalloc_state_main; + if (!obmalloc_state_initialized) { + init_obmalloc_pools(interp); + obmalloc_state_initialized = true; + } + } else { + interp->obmalloc = PyMem_RawCalloc(1, sizeof(struct _obmalloc_state)); + if (interp->obmalloc == NULL) { + return -1; + } + init_obmalloc_pools(interp); + } +#endif /* WITH_PYMALLOC */ + return 0; // success +} + #ifdef WITH_PYMALLOC +static void +free_obmalloc_arenas(PyInterpreterState *interp) +{ + OMState *state = interp->obmalloc; + for (uint i = 0; i < maxarenas; ++i) { + // free each obmalloc memory arena + struct arena_object *ao = &allarenas[i]; + _PyObject_Arena.free(_PyObject_Arena.ctx, + (void *)ao->address, ARENA_SIZE); + } + // free the array containing pointers to all arenas + PyMem_RawFree(allarenas); +#if WITH_PYMALLOC_RADIX_TREE +#ifdef USE_INTERIOR_NODES + // Free the middle and bottom nodes of the radix tree. These are allocated + // by arena_map_mark_used() but not freed when arenas are freed. + for (int i1 = 0; i1 < MAP_TOP_LENGTH; i1++) { + arena_map_mid_t *mid = arena_map_root.ptrs[i1]; + if (mid == NULL) { + continue; + } + for (int i2 = 0; i2 < MAP_MID_LENGTH; i2++) { + arena_map_bot_t *bot = arena_map_root.ptrs[i1]->ptrs[i2]; + if (bot == NULL) { + continue; + } + PyMem_RawFree(bot); + } + PyMem_RawFree(mid); + } +#endif +#endif +} + #ifdef Py_DEBUG /* Is target in the list? The list is traversed via the nextpool pointers. * The list may be NULL-terminated, or circular. Return 1 if target is in diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index 261622adc4cc77..fff64dd63d6b21 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -32,6 +32,7 @@ #include "pycore_typevarobject.h" // _Py_clear_generic_types() #include "pycore_unicodeobject.h" // _PyUnicode_InitTypes() #include "pycore_weakref.h" // _PyWeakref_GET_REF() +#include "pycore_obmalloc.h" // _PyMem_init_obmalloc() #include "opcode.h" @@ -645,6 +646,13 @@ pycore_create_interpreter(_PyRuntimeState *runtime, return status; } + // initialize the interp->obmalloc state. This must be done after + // the settings are loaded (so that feature_flags are set) but before + // any calls are made to obmalloc functions. + if (_PyMem_init_obmalloc(interp) < 0) { + return _PyStatus_NO_MEMORY(); + } + PyThreadState *tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP); if (tstate == NULL) { @@ -2144,6 +2152,14 @@ new_interpreter(PyThreadState **tstate_p, const PyInterpreterConfig *config) goto error; } + // initialize the interp->obmalloc state. This must be done after + // the settings are loaded (so that feature_flags are set) but before + // any calls are made to obmalloc functions. + if (_PyMem_init_obmalloc(interp) < 0) { + status = _PyStatus_NO_MEMORY(); + goto error; + } + tstate = _PyThreadState_New(interp, _PyThreadState_WHENCE_INTERP); if (tstate == NULL) { status = _PyStatus_NO_MEMORY(); diff --git a/Python/pystate.c b/Python/pystate.c index 5ad5b6f3fcc634..8e097c848cf4a1 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -18,6 +18,7 @@ #include "pycore_pystate.h" #include "pycore_runtime_init.h" // _PyRuntimeState_INIT #include "pycore_sysmodule.h" // _PySys_Audit() +#include "pycore_obmalloc.h" // _PyMem_obmalloc_state_on_heap() /* -------------------------------------------------------------------------- CAUTION @@ -553,6 +554,11 @@ free_interpreter(PyInterpreterState *interp) // The main interpreter is statically allocated so // should not be freed. if (interp != &_PyRuntime._main_interpreter) { + if (_PyMem_obmalloc_state_on_heap(interp)) { + // interpreter has its own obmalloc state, free it + PyMem_RawFree(interp->obmalloc); + interp->obmalloc = NULL; + } PyMem_RawFree(interp); } } @@ -595,14 +601,6 @@ init_interpreter(PyInterpreterState *interp, assert(next != NULL || (interp == runtime->interpreters.main)); interp->next = next; - /* Initialize obmalloc, but only for subinterpreters, - since the main interpreter is initialized statically. */ - if (interp != &runtime->_main_interpreter) { - poolp temp[OBMALLOC_USED_POOLS_SIZE] = \ - _obmalloc_pools_INIT(interp->obmalloc.pools); - memcpy(&interp->obmalloc.pools.used, temp, sizeof(temp)); - } - PyStatus status = _PyObject_InitState(interp); if (_PyStatus_EXCEPTION(status)) { return status; diff --git a/Tools/c-analyzer/cpython/ignored.tsv b/Tools/c-analyzer/cpython/ignored.tsv index 2f9e80d6ab6737..c75aff8c1723c1 100644 --- a/Tools/c-analyzer/cpython/ignored.tsv +++ b/Tools/c-analyzer/cpython/ignored.tsv @@ -325,7 +325,8 @@ Objects/obmalloc.c - _PyMem_Debug - Objects/obmalloc.c - _PyMem_Raw - Objects/obmalloc.c - _PyObject - Objects/obmalloc.c - last_final_leaks - -Objects/obmalloc.c - usedpools - +Objects/obmalloc.c - obmalloc_state_main - +Objects/obmalloc.c - obmalloc_state_initialized - Objects/typeobject.c - name_op - Objects/typeobject.c - slotdefs - Objects/unicodeobject.c - stripfuncnames - From 926881dc10ebf77069e02e66eea3e0d3ba500fe5 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Sat, 27 Jan 2024 10:55:33 +0300 Subject: [PATCH 123/160] gh-113445: Amend PyObject_RichCompareBool() docs (GH-113891) --- Doc/c-api/object.rst | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst index 4f656779c80b1a..12476412799a4f 100644 --- a/Doc/c-api/object.rst +++ b/Doc/c-api/object.rst @@ -229,12 +229,8 @@ Object Protocol .. c:function:: int PyObject_RichCompareBool(PyObject *o1, PyObject *o2, int opid) Compare the values of *o1* and *o2* using the operation specified by *opid*, - which must be one of :c:macro:`Py_LT`, :c:macro:`Py_LE`, :c:macro:`Py_EQ`, - :c:macro:`Py_NE`, :c:macro:`Py_GT`, or :c:macro:`Py_GE`, corresponding to ``<``, - ``<=``, ``==``, ``!=``, ``>``, or ``>=`` respectively. Returns ``-1`` on error, - ``0`` if the result is false, ``1`` otherwise. This is the equivalent of the - Python expression ``o1 op o2``, where ``op`` is the operator corresponding to - *opid*. + like :c:func:`PyObject_RichCompare`, but returns ``-1`` on error, ``0`` if + the result is false, ``1`` otherwise. .. note:: If *o1* and *o2* are the same object, :c:func:`PyObject_RichCompareBool` From 547c135d70760f974ed0476a32a6809e708bfe4d Mon Sep 17 00:00:00 2001 From: NewUserHa <32261870+NewUserHa@users.noreply.github.com> Date: Sat, 27 Jan 2024 16:29:38 +0800 Subject: [PATCH 124/160] Simplify concurrent.futures.process code by using itertools.batched() (GH-114221) --- Lib/concurrent/futures/process.py | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/Lib/concurrent/futures/process.py b/Lib/concurrent/futures/process.py index ffaffdb8b3d0aa..ca843e11eeb83d 100644 --- a/Lib/concurrent/futures/process.py +++ b/Lib/concurrent/futures/process.py @@ -190,16 +190,6 @@ def _on_queue_feeder_error(self, e, obj): super()._on_queue_feeder_error(e, obj) -def _get_chunks(*iterables, chunksize): - """ Iterates over zip()ed iterables in chunks. """ - it = zip(*iterables) - while True: - chunk = tuple(itertools.islice(it, chunksize)) - if not chunk: - return - yield chunk - - def _process_chunk(fn, chunk): """ Processes a chunk of an iterable passed to map. @@ -847,7 +837,7 @@ def map(self, fn, *iterables, timeout=None, chunksize=1): raise ValueError("chunksize must be >= 1.") results = super().map(partial(_process_chunk, fn), - _get_chunks(*iterables, chunksize=chunksize), + itertools.batched(zip(*iterables), chunksize), timeout=timeout) return _chain_from_iterable_of_lists(results) From 23fb9f0777b054526b3b32f58e60b2a03132bf45 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 27 Jan 2024 11:45:07 +0300 Subject: [PATCH 125/160] Fix `c-api/file.rst` indexes (GH-114608) --- Doc/c-api/file.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Doc/c-api/file.rst b/Doc/c-api/file.rst index 0a03841e467cad..d3a78c588454e8 100644 --- a/Doc/c-api/file.rst +++ b/Doc/c-api/file.rst @@ -65,9 +65,10 @@ the :mod:`io` APIs instead. Overrides the normal behavior of :func:`io.open_code` to pass its parameter through the provided handler. - The handler is a function of type: + The *handler* is a function of type: - .. c:type:: Py_OpenCodeHookFunction + .. c:namespace:: NULL + .. c:type:: PyObject * (*Py_OpenCodeHookFunction)(PyObject *, void *) Equivalent of :c:expr:`PyObject *(\*)(PyObject *path, void *userData)`, where *path* is guaranteed to be From 6a8944acb61d0a2c210ab8066cdcec8602110e2f Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 27 Jan 2024 11:45:40 +0300 Subject: [PATCH 126/160] gh-101100: Fix sphinx warnings in `library/email.mime.rst` (GH-114635) --- Doc/library/email.mime.rst | 16 ++++++++-------- Doc/tools/.nitignore | 1 - 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/Doc/library/email.mime.rst b/Doc/library/email.mime.rst index d7c0d203d191f8..dc0dd3b9eebde6 100644 --- a/Doc/library/email.mime.rst +++ b/Doc/library/email.mime.rst @@ -28,7 +28,7 @@ make things easier. Here are the classes: -.. currentmodule:: email.mime.base +.. module:: email.mime.base .. class:: MIMEBase(_maintype, _subtype, *, policy=compat32, **_params) @@ -58,7 +58,7 @@ Here are the classes: Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.nonmultipart +.. module:: email.mime.nonmultipart .. class:: MIMENonMultipart() @@ -72,7 +72,7 @@ Here are the classes: is called, a :exc:`~email.errors.MultipartConversionError` exception is raised. -.. currentmodule:: email.mime.multipart +.. module:: email.mime.multipart .. class:: MIMEMultipart(_subtype='mixed', boundary=None, _subparts=None, \ *, policy=compat32, **_params) @@ -104,7 +104,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.application +.. module:: email.mime.application .. class:: MIMEApplication(_data, _subtype='octet-stream', \ _encoder=email.encoders.encode_base64, \ @@ -135,7 +135,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.audio +.. module:: email.mime.audio .. class:: MIMEAudio(_audiodata, _subtype=None, \ _encoder=email.encoders.encode_base64, \ @@ -169,7 +169,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.image +.. module:: email.mime.image .. class:: MIMEImage(_imagedata, _subtype=None, \ _encoder=email.encoders.encode_base64, \ @@ -205,7 +205,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.message +.. module:: email.mime.message .. class:: MIMEMessage(_msg, _subtype='rfc822', *, policy=compat32) @@ -225,7 +225,7 @@ Here are the classes: .. versionchanged:: 3.6 Added *policy* keyword-only parameter. -.. currentmodule:: email.mime.text +.. module:: email.mime.text .. class:: MIMEText(_text, _subtype='plain', _charset=None, *, policy=compat32) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index d56a44ad09a6f8..f48506e3a21df5 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -33,7 +33,6 @@ Doc/library/decimal.rst Doc/library/email.charset.rst Doc/library/email.compat32-message.rst Doc/library/email.errors.rst -Doc/library/email.mime.rst Doc/library/email.parser.rst Doc/library/email.policy.rst Doc/library/enum.rst From 11c582235d86b6020710eff282eeb381a7bf7bb7 Mon Sep 17 00:00:00 2001 From: Charlie Zhao Date: Sat, 27 Jan 2024 17:53:01 +0800 Subject: [PATCH 127/160] gh-113560: Improve docstrings for set.issubset() and set.issuperset() (GH-113562) --- Objects/setobject.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Objects/setobject.c b/Objects/setobject.c index 88d20019bfb4a7..93de8e84f2ddf9 100644 --- a/Objects/setobject.c +++ b/Objects/setobject.c @@ -1770,7 +1770,11 @@ set_issubset(PySetObject *so, PyObject *other) Py_RETURN_TRUE; } -PyDoc_STRVAR(issubset_doc, "Report whether another set contains this set."); +PyDoc_STRVAR(issubset_doc, +"issubset($self, other, /)\n\ +--\n\ +\n\ +Test whether every element in the set is in other."); static PyObject * set_issuperset(PySetObject *so, PyObject *other) @@ -1802,7 +1806,11 @@ set_issuperset(PySetObject *so, PyObject *other) Py_RETURN_TRUE; } -PyDoc_STRVAR(issuperset_doc, "Report whether this set contains another set."); +PyDoc_STRVAR(issuperset_doc, +"issuperset($self, other, /)\n\ +--\n\ +\n\ +Test whether every element in other is in the set."); static PyObject * set_richcompare(PySetObject *v, PyObject *w, int op) From b6623d61d4e07aefd15d910ef67837c66bceaf32 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sat, 27 Jan 2024 15:06:59 +0200 Subject: [PATCH 128/160] gh-101100: Fix Sphinx warnings in `whatsnew/3.11.rst` and related (#114531) --- Doc/whatsnew/2.7.rst | 2 +- Doc/whatsnew/3.11.rst | 96 ++++++++++++++++++++-------------------- Doc/whatsnew/3.8.rst | 4 +- Misc/NEWS.d/3.11.0a2.rst | 2 +- Misc/NEWS.d/3.11.0a4.rst | 6 +-- Misc/NEWS.d/3.11.0a6.rst | 2 +- Misc/NEWS.d/3.11.0a7.rst | 4 +- Misc/NEWS.d/3.11.0b1.rst | 4 +- Misc/NEWS.d/3.12.0a4.rst | 4 +- Misc/NEWS.d/3.12.0a6.rst | 2 +- Misc/NEWS.d/3.12.0a7.rst | 2 +- Misc/NEWS.d/3.12.0b1.rst | 2 +- Misc/NEWS.d/3.8.0a1.rst | 4 +- Misc/NEWS.d/3.9.0a1.rst | 4 +- Misc/NEWS.d/3.9.0a5.rst | 2 +- 15 files changed, 70 insertions(+), 70 deletions(-) diff --git a/Doc/whatsnew/2.7.rst b/Doc/whatsnew/2.7.rst index 241d58720399af..524967b4524234 100644 --- a/Doc/whatsnew/2.7.rst +++ b/Doc/whatsnew/2.7.rst @@ -2130,7 +2130,7 @@ Changes to Python's build process and to the C API include: only the filename, function name, and first line number are required. This is useful for extension modules that are attempting to construct a more useful traceback stack. Previously such - extensions needed to call :c:func:`PyCode_New`, which had many + extensions needed to call :c:func:`!PyCode_New`, which had many more arguments. (Added by Jeffrey Yasskin.) * New function: :c:func:`PyErr_NewExceptionWithDoc` creates a new diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index cb646a54df3607..a8a7dfda0c5309 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -672,7 +672,7 @@ enum * Changed :meth:`Enum.__format__() ` (the default for :func:`format`, :meth:`str.format` and :term:`f-string`\s) to always produce - the same result as :meth:`Enum.__str__()`: for enums inheriting from + the same result as :meth:`Enum.__str__() `: for enums inheriting from :class:`~enum.ReprEnum` it will be the member's value; for all other enums it will be the enum and member name (e.g. ``Color.RED``). @@ -1604,7 +1604,7 @@ raw, adaptive bytecode containing quickened data. New opcodes ----------- -* :opcode:`ASYNC_GEN_WRAP`, :opcode:`RETURN_GENERATOR` and :opcode:`SEND`, +* :opcode:`!ASYNC_GEN_WRAP`, :opcode:`RETURN_GENERATOR` and :opcode:`SEND`, used in generators and co-routines. * :opcode:`COPY_FREE_VARS`, @@ -1615,7 +1615,7 @@ New opcodes * :opcode:`MAKE_CELL`, to create :ref:`cell-objects`. -* :opcode:`CHECK_EG_MATCH` and :opcode:`PREP_RERAISE_STAR`, +* :opcode:`CHECK_EG_MATCH` and :opcode:`!PREP_RERAISE_STAR`, to handle the :ref:`new exception groups and except* ` added in :pep:`654`. @@ -1630,38 +1630,38 @@ New opcodes Replaced opcodes ---------------- -+------------------------------------+-----------------------------------+-----------------------------------------+ -| Replaced Opcode(s) | New Opcode(s) | Notes | -+====================================+===================================+=========================================+ -| | :opcode:`!BINARY_*` | :opcode:`BINARY_OP` | Replaced all numeric binary/in-place | -| | :opcode:`!INPLACE_*` | | opcodes with a single opcode | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!CALL_FUNCTION` | | :opcode:`CALL` | Decouples argument shifting for methods | -| | :opcode:`!CALL_FUNCTION_KW` | | :opcode:`KW_NAMES` | from handling of keyword arguments; | -| | :opcode:`!CALL_METHOD` | | :opcode:`PRECALL` | allows better specialization of calls | -| | | :opcode:`PUSH_NULL` | | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!DUP_TOP` | | :opcode:`COPY` | Stack manipulation instructions | -| | :opcode:`!DUP_TOP_TWO` | | :opcode:`SWAP` | | -| | :opcode:`!ROT_TWO` | | | -| | :opcode:`!ROT_THREE` | | | -| | :opcode:`!ROT_FOUR` | | | -| | :opcode:`!ROT_N` | | | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!JUMP_IF_NOT_EXC_MATCH` | | :opcode:`CHECK_EXC_MATCH` | Now performs check but doesn't jump | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!JUMP_ABSOLUTE` | | :opcode:`JUMP_BACKWARD` | See [#bytecode-jump]_; | -| | :opcode:`!POP_JUMP_IF_FALSE` | | :opcode:`POP_JUMP_BACKWARD_IF_* | ``TRUE``, ``FALSE``, | -| | :opcode:`!POP_JUMP_IF_TRUE` | ` | ``NONE`` and ``NOT_NONE`` variants | -| | | :opcode:`POP_JUMP_FORWARD_IF_* | for each direction | -| | ` | | -+------------------------------------+-----------------------------------+-----------------------------------------+ -| | :opcode:`!SETUP_WITH` | :opcode:`BEFORE_WITH` | :keyword:`with` block setup | -| | :opcode:`!SETUP_ASYNC_WITH` | | | -+------------------------------------+-----------------------------------+-----------------------------------------+ ++------------------------------------+------------------------------------+-----------------------------------------+ +| Replaced Opcode(s) | New Opcode(s) | Notes | ++====================================+====================================+=========================================+ +| | :opcode:`!BINARY_*` | :opcode:`BINARY_OP` | Replaced all numeric binary/in-place | +| | :opcode:`!INPLACE_*` | | opcodes with a single opcode | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!CALL_FUNCTION` | | :opcode:`CALL` | Decouples argument shifting for methods | +| | :opcode:`!CALL_FUNCTION_KW` | | :opcode:`!KW_NAMES` | from handling of keyword arguments; | +| | :opcode:`!CALL_METHOD` | | :opcode:`!PRECALL` | allows better specialization of calls | +| | | :opcode:`PUSH_NULL` | | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!DUP_TOP` | | :opcode:`COPY` | Stack manipulation instructions | +| | :opcode:`!DUP_TOP_TWO` | | :opcode:`SWAP` | | +| | :opcode:`!ROT_TWO` | | | +| | :opcode:`!ROT_THREE` | | | +| | :opcode:`!ROT_FOUR` | | | +| | :opcode:`!ROT_N` | | | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!JUMP_IF_NOT_EXC_MATCH` | | :opcode:`CHECK_EXC_MATCH` | Now performs check but doesn't jump | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!JUMP_ABSOLUTE` | | :opcode:`JUMP_BACKWARD` | See [#bytecode-jump]_; | +| | :opcode:`!POP_JUMP_IF_FALSE` | | :opcode:`!POP_JUMP_BACKWARD_IF_*`| ``TRUE``, ``FALSE``, | +| | :opcode:`!POP_JUMP_IF_TRUE` | | :opcode:`!POP_JUMP_FORWARD_IF_*` | ``NONE`` and ``NOT_NONE`` variants | +| | | for each direction | +| | | | ++------------------------------------+------------------------------------+-----------------------------------------+ +| | :opcode:`!SETUP_WITH` | :opcode:`BEFORE_WITH` | :keyword:`with` block setup | +| | :opcode:`!SETUP_ASYNC_WITH` | | | ++------------------------------------+------------------------------------+-----------------------------------------+ .. [#bytecode-jump] All jump opcodes are now relative, including the - existing :opcode:`JUMP_IF_TRUE_OR_POP` and :opcode:`JUMP_IF_FALSE_OR_POP`. + existing :opcode:`!JUMP_IF_TRUE_OR_POP` and :opcode:`!JUMP_IF_FALSE_OR_POP`. The argument is now an offset from the current instruction rather than an absolute location. @@ -1789,13 +1789,13 @@ Standard Library and will be removed in a future Python version, due to not supporting resources located within package subdirectories: - * :func:`importlib.resources.contents` - * :func:`importlib.resources.is_resource` - * :func:`importlib.resources.open_binary` - * :func:`importlib.resources.open_text` - * :func:`importlib.resources.read_binary` - * :func:`importlib.resources.read_text` - * :func:`importlib.resources.path` + * :func:`!importlib.resources.contents` + * :func:`!importlib.resources.is_resource` + * :func:`!importlib.resources.open_binary` + * :func:`!importlib.resources.open_text` + * :func:`!importlib.resources.read_binary` + * :func:`!importlib.resources.read_text` + * :func:`!importlib.resources.path` * The :func:`locale.getdefaultlocale` function is deprecated and will be removed in Python 3.15. Use :func:`locale.setlocale`, @@ -1803,7 +1803,7 @@ Standard Library :func:`locale.getlocale` functions instead. (Contributed by Victor Stinner in :gh:`90817`.) -* The :func:`locale.resetlocale` function is deprecated and will be +* The :func:`!locale.resetlocale` function is deprecated and will be removed in Python 3.13. Use ``locale.setlocale(locale.LC_ALL, "")`` instead. (Contributed by Victor Stinner in :gh:`90817`.) @@ -1967,7 +1967,7 @@ Removed C APIs are :ref:`listed separately `. (Contributed by Victor Stinner in :issue:`45085`.) -* Removed the :mod:`distutils` ``bdist_msi`` command deprecated in Python 3.9. +* Removed the :mod:`!distutils` ``bdist_msi`` command deprecated in Python 3.9. Use ``bdist_wheel`` (wheel packages) instead. (Contributed by Hugo van Kemenade in :issue:`45124`.) @@ -2295,7 +2295,7 @@ Porting to Python 3.11 as its second parameter, instead of ``PyFrameObject*``. See :pep:`523` for more details of how to use this function pointer type. -* :c:func:`PyCode_New` and :c:func:`PyCode_NewWithPosOnlyArgs` now take +* :c:func:`!PyCode_New` and :c:func:`!PyCode_NewWithPosOnlyArgs` now take an additional ``exception_table`` argument. Using these functions should be avoided, if at all possible. To get a custom code object: create a code object using the compiler, @@ -2402,7 +2402,7 @@ Porting to Python 3.11 been included directly, consider including ``Python.h`` instead. (Contributed by Victor Stinner in :issue:`35134`.) -* The :c:func:`PyUnicode_CHECK_INTERNED` macro has been excluded from the +* The :c:func:`!PyUnicode_CHECK_INTERNED` macro has been excluded from the limited C API. It was never usable there, because it used internal structures which are not available in the limited C API. (Contributed by Victor Stinner in :issue:`46007`.) @@ -2465,7 +2465,7 @@ Porting to Python 3.11 Debuggers that accessed the :attr:`~frame.f_locals` directly *must* call :c:func:`PyFrame_GetLocals` instead. They no longer need to call - :c:func:`PyFrame_FastToLocalsWithError` or :c:func:`PyFrame_LocalsToFast`, + :c:func:`!PyFrame_FastToLocalsWithError` or :c:func:`!PyFrame_LocalsToFast`, in fact they should not call those functions. The necessary updating of the frame is now managed by the virtual machine. @@ -2604,8 +2604,8 @@ and will be removed in Python 3.12. * :c:func:`!PyUnicode_GET_DATA_SIZE` * :c:func:`!PyUnicode_GET_SIZE` * :c:func:`!PyUnicode_GetSize` -* :c:func:`PyUnicode_IS_COMPACT` -* :c:func:`PyUnicode_IS_READY` +* :c:func:`!PyUnicode_IS_COMPACT` +* :c:func:`!PyUnicode_IS_READY` * :c:func:`PyUnicode_READY` * :c:func:`!PyUnicode_WSTR_LENGTH` * :c:func:`!_PyUnicode_AsUnicode` @@ -2660,7 +2660,7 @@ Removed (Contributed by Victor Stinner in :issue:`45474`.) * Exclude :c:func:`PyWeakref_GET_OBJECT` from the limited C API. It never - worked since the :c:type:`PyWeakReference` structure is opaque in the + worked since the :c:type:`!PyWeakReference` structure is opaque in the limited C API. (Contributed by Victor Stinner in :issue:`35134`.) diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index d373fa163ff737..304d1b4ef4efe8 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -1623,8 +1623,8 @@ Build and C API Changes allocation or deallocation may need to be adjusted. (Contributed by Eddie Elizondo in :issue:`35810`.) -* The new function :c:func:`PyCode_NewWithPosOnlyArgs` allows to create - code objects like :c:func:`PyCode_New`, but with an extra *posonlyargcount* +* The new function :c:func:`!PyCode_NewWithPosOnlyArgs` allows to create + code objects like :c:func:`!PyCode_New`, but with an extra *posonlyargcount* parameter for indicating the number of positional-only arguments. (Contributed by Pablo Galindo in :issue:`37221`.) diff --git a/Misc/NEWS.d/3.11.0a2.rst b/Misc/NEWS.d/3.11.0a2.rst index eb1456f1bcf353..a6b5fe54b391c5 100644 --- a/Misc/NEWS.d/3.11.0a2.rst +++ b/Misc/NEWS.d/3.11.0a2.rst @@ -1189,7 +1189,7 @@ context objects can now be disabled. .. section: C API Exclude :c:func:`PyWeakref_GET_OBJECT` from the limited C API. It never -worked since the :c:type:`PyWeakReference` structure is opaque in the +worked since the :c:type:`!PyWeakReference` structure is opaque in the limited C API. .. diff --git a/Misc/NEWS.d/3.11.0a4.rst b/Misc/NEWS.d/3.11.0a4.rst index 5abacd8473f394..78b682f7a22cc6 100644 --- a/Misc/NEWS.d/3.11.0a4.rst +++ b/Misc/NEWS.d/3.11.0a4.rst @@ -161,7 +161,7 @@ faster due to reference-counting optimizations. Patch by Dennis Sweeney. .. nonce: 7oGp-I .. section: Core and Builtins -:opcode:`PREP_RERAISE_STAR` no longer pushes ``lasti`` to the stack. +:opcode:`!PREP_RERAISE_STAR` no longer pushes ``lasti`` to the stack. .. @@ -170,7 +170,7 @@ faster due to reference-counting optimizations. Patch by Dennis Sweeney. .. nonce: IKx4v6 .. section: Core and Builtins -Remove :opcode:`POP_EXCEPT_AND_RERAISE` and replace it by an equivalent +Remove :opcode:`!POP_EXCEPT_AND_RERAISE` and replace it by an equivalent sequence of other opcodes. .. @@ -1171,7 +1171,7 @@ Replaced deprecated usage of :c:func:`PyImport_ImportModuleNoBlock` with .. nonce: sMgDLz .. section: C API -The :c:func:`PyUnicode_CHECK_INTERNED` macro has been excluded from the +The :c:func:`!PyUnicode_CHECK_INTERNED` macro has been excluded from the limited C API. It was never usable there, because it used internal structures which are not available in the limited C API. Patch by Victor Stinner. diff --git a/Misc/NEWS.d/3.11.0a6.rst b/Misc/NEWS.d/3.11.0a6.rst index 2b50b7773492cb..2fdceef7746d4e 100644 --- a/Misc/NEWS.d/3.11.0a6.rst +++ b/Misc/NEWS.d/3.11.0a6.rst @@ -248,7 +248,7 @@ Don't un-adapt :opcode:`COMPARE_OP` when collecting specialization stats. .. nonce: RX_AzJ .. section: Core and Builtins -Fix specialization stats gathering for :opcode:`PRECALL` instructions. +Fix specialization stats gathering for :opcode:`!PRECALL` instructions. .. diff --git a/Misc/NEWS.d/3.11.0a7.rst b/Misc/NEWS.d/3.11.0a7.rst index 76699632db223a..ec99bd0294ceca 100644 --- a/Misc/NEWS.d/3.11.0a7.rst +++ b/Misc/NEWS.d/3.11.0a7.rst @@ -138,7 +138,7 @@ Replaced :opcode:`JUMP_ABSOLUTE` by the relative jump .. nonce: SwrrFO .. section: Core and Builtins -:c:func:`PyFrame_FastToLocalsWithError` and :c:func:`PyFrame_LocalsToFast` +:c:func:`!PyFrame_FastToLocalsWithError` and :c:func:`!PyFrame_LocalsToFast` are no longer called during profiling nor tracing. C code can access the ``f_locals`` attribute of :c:type:`PyFrameObject` by calling :c:func:`PyFrame_GetLocals`. @@ -295,7 +295,7 @@ oparg) as an adaptive counter. .. nonce: O12Pba .. section: Core and Builtins -Use inline caching for :opcode:`PRECALL` and :opcode:`CALL`, and remove the +Use inline caching for :opcode:`!PRECALL` and :opcode:`CALL`, and remove the internal machinery for managing the (now unused) non-inline caches. .. diff --git a/Misc/NEWS.d/3.11.0b1.rst b/Misc/NEWS.d/3.11.0b1.rst index 2c30dc6e084bfb..f9296679655573 100644 --- a/Misc/NEWS.d/3.11.0b1.rst +++ b/Misc/NEWS.d/3.11.0b1.rst @@ -403,8 +403,8 @@ so this led to crashes. The problem is now fixed. .. nonce: 6S_uoU .. section: Core and Builtins -Make opcodes :opcode:`JUMP_IF_TRUE_OR_POP` and -:opcode:`JUMP_IF_FALSE_OR_POP` relative rather than absolute. +Make opcodes :opcode:`!JUMP_IF_TRUE_OR_POP` and +:opcode:`!JUMP_IF_FALSE_OR_POP` relative rather than absolute. .. diff --git a/Misc/NEWS.d/3.12.0a4.rst b/Misc/NEWS.d/3.12.0a4.rst index ce2814bbe2e5ab..82faa5ad0b2031 100644 --- a/Misc/NEWS.d/3.12.0a4.rst +++ b/Misc/NEWS.d/3.12.0a4.rst @@ -13,8 +13,8 @@ Fix misleading default value in :func:`input`'s ``__text_signature__``. .. nonce: cmGwxv .. section: Core and Builtins -Remove :opcode:`UNARY_POSITIVE`, :opcode:`ASYNC_GEN_WRAP` and -:opcode:`LIST_TO_TUPLE`, replacing them with intrinsics. +Remove :opcode:`!UNARY_POSITIVE`, :opcode:`!ASYNC_GEN_WRAP` and +:opcode:`!LIST_TO_TUPLE`, replacing them with intrinsics. .. diff --git a/Misc/NEWS.d/3.12.0a6.rst b/Misc/NEWS.d/3.12.0a6.rst index 5bd600cd8b6fc0..cf28bdb9258820 100644 --- a/Misc/NEWS.d/3.12.0a6.rst +++ b/Misc/NEWS.d/3.12.0a6.rst @@ -170,7 +170,7 @@ all as not all platform C libraries generate an error. .. section: Core and Builtins Add :opcode:`CALL_INTRINSIC_2` and use it instead of -:opcode:`PREP_RERAISE_STAR`. +:opcode:`!PREP_RERAISE_STAR`. .. diff --git a/Misc/NEWS.d/3.12.0a7.rst b/Misc/NEWS.d/3.12.0a7.rst index f22050b0dc377b..a859be8a047456 100644 --- a/Misc/NEWS.d/3.12.0a7.rst +++ b/Misc/NEWS.d/3.12.0a7.rst @@ -24,7 +24,7 @@ Reduce the number of inline :opcode:`CACHE` entries for .. nonce: PRkGca .. section: Core and Builtins -Removed :opcode:`JUMP_IF_FALSE_OR_POP` and :opcode:`JUMP_IF_TRUE_OR_POP` +Removed :opcode:`!JUMP_IF_FALSE_OR_POP` and :opcode:`!JUMP_IF_TRUE_OR_POP` instructions. .. diff --git a/Misc/NEWS.d/3.12.0b1.rst b/Misc/NEWS.d/3.12.0b1.rst index 007a6ad4ffd4d4..211513d05d0040 100644 --- a/Misc/NEWS.d/3.12.0b1.rst +++ b/Misc/NEWS.d/3.12.0b1.rst @@ -1008,7 +1008,7 @@ Update the bundled copy of pip to version 23.1.2. .. nonce: pst8iT .. section: Library -Make :mod:`dis` display the value of oparg of :opcode:`KW_NAMES`. +Make :mod:`dis` display the value of oparg of :opcode:`!KW_NAMES`. .. diff --git a/Misc/NEWS.d/3.8.0a1.rst b/Misc/NEWS.d/3.8.0a1.rst index 11b303e89ad04f..bd9061601fe190 100644 --- a/Misc/NEWS.d/3.8.0a1.rst +++ b/Misc/NEWS.d/3.8.0a1.rst @@ -3395,8 +3395,8 @@ Zackery Spytz. .. nonce: S0Irst .. section: Library -Fix parsing non-ASCII identifiers in :mod:`lib2to3.pgen2.tokenize` (PEP -3131). +Fix parsing non-ASCII identifiers in :mod:`!lib2to3.pgen2.tokenize` +(:pep:`3131`). .. diff --git a/Misc/NEWS.d/3.9.0a1.rst b/Misc/NEWS.d/3.9.0a1.rst index 0772a0fed20652..66d7fc1f32e705 100644 --- a/Misc/NEWS.d/3.9.0a1.rst +++ b/Misc/NEWS.d/3.9.0a1.rst @@ -5715,8 +5715,8 @@ The :c:macro:`METH_FASTCALL` calling convention has been documented. .. nonce: 4tClQT .. section: C API -The new function :c:func:`PyCode_NewWithPosOnlyArgs` allows to create code -objects like :c:func:`PyCode_New`, but with an extra *posonlyargcount* +The new function :c:func:`!PyCode_NewWithPosOnlyArgs` allows to create code +objects like :c:func:`!PyCode_New`, but with an extra *posonlyargcount* parameter for indicating the number of positonal-only arguments. .. diff --git a/Misc/NEWS.d/3.9.0a5.rst b/Misc/NEWS.d/3.9.0a5.rst index b4594aade3b3ed..f0015ac54df307 100644 --- a/Misc/NEWS.d/3.9.0a5.rst +++ b/Misc/NEWS.d/3.9.0a5.rst @@ -1122,7 +1122,7 @@ a different condition than the GIL. .. nonce: Nbl7lF .. section: Tools/Demos -Added support to fix ``getproxies`` in the :mod:`lib2to3.fixes.fix_urllib` +Added support to fix ``getproxies`` in the :mod:`!lib2to3.fixes.fix_urllib` module. Patch by José Roberto Meza Cabrera. .. From a384b20c0ce5aa520fa91ae0233d53642925525b Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Sat, 27 Jan 2024 17:30:21 +0300 Subject: [PATCH 129/160] gh-101100: Fix sphinx warnings in `reference/import.rst` (#114646) --- Doc/reference/import.rst | 7 ++++--- Doc/tools/.nitignore | 1 - 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/reference/import.rst b/Doc/reference/import.rst index a7beeea29b4556..f8c9724114da9e 100644 --- a/Doc/reference/import.rst +++ b/Doc/reference/import.rst @@ -327,14 +327,15 @@ modules, and one that knows how to import modules from an :term:`import path` finders replaced :meth:`!find_module`, which is now deprecated. While it will continue to work without change, the import machinery will try it only if the finder does not implement - ``find_spec()``. + :meth:`~importlib.abc.MetaPathFinder.find_spec`. .. versionchanged:: 3.10 Use of :meth:`!find_module` by the import system now raises :exc:`ImportWarning`. .. versionchanged:: 3.12 - ``find_module()`` has been removed. Use :meth:`find_spec` instead. + :meth:`!find_module` has been removed. + Use :meth:`~importlib.abc.MetaPathFinder.find_spec` instead. Loading @@ -812,7 +813,7 @@ attributes on package objects are also used. These provide additional ways that the import machinery can be customized. :data:`sys.path` contains a list of strings providing search locations for -modules and packages. It is initialized from the :data:`PYTHONPATH` +modules and packages. It is initialized from the :envvar:`PYTHONPATH` environment variable and various other installation- and implementation-specific defaults. Entries in :data:`sys.path` can name directories on the file system, zip files, and potentially other "locations" diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index f48506e3a21df5..8b6847ef2a7d76 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -87,7 +87,6 @@ Doc/library/xmlrpc.server.rst Doc/library/zlib.rst Doc/reference/compound_stmts.rst Doc/reference/datamodel.rst -Doc/reference/import.rst Doc/tutorial/datastructures.rst Doc/using/windows.rst Doc/whatsnew/2.0.rst From 7a470541e2bbc6f3e87a6d813e2ec42cf726de7a Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 27 Jan 2024 18:38:17 +0200 Subject: [PATCH 130/160] gh-114100: Remove superfluous writing to fd 1 in test_pty (GH-114647) --- Lib/test/test_pty.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pty.py b/Lib/test/test_pty.py index f31a68c5d84e03..51e3a46d0df178 100644 --- a/Lib/test/test_pty.py +++ b/Lib/test/test_pty.py @@ -1,4 +1,5 @@ from test.support import verbose, reap_children +from test.support.os_helper import TESTFN, unlink from test.support.import_helper import import_module # Skip these tests if termios or fcntl are not available @@ -292,7 +293,26 @@ def test_master_read(self): self.assertEqual(data, b"") def test_spawn_doesnt_hang(self): - pty.spawn([sys.executable, '-c', 'print("hi there")']) + self.addCleanup(unlink, TESTFN) + with open(TESTFN, 'wb') as f: + STDOUT_FILENO = 1 + dup_stdout = os.dup(STDOUT_FILENO) + os.dup2(f.fileno(), STDOUT_FILENO) + buf = b'' + def master_read(fd): + nonlocal buf + data = os.read(fd, 1024) + buf += data + return data + try: + pty.spawn([sys.executable, '-c', 'print("hi there")'], + master_read) + finally: + os.dup2(dup_stdout, STDOUT_FILENO) + os.close(dup_stdout) + self.assertEqual(buf, b'hi there\r\n') + with open(TESTFN, 'rb') as f: + self.assertEqual(f.read(), b'hi there\r\n') class SmallPtyTests(unittest.TestCase): """These tests don't spawn children or hang.""" From 823a38a960c245cbf309ef29120d3690ba1bcd2c Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 27 Jan 2024 19:59:51 +0000 Subject: [PATCH 131/160] GH-79634: Speed up pathlib globbing by removing `joinpath()` call. (#114623) Remove `self.joinpath('')` call that should have been removed in 6313cdde. This makes `PathBase.glob('')` yield itself *without* adding a trailing slash. It's hard to say whether this is more or less correct, but at least everything else is faster, and there's no behaviour change in the public classes where empty glob patterns are disallowed. --- Lib/pathlib/_abc.py | 2 +- Lib/test/test_pathlib/test_pathlib.py | 2 ++ Lib/test/test_pathlib/test_pathlib_abc.py | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 6303a18680befc..ad5684829ebc80 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -771,7 +771,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): filter_paths = False deduplicate_paths = False sep = self.pathmod.sep - paths = iter([self.joinpath('')] if self.is_dir() else []) + paths = iter([self] if self.is_dir() else []) while stack: part = stack.pop() if part in specials: diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 9c2b26d41d73f8..5ce3b605c58e63 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1232,6 +1232,8 @@ def test_glob_empty_pattern(self): list(p.glob('')) with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): list(p.glob('.')) + with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): + list(p.glob('./')) def test_glob_many_open_files(self): depth = 30 diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index ea70931eaa2c7e..ab989cb5503f99 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1733,12 +1733,11 @@ def test_glob_windows(self): self.assertEqual(set(map(str, p.glob("F*a"))), {f"{p}\\fileA"}) def test_glob_empty_pattern(self): - def _check(glob, expected): - self.assertEqual(set(glob), { P(self.base, q) for q in expected }) P = self.cls p = P(self.base) - _check(p.glob(""), [""]) - _check(p.glob("."), ["."]) + self.assertEqual(list(p.glob("")), [p]) + self.assertEqual(list(p.glob(".")), [p / "."]) + self.assertEqual(list(p.glob("./")), [p / "./"]) def test_glob_case_sensitive(self): P = self.cls From a768e12f094a9b14a9a1680fb50330e1050716c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Skytt=C3=A4?= Date: Sat, 27 Jan 2024 23:47:55 +0200 Subject: [PATCH 132/160] Use bool in fileinput.input() docstring and tests for the inplace argument (GH-111998) The `.rst` docs, most tests, and typeshed already use bool for it. --- Lib/fileinput.py | 2 +- Lib/test/test_fileinput.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/fileinput.py b/Lib/fileinput.py index 1b25f28f3d3432..3dba3d2fbfa967 100644 --- a/Lib/fileinput.py +++ b/Lib/fileinput.py @@ -53,7 +53,7 @@ sequence must be accessed in strictly sequential order; sequence access and readline() cannot be mixed. -Optional in-place filtering: if the keyword argument inplace=1 is +Optional in-place filtering: if the keyword argument inplace=True is passed to input() or to the FileInput constructor, the file is moved to a backup file and standard output is directed to the input file. This makes it possible to write a filter that rewrites its input file diff --git a/Lib/test/test_fileinput.py b/Lib/test/test_fileinput.py index 786d9186634305..b3ad41d2588c4c 100644 --- a/Lib/test/test_fileinput.py +++ b/Lib/test/test_fileinput.py @@ -151,7 +151,7 @@ def test_buffer_sizes(self): print('6. Inplace') savestdout = sys.stdout try: - fi = FileInput(files=(t1, t2, t3, t4), inplace=1, encoding="utf-8") + fi = FileInput(files=(t1, t2, t3, t4), inplace=True, encoding="utf-8") for line in fi: line = line[:-1].upper() print(line) @@ -256,7 +256,7 @@ def test_detached_stdin_binary_mode(self): def test_file_opening_hook(self): try: # cannot use openhook and inplace mode - fi = FileInput(inplace=1, openhook=lambda f, m: None) + fi = FileInput(inplace=True, openhook=lambda f, m: None) self.fail("FileInput should raise if both inplace " "and openhook arguments are given") except ValueError: From 5ecfd750b4f511f270c38f0d748da9cffa279295 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Sun, 28 Jan 2024 08:51:25 -0600 Subject: [PATCH 133/160] Correction Skip Montanaro's email address (#114677) --- Doc/library/atexit.rst | 4 ++-- Doc/library/csv.rst | 2 +- Doc/library/readline.rst | 2 +- Doc/library/urllib.robotparser.rst | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/library/atexit.rst b/Doc/library/atexit.rst index 3dbef69580d9b3..43a8bd2d7cd133 100644 --- a/Doc/library/atexit.rst +++ b/Doc/library/atexit.rst @@ -4,8 +4,8 @@ .. module:: atexit :synopsis: Register and execute cleanup functions. -.. moduleauthor:: Skip Montanaro -.. sectionauthor:: Skip Montanaro +.. moduleauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro -------------- diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst index 7a5589e68b3052..07f38f5690bb54 100644 --- a/Doc/library/csv.rst +++ b/Doc/library/csv.rst @@ -4,7 +4,7 @@ .. module:: csv :synopsis: Write and read tabular data to and from delimited files. -.. sectionauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro **Source code:** :source:`Lib/csv.py` diff --git a/Doc/library/readline.rst b/Doc/library/readline.rst index 1adafcaa02eab9..54c6d9f3b32b1a 100644 --- a/Doc/library/readline.rst +++ b/Doc/library/readline.rst @@ -5,7 +5,7 @@ :platform: Unix :synopsis: GNU readline support for Python. -.. sectionauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro -------------- diff --git a/Doc/library/urllib.robotparser.rst b/Doc/library/urllib.robotparser.rst index f063e463753e0b..b5a49d9c592387 100644 --- a/Doc/library/urllib.robotparser.rst +++ b/Doc/library/urllib.robotparser.rst @@ -5,7 +5,7 @@ :synopsis: Load a robots.txt file and answer questions about fetchability of other URLs. -.. sectionauthor:: Skip Montanaro +.. sectionauthor:: Skip Montanaro **Source code:** :source:`Lib/urllib/robotparser.py` From d00fbed68ffcd5823acbb32a0e47e2e5f9732ff7 Mon Sep 17 00:00:00 2001 From: Bhushan Mohanraj <50306448+bhushan-mohanraj@users.noreply.github.com> Date: Sun, 28 Jan 2024 15:10:32 -0500 Subject: [PATCH 134/160] Fix indentation in `__post_init__` documentation. (gh-114666) --- Doc/library/dataclasses.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/dataclasses.rst b/Doc/library/dataclasses.rst index 88f2e0251b1e51..4ada69d63abada 100644 --- a/Doc/library/dataclasses.rst +++ b/Doc/library/dataclasses.rst @@ -538,8 +538,8 @@ that has to be called, it is common to call this method in a class Rectangle: def __init__(self, height, width): - self.height = height - self.width = width + self.height = height + self.width = width @dataclass class Square(Rectangle): From 3bb6912d8832e6e0a98c74de360dc1b23906c4b3 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Sun, 28 Jan 2024 22:28:25 +0200 Subject: [PATCH 135/160] gh-100734: Add 'Notable change in 3.11.x' to `whatsnew/3.11.rst` (#114657) Co-authored-by: Serhiy Storchaka --- Doc/whatsnew/3.11.rst | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Doc/whatsnew/3.11.rst b/Doc/whatsnew/3.11.rst index a8a7dfda0c5309..4f4c1de8d8d596 100644 --- a/Doc/whatsnew/3.11.rst +++ b/Doc/whatsnew/3.11.rst @@ -2701,4 +2701,30 @@ Removed (Contributed by Inada Naoki in :issue:`44029`.) +Notable changes in 3.11.4 +========================= + +tarfile +------- + +* The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`, + have a new a *filter* argument that allows limiting tar features than may be + surprising or dangerous, such as creating files outside the destination + directory. + See :ref:`tarfile-extraction-filter` for details. + In Python 3.12, use without the *filter* argument will show a + :exc:`DeprecationWarning`. + In Python 3.14, the default will switch to ``'data'``. + (Contributed by Petr Viktorin in :pep:`706`.) + + +Notable changes in 3.11.5 +========================= + +OpenSSL +------- + +* Windows builds and macOS installers from python.org now use OpenSSL 3.0. + + .. _libb2: https://www.blake2.net/ From f7c05d7ad3075a1dbeed86b6b12903032e4afba6 Mon Sep 17 00:00:00 2001 From: Furkan Onder Date: Mon, 29 Jan 2024 02:05:29 +0300 Subject: [PATCH 136/160] gh-55664: Add warning when creating a type using a namespace dictionary with non-string keys. (GH-105338) Co-authored-by: Daniel Urban --- Lib/test/test_descr.py | 17 ++++++++++++++++- ...023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst | 1 + Objects/typeobject.c | 11 +++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index fd0af9b30a0a71..beeab6cb7f254c 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -4734,6 +4734,20 @@ class X(object): with self.assertRaises(AttributeError): del X.__abstractmethods__ + def test_gh55664(self): + # gh-55664: issue a warning when the + # __dict__ of a class contains non-string keys + with self.assertWarnsRegex(RuntimeWarning, 'MyClass'): + MyClass = type('MyClass', (), {1: 2}) + + class meta(type): + def __new__(mcls, name, bases, ns): + ns[1] = 2 + return super().__new__(mcls, name, bases, ns) + + with self.assertWarnsRegex(RuntimeWarning, 'MyClass'): + MyClass = meta('MyClass', (), {}) + def test_proxy_call(self): class FakeStr: __class__ = str @@ -5151,7 +5165,8 @@ class Base2(object): mykey = 'from Base2' mykey2 = 'from Base2' - X = type('X', (Base,), {MyKey(): 5}) + with self.assertWarnsRegex(RuntimeWarning, 'X'): + X = type('X', (Base,), {MyKey(): 5}) # mykey is read from Base self.assertEqual(X.mykey, 'from Base') # mykey2 is read from Base2 because MyKey.__eq__ has set __bases__ diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst new file mode 100644 index 00000000000000..438be985496650 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-06-06-19-09-00.gh-issue-55664.vYYl0V.rst @@ -0,0 +1 @@ +Add warning when creating :class:`type` using a namespace dictionary with non-string keys. Patched by Daniel Urban and Furkan Onder. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 114cf21f95e744..a850473cad813d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3828,6 +3828,17 @@ type_new_impl(type_new_ctx *ctx) // Put the proper slots in place fixup_slot_dispatchers(type); + if (!_PyDict_HasOnlyStringKeys(type->tp_dict)) { + if (PyErr_WarnFormat( + PyExc_RuntimeWarning, + 1, + "non-string key in the __dict__ of class %.200s", + type->tp_name) == -1) + { + goto error; + } + } + if (type_new_set_names(type) < 0) { goto error; } From f6d9e5926b6138994eaa60d1c36462e36105733d Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 28 Jan 2024 18:48:48 -0800 Subject: [PATCH 137/160] GH-113464: Add a JIT backend for tier 2 (GH-113465) Add an option (--enable-experimental-jit for configure-based builds or --experimental-jit for PCbuild-based ones) to build an *experimental* just-in-time compiler, based on copy-and-patch (https://fredrikbk.com/publications/copy-and-patch.pdf). See Tools/jit/README.md for more information on how to install the required build-time tooling. --- .github/workflows/jit.yml | 112 +++++ .github/workflows/mypy.yml | 2 + .gitignore | 1 + Include/cpython/optimizer.h | 2 + Include/internal/pycore_jit.h | 25 ++ Include/internal/pycore_object.h | 4 +- Makefile.pre.in | 11 +- ...-12-24-03-25-28.gh-issue-113464.dvjQmA.rst | 4 + PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 + PCbuild/build.bat | 3 + PCbuild/pythoncore.vcxproj | 3 + PCbuild/pythoncore.vcxproj.filters | 6 + PCbuild/regen.targets | 23 +- Python/ceval.c | 20 +- Python/jit.c | 369 ++++++++++++++++ Python/optimizer.c | 12 + Python/pylifecycle.c | 7 + Tools/jit/README.md | 46 ++ Tools/jit/_llvm.py | 99 +++++ Tools/jit/_schema.py | 99 +++++ Tools/jit/_stencils.py | 220 ++++++++++ Tools/jit/_targets.py | 394 ++++++++++++++++++ Tools/jit/_writer.py | 95 +++++ Tools/jit/build.py | 28 ++ Tools/jit/mypy.ini | 5 + Tools/jit/template.c | 98 +++++ configure | 31 ++ configure.ac | 20 + 29 files changed, 1738 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/jit.yml create mode 100644 Include/internal/pycore_jit.h create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-12-24-03-25-28.gh-issue-113464.dvjQmA.rst create mode 100644 Python/jit.c create mode 100644 Tools/jit/README.md create mode 100644 Tools/jit/_llvm.py create mode 100644 Tools/jit/_schema.py create mode 100644 Tools/jit/_stencils.py create mode 100644 Tools/jit/_targets.py create mode 100644 Tools/jit/_writer.py create mode 100644 Tools/jit/build.py create mode 100644 Tools/jit/mypy.ini create mode 100644 Tools/jit/template.c diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml new file mode 100644 index 00000000000000..e137fd21b0a0dd --- /dev/null +++ b/.github/workflows/jit.yml @@ -0,0 +1,112 @@ +name: JIT +on: + pull_request: + paths: '**jit**' + push: + paths: '**jit**' + workflow_dispatch: +jobs: + jit: + name: ${{ matrix.target }} (${{ matrix.debug && 'Debug' || 'Release' }}) + runs-on: ${{ matrix.runner }} + strategy: + fail-fast: false + matrix: + target: + - i686-pc-windows-msvc/msvc + - x86_64-pc-windows-msvc/msvc + - x86_64-apple-darwin/clang + - x86_64-unknown-linux-gnu/gcc + - x86_64-unknown-linux-gnu/clang + - aarch64-unknown-linux-gnu/gcc + - aarch64-unknown-linux-gnu/clang + debug: + - true + - false + llvm: + - 16 + include: + - target: i686-pc-windows-msvc/msvc + architecture: Win32 + runner: windows-latest + compiler: msvc + - target: x86_64-pc-windows-msvc/msvc + architecture: x64 + runner: windows-latest + compiler: msvc + - target: x86_64-apple-darwin/clang + architecture: x86_64 + runner: macos-latest + compiler: clang + exclude: test_embed + - target: x86_64-unknown-linux-gnu/gcc + architecture: x86_64 + runner: ubuntu-latest + compiler: gcc + - target: x86_64-unknown-linux-gnu/clang + architecture: x86_64 + runner: ubuntu-latest + compiler: clang + - target: aarch64-unknown-linux-gnu/gcc + architecture: aarch64 + runner: ubuntu-latest + compiler: gcc + # These fail because of emulation, not because of the JIT: + exclude: test_unix_events test_init test_process_pool test_shutdown test_multiprocessing_fork test_cmd_line test_faulthandler test_os test_perf_profiler test_posix test_signal test_socket test_subprocess test_threading test_venv + - target: aarch64-unknown-linux-gnu/clang + architecture: aarch64 + runner: ubuntu-latest + compiler: clang + # These fail because of emulation, not because of the JIT: + exclude: test_unix_events test_init test_process_pool test_shutdown test_multiprocessing_fork test_cmd_line test_faulthandler test_os test_perf_profiler test_posix test_signal test_socket test_subprocess test_threading test_venv + env: + CC: ${{ matrix.compiler }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Windows + if: runner.os == 'Windows' + run: | + choco install llvm --allow-downgrade --no-progress --version ${{ matrix.llvm }} + ./PCbuild/build.bat --experimental-jit ${{ matrix.debug && '-d' || '--pgo' }} -p ${{ matrix.architecture }} + ./PCbuild/rt.bat ${{ matrix.debug && '-d' }} -p ${{ matrix.architecture }} -q --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 + + - name: macOS + if: runner.os == 'macOS' + run: | + brew install llvm@${{ matrix.llvm }} + export SDKROOT="$(xcrun --show-sdk-path)" + ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} + make all --jobs 3 + ./python.exe -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 + + - name: Native Linux + if: runner.os == 'Linux' && matrix.architecture == 'x86_64' + run: | + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} + export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} + make all --jobs 4 + ./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 + - name: Emulated Linux + if: runner.os == 'Linux' && matrix.architecture != 'x86_64' + run: | + sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} + export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" + ./configure --prefix="$(pwd)/../build" + make install --jobs 4 + make clean --jobs 4 + export HOST=${{ matrix.architecture }}-linux-gnu + sudo apt install --yes "gcc-$HOST" qemu-user + ${{ !matrix.debug && matrix.compiler == 'clang' && './configure --enable-optimizations' || '' }} + ${{ !matrix.debug && matrix.compiler == 'clang' && 'make profile-run-stamp --jobs 4' || '' }} + export CC="${{ matrix.compiler == 'clang' && 'clang --target=$HOST' || '$HOST-gcc' }}" + export CPP="$CC --preprocess" + export HOSTRUNNER=qemu-${{ matrix.architecture }} + export QEMU_LD_PREFIX="/usr/$HOST" + ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '--enable-optimizations --with-lto' }} --build=x86_64-linux-gnu --host="$HOST" --with-build-python=../build/bin/python3 --with-pkg-config=no ac_cv_buggy_getaddrinfo=no ac_cv_file__dev_ptc=no ac_cv_file__dev_ptmx=yes + make all --jobs 4 + ./python -m test --exclude ${{ matrix.exclude }} --multiprocess 0 --timeout 3600 --verbose2 --verbose3 diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 11928e72b9b43a..b766785de405d2 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -12,6 +12,7 @@ on: - "Tools/build/generate_sbom.py" - "Tools/cases_generator/**" - "Tools/clinic/**" + - "Tools/jit/**" - "Tools/peg_generator/**" - "Tools/requirements-dev.txt" - "Tools/wasm/**" @@ -38,6 +39,7 @@ jobs: "Tools/build/", "Tools/cases_generator", "Tools/clinic", + "Tools/jit", "Tools/peg_generator", "Tools/wasm", ] diff --git a/.gitignore b/.gitignore index c424a894c2a6e0..18eb2a9f0632ce 100644 --- a/.gitignore +++ b/.gitignore @@ -126,6 +126,7 @@ Tools/unicode/data/ # hendrikmuhs/ccache-action@v1 /.ccache /cross-build/ +/jit_stencils.h /platform /profile-clean-stamp /profile-run-stamp diff --git a/Include/cpython/optimizer.h b/Include/cpython/optimizer.h index 96e829f8fbe97d..ecf3cae4cbc3f1 100644 --- a/Include/cpython/optimizer.h +++ b/Include/cpython/optimizer.h @@ -39,6 +39,8 @@ typedef struct { typedef struct _PyExecutorObject { PyObject_VAR_HEAD _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ + void *jit_code; + size_t jit_size; _PyUOpInstruction trace[1]; } _PyExecutorObject; diff --git a/Include/internal/pycore_jit.h b/Include/internal/pycore_jit.h new file mode 100644 index 00000000000000..0b71eb6f758ac6 --- /dev/null +++ b/Include/internal/pycore_jit.h @@ -0,0 +1,25 @@ +#ifndef Py_INTERNAL_JIT_H +#define Py_INTERNAL_JIT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef _Py_JIT + +typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate); + +int _PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length); +void _PyJIT_Free(_PyExecutorObject *executor); + +#endif // _Py_JIT + +#ifdef __cplusplus +} +#endif + +#endif // !Py_INTERNAL_JIT_H diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 4e52ffc77c5956..e32ea2f528940a 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -178,7 +178,7 @@ _Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) } _Py_DECREF_STAT_INC(); #ifdef Py_REF_DEBUG - _Py_DEC_REFTOTAL(_PyInterpreterState_GET()); + _Py_DEC_REFTOTAL(PyInterpreterState_Get()); #endif if (--op->ob_refcnt != 0) { assert(op->ob_refcnt > 0); @@ -199,7 +199,7 @@ _Py_DECREF_NO_DEALLOC(PyObject *op) } _Py_DECREF_STAT_INC(); #ifdef Py_REF_DEBUG - _Py_DEC_REFTOTAL(_PyInterpreterState_GET()); + _Py_DEC_REFTOTAL(PyInterpreterState_Get()); #endif op->ob_refcnt--; #ifdef Py_DEBUG diff --git a/Makefile.pre.in b/Makefile.pre.in index 37a8b06987c710..fff3d3c4914e7a 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -433,6 +433,7 @@ PYTHON_OBJS= \ Python/initconfig.o \ Python/instrumentation.o \ Python/intrinsics.o \ + Python/jit.o \ Python/legacy_tracing.o \ Python/lock.o \ Python/marshal.o \ @@ -1365,7 +1366,7 @@ regen-unicodedata: regen-all: regen-cases regen-typeslots \ regen-token regen-ast regen-keyword regen-sre regen-frozen \ regen-pegen-metaparser regen-pegen regen-test-frozenmain \ - regen-test-levenshtein regen-global-objects regen-sbom + regen-test-levenshtein regen-global-objects regen-sbom regen-jit @echo @echo "Note: make regen-stdlib-module-names, make regen-limited-abi, " @echo "make regen-configure and make regen-unicodedata should be run manually" @@ -1846,6 +1847,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/internal/pycore_initconfig.h \ $(srcdir)/Include/internal/pycore_interp.h \ $(srcdir)/Include/internal/pycore_intrinsics.h \ + $(srcdir)/Include/internal/pycore_jit.h \ $(srcdir)/Include/internal/pycore_list.h \ $(srcdir)/Include/internal/pycore_llist.h \ $(srcdir)/Include/internal/pycore_lock.h \ @@ -2641,6 +2643,12 @@ config.status: $(srcdir)/configure Python/asm_trampoline.o: $(srcdir)/Python/asm_trampoline.S $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< +Python/jit.o: regen-jit + +.PHONY: regen-jit +regen-jit: + @REGEN_JIT_COMMAND@ + # Some make's put the object file in the current directory .c.o: $(CC) -c $(PY_CORE_CFLAGS) -o $@ $< @@ -2733,6 +2741,7 @@ clean-retain-profile: pycremoval -rm -f Python/deepfreeze/*.[co] -rm -f Python/frozen_modules/*.h -rm -f Python/frozen_modules/MANIFEST + -rm -f jit_stencils.h -find build -type f -a ! -name '*.gc??' -exec rm -f {} ';' -rm -f Include/pydtrace_probes.h -rm -f profile-gen-stamp diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-12-24-03-25-28.gh-issue-113464.dvjQmA.rst b/Misc/NEWS.d/next/Core and Builtins/2023-12-24-03-25-28.gh-issue-113464.dvjQmA.rst new file mode 100644 index 00000000000000..bdee4d645f61c8 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-12-24-03-25-28.gh-issue-113464.dvjQmA.rst @@ -0,0 +1,4 @@ +Add an option (``--enable-experimental-jit`` for ``configure``-based builds +or ``--experimental-jit`` for ``PCbuild``-based ones) to build an +*experimental* just-in-time compiler, based on `copy-and-patch +`_ diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index dde801fc0fd525..35788ec4503e8f 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -224,6 +224,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 90ccb954b424bc..7a44179e356105 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -250,6 +250,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/build.bat b/PCbuild/build.bat index e61267b5852a8f..83b50db4467033 100644 --- a/PCbuild/build.bat +++ b/PCbuild/build.bat @@ -36,6 +36,7 @@ echo. overrides -c and -d echo. --disable-gil Enable experimental support for running without the GIL. echo. --test-marker Enable the test marker within the build. echo. --regen Regenerate all opcodes, grammar and tokens. +echo. --experimental-jit Enable the experimental just-in-time compiler. echo. echo.Available flags to avoid building certain modules. echo.These flags have no effect if '-e' is not given: @@ -85,6 +86,7 @@ if "%~1"=="--disable-gil" (set UseDisableGil=true) & shift & goto CheckOpts if "%~1"=="--test-marker" (set UseTestMarker=true) & shift & goto CheckOpts if "%~1"=="-V" shift & goto Version if "%~1"=="--regen" (set Regen=true) & shift & goto CheckOpts +if "%~1"=="--experimental-jit" (set UseJIT=true) & shift & goto CheckOpts rem These use the actual property names used by MSBuild. We could just let rem them in through the environment, but we specify them on the command line rem anyway for visibility so set defaults after this @@ -176,6 +178,7 @@ echo on /p:IncludeSSL=%IncludeSSL% /p:IncludeTkinter=%IncludeTkinter%^ /p:DisableGil=%UseDisableGil%^ /p:UseTestMarker=%UseTestMarker% %GITProperty%^ + /p:UseJIT=%UseJIT%^ %1 %2 %3 %4 %5 %6 %7 %8 %9 @echo off diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index e0b9fc137457a0..e1ff97659659ee 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -104,6 +104,7 @@ $(zlibDir);%(AdditionalIncludeDirectories) _USRDLL;Py_BUILD_CORE;Py_BUILD_CORE_BUILTIN;Py_ENABLE_SHARED;MS_DLL_ID="$(SysWinVer)";%(PreprocessorDefinitions) _Py_HAVE_ZLIB;%(PreprocessorDefinitions) + _Py_JIT;%(PreprocessorDefinitions) version.lib;ws2_32.lib;pathcch.lib;bcrypt.lib;%(AdditionalDependencies) @@ -247,6 +248,7 @@ + @@ -585,6 +587,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index fd79436f5add97..4c55f23006b2f0 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -669,6 +669,9 @@ Include\cpython + + Include\internal + Include\internal @@ -1337,6 +1340,9 @@ Source Files + + Python + Source Files diff --git a/PCbuild/regen.targets b/PCbuild/regen.targets index cc9469c7ddd726..a90620d6ca8b7d 100644 --- a/PCbuild/regen.targets +++ b/PCbuild/regen.targets @@ -28,6 +28,9 @@ <_KeywordSources Include="$(PySourcePath)Grammar\python.gram;$(PySourcePath)Grammar\Tokens" /> <_KeywordOutputs Include="$(PySourcePath)Lib\keyword.py" /> + + <_JITSources Include="$(PySourcePath)Python\executor_cases.c.h;$(GeneratedPyConfigDir)pyconfig.h;$(PySourcePath)Tools\jit\**"/> + <_JITOutputs Include="$(GeneratedPyConfigDir)jit_stencils.h"/> @@ -76,10 +79,28 @@ + + + + aarch64-pc-windows-msvc + i686-pc-windows-msvc + x86_64-pc-windows-msvc + $(JITArgs) --debug + + + - + + + diff --git a/Python/ceval.c b/Python/ceval.c index 49388cd20377c0..4f208009086191 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -11,6 +11,7 @@ #include "pycore_function.h" #include "pycore_instruments.h" #include "pycore_intrinsics.h" +#include "pycore_jit.h" #include "pycore_long.h" // _PyLong_GetZero() #include "pycore_moduleobject.h" // PyModuleObject #include "pycore_object.h" // _PyObject_GC_TRACK() @@ -955,9 +956,24 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int -// The Tier 2 interpreter is also here! +// Tier 2 is also here! enter_tier_two: +#ifdef _Py_JIT + + ; // ;) + jit_func jitted = current_executor->jit_code; + next_instr = jitted(frame, stack_pointer, tstate); + frame = tstate->current_frame; + Py_DECREF(current_executor); + if (next_instr == NULL) { + goto resume_with_error; + } + stack_pointer = _PyFrame_GetStackPointer(frame); + DISPATCH(); + +#else + #undef LOAD_IP #define LOAD_IP(UNUSED) (void)0 @@ -1073,6 +1089,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int Py_DECREF(current_executor); DISPATCH(); +#endif // _Py_JIT + } #if defined(__GNUC__) # pragma GCC diagnostic pop diff --git a/Python/jit.c b/Python/jit.c new file mode 100644 index 00000000000000..22949c082da05a --- /dev/null +++ b/Python/jit.c @@ -0,0 +1,369 @@ +#ifdef _Py_JIT + +#include "Python.h" + +#include "pycore_abstract.h" +#include "pycore_call.h" +#include "pycore_ceval.h" +#include "pycore_dict.h" +#include "pycore_intrinsics.h" +#include "pycore_long.h" +#include "pycore_opcode_metadata.h" +#include "pycore_opcode_utils.h" +#include "pycore_optimizer.h" +#include "pycore_pyerrors.h" +#include "pycore_setobject.h" +#include "pycore_sliceobject.h" +#include "pycore_jit.h" + +#include "jit_stencils.h" + +// Memory management stuff: //////////////////////////////////////////////////// + +#ifndef MS_WINDOWS + #include +#endif + +static size_t +get_page_size(void) +{ +#ifdef MS_WINDOWS + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +#else + return sysconf(_SC_PAGESIZE); +#endif +} + +static void +jit_error(const char *message) +{ +#ifdef MS_WINDOWS + int hint = GetLastError(); +#else + int hint = errno; +#endif + PyErr_Format(PyExc_RuntimeWarning, "JIT %s (%d)", message, hint); +} + +static char * +jit_alloc(size_t size) +{ + assert(size); + assert(size % get_page_size() == 0); +#ifdef MS_WINDOWS + int flags = MEM_COMMIT | MEM_RESERVE; + char *memory = VirtualAlloc(NULL, size, flags, PAGE_READWRITE); + int failed = memory == NULL; +#else + int flags = MAP_ANONYMOUS | MAP_PRIVATE; + char *memory = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); + int failed = memory == MAP_FAILED; +#endif + if (failed) { + jit_error("unable to allocate memory"); + return NULL; + } + return memory; +} + +static int +jit_free(char *memory, size_t size) +{ + assert(size); + assert(size % get_page_size() == 0); +#ifdef MS_WINDOWS + int failed = !VirtualFree(memory, 0, MEM_RELEASE); +#else + int failed = munmap(memory, size); +#endif + if (failed) { + jit_error("unable to free memory"); + return -1; + } + return 0; +} + +static int +mark_executable(char *memory, size_t size) +{ + if (size == 0) { + return 0; + } + assert(size % get_page_size() == 0); + // Do NOT ever leave the memory writable! Also, don't forget to flush the + // i-cache (I cannot begin to tell you how horrible that is to debug): +#ifdef MS_WINDOWS + if (!FlushInstructionCache(GetCurrentProcess(), memory, size)) { + jit_error("unable to flush instruction cache"); + return -1; + } + int old; + int failed = !VirtualProtect(memory, size, PAGE_EXECUTE_READ, &old); +#else + __builtin___clear_cache((char *)memory, (char *)memory + size); + int failed = mprotect(memory, size, PROT_EXEC | PROT_READ); +#endif + if (failed) { + jit_error("unable to protect executable memory"); + return -1; + } + return 0; +} + +static int +mark_readable(char *memory, size_t size) +{ + if (size == 0) { + return 0; + } + assert(size % get_page_size() == 0); +#ifdef MS_WINDOWS + DWORD old; + int failed = !VirtualProtect(memory, size, PAGE_READONLY, &old); +#else + int failed = mprotect(memory, size, PROT_READ); +#endif + if (failed) { + jit_error("unable to protect readable memory"); + return -1; + } + return 0; +} + +// JIT compiler stuff: ///////////////////////////////////////////////////////// + +// Warning! AArch64 requires you to get your hands dirty. These are your gloves: + +// value[value_start : value_start + len] +static uint32_t +get_bits(uint64_t value, uint8_t value_start, uint8_t width) +{ + assert(width <= 32); + return (value >> value_start) & ((1ULL << width) - 1); +} + +// *loc[loc_start : loc_start + width] = value[value_start : value_start + width] +static void +set_bits(uint32_t *loc, uint8_t loc_start, uint64_t value, uint8_t value_start, + uint8_t width) +{ + assert(loc_start + width <= 32); + // Clear the bits we're about to patch: + *loc &= ~(((1ULL << width) - 1) << loc_start); + assert(get_bits(*loc, loc_start, width) == 0); + // Patch the bits: + *loc |= get_bits(value, value_start, width) << loc_start; + assert(get_bits(*loc, loc_start, width) == get_bits(value, value_start, width)); +} + +// See https://developer.arm.com/documentation/ddi0602/2023-09/Base-Instructions +// for instruction encodings: +#define IS_AARCH64_ADD_OR_SUB(I) (((I) & 0x11C00000) == 0x11000000) +#define IS_AARCH64_ADRP(I) (((I) & 0x9F000000) == 0x90000000) +#define IS_AARCH64_BRANCH(I) (((I) & 0x7C000000) == 0x14000000) +#define IS_AARCH64_LDR_OR_STR(I) (((I) & 0x3B000000) == 0x39000000) +#define IS_AARCH64_MOV(I) (((I) & 0x9F800000) == 0x92800000) + +// Fill all of stencil's holes in the memory pointed to by base, using the +// values in patches. +static void +patch(char *base, const Stencil *stencil, uint64_t *patches) +{ + for (uint64_t i = 0; i < stencil->holes_size; i++) { + const Hole *hole = &stencil->holes[i]; + void *location = base + hole->offset; + uint64_t value = patches[hole->value] + (uint64_t)hole->symbol + hole->addend; + uint32_t *loc32 = (uint32_t *)location; + uint64_t *loc64 = (uint64_t *)location; + // LLD is a great reference for performing relocations... just keep in + // mind that Tools/jit/build.py does filtering and preprocessing for us! + // Here's a good place to start for each platform: + // - aarch64-apple-darwin: + // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.cpp + // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/ARM64Common.h + // - aarch64-unknown-linux-gnu: + // - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/AArch64.cpp + // - i686-pc-windows-msvc: + // - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp + // - x86_64-apple-darwin: + // - https://github.com/llvm/llvm-project/blob/main/lld/MachO/Arch/X86_64.cpp + // - x86_64-pc-windows-msvc: + // - https://github.com/llvm/llvm-project/blob/main/lld/COFF/Chunks.cpp + // - x86_64-unknown-linux-gnu: + // - https://github.com/llvm/llvm-project/blob/main/lld/ELF/Arch/X86_64.cpp + switch (hole->kind) { + case HoleKind_IMAGE_REL_I386_DIR32: + // 32-bit absolute address. + // Check that we're not out of range of 32 unsigned bits: + assert(value < (1ULL << 32)); + *loc32 = (uint32_t)value; + continue; + case HoleKind_ARM64_RELOC_UNSIGNED: + case HoleKind_IMAGE_REL_AMD64_ADDR64: + case HoleKind_R_AARCH64_ABS64: + case HoleKind_X86_64_RELOC_UNSIGNED: + case HoleKind_R_X86_64_64: + // 64-bit absolute address. + *loc64 = value; + continue; + case HoleKind_R_AARCH64_CALL26: + case HoleKind_R_AARCH64_JUMP26: + // 28-bit relative branch. + assert(IS_AARCH64_BRANCH(*loc32)); + value -= (uint64_t)location; + // Check that we're not out of range of 28 signed bits: + assert((int64_t)value >= -(1 << 27)); + assert((int64_t)value < (1 << 27)); + // Since instructions are 4-byte aligned, only use 26 bits: + assert(get_bits(value, 0, 2) == 0); + set_bits(loc32, 0, value, 2, 26); + continue; + case HoleKind_R_AARCH64_MOVW_UABS_G0_NC: + // 16-bit low part of an absolute address. + assert(IS_AARCH64_MOV(*loc32)); + // Check the implicit shift (this is "part 0 of 3"): + assert(get_bits(*loc32, 21, 2) == 0); + set_bits(loc32, 5, value, 0, 16); + continue; + case HoleKind_R_AARCH64_MOVW_UABS_G1_NC: + // 16-bit middle-low part of an absolute address. + assert(IS_AARCH64_MOV(*loc32)); + // Check the implicit shift (this is "part 1 of 3"): + assert(get_bits(*loc32, 21, 2) == 1); + set_bits(loc32, 5, value, 16, 16); + continue; + case HoleKind_R_AARCH64_MOVW_UABS_G2_NC: + // 16-bit middle-high part of an absolute address. + assert(IS_AARCH64_MOV(*loc32)); + // Check the implicit shift (this is "part 2 of 3"): + assert(get_bits(*loc32, 21, 2) == 2); + set_bits(loc32, 5, value, 32, 16); + continue; + case HoleKind_R_AARCH64_MOVW_UABS_G3: + // 16-bit high part of an absolute address. + assert(IS_AARCH64_MOV(*loc32)); + // Check the implicit shift (this is "part 3 of 3"): + assert(get_bits(*loc32, 21, 2) == 3); + set_bits(loc32, 5, value, 48, 16); + continue; + case HoleKind_ARM64_RELOC_GOT_LOAD_PAGE21: + // 21-bit count of pages between this page and an absolute address's + // page... I know, I know, it's weird. Pairs nicely with + // ARM64_RELOC_GOT_LOAD_PAGEOFF12 (below). + assert(IS_AARCH64_ADRP(*loc32)); + // Number of pages between this page and the value's page: + value = (value >> 12) - ((uint64_t)location >> 12); + // Check that we're not out of range of 21 signed bits: + assert((int64_t)value >= -(1 << 20)); + assert((int64_t)value < (1 << 20)); + // value[0:2] goes in loc[29:31]: + set_bits(loc32, 29, value, 0, 2); + // value[2:21] goes in loc[5:26]: + set_bits(loc32, 5, value, 2, 19); + continue; + case HoleKind_ARM64_RELOC_GOT_LOAD_PAGEOFF12: + // 12-bit low part of an absolute address. Pairs nicely with + // ARM64_RELOC_GOT_LOAD_PAGE21 (above). + assert(IS_AARCH64_LDR_OR_STR(*loc32) || IS_AARCH64_ADD_OR_SUB(*loc32)); + // There might be an implicit shift encoded in the instruction: + uint8_t shift = 0; + if (IS_AARCH64_LDR_OR_STR(*loc32)) { + shift = (uint8_t)get_bits(*loc32, 30, 2); + // If both of these are set, the shift is supposed to be 4. + // That's pretty weird, and it's never actually been observed... + assert(get_bits(*loc32, 23, 1) == 0 || get_bits(*loc32, 26, 1) == 0); + } + value = get_bits(value, 0, 12); + assert(get_bits(value, 0, shift) == 0); + set_bits(loc32, 10, value, shift, 12); + continue; + } + Py_UNREACHABLE(); + } +} + +static void +copy_and_patch(char *base, const Stencil *stencil, uint64_t *patches) +{ + memcpy(base, stencil->body, stencil->body_size); + patch(base, stencil, patches); +} + +static void +emit(const StencilGroup *group, uint64_t patches[]) +{ + copy_and_patch((char *)patches[HoleValue_CODE], &group->code, patches); + copy_and_patch((char *)patches[HoleValue_DATA], &group->data, patches); +} + +// Compiles executor in-place. Don't forget to call _PyJIT_Free later! +int +_PyJIT_Compile(_PyExecutorObject *executor, _PyUOpInstruction *trace, size_t length) +{ + // Loop once to find the total compiled size: + size_t code_size = 0; + size_t data_size = 0; + for (size_t i = 0; i < length; i++) { + _PyUOpInstruction *instruction = &trace[i]; + const StencilGroup *group = &stencil_groups[instruction->opcode]; + code_size += group->code.body_size; + data_size += group->data.body_size; + } + // Round up to the nearest page (code and data need separate pages): + size_t page_size = get_page_size(); + assert((page_size & (page_size - 1)) == 0); + code_size += page_size - (code_size & (page_size - 1)); + data_size += page_size - (data_size & (page_size - 1)); + char *memory = jit_alloc(code_size + data_size); + if (memory == NULL) { + return -1; + } + // Loop again to emit the code: + char *code = memory; + char *data = memory + code_size; + for (size_t i = 0; i < length; i++) { + _PyUOpInstruction *instruction = &trace[i]; + const StencilGroup *group = &stencil_groups[instruction->opcode]; + // Think of patches as a dictionary mapping HoleValue to uint64_t: + uint64_t patches[] = GET_PATCHES(); + patches[HoleValue_CODE] = (uint64_t)code; + patches[HoleValue_CONTINUE] = (uint64_t)code + group->code.body_size; + patches[HoleValue_DATA] = (uint64_t)data; + patches[HoleValue_EXECUTOR] = (uint64_t)executor; + patches[HoleValue_OPARG] = instruction->oparg; + patches[HoleValue_OPERAND] = instruction->operand; + patches[HoleValue_TARGET] = instruction->target; + patches[HoleValue_TOP] = (uint64_t)memory; + patches[HoleValue_ZERO] = 0; + emit(group, patches); + code += group->code.body_size; + data += group->data.body_size; + } + if (mark_executable(memory, code_size) || + mark_readable(memory + code_size, data_size)) + { + jit_free(memory, code_size + data_size); + return -1; + } + executor->jit_code = memory; + executor->jit_size = code_size + data_size; + return 0; +} + +void +_PyJIT_Free(_PyExecutorObject *executor) +{ + char *memory = (char *)executor->jit_code; + size_t size = executor->jit_size; + if (memory) { + executor->jit_code = NULL; + executor->jit_size = 0; + if (jit_free(memory, size)) { + PyErr_WriteUnraisable(NULL); + } + } +} + +#endif // _Py_JIT diff --git a/Python/optimizer.c b/Python/optimizer.c index db615068ff517f..0d04b09fef1e84 100644 --- a/Python/optimizer.c +++ b/Python/optimizer.c @@ -7,6 +7,7 @@ #include "pycore_optimizer.h" // _Py_uop_analyze_and_optimize() #include "pycore_pystate.h" // _PyInterpreterState_GET() #include "pycore_uop_ids.h" +#include "pycore_jit.h" #include "cpython/optimizer.h" #include #include @@ -227,6 +228,9 @@ static PyMethodDef executor_methods[] = { static void uop_dealloc(_PyExecutorObject *self) { _Py_ExecutorClear(self); +#ifdef _Py_JIT + _PyJIT_Free(self); +#endif PyObject_Free(self); } @@ -789,6 +793,14 @@ make_executor_from_uops(_PyUOpInstruction *buffer, _PyBloomFilter *dependencies) executor->trace[i].operand); } } +#endif +#ifdef _Py_JIT + executor->jit_code = NULL; + executor->jit_size = 0; + if (_PyJIT_Compile(executor, executor->trace, Py_SIZE(executor))) { + Py_DECREF(executor); + return NULL; + } #endif return executor; } diff --git a/Python/pylifecycle.c b/Python/pylifecycle.c index fff64dd63d6b21..372f60602375b6 100644 --- a/Python/pylifecycle.c +++ b/Python/pylifecycle.c @@ -1240,12 +1240,19 @@ init_interp_main(PyThreadState *tstate) // Turn on experimental tier 2 (uops-based) optimizer if (is_main_interp) { +#ifndef _Py_JIT + // No JIT, maybe use the tier two interpreter: char *envvar = Py_GETENV("PYTHON_UOPS"); int enabled = envvar != NULL && *envvar > '0'; if (_Py_get_xoption(&config->xoptions, L"uops") != NULL) { enabled = 1; } if (enabled) { +#else + // Always enable tier two for JIT builds (ignoring the environment + // variable and command-line option above): + if (true) { +#endif PyObject *opt = PyUnstable_Optimizer_NewUOpOptimizer(); if (opt == NULL) { return _PyStatus_ERR("can't initialize optimizer"); diff --git a/Tools/jit/README.md b/Tools/jit/README.md new file mode 100644 index 00000000000000..04a6c0780bf972 --- /dev/null +++ b/Tools/jit/README.md @@ -0,0 +1,46 @@ +The JIT Compiler +================ + +This version of CPython can be built with an experimental just-in-time compiler. While most everything you already know about building and using CPython is unchanged, you will probably need to install a compatible version of LLVM first. + +## Installing LLVM + +The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon). + +LLVM version 16 is required. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-16`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code. + +It's easy to install all of the required tools: + +### Linux + +Install LLVM 16 on Ubuntu/Debian: + +```sh +wget https://apt.llvm.org/llvm.sh +chmod +x llvm.sh +sudo ./llvm.sh 16 +``` + +### macOS + +Install LLVM 16 with [Homebrew](https://brew.sh): + +```sh +brew install llvm@16 +``` + +Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them. + +### Windows + +Install LLVM 16 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=16), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".** + +## Building + +For `PCbuild`-based builds, pass the new `--experimental-jit` option to `build.bat`. + +For all other builds, pass the new `--enable-experimental-jit` option to `configure`. + +Otherwise, just configure and build as you normally would. Cross-compiling "just works", since the JIT is built for the host platform. + +[^why-llvm]: Clang is specifically needed because it's the only C compiler with support for guaranteed tail calls (`musttail`), which are required by CPython's continuation-passing-style approach to JIT compilation. Since LLVM also includes other functionalities we need (namely, object file parsing and disassembly), it's convenient to only support one toolchain at this time. diff --git a/Tools/jit/_llvm.py b/Tools/jit/_llvm.py new file mode 100644 index 00000000000000..603bbef59ba2e6 --- /dev/null +++ b/Tools/jit/_llvm.py @@ -0,0 +1,99 @@ +"""Utilities for invoking LLVM tools.""" +import asyncio +import functools +import os +import re +import shlex +import subprocess +import typing + +_LLVM_VERSION = 16 +_LLVM_VERSION_PATTERN = re.compile(rf"version\s+{_LLVM_VERSION}\.\d+\.\d+\s+") + +_P = typing.ParamSpec("_P") +_R = typing.TypeVar("_R") +_C = typing.Callable[_P, typing.Awaitable[_R]] + + +def _async_cache(f: _C[_P, _R]) -> _C[_P, _R]: + cache = {} + lock = asyncio.Lock() + + @functools.wraps(f) + async def wrapper( + *args: _P.args, **kwargs: _P.kwargs # pylint: disable = no-member + ) -> _R: + async with lock: + if args not in cache: + cache[args] = await f(*args, **kwargs) + return cache[args] + + return wrapper + + +_CORES = asyncio.BoundedSemaphore(os.cpu_count() or 1) + + +async def _run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str | None: + command = [tool, *args] + async with _CORES: + if echo: + print(shlex.join(command)) + try: + process = await asyncio.create_subprocess_exec( + *command, stdout=subprocess.PIPE + ) + except FileNotFoundError: + return None + out, _ = await process.communicate() + if process.returncode: + raise RuntimeError(f"{tool} exited with return code {process.returncode}") + return out.decode() + + +@_async_cache +async def _check_tool_version(name: str, *, echo: bool = False) -> bool: + output = await _run(name, ["--version"], echo=echo) + return bool(output and _LLVM_VERSION_PATTERN.search(output)) + + +@_async_cache +async def _get_brew_llvm_prefix(*, echo: bool = False) -> str | None: + output = await _run("brew", ["--prefix", f"llvm@{_LLVM_VERSION}"], echo=echo) + return output and output.removesuffix("\n") + + +@_async_cache +async def _find_tool(tool: str, *, echo: bool = False) -> str | None: + # Unversioned executables: + path = tool + if await _check_tool_version(path, echo=echo): + return path + # Versioned executables: + path = f"{tool}-{_LLVM_VERSION}" + if await _check_tool_version(path, echo=echo): + return path + # Homebrew-installed executables: + prefix = await _get_brew_llvm_prefix(echo=echo) + if prefix is not None: + path = os.path.join(prefix, "bin", tool) + if await _check_tool_version(path, echo=echo): + return path + # Nothing found: + return None + + +async def maybe_run( + tool: str, args: typing.Iterable[str], echo: bool = False +) -> str | None: + """Run an LLVM tool if it can be found. Otherwise, return None.""" + path = await _find_tool(tool, echo=echo) + return path and await _run(path, args, echo=echo) + + +async def run(tool: str, args: typing.Iterable[str], echo: bool = False) -> str: + """Run an LLVM tool if it can be found. Otherwise, raise RuntimeError.""" + output = await maybe_run(tool, args, echo=echo) + if output is None: + raise RuntimeError(f"Can't find {tool}-{_LLVM_VERSION}!") + return output diff --git a/Tools/jit/_schema.py b/Tools/jit/_schema.py new file mode 100644 index 00000000000000..8eeb78e6cd69ee --- /dev/null +++ b/Tools/jit/_schema.py @@ -0,0 +1,99 @@ +"""Schema for the JSON produced by llvm-readobj --elf-output-style=JSON.""" +import typing + +HoleKind: typing.TypeAlias = typing.Literal[ + "ARM64_RELOC_GOT_LOAD_PAGE21", + "ARM64_RELOC_GOT_LOAD_PAGEOFF12", + "ARM64_RELOC_UNSIGNED", + "IMAGE_REL_AMD64_ADDR64", + "IMAGE_REL_I386_DIR32", + "R_AARCH64_ABS64", + "R_AARCH64_CALL26", + "R_AARCH64_JUMP26", + "R_AARCH64_MOVW_UABS_G0_NC", + "R_AARCH64_MOVW_UABS_G1_NC", + "R_AARCH64_MOVW_UABS_G2_NC", + "R_AARCH64_MOVW_UABS_G3", + "R_X86_64_64", + "X86_64_RELOC_UNSIGNED", +] + + +class COFFRelocation(typing.TypedDict): + """A COFF object file relocation record.""" + + Type: dict[typing.Literal["Value"], HoleKind] + Symbol: str + Offset: int + + +class ELFRelocation(typing.TypedDict): + """An ELF object file relocation record.""" + + Addend: int + Offset: int + Symbol: dict[typing.Literal["Value"], str] + Type: dict[typing.Literal["Value"], HoleKind] + + +class MachORelocation(typing.TypedDict): + """A Mach-O object file relocation record.""" + + Offset: int + Section: typing.NotRequired[dict[typing.Literal["Value"], str]] + Symbol: typing.NotRequired[dict[typing.Literal["Value"], str]] + Type: dict[typing.Literal["Value"], HoleKind] + + +class _COFFSymbol(typing.TypedDict): + Name: str + Value: int + + +class _ELFSymbol(typing.TypedDict): + Name: dict[typing.Literal["Value"], str] + Value: int + + +class _MachOSymbol(typing.TypedDict): + Name: dict[typing.Literal["Value"], str] + Value: int + + +class COFFSection(typing.TypedDict): + """A COFF object file section.""" + + Characteristics: dict[ + typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]] + ] + Number: int + RawDataSize: int + Relocations: list[dict[typing.Literal["Relocation"], COFFRelocation]] + SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] + Symbols: list[dict[typing.Literal["Symbol"], _COFFSymbol]] + + +class ELFSection(typing.TypedDict): + """An ELF object file section.""" + + Flags: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]] + Index: int + Info: int + Relocations: list[dict[typing.Literal["Relocation"], ELFRelocation]] + SectionData: dict[typing.Literal["Bytes"], list[int]] + Symbols: list[dict[typing.Literal["Symbol"], _ELFSymbol]] + Type: dict[typing.Literal["Value"], str] + + +class MachOSection(typing.TypedDict): + """A Mach-O object file section.""" + + Address: int + Attributes: dict[typing.Literal["Flags"], list[dict[typing.Literal["Name"], str]]] + Index: int + Name: dict[typing.Literal["Value"], str] + Relocations: typing.NotRequired[ + list[dict[typing.Literal["Relocation"], MachORelocation]] + ] + SectionData: typing.NotRequired[dict[typing.Literal["Bytes"], list[int]]] + Symbols: typing.NotRequired[list[dict[typing.Literal["Symbol"], _MachOSymbol]]] diff --git a/Tools/jit/_stencils.py b/Tools/jit/_stencils.py new file mode 100644 index 00000000000000..71c678e04fbfd5 --- /dev/null +++ b/Tools/jit/_stencils.py @@ -0,0 +1,220 @@ +"""Core data structures for compiled code templates.""" +import dataclasses +import enum +import sys + +import _schema + + +@enum.unique +class HoleValue(enum.Enum): + """ + Different "base" values that can be patched into holes (usually combined with the + address of a symbol and/or an addend). + """ + + # The base address of the machine code for the current uop (exposed as _JIT_ENTRY): + CODE = enum.auto() + # The base address of the machine code for the next uop (exposed as _JIT_CONTINUE): + CONTINUE = enum.auto() + # The base address of the read-only data for this uop: + DATA = enum.auto() + # The address of the current executor (exposed as _JIT_EXECUTOR): + EXECUTOR = enum.auto() + # The base address of the "global" offset table located in the read-only data. + # Shouldn't be present in the final stencils, since these are all replaced with + # equivalent DATA values: + GOT = enum.auto() + # The current uop's oparg (exposed as _JIT_OPARG): + OPARG = enum.auto() + # The current uop's operand (exposed as _JIT_OPERAND): + OPERAND = enum.auto() + # The current uop's target (exposed as _JIT_TARGET): + TARGET = enum.auto() + # The base address of the machine code for the first uop (exposed as _JIT_TOP): + TOP = enum.auto() + # A hardcoded value of zero (used for symbol lookups): + ZERO = enum.auto() + + +@dataclasses.dataclass +class Hole: + """ + A "hole" in the stencil to be patched with a computed runtime value. + + Analogous to relocation records in an object file. + """ + + offset: int + kind: _schema.HoleKind + # Patch with this base value: + value: HoleValue + # ...plus the address of this symbol: + symbol: str | None + # ...plus this addend: + addend: int + # Convenience method: + replace = dataclasses.replace + + def as_c(self) -> str: + """Dump this hole as an initialization of a C Hole struct.""" + parts = [ + f"{self.offset:#x}", + f"HoleKind_{self.kind}", + f"HoleValue_{self.value.name}", + f"&{self.symbol}" if self.symbol else "NULL", + _format_addend(self.addend), + ] + return f"{{{', '.join(parts)}}}" + + +@dataclasses.dataclass +class Stencil: + """ + A contiguous block of machine code or data to be copied-and-patched. + + Analogous to a section or segment in an object file. + """ + + body: bytearray = dataclasses.field(default_factory=bytearray, init=False) + holes: list[Hole] = dataclasses.field(default_factory=list, init=False) + disassembly: list[str] = dataclasses.field(default_factory=list, init=False) + + def pad(self, alignment: int) -> None: + """Pad the stencil to the given alignment.""" + offset = len(self.body) + padding = -offset % alignment + self.disassembly.append(f"{offset:x}: {' '.join(['00'] * padding)}") + self.body.extend([0] * padding) + + def emit_aarch64_trampoline(self, hole: Hole) -> None: + """Even with the large code model, AArch64 Linux insists on 28-bit jumps.""" + base = len(self.body) + where = slice(hole.offset, hole.offset + 4) + instruction = int.from_bytes(self.body[where], sys.byteorder) + instruction &= 0xFC000000 + instruction |= ((base - hole.offset) >> 2) & 0x03FFFFFF + self.body[where] = instruction.to_bytes(4, sys.byteorder) + self.disassembly += [ + f"{base + 4 * 0: x}: d2800008 mov x8, #0x0", + f"{base + 4 * 0:016x}: R_AARCH64_MOVW_UABS_G0_NC {hole.symbol}", + f"{base + 4 * 1:x}: f2a00008 movk x8, #0x0, lsl #16", + f"{base + 4 * 1:016x}: R_AARCH64_MOVW_UABS_G1_NC {hole.symbol}", + f"{base + 4 * 2:x}: f2c00008 movk x8, #0x0, lsl #32", + f"{base + 4 * 2:016x}: R_AARCH64_MOVW_UABS_G2_NC {hole.symbol}", + f"{base + 4 * 3:x}: f2e00008 movk x8, #0x0, lsl #48", + f"{base + 4 * 3:016x}: R_AARCH64_MOVW_UABS_G3 {hole.symbol}", + f"{base + 4 * 4:x}: d61f0100 br x8", + ] + for code in [ + 0xD2800008.to_bytes(4, sys.byteorder), + 0xF2A00008.to_bytes(4, sys.byteorder), + 0xF2C00008.to_bytes(4, sys.byteorder), + 0xF2E00008.to_bytes(4, sys.byteorder), + 0xD61F0100.to_bytes(4, sys.byteorder), + ]: + self.body.extend(code) + for i, kind in enumerate( + [ + "R_AARCH64_MOVW_UABS_G0_NC", + "R_AARCH64_MOVW_UABS_G1_NC", + "R_AARCH64_MOVW_UABS_G2_NC", + "R_AARCH64_MOVW_UABS_G3", + ] + ): + self.holes.append(hole.replace(offset=base + 4 * i, kind=kind)) + + +@dataclasses.dataclass +class StencilGroup: + """ + Code and data corresponding to a given micro-opcode. + + Analogous to an entire object file. + """ + + code: Stencil = dataclasses.field(default_factory=Stencil, init=False) + data: Stencil = dataclasses.field(default_factory=Stencil, init=False) + symbols: dict[int | str, tuple[HoleValue, int]] = dataclasses.field( + default_factory=dict, init=False + ) + _got: dict[str, int] = dataclasses.field(default_factory=dict, init=False) + + def process_relocations(self, *, alignment: int = 1) -> None: + """Fix up all GOT and internal relocations for this stencil group.""" + self.code.pad(alignment) + self.data.pad(8) + for stencil in [self.code, self.data]: + holes = [] + for hole in stencil.holes: + if hole.value is HoleValue.GOT: + assert hole.symbol is not None + hole.value = HoleValue.DATA + hole.addend += self._global_offset_table_lookup(hole.symbol) + hole.symbol = None + elif hole.symbol in self.symbols: + hole.value, addend = self.symbols[hole.symbol] + hole.addend += addend + hole.symbol = None + elif ( + hole.kind in {"R_AARCH64_CALL26", "R_AARCH64_JUMP26"} + and hole.value is HoleValue.ZERO + ): + self.code.emit_aarch64_trampoline(hole) + continue + holes.append(hole) + stencil.holes[:] = holes + self.code.pad(alignment) + self._emit_global_offset_table() + self.code.holes.sort(key=lambda hole: hole.offset) + self.data.holes.sort(key=lambda hole: hole.offset) + + def _global_offset_table_lookup(self, symbol: str) -> int: + return len(self.data.body) + self._got.setdefault(symbol, 8 * len(self._got)) + + def _emit_global_offset_table(self) -> None: + got = len(self.data.body) + for s, offset in self._got.items(): + if s in self.symbols: + value, addend = self.symbols[s] + symbol = None + else: + value, symbol = symbol_to_value(s) + addend = 0 + self.data.holes.append( + Hole(got + offset, "R_X86_64_64", value, symbol, addend) + ) + value_part = value.name if value is not HoleValue.ZERO else "" + if value_part and not symbol and not addend: + addend_part = "" + else: + addend_part = f"&{symbol}" if symbol else "" + addend_part += _format_addend(addend, signed=symbol is not None) + if value_part: + value_part += "+" + self.data.disassembly.append( + f"{len(self.data.body):x}: {value_part}{addend_part}" + ) + self.data.body.extend([0] * 8) + + +def symbol_to_value(symbol: str) -> tuple[HoleValue, str | None]: + """ + Convert a symbol name to a HoleValue and a symbol name. + + Some symbols (starting with "_JIT_") are special and are converted to their + own HoleValues. + """ + if symbol.startswith("_JIT_"): + try: + return HoleValue[symbol.removeprefix("_JIT_")], None + except KeyError: + pass + return HoleValue.ZERO, symbol + + +def _format_addend(addend: int, signed: bool = False) -> str: + addend %= 1 << 64 + if addend & (1 << 63): + addend -= 1 << 64 + return f"{addend:{'+#x' if signed else '#x'}}" diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py new file mode 100644 index 00000000000000..51b091eb246413 --- /dev/null +++ b/Tools/jit/_targets.py @@ -0,0 +1,394 @@ +"""Target-specific code generation, parsing, and processing.""" +import asyncio +import dataclasses +import hashlib +import json +import os +import pathlib +import re +import sys +import tempfile +import typing + +import _llvm +import _schema +import _stencils +import _writer + +if sys.version_info < (3, 11): + raise RuntimeError("Building the JIT compiler requires Python 3.11 or newer!") + +TOOLS_JIT_BUILD = pathlib.Path(__file__).resolve() +TOOLS_JIT = TOOLS_JIT_BUILD.parent +TOOLS = TOOLS_JIT.parent +CPYTHON = TOOLS.parent +PYTHON_EXECUTOR_CASES_C_H = CPYTHON / "Python" / "executor_cases.c.h" +TOOLS_JIT_TEMPLATE_C = TOOLS_JIT / "template.c" + + +_S = typing.TypeVar("_S", _schema.COFFSection, _schema.ELFSection, _schema.MachOSection) +_R = typing.TypeVar( + "_R", _schema.COFFRelocation, _schema.ELFRelocation, _schema.MachORelocation +) + + +@dataclasses.dataclass +class _Target(typing.Generic[_S, _R]): + triple: str + _: dataclasses.KW_ONLY + alignment: int = 1 + prefix: str = "" + debug: bool = False + force: bool = False + verbose: bool = False + + def _compute_digest(self, out: pathlib.Path) -> str: + hasher = hashlib.sha256() + hasher.update(self.triple.encode()) + hasher.update(self.alignment.to_bytes()) + hasher.update(self.prefix.encode()) + # These dependencies are also reflected in _JITSources in regen.targets: + hasher.update(PYTHON_EXECUTOR_CASES_C_H.read_bytes()) + hasher.update((out / "pyconfig.h").read_bytes()) + for dirpath, _, filenames in sorted(os.walk(TOOLS_JIT)): + for filename in filenames: + hasher.update(pathlib.Path(dirpath, filename).read_bytes()) + return hasher.hexdigest() + + async def _parse(self, path: pathlib.Path) -> _stencils.StencilGroup: + group = _stencils.StencilGroup() + args = ["--disassemble", "--reloc", f"{path}"] + output = await _llvm.maybe_run("llvm-objdump", args, echo=self.verbose) + if output is not None: + group.code.disassembly.extend( + line.expandtabs().strip() + for line in output.splitlines() + if not line.isspace() + ) + args = [ + "--elf-output-style=JSON", + "--expand-relocs", + # "--pretty-print", + "--section-data", + "--section-relocations", + "--section-symbols", + "--sections", + f"{path}", + ] + output = await _llvm.run("llvm-readobj", args, echo=self.verbose) + # --elf-output-style=JSON is only *slightly* broken on Mach-O... + output = output.replace("PrivateExtern\n", "\n") + output = output.replace("Extern\n", "\n") + # ...and also COFF: + output = output[output.index("[", 1, None) :] + output = output[: output.rindex("]", None, -1) + 1] + sections: list[dict[typing.Literal["Section"], _S]] = json.loads(output) + for wrapped_section in sections: + self._handle_section(wrapped_section["Section"], group) + assert group.symbols["_JIT_ENTRY"] == (_stencils.HoleValue.CODE, 0) + if group.data.body: + line = f"0: {str(bytes(group.data.body)).removeprefix('b')}" + group.data.disassembly.append(line) + group.process_relocations() + return group + + def _handle_section(self, section: _S, group: _stencils.StencilGroup) -> None: + raise NotImplementedError(type(self)) + + def _handle_relocation( + self, base: int, relocation: _R, raw: bytes + ) -> _stencils.Hole: + raise NotImplementedError(type(self)) + + async def _compile( + self, opname: str, c: pathlib.Path, tempdir: pathlib.Path + ) -> _stencils.StencilGroup: + o = tempdir / f"{opname}.o" + args = [ + f"--target={self.triple}", + "-DPy_BUILD_CORE", + "-D_DEBUG" if self.debug else "-DNDEBUG", + f"-D_JIT_OPCODE={opname}", + "-D_PyJIT_ACTIVE", + "-D_Py_JIT", + "-I.", + f"-I{CPYTHON / 'Include'}", + f"-I{CPYTHON / 'Include' / 'internal'}", + f"-I{CPYTHON / 'Include' / 'internal' / 'mimalloc'}", + f"-I{CPYTHON / 'Python'}", + "-O3", + "-c", + "-fno-asynchronous-unwind-tables", + # SET_FUNCTION_ATTRIBUTE on 32-bit Windows debug builds: + "-fno-jump-tables", + # Position-independent code adds indirection to every load and jump: + "-fno-pic", + # Don't make calls to weird stack-smashing canaries: + "-fno-stack-protector", + # We have three options for code model: + # - "small": the default, assumes that code and data reside in the + # lowest 2GB of memory (128MB on aarch64) + # - "medium": assumes that code resides in the lowest 2GB of memory, + # and makes no assumptions about data (not available on aarch64) + # - "large": makes no assumptions about either code or data + "-mcmodel=large", + "-o", + f"{o}", + "-std=c11", + f"{c}", + ] + await _llvm.run("clang", args, echo=self.verbose) + return await self._parse(o) + + async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: + generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() + opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases)) + tasks = [] + with tempfile.TemporaryDirectory() as tempdir: + work = pathlib.Path(tempdir).resolve() + async with asyncio.TaskGroup() as group: + for opname in opnames: + coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work) + tasks.append(group.create_task(coro, name=opname)) + return {task.get_name(): task.result() for task in tasks} + + def build(self, out: pathlib.Path, *, comment: str = "") -> None: + """Build jit_stencils.h in the given directory.""" + digest = f"// {self._compute_digest(out)}\n" + jit_stencils = out / "jit_stencils.h" + if ( + not self.force + and jit_stencils.exists() + and jit_stencils.read_text().startswith(digest) + ): + return + stencil_groups = asyncio.run(self._build_stencils()) + with jit_stencils.open("w") as file: + file.write(digest) + if comment: + file.write(f"// {comment}\n") + file.write("") + for line in _writer.dump(stencil_groups): + file.write(f"{line}\n") + + +class _COFF( + _Target[_schema.COFFSection, _schema.COFFRelocation] +): # pylint: disable = too-few-public-methods + def _handle_section( + self, section: _schema.COFFSection, group: _stencils.StencilGroup + ) -> None: + flags = {flag["Name"] for flag in section["Characteristics"]["Flags"]} + if "SectionData" in section: + section_data_bytes = section["SectionData"]["Bytes"] + else: + # Zeroed BSS data, seen with printf debugging calls: + section_data_bytes = [0] * section["RawDataSize"] + if "IMAGE_SCN_MEM_EXECUTE" in flags: + value = _stencils.HoleValue.CODE + stencil = group.code + elif "IMAGE_SCN_MEM_READ" in flags: + value = _stencils.HoleValue.DATA + stencil = group.data + else: + return + base = len(stencil.body) + group.symbols[section["Number"]] = value, base + stencil.body.extend(section_data_bytes) + for wrapped_symbol in section["Symbols"]: + symbol = wrapped_symbol["Symbol"] + offset = base + symbol["Value"] + name = symbol["Name"] + name = name.removeprefix(self.prefix) + group.symbols[name] = value, offset + for wrapped_relocation in section["Relocations"]: + relocation = wrapped_relocation["Relocation"] + hole = self._handle_relocation(base, relocation, stencil.body) + stencil.holes.append(hole) + + def _handle_relocation( + self, base: int, relocation: _schema.COFFRelocation, raw: bytes + ) -> _stencils.Hole: + match relocation: + case { + "Offset": offset, + "Symbol": s, + "Type": {"Value": "IMAGE_REL_AMD64_ADDR64" as kind}, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + addend = int.from_bytes(raw[offset : offset + 8], "little") + case { + "Offset": offset, + "Symbol": s, + "Type": {"Value": "IMAGE_REL_I386_DIR32" as kind}, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + addend = int.from_bytes(raw[offset : offset + 4], "little") + case _: + raise NotImplementedError(relocation) + return _stencils.Hole(offset, kind, value, symbol, addend) + + +class _ELF( + _Target[_schema.ELFSection, _schema.ELFRelocation] +): # pylint: disable = too-few-public-methods + def _handle_section( + self, section: _schema.ELFSection, group: _stencils.StencilGroup + ) -> None: + section_type = section["Type"]["Value"] + flags = {flag["Name"] for flag in section["Flags"]["Flags"]} + if section_type == "SHT_RELA": + assert "SHF_INFO_LINK" in flags, flags + assert not section["Symbols"] + value, base = group.symbols[section["Info"]] + if value is _stencils.HoleValue.CODE: + stencil = group.code + else: + assert value is _stencils.HoleValue.DATA + stencil = group.data + for wrapped_relocation in section["Relocations"]: + relocation = wrapped_relocation["Relocation"] + hole = self._handle_relocation(base, relocation, stencil.body) + stencil.holes.append(hole) + elif section_type == "SHT_PROGBITS": + if "SHF_ALLOC" not in flags: + return + if "SHF_EXECINSTR" in flags: + value = _stencils.HoleValue.CODE + stencil = group.code + else: + value = _stencils.HoleValue.DATA + stencil = group.data + group.symbols[section["Index"]] = value, len(stencil.body) + for wrapped_symbol in section["Symbols"]: + symbol = wrapped_symbol["Symbol"] + offset = len(stencil.body) + symbol["Value"] + name = symbol["Name"]["Value"] + name = name.removeprefix(self.prefix) + group.symbols[name] = value, offset + stencil.body.extend(section["SectionData"]["Bytes"]) + assert not section["Relocations"] + else: + assert section_type in { + "SHT_GROUP", + "SHT_LLVM_ADDRSIG", + "SHT_NULL", + "SHT_STRTAB", + "SHT_SYMTAB", + }, section_type + + def _handle_relocation( + self, base: int, relocation: _schema.ELFRelocation, raw: bytes + ) -> _stencils.Hole: + match relocation: + case { + "Addend": addend, + "Offset": offset, + "Symbol": {"Value": s}, + "Type": {"Value": kind}, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + case _: + raise NotImplementedError(relocation) + return _stencils.Hole(offset, kind, value, symbol, addend) + + +class _MachO( + _Target[_schema.MachOSection, _schema.MachORelocation] +): # pylint: disable = too-few-public-methods + def _handle_section( + self, section: _schema.MachOSection, group: _stencils.StencilGroup + ) -> None: + assert section["Address"] >= len(group.code.body) + assert "SectionData" in section + flags = {flag["Name"] for flag in section["Attributes"]["Flags"]} + name = section["Name"]["Value"] + name = name.removeprefix(self.prefix) + if "SomeInstructions" in flags: + value = _stencils.HoleValue.CODE + stencil = group.code + start_address = 0 + group.symbols[name] = value, section["Address"] - start_address + else: + value = _stencils.HoleValue.DATA + stencil = group.data + start_address = len(group.code.body) + group.symbols[name] = value, len(group.code.body) + base = section["Address"] - start_address + group.symbols[section["Index"]] = value, base + stencil.body.extend( + [0] * (section["Address"] - len(group.code.body) - len(group.data.body)) + ) + stencil.body.extend(section["SectionData"]["Bytes"]) + assert "Symbols" in section + for wrapped_symbol in section["Symbols"]: + symbol = wrapped_symbol["Symbol"] + offset = symbol["Value"] - start_address + name = symbol["Name"]["Value"] + name = name.removeprefix(self.prefix) + group.symbols[name] = value, offset + assert "Relocations" in section + for wrapped_relocation in section["Relocations"]: + relocation = wrapped_relocation["Relocation"] + hole = self._handle_relocation(base, relocation, stencil.body) + stencil.holes.append(hole) + + def _handle_relocation( + self, base: int, relocation: _schema.MachORelocation, raw: bytes + ) -> _stencils.Hole: + symbol: str | None + match relocation: + case { + "Offset": offset, + "Symbol": {"Value": s}, + "Type": { + "Value": "ARM64_RELOC_GOT_LOAD_PAGE21" + | "ARM64_RELOC_GOT_LOAD_PAGEOFF12" as kind + }, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.HoleValue.GOT, s + addend = 0 + case { + "Offset": offset, + "Section": {"Value": s}, + "Type": {"Value": kind}, + } | { + "Offset": offset, + "Symbol": {"Value": s}, + "Type": {"Value": kind}, + }: + offset += base + s = s.removeprefix(self.prefix) + value, symbol = _stencils.symbol_to_value(s) + addend = 0 + case _: + raise NotImplementedError(relocation) + # Turn Clang's weird __bzero calls into normal bzero calls: + if symbol == "__bzero": + symbol = "bzero" + return _stencils.Hole(offset, kind, value, symbol, addend) + + +def get_target(host: str) -> _COFF | _ELF | _MachO: + """Build a _Target for the given host "triple" and options.""" + if re.fullmatch(r"aarch64-apple-darwin.*", host): + return _MachO(host, alignment=8, prefix="_") + if re.fullmatch(r"aarch64-.*-linux-gnu", host): + return _ELF(host, alignment=8) + if re.fullmatch(r"i686-pc-windows-msvc", host): + return _COFF(host, prefix="_") + if re.fullmatch(r"x86_64-apple-darwin.*", host): + return _MachO(host, prefix="_") + if re.fullmatch(r"x86_64-pc-windows-msvc", host): + return _COFF(host) + if re.fullmatch(r"x86_64-.*-linux-gnu", host): + return _ELF(host) + raise ValueError(host) diff --git a/Tools/jit/_writer.py b/Tools/jit/_writer.py new file mode 100644 index 00000000000000..8a2a42e75cfb9b --- /dev/null +++ b/Tools/jit/_writer.py @@ -0,0 +1,95 @@ +"""Utilities for writing StencilGroups out to a C header file.""" +import typing + +import _schema +import _stencils + + +def _dump_header() -> typing.Iterator[str]: + yield "typedef enum {" + for kind in typing.get_args(_schema.HoleKind): + yield f" HoleKind_{kind}," + yield "} HoleKind;" + yield "" + yield "typedef enum {" + for value in _stencils.HoleValue: + yield f" HoleValue_{value.name}," + yield "} HoleValue;" + yield "" + yield "typedef struct {" + yield " const uint64_t offset;" + yield " const HoleKind kind;" + yield " const HoleValue value;" + yield " const void *symbol;" + yield " const uint64_t addend;" + yield "} Hole;" + yield "" + yield "typedef struct {" + yield " const size_t body_size;" + yield " const unsigned char * const body;" + yield " const size_t holes_size;" + yield " const Hole * const holes;" + yield "} Stencil;" + yield "" + yield "typedef struct {" + yield " const Stencil code;" + yield " const Stencil data;" + yield "} StencilGroup;" + yield "" + + +def _dump_footer(opnames: typing.Iterable[str]) -> typing.Iterator[str]: + yield "#define INIT_STENCIL(STENCIL) { \\" + yield " .body_size = Py_ARRAY_LENGTH(STENCIL##_body) - 1, \\" + yield " .body = STENCIL##_body, \\" + yield " .holes_size = Py_ARRAY_LENGTH(STENCIL##_holes) - 1, \\" + yield " .holes = STENCIL##_holes, \\" + yield "}" + yield "" + yield "#define INIT_STENCIL_GROUP(OP) { \\" + yield " .code = INIT_STENCIL(OP##_code), \\" + yield " .data = INIT_STENCIL(OP##_data), \\" + yield "}" + yield "" + yield "static const StencilGroup stencil_groups[512] = {" + for opname in opnames: + yield f" [{opname}] = INIT_STENCIL_GROUP({opname})," + yield "};" + yield "" + yield "#define GET_PATCHES() { \\" + for value in _stencils.HoleValue: + yield f" [HoleValue_{value.name}] = (uint64_t)0xBADBADBADBADBADB, \\" + yield "}" + + +def _dump_stencil(opname: str, group: _stencils.StencilGroup) -> typing.Iterator[str]: + yield f"// {opname}" + for part, stencil in [("code", group.code), ("data", group.data)]: + for line in stencil.disassembly: + yield f"// {line}" + if stencil.body: + size = len(stencil.body) + 1 + yield f"static const unsigned char {opname}_{part}_body[{size}] = {{" + for i in range(0, len(stencil.body), 8): + row = " ".join(f"{byte:#04x}," for byte in stencil.body[i : i + 8]) + yield f" {row}" + yield "};" + else: + yield f"static const unsigned char {opname}_{part}_body[1];" + if stencil.holes: + size = len(stencil.holes) + 1 + yield f"static const Hole {opname}_{part}_holes[{size}] = {{" + for hole in stencil.holes: + yield f" {hole.as_c()}," + yield "};" + else: + yield f"static const Hole {opname}_{part}_holes[1];" + yield "" + + +def dump(groups: dict[str, _stencils.StencilGroup]) -> typing.Iterator[str]: + """Yield a JIT compiler line-by-line as a C header file.""" + yield from _dump_header() + for opname, group in groups.items(): + yield from _dump_stencil(opname, group) + yield from _dump_footer(groups) diff --git a/Tools/jit/build.py b/Tools/jit/build.py new file mode 100644 index 00000000000000..4d4ace14ebf26c --- /dev/null +++ b/Tools/jit/build.py @@ -0,0 +1,28 @@ +"""Build an experimental just-in-time compiler for CPython.""" +import argparse +import pathlib +import shlex +import sys + +import _targets + +if __name__ == "__main__": + comment = f"$ {shlex.join([sys.executable] + sys.argv)}" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "target", type=_targets.get_target, help="a PEP 11 target triple to compile for" + ) + parser.add_argument( + "-d", "--debug", action="store_true", help="compile for a debug build of Python" + ) + parser.add_argument( + "-f", "--force", action="store_true", help="force the entire JIT to be rebuilt" + ) + parser.add_argument( + "-v", "--verbose", action="store_true", help="echo commands as they are run" + ) + args = parser.parse_args() + args.target.debug = args.debug + args.target.force = args.force + args.target.verbose = args.verbose + args.target.build(pathlib.Path.cwd(), comment=comment) diff --git a/Tools/jit/mypy.ini b/Tools/jit/mypy.ini new file mode 100644 index 00000000000000..768d0028516abd --- /dev/null +++ b/Tools/jit/mypy.ini @@ -0,0 +1,5 @@ +[mypy] +files = Tools/jit +pretty = True +python_version = 3.11 +strict = True diff --git a/Tools/jit/template.c b/Tools/jit/template.c new file mode 100644 index 00000000000000..12303a550d8879 --- /dev/null +++ b/Tools/jit/template.c @@ -0,0 +1,98 @@ +#include "Python.h" + +#include "pycore_call.h" +#include "pycore_ceval.h" +#include "pycore_dict.h" +#include "pycore_emscripten_signal.h" +#include "pycore_intrinsics.h" +#include "pycore_jit.h" +#include "pycore_long.h" +#include "pycore_opcode_metadata.h" +#include "pycore_opcode_utils.h" +#include "pycore_range.h" +#include "pycore_setobject.h" +#include "pycore_sliceobject.h" + +#include "ceval_macros.h" + +#undef CURRENT_OPARG +#define CURRENT_OPARG() (_oparg) + +#undef CURRENT_OPERAND +#define CURRENT_OPERAND() (_operand) + +#undef DEOPT_IF +#define DEOPT_IF(COND, INSTNAME) \ + do { \ + if ((COND)) { \ + goto deoptimize; \ + } \ + } while (0) + +#undef ENABLE_SPECIALIZATION +#define ENABLE_SPECIALIZATION (0) + +#undef GOTO_ERROR +#define GOTO_ERROR(LABEL) \ + do { \ + goto LABEL ## _tier_two; \ + } while (0) + +#undef LOAD_IP +#define LOAD_IP(UNUSED) \ + do { \ + } while (0) + +#define PATCH_VALUE(TYPE, NAME, ALIAS) \ + extern void ALIAS; \ + TYPE NAME = (TYPE)(uint64_t)&ALIAS; + +#define PATCH_JUMP(ALIAS) \ + extern void ALIAS; \ + __attribute__((musttail)) \ + return ((jit_func)&ALIAS)(frame, stack_pointer, tstate); + +_Py_CODEUNIT * +_JIT_ENTRY(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate) +{ + // Locals that the instruction implementations expect to exist: + PATCH_VALUE(_PyExecutorObject *, current_executor, _JIT_EXECUTOR) + int oparg; + int opcode = _JIT_OPCODE; + _PyUOpInstruction *next_uop; + // Other stuff we need handy: + PATCH_VALUE(uint16_t, _oparg, _JIT_OPARG) + PATCH_VALUE(uint64_t, _operand, _JIT_OPERAND) + PATCH_VALUE(uint32_t, _target, _JIT_TARGET) + // The actual instruction definitions (only one will be used): + if (opcode == _JUMP_TO_TOP) { + CHECK_EVAL_BREAKER(); + PATCH_JUMP(_JIT_TOP); + } + switch (opcode) { +#include "executor_cases.c.h" + default: + Py_UNREACHABLE(); + } + PATCH_JUMP(_JIT_CONTINUE); + // Labels that the instruction implementations expect to exist: +unbound_local_error_tier_two: + _PyEval_FormatExcCheckArg( + tstate, PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG, + PyTuple_GetItem(_PyFrame_GetCode(frame)->co_localsplusnames, oparg)); + goto error_tier_two; +pop_4_error_tier_two: + STACK_SHRINK(1); +pop_3_error_tier_two: + STACK_SHRINK(1); +pop_2_error_tier_two: + STACK_SHRINK(1); +pop_1_error_tier_two: + STACK_SHRINK(1); +error_tier_two: + _PyFrame_SetStackPointer(frame, stack_pointer); + return NULL; +deoptimize: + _PyFrame_SetStackPointer(frame, stack_pointer); + return _PyCode_CODE(_PyFrame_GetCode(frame)) + _target; +} diff --git a/configure b/configure index b1153df4d7ec52..c563c3f5d3c7e6 100755 --- a/configure +++ b/configure @@ -920,6 +920,7 @@ LLVM_AR PROFILE_TASK DEF_MAKE_RULE DEF_MAKE_ALL_RULE +REGEN_JIT_COMMAND ABIFLAGS LN MKDIR_P @@ -1074,6 +1075,7 @@ with_pydebug with_trace_refs enable_pystats with_assertions +enable_experimental_jit enable_optimizations with_lto enable_bolt @@ -1801,6 +1803,9 @@ Optional Features: --disable-gil enable experimental support for running without the GIL (default is no) --enable-pystats enable internal statistics gathering (default is no) + --enable-experimental-jit + build the experimental just-in-time compiler + (default is no) --enable-optimizations enable expensive, stable optimizations (PGO, etc.) (default is no) --enable-bolt enable usage of the llvm-bolt post-link optimizer @@ -7997,6 +8002,32 @@ else printf "%s\n" "no" >&6; } fi +# Check for --enable-experimental-jit: +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --enable-experimental-jit" >&5 +printf %s "checking for --enable-experimental-jit... " >&6; } +# Check whether --enable-experimental-jit was given. +if test ${enable_experimental_jit+y} +then : + enableval=$enable_experimental_jit; +else $as_nop + enable_experimental_jit=no +fi + +if test "x$enable_experimental_jit" = xno +then : + +else $as_nop + as_fn_append CFLAGS_NODIST " -D_Py_JIT" + REGEN_JIT_COMMAND="\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py $host" + if test "x$Py_DEBUG" = xtrue +then : + as_fn_append REGEN_JIT_COMMAND " --debug" +fi +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_experimental_jit" >&5 +printf "%s\n" "$enable_experimental_jit" >&6; } + # Enable optimization flags diff --git a/configure.ac b/configure.ac index 9587e6d63499aa..13c46b3e80151d 100644 --- a/configure.ac +++ b/configure.ac @@ -1579,6 +1579,26 @@ else AC_MSG_RESULT([no]) fi +# Check for --enable-experimental-jit: +AC_MSG_CHECKING([for --enable-experimental-jit]) +AC_ARG_ENABLE([experimental-jit], + [AS_HELP_STRING([--enable-experimental-jit], + [build the experimental just-in-time compiler (default is no)])], + [], + [enable_experimental_jit=no]) +AS_VAR_IF([enable_experimental_jit], + [no], + [], + [AS_VAR_APPEND([CFLAGS_NODIST], [" -D_Py_JIT"]) + AS_VAR_SET([REGEN_JIT_COMMAND], + ["\$(PYTHON_FOR_REGEN) \$(srcdir)/Tools/jit/build.py $host"]) + AS_VAR_IF([Py_DEBUG], + [true], + [AS_VAR_APPEND([REGEN_JIT_COMMAND], [" --debug"])], + [])]) +AC_SUBST([REGEN_JIT_COMMAND]) +AC_MSG_RESULT([$enable_experimental_jit]) + # Enable optimization flags AC_SUBST([DEF_MAKE_ALL_RULE]) AC_SUBST([DEF_MAKE_RULE]) From a16a9f978f42b8a09297c1efbf33877f6388c403 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Sun, 28 Jan 2024 18:52:58 -0800 Subject: [PATCH 138/160] GH-113464: A JIT backend for tier 2 (GH-113465) Add an option (--enable-experimental-jit for configure-based builds or --experimental-jit for PCbuild-based ones) to build an *experimental* just-in-time compiler, based on copy-and-patch (https://fredrikbk.com/publications/copy-and-patch.pdf). See Tools/jit/README.md for more information, including how to install the required build-time tooling. Merry JIT-mas! ;) From d7d0d13cd37651990586d31d8974c59bd25e1045 Mon Sep 17 00:00:00 2001 From: Stanley <46876382+slateny@users.noreply.github.com> Date: Mon, 29 Jan 2024 01:19:22 -0800 Subject: [PATCH 139/160] gh-89159: Add some TarFile attribute types (GH-114520) Co-authored-by: Stanley <46876382+slateny@users.noreply.github.com> --- Doc/library/tarfile.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index 34a738a7f1c41f..2134293a0bb0de 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -673,6 +673,7 @@ be finalized; only the internally used file object will be closed. See the .. attribute:: TarFile.pax_headers + :type: dict A dictionary containing key-value pairs of pax global headers. @@ -838,26 +839,31 @@ A ``TarInfo`` object has the following public data attributes: attribute. .. attribute:: TarInfo.chksum + :type: int Header checksum. .. attribute:: TarInfo.devmajor + :type: int Device major number. .. attribute:: TarInfo.devminor + :type: int Device minor number. .. attribute:: TarInfo.offset + :type: int The tar header starts here. .. attribute:: TarInfo.offset_data + :type: int The file's data starts here. From 2124a3ddcc0e274521f74d239f0e94060e17dd7f Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Mon, 29 Jan 2024 01:30:22 -0800 Subject: [PATCH 140/160] gh-109653: Improve import time of importlib.metadata / email.utils (#114664) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My criterion for delayed imports is that they're only worth it if the majority of users of the module would benefit from it, otherwise you're just moving latency around unpredictably. mktime_tz is not used anywhere in the standard library and grep.app indicates it's not got much use in the ecosystem either. Distribution.files is not nearly as widely used as other importlib.metadata APIs, so we defer the csv import. Before: ``` λ hyperfine -w 8 './python -c "import importlib.metadata"' Benchmark 1: ./python -c "import importlib.metadata" Time (mean ± σ): 65.1 ms ± 0.5 ms [User: 55.3 ms, System: 9.8 ms] Range (min … max): 64.4 ms … 66.4 ms 44 runs ``` After: ``` λ hyperfine -w 8 './python -c "import importlib.metadata"' Benchmark 1: ./python -c "import importlib.metadata" Time (mean ± σ): 62.0 ms ± 0.3 ms [User: 52.5 ms, System: 9.6 ms] Range (min … max): 61.3 ms … 62.8 ms 46 runs ``` for about a 3ms saving with warm disk cache, maybe 7-11ms with cold disk cache. --- Lib/email/_parseaddr.py | 5 ++++- Lib/importlib/metadata/__init__.py | 5 ++++- .../Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst diff --git a/Lib/email/_parseaddr.py b/Lib/email/_parseaddr.py index febe411355d6be..0f1bf8e4253ec4 100644 --- a/Lib/email/_parseaddr.py +++ b/Lib/email/_parseaddr.py @@ -13,7 +13,7 @@ 'quote', ] -import time, calendar +import time SPACE = ' ' EMPTYSTRING = '' @@ -194,6 +194,9 @@ def mktime_tz(data): # No zone info, so localtime is better assumption than GMT return time.mktime(data[:8] + (-1,)) else: + # Delay the import, since mktime_tz is rarely used + import calendar + t = calendar.timegm(data) return t - data[9] diff --git a/Lib/importlib/metadata/__init__.py b/Lib/importlib/metadata/__init__.py index 7b142e786e829e..c612fbefee2e80 100644 --- a/Lib/importlib/metadata/__init__.py +++ b/Lib/importlib/metadata/__init__.py @@ -1,7 +1,6 @@ import os import re import abc -import csv import sys import json import email @@ -478,6 +477,10 @@ def make_file(name, hash=None, size_str=None): @pass_none def make_files(lines): + # Delay csv import, since Distribution.files is not as widely used + # as other parts of importlib.metadata + import csv + return starmap(make_file, csv.reader(lines)) @pass_none diff --git a/Misc/NEWS.d/next/Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst b/Misc/NEWS.d/next/Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst new file mode 100644 index 00000000000000..fb3382098853b3 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-28-00-48-12.gh-issue-109653.vF4exe.rst @@ -0,0 +1 @@ +Improve import time of :mod:`importlib.metadata` and :mod:`email.utils`. From 1ac1b2f9536a581f1656f0ac9330a7382420cda1 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 29 Jan 2024 12:37:06 +0300 Subject: [PATCH 141/160] gh-114685: Fix incorrect use of PyBUF_READ in import.c (GH-114686) --- Python/import.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/import.c b/Python/import.c index 2dd95d8364a0be..2fd0c08a6bb5ae 100644 --- a/Python/import.c +++ b/Python/import.c @@ -3544,7 +3544,7 @@ _imp_get_frozen_object_impl(PyObject *module, PyObject *name, struct frozen_info info = {0}; Py_buffer buf = {0}; if (PyObject_CheckBuffer(dataobj)) { - if (PyObject_GetBuffer(dataobj, &buf, PyBUF_READ) != 0) { + if (PyObject_GetBuffer(dataobj, &buf, PyBUF_SIMPLE) != 0) { return NULL; } info.data = (const char *)buf.buf; From 3b86891fd69093b60141300862f278614ba80613 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Mon, 29 Jan 2024 01:37:28 -0800 Subject: [PATCH 142/160] gh-110893: Improve the documentation for __future__ module (#114642) nedbat took issue with the phrasing "real module". I'm actually fine with that phrasing, but I do think the `__future__` page should be clear about the way in which the `__future__` module is special. (Yes, there was a footnote linking to the future statements part of the reference, but there should be upfront discussion). I'm sympathetic to nedbat's claim that no one really cares about `__future__._Feature`, so I've moved the interesting table up to the top. --- Doc/library/__future__.rst | 98 ++++++++++++++++++++++---------------- 1 file changed, 56 insertions(+), 42 deletions(-) diff --git a/Doc/library/__future__.rst b/Doc/library/__future__.rst index d261e4a4f338a5..762f8b4695b3dd 100644 --- a/Doc/library/__future__.rst +++ b/Doc/library/__future__.rst @@ -8,20 +8,68 @@ -------------- -:mod:`__future__` is a real module, and serves three purposes: +Imports of the form ``from __future__ import feature`` are called +:ref:`future statements `. These are special-cased by the Python compiler +to allow the use of new Python features in modules containing the future statement +before the release in which the feature becomes standard. + +While these future statements are given additional special meaning by the +Python compiler, they are still executed like any other import statement and +the :mod:`__future__` exists and is handled by the import system the same way +any other Python module would be. This design serves three purposes: * To avoid confusing existing tools that analyze import statements and expect to find the modules they're importing. -* To ensure that :ref:`future statements ` run under releases prior to - 2.1 at least yield runtime exceptions (the import of :mod:`__future__` will - fail, because there was no module of that name prior to 2.1). - * To document when incompatible changes were introduced, and when they will be --- or were --- made mandatory. This is a form of executable documentation, and can be inspected programmatically via importing :mod:`__future__` and examining its contents. +* To ensure that :ref:`future statements ` run under releases prior to + Python 2.1 at least yield runtime exceptions (the import of :mod:`__future__` + will fail, because there was no module of that name prior to 2.1). + +Module Contents +--------------- + +No feature description will ever be deleted from :mod:`__future__`. Since its +introduction in Python 2.1 the following features have found their way into the +language using this mechanism: + ++------------------+-------------+--------------+---------------------------------------------+ +| feature | optional in | mandatory in | effect | ++==================+=============+==============+=============================================+ +| nested_scopes | 2.1.0b1 | 2.2 | :pep:`227`: | +| | | | *Statically Nested Scopes* | ++------------------+-------------+--------------+---------------------------------------------+ +| generators | 2.2.0a1 | 2.3 | :pep:`255`: | +| | | | *Simple Generators* | ++------------------+-------------+--------------+---------------------------------------------+ +| division | 2.2.0a2 | 3.0 | :pep:`238`: | +| | | | *Changing the Division Operator* | ++------------------+-------------+--------------+---------------------------------------------+ +| absolute_import | 2.5.0a1 | 3.0 | :pep:`328`: | +| | | | *Imports: Multi-Line and Absolute/Relative* | ++------------------+-------------+--------------+---------------------------------------------+ +| with_statement | 2.5.0a1 | 2.6 | :pep:`343`: | +| | | | *The "with" Statement* | ++------------------+-------------+--------------+---------------------------------------------+ +| print_function | 2.6.0a2 | 3.0 | :pep:`3105`: | +| | | | *Make print a function* | ++------------------+-------------+--------------+---------------------------------------------+ +| unicode_literals | 2.6.0a2 | 3.0 | :pep:`3112`: | +| | | | *Bytes literals in Python 3000* | ++------------------+-------------+--------------+---------------------------------------------+ +| generator_stop | 3.5.0b1 | 3.7 | :pep:`479`: | +| | | | *StopIteration handling inside generators* | ++------------------+-------------+--------------+---------------------------------------------+ +| annotations | 3.7.0b1 | TBD [1]_ | :pep:`563`: | +| | | | *Postponed evaluation of annotations* | ++------------------+-------------+--------------+---------------------------------------------+ + +.. XXX Adding a new entry? Remember to update simple_stmts.rst, too. + .. _future-classes: .. class:: _Feature @@ -65,43 +113,6 @@ dynamically compiled code. This flag is stored in the :attr:`_Feature.compiler_flag` attribute on :class:`_Feature` instances. -No feature description will ever be deleted from :mod:`__future__`. Since its -introduction in Python 2.1 the following features have found their way into the -language using this mechanism: - -+------------------+-------------+--------------+---------------------------------------------+ -| feature | optional in | mandatory in | effect | -+==================+=============+==============+=============================================+ -| nested_scopes | 2.1.0b1 | 2.2 | :pep:`227`: | -| | | | *Statically Nested Scopes* | -+------------------+-------------+--------------+---------------------------------------------+ -| generators | 2.2.0a1 | 2.3 | :pep:`255`: | -| | | | *Simple Generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| division | 2.2.0a2 | 3.0 | :pep:`238`: | -| | | | *Changing the Division Operator* | -+------------------+-------------+--------------+---------------------------------------------+ -| absolute_import | 2.5.0a1 | 3.0 | :pep:`328`: | -| | | | *Imports: Multi-Line and Absolute/Relative* | -+------------------+-------------+--------------+---------------------------------------------+ -| with_statement | 2.5.0a1 | 2.6 | :pep:`343`: | -| | | | *The "with" Statement* | -+------------------+-------------+--------------+---------------------------------------------+ -| print_function | 2.6.0a2 | 3.0 | :pep:`3105`: | -| | | | *Make print a function* | -+------------------+-------------+--------------+---------------------------------------------+ -| unicode_literals | 2.6.0a2 | 3.0 | :pep:`3112`: | -| | | | *Bytes literals in Python 3000* | -+------------------+-------------+--------------+---------------------------------------------+ -| generator_stop | 3.5.0b1 | 3.7 | :pep:`479`: | -| | | | *StopIteration handling inside generators* | -+------------------+-------------+--------------+---------------------------------------------+ -| annotations | 3.7.0b1 | TBD [1]_ | :pep:`563`: | -| | | | *Postponed evaluation of annotations* | -+------------------+-------------+--------------+---------------------------------------------+ - -.. XXX Adding a new entry? Remember to update simple_stmts.rst, too. - .. [1] ``from __future__ import annotations`` was previously scheduled to become mandatory in Python 3.10, but the Python Steering Council @@ -115,3 +126,6 @@ language using this mechanism: :ref:`future` How the compiler treats future imports. + + :pep:`236` - Back to the __future__ + The original proposal for the __future__ mechanism. From 97fb2480e4807a34b8197243ad57566ed7769e24 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Mon, 29 Jan 2024 12:56:11 +0300 Subject: [PATCH 143/160] gh-101100: Fix sphinx warnings in `Doc/c-api/memoryview.rst` (GH-114669) --- Doc/c-api/memoryview.rst | 13 +++++++++++++ Doc/tools/.nitignore | 1 - 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Doc/c-api/memoryview.rst b/Doc/c-api/memoryview.rst index 2aa43318e7a455..f6038032805259 100644 --- a/Doc/c-api/memoryview.rst +++ b/Doc/c-api/memoryview.rst @@ -20,6 +20,17 @@ any other object. read/write, otherwise it may be either read-only or read/write at the discretion of the exporter. + +.. c:macro:: PyBUF_READ + + Flag to request a readonly buffer. + + +.. c:macro:: PyBUF_WRITE + + Flag to request a writable buffer. + + .. c:function:: PyObject *PyMemoryView_FromMemory(char *mem, Py_ssize_t size, int flags) Create a memoryview object using *mem* as the underlying buffer. @@ -41,6 +52,8 @@ any other object. original memory. Otherwise, a copy is made and the memoryview points to a new bytes object. + *buffertype* can be one of :c:macro:`PyBUF_READ` or :c:macro:`PyBUF_WRITE`. + .. c:function:: int PyMemoryView_Check(PyObject *obj) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 8b6847ef2a7d76..b8b7c2299ca9f4 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -9,7 +9,6 @@ Doc/c-api/gcsupport.rst Doc/c-api/init.rst Doc/c-api/init_config.rst Doc/c-api/intro.rst -Doc/c-api/memoryview.rst Doc/c-api/module.rst Doc/c-api/stable.rst Doc/c-api/sys.rst From b7a12ab2146f946ae57e2d8019372cafe94d8375 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 29 Jan 2024 15:12:19 +0200 Subject: [PATCH 144/160] gh-101100: Fix Sphinx warnings in `whatsnew/2.2.rst` (#112366) Co-authored-by: Hugo van Kemenade --- Doc/tools/.nitignore | 1 - Doc/whatsnew/2.2.rst | 140 +++++++++++++++++++++---------------------- 2 files changed, 70 insertions(+), 71 deletions(-) diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index b8b7c2299ca9f4..763503205e1670 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -90,7 +90,6 @@ Doc/tutorial/datastructures.rst Doc/using/windows.rst Doc/whatsnew/2.0.rst Doc/whatsnew/2.1.rst -Doc/whatsnew/2.2.rst Doc/whatsnew/2.4.rst Doc/whatsnew/2.5.rst Doc/whatsnew/2.6.rst diff --git a/Doc/whatsnew/2.2.rst b/Doc/whatsnew/2.2.rst index 6efc23a82de923..968bd7a126bdf0 100644 --- a/Doc/whatsnew/2.2.rst +++ b/Doc/whatsnew/2.2.rst @@ -53,9 +53,9 @@ A long time ago I wrote a web page listing flaws in Python's design. One of the most significant flaws was that it's impossible to subclass Python types implemented in C. In particular, it's not possible to subclass built-in types, so you can't just subclass, say, lists in order to add a single useful method to -them. The :mod:`UserList` module provides a class that supports all of the +them. The :mod:`!UserList` module provides a class that supports all of the methods of lists and that can be subclassed further, but there's lots of C code -that expects a regular Python list and won't accept a :class:`UserList` +that expects a regular Python list and won't accept a :class:`!UserList` instance. Python 2.2 fixes this, and in the process adds some exciting new capabilities. @@ -69,7 +69,7 @@ A brief summary: * It's also possible to automatically call methods on accessing or setting an instance attribute by using a new mechanism called :dfn:`properties`. Many uses - of :meth:`__getattr__` can be rewritten to use properties instead, making the + of :meth:`!__getattr__` can be rewritten to use properties instead, making the resulting code simpler and faster. As a small side benefit, attributes can now have docstrings, too. @@ -120,7 +120,7 @@ added so if no built-in type is suitable, you can just subclass This means that :keyword:`class` statements that don't have any base classes are always classic classes in Python 2.2. (Actually you can also change this by -setting a module-level variable named :attr:`__metaclass__` --- see :pep:`253` +setting a module-level variable named :attr:`!__metaclass__` --- see :pep:`253` for the details --- but it's easier to just subclass :class:`object`.) The type objects for the built-in types are available as built-ins, named using @@ -134,8 +134,8 @@ type objects that behave as factories when called. :: 123 To make the set of types complete, new type objects such as :func:`dict` and -:func:`file` have been added. Here's a more interesting example, adding a -:meth:`lock` method to file objects:: +:func:`!file` have been added. Here's a more interesting example, adding a +:meth:`!lock` method to file objects:: class LockableFile(file): def lock (self, operation, length=0, start=0, whence=0): @@ -146,7 +146,7 @@ To make the set of types complete, new type objects such as :func:`dict` and The now-obsolete :mod:`!posixfile` module contained a class that emulated all of a file object's methods and also added a :meth:`!lock` method, but this class couldn't be passed to internal functions that expected a built-in file, -something which is possible with our new :class:`LockableFile`. +something which is possible with our new :class:`!LockableFile`. Descriptors @@ -154,11 +154,11 @@ Descriptors In previous versions of Python, there was no consistent way to discover what attributes and methods were supported by an object. There were some informal -conventions, such as defining :attr:`__members__` and :attr:`__methods__` +conventions, such as defining :attr:`!__members__` and :attr:`!__methods__` attributes that were lists of names, but often the author of an extension type or a class wouldn't bother to define them. You could fall back on inspecting the :attr:`~object.__dict__` of an object, but when class inheritance or an arbitrary -:meth:`__getattr__` hook were in use this could still be inaccurate. +:meth:`!__getattr__` hook were in use this could still be inaccurate. The one big idea underlying the new class model is that an API for describing the attributes of an object using :dfn:`descriptors` has been formalized. @@ -171,7 +171,7 @@ attributes of their own: * :attr:`~definition.__name__` is the attribute's name. -* :attr:`__doc__` is the attribute's docstring. +* :attr:`!__doc__` is the attribute's docstring. * ``__get__(object)`` is a method that retrieves the attribute value from *object*. @@ -186,7 +186,7 @@ are:: descriptor = obj.__class__.x descriptor.__get__(obj) -For methods, :meth:`descriptor.__get__` returns a temporary object that's +For methods, :meth:`!descriptor.__get__` returns a temporary object that's callable, and wraps up the instance and the method to be called on it. This is also why static methods and class methods are now possible; they have descriptors that wrap up just the method, or the method and the class. As a @@ -204,7 +204,7 @@ methods are defined like this:: ... g = classmethod(g) -The :func:`staticmethod` function takes the function :func:`f`, and returns it +The :func:`staticmethod` function takes the function :func:`!f`, and returns it wrapped up in a descriptor so it can be stored in the class object. You might expect there to be special syntax for creating such methods (``def static f``, ``defstatic f()``, or something like that) but no such syntax has been defined @@ -232,10 +232,10 @@ like this:: f = eiffelmethod(f, pre_f, post_f) -Note that a person using the new :func:`eiffelmethod` doesn't have to understand +Note that a person using the new :func:`!eiffelmethod` doesn't have to understand anything about descriptors. This is why I think the new features don't increase the basic complexity of the language. There will be a few wizards who need to -know about it in order to write :func:`eiffelmethod` or the ZODB or whatever, +know about it in order to write :func:`!eiffelmethod` or the ZODB or whatever, but most users will just write code on top of the resulting libraries and ignore the implementation details. @@ -263,10 +263,10 @@ from :pep:`253` by Guido van Rossum):: The lookup rule for classic classes is simple but not very smart; the base classes are searched depth-first, going from left to right. A reference to -:meth:`D.save` will search the classes :class:`D`, :class:`B`, and then -:class:`A`, where :meth:`save` would be found and returned. :meth:`C.save` -would never be found at all. This is bad, because if :class:`C`'s :meth:`save` -method is saving some internal state specific to :class:`C`, not calling it will +:meth:`!D.save` will search the classes :class:`!D`, :class:`!B`, and then +:class:`!A`, where :meth:`!save` would be found and returned. :meth:`!C.save` +would never be found at all. This is bad, because if :class:`!C`'s :meth:`!save` +method is saving some internal state specific to :class:`!C`, not calling it will result in that state never getting saved. New-style classes follow a different algorithm that's a bit more complicated to @@ -276,22 +276,22 @@ produces more useful results for really complicated inheritance graphs.) #. List all the base classes, following the classic lookup rule and include a class multiple times if it's visited repeatedly. In the above example, the list - of visited classes is [:class:`D`, :class:`B`, :class:`A`, :class:`C`, - :class:`A`]. + of visited classes is [:class:`!D`, :class:`!B`, :class:`!A`, :class:`!C`, + :class:`!A`]. #. Scan the list for duplicated classes. If any are found, remove all but one occurrence, leaving the *last* one in the list. In the above example, the list - becomes [:class:`D`, :class:`B`, :class:`C`, :class:`A`] after dropping + becomes [:class:`!D`, :class:`!B`, :class:`!C`, :class:`!A`] after dropping duplicates. -Following this rule, referring to :meth:`D.save` will return :meth:`C.save`, +Following this rule, referring to :meth:`!D.save` will return :meth:`!C.save`, which is the behaviour we're after. This lookup rule is the same as the one followed by Common Lisp. A new built-in function, :func:`super`, provides a way to get at a class's superclasses without having to reimplement Python's algorithm. The most commonly used form will be ``super(class, obj)``, which returns a bound superclass object (not the actual class object). This form will be used in methods to call a method in the superclass; for example, -:class:`D`'s :meth:`save` method would look like this:: +:class:`!D`'s :meth:`!save` method would look like this:: class D (B,C): def save (self): @@ -309,7 +309,7 @@ Attribute Access ---------------- A fair number of sophisticated Python classes define hooks for attribute access -using :meth:`__getattr__`; most commonly this is done for convenience, to make +using :meth:`~object.__getattr__`; most commonly this is done for convenience, to make code more readable by automatically mapping an attribute access such as ``obj.parent`` into a method call such as ``obj.get_parent``. Python 2.2 adds some new ways of controlling attribute access. @@ -321,22 +321,22 @@ instance's dictionary. New-style classes also support a new method, ``__getattribute__(attr_name)``. The difference between the two methods is -that :meth:`__getattribute__` is *always* called whenever any attribute is -accessed, while the old :meth:`__getattr__` is only called if ``foo`` isn't +that :meth:`~object.__getattribute__` is *always* called whenever any attribute is +accessed, while the old :meth:`~object.__getattr__` is only called if ``foo`` isn't found in the instance's dictionary. However, Python 2.2's support for :dfn:`properties` will often be a simpler way -to trap attribute references. Writing a :meth:`__getattr__` method is +to trap attribute references. Writing a :meth:`!__getattr__` method is complicated because to avoid recursion you can't use regular attribute accesses inside them, and instead have to mess around with the contents of -:attr:`~object.__dict__`. :meth:`__getattr__` methods also end up being called by Python -when it checks for other methods such as :meth:`__repr__` or :meth:`__coerce__`, +:attr:`~object.__dict__`. :meth:`~object.__getattr__` methods also end up being called by Python +when it checks for other methods such as :meth:`~object.__repr__` or :meth:`!__coerce__`, and so have to be written with this in mind. Finally, calling a function on every attribute access results in a sizable performance loss. :class:`property` is a new built-in type that packages up three functions that get, set, or delete an attribute, and a docstring. For example, if you want to -define a :attr:`size` attribute that's computed, but also settable, you could +define a :attr:`!size` attribute that's computed, but also settable, you could write:: class C(object): @@ -355,9 +355,9 @@ write:: "Storage size of this instance") That is certainly clearer and easier to write than a pair of -:meth:`__getattr__`/:meth:`__setattr__` methods that check for the :attr:`size` +:meth:`!__getattr__`/:meth:`!__setattr__` methods that check for the :attr:`!size` attribute and handle it specially while retrieving all other attributes from the -instance's :attr:`~object.__dict__`. Accesses to :attr:`size` are also the only ones +instance's :attr:`~object.__dict__`. Accesses to :attr:`!size` are also the only ones which have to perform the work of calling a function, so references to other attributes run at their usual speed. @@ -447,7 +447,7 @@ an iterator for the object *obj*, while ``iter(C, sentinel)`` returns an iterator that will invoke the callable object *C* until it returns *sentinel* to signal that the iterator is done. -Python classes can define an :meth:`__iter__` method, which should create and +Python classes can define an :meth:`!__iter__` method, which should create and return a new iterator for the object; if the object is its own iterator, this method can just return ``self``. In particular, iterators will usually be their own iterators. Extension types implemented in C can implement a :c:member:`~PyTypeObject.tp_iter` @@ -478,7 +478,7 @@ there are no more values to be returned, calling :meth:`next` should raise the In 2.2, Python's :keyword:`for` statement no longer expects a sequence; it expects something for which :func:`iter` will return an iterator. For backward compatibility and convenience, an iterator is automatically constructed for -sequences that don't implement :meth:`__iter__` or a :c:member:`~PyTypeObject.tp_iter` slot, so +sequences that don't implement :meth:`!__iter__` or a :c:member:`~PyTypeObject.tp_iter` slot, so ``for i in [1,2,3]`` will still work. Wherever the Python interpreter loops over a sequence, it's been changed to use the iterator protocol. This means you can do things like this:: @@ -510,8 +510,8 @@ Iterator support has been added to some of Python's basic types. Calling Oct 10 That's just the default behaviour. If you want to iterate over keys, values, or -key/value pairs, you can explicitly call the :meth:`iterkeys`, -:meth:`itervalues`, or :meth:`iteritems` methods to get an appropriate iterator. +key/value pairs, you can explicitly call the :meth:`!iterkeys`, +:meth:`!itervalues`, or :meth:`!iteritems` methods to get an appropriate iterator. In a minor related change, the :keyword:`in` operator now works on dictionaries, so ``key in dict`` is now equivalent to ``dict.has_key(key)``. @@ -580,7 +580,7 @@ allowed inside the :keyword:`!try` block of a :keyword:`try`...\ :keyword:`finally` statement; read :pep:`255` for a full explanation of the interaction between :keyword:`!yield` and exceptions.) -Here's a sample usage of the :func:`generate_ints` generator:: +Here's a sample usage of the :func:`!generate_ints` generator:: >>> gen = generate_ints(3) >>> gen @@ -641,7 +641,7 @@ like:: sentence := "Store it in the neighboring harbor" if (i := find("or", sentence)) > 5 then write(i) -In Icon the :func:`find` function returns the indexes at which the substring +In Icon the :func:`!find` function returns the indexes at which the substring "or" is found: 3, 23, 33. In the :keyword:`if` statement, ``i`` is first assigned a value of 3, but 3 is less than 5, so the comparison fails, and Icon retries it with the second value of 23. 23 is greater than 5, so the comparison @@ -671,7 +671,7 @@ PEP 237: Unifying Long Integers and Integers In recent versions, the distinction between regular integers, which are 32-bit values on most machines, and long integers, which can be of arbitrary size, was becoming an annoyance. For example, on platforms that support files larger than -``2**32`` bytes, the :meth:`tell` method of file objects has to return a long +``2**32`` bytes, the :meth:`!tell` method of file objects has to return a long integer. However, there were various bits of Python that expected plain integers and would raise an error if a long integer was provided instead. For example, in Python 1.5, only regular integers could be used as a slice index, and @@ -752,7 +752,7 @@ Here are the changes 2.2 introduces: 0.5. Without the ``__future__`` statement, ``/`` still means classic division. The default meaning of ``/`` will not change until Python 3.0. -* Classes can define methods called :meth:`__truediv__` and :meth:`__floordiv__` +* Classes can define methods called :meth:`~object.__truediv__` and :meth:`~object.__floordiv__` to overload the two division operators. At the C level, there are also slots in the :c:type:`PyNumberMethods` structure so extension types can define the two operators. @@ -785,17 +785,17 @@ support.) When built to use UCS-4 (a "wide Python"), the interpreter can natively handle Unicode characters from U+000000 to U+110000, so the range of legal values for -the :func:`unichr` function is expanded accordingly. Using an interpreter +the :func:`!unichr` function is expanded accordingly. Using an interpreter compiled to use UCS-2 (a "narrow Python"), values greater than 65535 will still -cause :func:`unichr` to raise a :exc:`ValueError` exception. This is all +cause :func:`!unichr` to raise a :exc:`ValueError` exception. This is all described in :pep:`261`, "Support for 'wide' Unicode characters"; consult it for further details. Another change is simpler to explain. Since their introduction, Unicode strings -have supported an :meth:`encode` method to convert the string to a selected +have supported an :meth:`!encode` method to convert the string to a selected encoding such as UTF-8 or Latin-1. A symmetric ``decode([*encoding*])`` method has been added to 8-bit strings (though not to Unicode strings) in 2.2. -:meth:`decode` assumes that the string is in the specified encoding and decodes +:meth:`!decode` assumes that the string is in the specified encoding and decodes it, returning whatever is returned by the codec. Using this new feature, codecs have been added for tasks not directly related to @@ -819,10 +819,10 @@ encoding, and compression with the :mod:`zlib` module:: >>> "sheesh".encode('rot-13') 'furrfu' -To convert a class instance to Unicode, a :meth:`__unicode__` method can be -defined by a class, analogous to :meth:`__str__`. +To convert a class instance to Unicode, a :meth:`!__unicode__` method can be +defined by a class, analogous to :meth:`!__str__`. -:meth:`encode`, :meth:`decode`, and :meth:`__unicode__` were implemented by +:meth:`!encode`, :meth:`!decode`, and :meth:`!__unicode__` were implemented by Marc-André Lemburg. The changes to support using UCS-4 internally were implemented by Fredrik Lundh and Martin von Löwis. @@ -859,7 +859,7 @@ doesn't work:: return g(value-1) + 1 ... -The function :func:`g` will always raise a :exc:`NameError` exception, because +The function :func:`!g` will always raise a :exc:`NameError` exception, because the binding of the name ``g`` isn't in either its local namespace or in the module-level namespace. This isn't much of a problem in practice (how often do you recursively define interior functions like this?), but this also made using @@ -915,7 +915,7 @@ To make the preceding explanation a bit clearer, here's an example:: Line 4 containing the ``exec`` statement is a syntax error, since ``exec`` would define a new local variable named ``x`` whose value should -be accessed by :func:`g`. +be accessed by :func:`!g`. This shouldn't be much of a limitation, since ``exec`` is rarely used in most Python code (and when it is used, it's often a sign of a poor design @@ -933,7 +933,7 @@ anyway). New and Improved Modules ======================== -* The :mod:`xmlrpclib` module was contributed to the standard library by Fredrik +* The :mod:`!xmlrpclib` module was contributed to the standard library by Fredrik Lundh, providing support for writing XML-RPC clients. XML-RPC is a simple remote procedure call protocol built on top of HTTP and XML. For example, the following snippet retrieves a list of RSS channels from the O'Reilly Network, @@ -956,7 +956,7 @@ New and Improved Modules # 'description': 'A utility which converts HTML to XSL FO.', # 'title': 'html2fo 0.3 (Default)'}, ... ] - The :mod:`SimpleXMLRPCServer` module makes it easy to create straightforward + The :mod:`!SimpleXMLRPCServer` module makes it easy to create straightforward XML-RPC servers. See http://xmlrpc.scripting.com/ for more information about XML-RPC. * The new :mod:`hmac` module implements the HMAC algorithm described by @@ -964,9 +964,9 @@ New and Improved Modules * Several functions that originally returned lengthy tuples now return pseudo-sequences that still behave like tuples but also have mnemonic attributes such - as memberst_mtime or :attr:`tm_year`. The enhanced functions include - :func:`stat`, :func:`fstat`, :func:`statvfs`, and :func:`fstatvfs` in the - :mod:`os` module, and :func:`localtime`, :func:`gmtime`, and :func:`strptime` in + as :attr:`!memberst_mtime` or :attr:`!tm_year`. The enhanced functions include + :func:`~os.stat`, :func:`~os.fstat`, :func:`~os.statvfs`, and :func:`~os.fstatvfs` in the + :mod:`os` module, and :func:`~time.localtime`, :func:`~time.gmtime`, and :func:`~time.strptime` in the :mod:`time` module. For example, to obtain a file's size using the old tuples, you'd end up writing @@ -999,7 +999,7 @@ New and Improved Modules underlying the :mod:`re` module. For example, the :func:`re.sub` and :func:`re.split` functions have been rewritten in C. Another contributed patch speeds up certain Unicode character ranges by a factor of two, and a new - :meth:`finditer` method that returns an iterator over all the non-overlapping + :meth:`~re.finditer` method that returns an iterator over all the non-overlapping matches in a given string. (SRE is maintained by Fredrik Lundh. The BIGCHARSET patch was contributed by Martin von Löwis.) @@ -1012,33 +1012,33 @@ New and Improved Modules new extensions: the NAMESPACE extension defined in :rfc:`2342`, SORT, GETACL and SETACL. (Contributed by Anthony Baxter and Michel Pelletier.) -* The :mod:`rfc822` module's parsing of email addresses is now compliant with +* The :mod:`!rfc822` module's parsing of email addresses is now compliant with :rfc:`2822`, an update to :rfc:`822`. (The module's name is *not* going to be changed to ``rfc2822``.) A new package, :mod:`email`, has also been added for parsing and generating e-mail messages. (Contributed by Barry Warsaw, and arising out of his work on Mailman.) -* The :mod:`difflib` module now contains a new :class:`Differ` class for +* The :mod:`difflib` module now contains a new :class:`!Differ` class for producing human-readable lists of changes (a "delta") between two sequences of - lines of text. There are also two generator functions, :func:`ndiff` and - :func:`restore`, which respectively return a delta from two sequences, or one of + lines of text. There are also two generator functions, :func:`!ndiff` and + :func:`!restore`, which respectively return a delta from two sequences, or one of the original sequences from a delta. (Grunt work contributed by David Goodger, from ndiff.py code by Tim Peters who then did the generatorization.) -* New constants :const:`ascii_letters`, :const:`ascii_lowercase`, and - :const:`ascii_uppercase` were added to the :mod:`string` module. There were - several modules in the standard library that used :const:`string.letters` to +* New constants :const:`!ascii_letters`, :const:`!ascii_lowercase`, and + :const:`!ascii_uppercase` were added to the :mod:`string` module. There were + several modules in the standard library that used :const:`!string.letters` to mean the ranges A-Za-z, but that assumption is incorrect when locales are in - use, because :const:`string.letters` varies depending on the set of legal + use, because :const:`!string.letters` varies depending on the set of legal characters defined by the current locale. The buggy modules have all been fixed - to use :const:`ascii_letters` instead. (Reported by an unknown person; fixed by + to use :const:`!ascii_letters` instead. (Reported by an unknown person; fixed by Fred L. Drake, Jr.) * The :mod:`mimetypes` module now makes it easier to use alternative MIME-type - databases by the addition of a :class:`MimeTypes` class, which takes a list of + databases by the addition of a :class:`~mimetypes.MimeTypes` class, which takes a list of filenames to be parsed. (Contributed by Fred L. Drake, Jr.) -* A :class:`Timer` class was added to the :mod:`threading` module that allows +* A :class:`~threading.Timer` class was added to the :mod:`threading` module that allows scheduling an activity to happen at some future time. (Contributed by Itamar Shtull-Trauring.) @@ -1114,7 +1114,7 @@ code, none of the changes described here will affect you very much. * Two new wrapper functions, :c:func:`PyOS_snprintf` and :c:func:`PyOS_vsnprintf` were added to provide cross-platform implementations for the relatively new :c:func:`snprintf` and :c:func:`vsnprintf` C lib APIs. In contrast to the standard - :c:func:`sprintf` and :c:func:`vsprintf` functions, the Python versions check the + :c:func:`sprintf` and :c:func:`!vsprintf` functions, the Python versions check the bounds of the buffer used to protect against buffer overruns. (Contributed by M.-A. Lemburg.) @@ -1212,12 +1212,12 @@ Some of the more notable changes are: * The :file:`Tools/scripts/ftpmirror.py` script now parses a :file:`.netrc` file, if you have one. (Contributed by Mike Romberg.) -* Some features of the object returned by the :func:`xrange` function are now +* Some features of the object returned by the :func:`!xrange` function are now deprecated, and trigger warnings when they're accessed; they'll disappear in - Python 2.3. :class:`xrange` objects tried to pretend they were full sequence + Python 2.3. :class:`!xrange` objects tried to pretend they were full sequence types by supporting slicing, sequence multiplication, and the :keyword:`in` operator, but these features were rarely used and therefore buggy. The - :meth:`tolist` method and the :attr:`start`, :attr:`stop`, and :attr:`step` + :meth:`!tolist` method and the :attr:`!start`, :attr:`!stop`, and :attr:`!step` attributes are also being deprecated. At the C level, the fourth argument to the :c:func:`!PyRange_New` function, ``repeat``, has also been deprecated. From e8b8f5e9c2da6a436360ce648061c90bdfcba863 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Mon, 29 Jan 2024 08:43:44 -0600 Subject: [PATCH 145/160] gh-101100: Fix datetime reference warnings (GH-114661) Co-authored-by: Serhiy Storchaka --- Doc/conf.py | 5 ++ Doc/library/datetime.rst | 106 +++++++++++++++++++-------------------- Doc/tools/.nitignore | 1 - 3 files changed, 58 insertions(+), 54 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index 458954370debe2..a96e7787d167a3 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -89,20 +89,25 @@ nitpick_ignore = [ # Standard C functions ('c:func', 'calloc'), + ('c:func', 'ctime'), ('c:func', 'dlopen'), ('c:func', 'exec'), ('c:func', 'fcntl'), ('c:func', 'fork'), ('c:func', 'free'), + ('c:func', 'gettimeofday'), ('c:func', 'gmtime'), + ('c:func', 'localeconv'), ('c:func', 'localtime'), ('c:func', 'main'), ('c:func', 'malloc'), + ('c:func', 'mktime'), ('c:func', 'printf'), ('c:func', 'realloc'), ('c:func', 'snprintf'), ('c:func', 'sprintf'), ('c:func', 'stat'), + ('c:func', 'strftime'), ('c:func', 'system'), ('c:func', 'time'), ('c:func', 'vsnprintf'), diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index b36f8c19cd6040..47ecb0ba331bdc 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -14,7 +14,7 @@ .. XXX what order should the types be discussed in? -The :mod:`datetime` module supplies classes for manipulating dates and times. +The :mod:`!datetime` module supplies classes for manipulating dates and times. While date and time arithmetic is supported, the focus of the implementation is on efficient attribute extraction for output formatting and manipulation. @@ -70,7 +70,7 @@ These :class:`tzinfo` objects capture information about the offset from UTC time, the time zone name, and whether daylight saving time is in effect. Only one concrete :class:`tzinfo` class, the :class:`timezone` class, is -supplied by the :mod:`datetime` module. The :class:`timezone` class can +supplied by the :mod:`!datetime` module. The :class:`timezone` class can represent simple timezones with fixed offsets from UTC, such as UTC itself or North American EST and EDT timezones. Supporting timezones at deeper levels of detail is up to the application. The rules for time adjustment across the @@ -80,7 +80,7 @@ standard suitable for every application aside from UTC. Constants --------- -The :mod:`datetime` module exports the following constants: +The :mod:`!datetime` module exports the following constants: .. data:: MINYEAR @@ -631,7 +631,7 @@ Notes: date2.toordinal()``. Date comparison raises :exc:`TypeError` if the other comparand isn't also a :class:`date` object. However, ``NotImplemented`` is returned instead if the other comparand has a - :meth:`timetuple` attribute. This hook gives other kinds of date objects a + :attr:`~date.timetuple` attribute. This hook gives other kinds of date objects a chance at implementing mixed-type comparison. If not, when a :class:`date` object is compared to an object of a different type, :exc:`TypeError` is raised unless the comparison is ``==`` or ``!=``. The latter cases return @@ -1215,7 +1215,7 @@ Supported operations: object addresses, datetime comparison normally raises :exc:`TypeError` if the other comparand isn't also a :class:`.datetime` object. However, ``NotImplemented`` is returned instead if the other comparand has a - :meth:`timetuple` attribute. This hook gives other kinds of date objects a + :attr:`~.datetime.timetuple` attribute. This hook gives other kinds of date objects a chance at implementing mixed-type comparison. If not, when a :class:`.datetime` object is compared to an object of a different type, :exc:`TypeError` is raised unless the comparison is ``==`` or ``!=``. The latter cases return @@ -1347,22 +1347,22 @@ Instance methods: where ``yday = d.toordinal() - date(d.year, 1, 1).toordinal() + 1`` is the day number within the current year starting with ``1`` for January - 1st. The :attr:`tm_isdst` flag of the result is set according to the + 1st. The :attr:`~time.struct_time.tm_isdst` flag of the result is set according to the :meth:`dst` method: :attr:`.tzinfo` is ``None`` or :meth:`dst` returns - ``None``, :attr:`tm_isdst` is set to ``-1``; else if :meth:`dst` returns a - non-zero value, :attr:`tm_isdst` is set to ``1``; else :attr:`tm_isdst` is + ``None``, :attr:`!tm_isdst` is set to ``-1``; else if :meth:`dst` returns a + non-zero value, :attr:`!tm_isdst` is set to ``1``; else :attr:`!tm_isdst` is set to ``0``. .. method:: datetime.utctimetuple() If :class:`.datetime` instance *d* is naive, this is the same as - ``d.timetuple()`` except that :attr:`tm_isdst` is forced to 0 regardless of what + ``d.timetuple()`` except that :attr:`~.time.struct_time.tm_isdst` is forced to 0 regardless of what ``d.dst()`` returns. DST is never in effect for a UTC time. If *d* is aware, *d* is normalized to UTC time, by subtracting ``d.utcoffset()``, and a :class:`time.struct_time` for the - normalized time is returned. :attr:`tm_isdst` is forced to 0. Note + normalized time is returned. :attr:`!tm_isdst` is forced to 0. Note that an :exc:`OverflowError` may be raised if *d*.year was ``MINYEAR`` or ``MAXYEAR`` and UTC adjustment spills over a year boundary. @@ -1550,7 +1550,7 @@ Instance methods: Examples of Usage: :class:`.datetime` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Examples of working with :class:`~datetime.datetime` objects: +Examples of working with :class:`.datetime` objects: .. doctest:: @@ -1761,9 +1761,9 @@ is aware, :exc:`TypeError` is raised if an order comparison is attempted. For eq comparisons, naive instances are never equal to aware instances. If both comparands are aware, and have -the same :attr:`~time.tzinfo` attribute, the common :attr:`~time.tzinfo` attribute is +the same :attr:`~.time.tzinfo` attribute, the common :attr:`!tzinfo` attribute is ignored and the base times are compared. If both comparands are aware and -have different :attr:`~time.tzinfo` attributes, the comparands are first adjusted by +have different :attr:`!tzinfo` attributes, the comparands are first adjusted by subtracting their UTC offsets (obtained from ``self.utcoffset()``). In order to stop mixed-type comparisons from falling back to the default comparison by object address, when a :class:`.time` object is compared to an object of a @@ -1771,7 +1771,7 @@ different type, :exc:`TypeError` is raised unless the comparison is ``==`` or ``!=``. The latter cases return :const:`False` or :const:`True`, respectively. .. versionchanged:: 3.3 - Equality comparisons between aware and naive :class:`~datetime.time` instances + Equality comparisons between aware and naive :class:`.time` instances don't raise :exc:`TypeError`. In Boolean contexts, a :class:`.time` object is always considered to be true. @@ -1981,7 +1981,7 @@ Examples of working with a :class:`.time` object:: You need to derive a concrete subclass, and (at least) supply implementations of the standard :class:`tzinfo` methods needed by the - :class:`.datetime` methods you use. The :mod:`datetime` module provides + :class:`.datetime` methods you use. The :mod:`!datetime` module provides :class:`timezone`, a simple concrete subclass of :class:`tzinfo` which can represent timezones with fixed offset from UTC such as UTC itself or North American EST and EDT. @@ -1994,7 +1994,7 @@ Examples of working with a :class:`.time` object:: A concrete subclass of :class:`tzinfo` may need to implement the following methods. Exactly which methods are needed depends on the uses made of aware - :mod:`datetime` objects. If in doubt, simply implement all of them. + :mod:`!datetime` objects. If in doubt, simply implement all of them. .. method:: tzinfo.utcoffset(dt) @@ -2035,7 +2035,7 @@ Examples of working with a :class:`.time` object:: already been added to the UTC offset returned by :meth:`utcoffset`, so there's no need to consult :meth:`dst` unless you're interested in obtaining DST info separately. For example, :meth:`datetime.timetuple` calls its :attr:`~.datetime.tzinfo` - attribute's :meth:`dst` method to determine how the :attr:`tm_isdst` flag + attribute's :meth:`dst` method to determine how the :attr:`~time.struct_time.tm_isdst` flag should be set, and :meth:`tzinfo.fromutc` calls :meth:`dst` to account for DST changes when crossing time zones. @@ -2051,7 +2051,7 @@ Examples of working with a :class:`.time` object:: relies on this, but cannot detect violations; it's the programmer's responsibility to ensure it. If a :class:`tzinfo` subclass cannot guarantee this, it may be able to override the default implementation of - :meth:`tzinfo.fromutc` to work correctly with :meth:`astimezone` regardless. + :meth:`tzinfo.fromutc` to work correctly with :meth:`~.datetime.astimezone` regardless. Most implementations of :meth:`dst` will probably look like one of these two:: @@ -2080,7 +2080,7 @@ Examples of working with a :class:`.time` object:: .. method:: tzinfo.tzname(dt) Return the time zone name corresponding to the :class:`.datetime` object *dt*, as - a string. Nothing about string names is defined by the :mod:`datetime` module, + a string. Nothing about string names is defined by the :mod:`!datetime` module, and there's no requirement that it mean anything in particular. For example, "GMT", "UTC", "-500", "-5:00", "EDT", "US/Eastern", "America/New York" are all valid replies. Return ``None`` if a string name isn't known. Note that this is @@ -2128,7 +2128,7 @@ There is one more :class:`tzinfo` method that a subclass may wish to override: different years. An example of a time zone the default :meth:`fromutc` implementation may not handle correctly in all cases is one where the standard offset (from UTC) depends on the specific date and time passed, which can happen - for political reasons. The default implementations of :meth:`astimezone` and + for political reasons. The default implementations of :meth:`~.datetime.astimezone` and :meth:`fromutc` may not produce the result you want if the result is one of the hours straddling the moment the standard offset changes. @@ -2194,10 +2194,10 @@ hour that can't be spelled unambiguously in local wall time: the last hour of daylight time. In Eastern, that's times of the form 5:MM UTC on the day daylight time ends. The local wall clock leaps from 1:59 (daylight time) back to 1:00 (standard time) again. Local times of the form 1:MM are ambiguous. -:meth:`astimezone` mimics the local clock's behavior by mapping two adjacent UTC +:meth:`~.datetime.astimezone` mimics the local clock's behavior by mapping two adjacent UTC hours into the same local hour then. In the Eastern example, UTC times of the form 5:MM and 6:MM both map to 1:MM when converted to Eastern, but earlier times -have the :attr:`~datetime.fold` attribute set to 0 and the later times have it set to 1. +have the :attr:`~.datetime.fold` attribute set to 0 and the later times have it set to 1. For example, at the Fall back transition of 2016, we get:: >>> u0 = datetime(2016, 11, 6, 4, tzinfo=timezone.utc) @@ -2212,10 +2212,10 @@ For example, at the Fall back transition of 2016, we get:: 07:00:00 UTC = 02:00:00 EST 0 Note that the :class:`.datetime` instances that differ only by the value of the -:attr:`~datetime.fold` attribute are considered equal in comparisons. +:attr:`~.datetime.fold` attribute are considered equal in comparisons. Applications that can't bear wall-time ambiguities should explicitly check the -value of the :attr:`~datetime.fold` attribute or avoid using hybrid +value of the :attr:`~.datetime.fold` attribute or avoid using hybrid :class:`tzinfo` subclasses; there are no ambiguities when using :class:`timezone`, or any other fixed-offset :class:`tzinfo` subclass (such as a class representing only EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)). @@ -2223,7 +2223,7 @@ only EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)). .. seealso:: :mod:`zoneinfo` - The :mod:`datetime` module has a basic :class:`timezone` class (for + The :mod:`!datetime` module has a basic :class:`timezone` class (for handling arbitrary fixed offsets from UTC) and its :attr:`timezone.utc` attribute (a UTC timezone instance). @@ -2241,7 +2241,7 @@ only EST (fixed offset -5 hours), or only EDT (fixed offset -4 hours)). .. _datetime-timezone: :class:`timezone` Objects --------------------------- +------------------------- The :class:`timezone` class is a subclass of :class:`tzinfo`, each instance of which represents a timezone defined by a fixed offset from @@ -2316,8 +2316,8 @@ Class attributes: .. _strftime-strptime-behavior: -:meth:`strftime` and :meth:`strptime` Behavior ----------------------------------------------- +:meth:`~.datetime.strftime` and :meth:`~.datetime.strptime` Behavior +-------------------------------------------------------------------- :class:`date`, :class:`.datetime`, and :class:`.time` objects all support a ``strftime(format)`` method, to create a string representing the time under the @@ -2327,8 +2327,8 @@ Conversely, the :meth:`datetime.strptime` class method creates a :class:`.datetime` object from a string representing a date and time and a corresponding format string. -The table below provides a high-level comparison of :meth:`strftime` -versus :meth:`strptime`: +The table below provides a high-level comparison of :meth:`~.datetime.strftime` +versus :meth:`~.datetime.strptime`: +----------------+--------------------------------------------------------+------------------------------------------------------------------------------+ | | ``strftime`` | ``strptime`` | @@ -2345,8 +2345,8 @@ versus :meth:`strptime`: .. _format-codes: -:meth:`strftime` and :meth:`strptime` Format Codes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +:meth:`~.datetime.strftime` and :meth:`~.datetime.strptime` Format Codes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ These methods accept format codes that can be used to parse and format dates:: @@ -2485,13 +2485,13 @@ convenience. These parameters all correspond to ISO 8601 date values. | | naive). | -03:07:12.345216 | | +-----------+--------------------------------+------------------------+-------+ -These may not be available on all platforms when used with the :meth:`strftime` +These may not be available on all platforms when used with the :meth:`~.datetime.strftime` method. The ISO 8601 year and ISO 8601 week directives are not interchangeable -with the year and week number directives above. Calling :meth:`strptime` with +with the year and week number directives above. Calling :meth:`~.datetime.strptime` with incomplete or ambiguous ISO 8601 directives will raise a :exc:`ValueError`. The full set of format codes supported varies across platforms, because Python -calls the platform C library's :func:`strftime` function, and platform +calls the platform C library's :c:func:`strftime` function, and platform variations are common. To see the full set of format codes supported on your platform, consult the :manpage:`strftime(3)` documentation. There are also differences between platforms in handling of unsupported format specifiers. @@ -2507,9 +2507,9 @@ Technical Detail Broadly speaking, ``d.strftime(fmt)`` acts like the :mod:`time` module's ``time.strftime(fmt, d.timetuple())`` although not all objects support a -:meth:`timetuple` method. +:meth:`~date.timetuple` method. -For the :meth:`datetime.strptime` class method, the default value is +For the :meth:`.datetime.strptime` class method, the default value is ``1900-01-01T00:00:00.000``: any components not specified in the format string will be pulled from the default value. [#]_ @@ -2544,27 +2544,27 @@ Notes: contain non-ASCII characters. (2) - The :meth:`strptime` method can parse years in the full [1, 9999] range, but + The :meth:`~.datetime.strptime` method can parse years in the full [1, 9999] range, but years < 1000 must be zero-filled to 4-digit width. .. versionchanged:: 3.2 - In previous versions, :meth:`strftime` method was restricted to + In previous versions, :meth:`~.datetime.strftime` method was restricted to years >= 1900. .. versionchanged:: 3.3 - In version 3.2, :meth:`strftime` method was restricted to + In version 3.2, :meth:`~.datetime.strftime` method was restricted to years >= 1000. (3) - When used with the :meth:`strptime` method, the ``%p`` directive only affects + When used with the :meth:`~.datetime.strptime` method, the ``%p`` directive only affects the output hour field if the ``%I`` directive is used to parse the hour. (4) - Unlike the :mod:`time` module, the :mod:`datetime` module does not support + Unlike the :mod:`time` module, the :mod:`!datetime` module does not support leap seconds. (5) - When used with the :meth:`strptime` method, the ``%f`` directive + When used with the :meth:`~.datetime.strptime` method, the ``%f`` directive accepts from one to six digits and zero pads on the right. ``%f`` is an extension to the set of format characters in the C standard (but implemented separately in datetime objects, and therefore always @@ -2577,7 +2577,7 @@ Notes: For an aware object: ``%z`` - :meth:`utcoffset` is transformed into a string of the form + :meth:`~.datetime.utcoffset` is transformed into a string of the form ``±HHMM[SS[.ffffff]]``, where ``HH`` is a 2-digit string giving the number of UTC offset hours, ``MM`` is a 2-digit string giving the number of UTC offset minutes, ``SS`` is a 2-digit string giving the number of UTC offset @@ -2585,14 +2585,14 @@ Notes: offset microseconds. The ``ffffff`` part is omitted when the offset is a whole number of seconds and both the ``ffffff`` and the ``SS`` part is omitted when the offset is a whole number of minutes. For example, if - :meth:`utcoffset` returns ``timedelta(hours=-3, minutes=-30)``, ``%z`` is + :meth:`~.datetime.utcoffset` returns ``timedelta(hours=-3, minutes=-30)``, ``%z`` is replaced with the string ``'-0330'``. .. versionchanged:: 3.7 The UTC offset is not restricted to a whole number of minutes. .. versionchanged:: 3.7 - When the ``%z`` directive is provided to the :meth:`strptime` method, + When the ``%z`` directive is provided to the :meth:`~.datetime.strptime` method, the UTC offsets can have a colon as a separator between hours, minutes and seconds. For example, ``'+01:00:00'`` will be parsed as an offset of one hour. @@ -2603,11 +2603,11 @@ Notes: hours, minutes and seconds. ``%Z`` - In :meth:`strftime`, ``%Z`` is replaced by an empty string if - :meth:`tzname` returns ``None``; otherwise ``%Z`` is replaced by the + In :meth:`~.datetime.strftime`, ``%Z`` is replaced by an empty string if + :meth:`~.datetime.tzname` returns ``None``; otherwise ``%Z`` is replaced by the returned value, which must be a string. - :meth:`strptime` only accepts certain values for ``%Z``: + :meth:`~.datetime.strptime` only accepts certain values for ``%Z``: 1. any value in ``time.tzname`` for your machine's locale 2. the hard-coded values ``UTC`` and ``GMT`` @@ -2617,23 +2617,23 @@ Notes: invalid values. .. versionchanged:: 3.2 - When the ``%z`` directive is provided to the :meth:`strptime` method, an + When the ``%z`` directive is provided to the :meth:`~.datetime.strptime` method, an aware :class:`.datetime` object will be produced. The ``tzinfo`` of the result will be set to a :class:`timezone` instance. (7) - When used with the :meth:`strptime` method, ``%U`` and ``%W`` are only used + When used with the :meth:`~.datetime.strptime` method, ``%U`` and ``%W`` are only used in calculations when the day of the week and the calendar year (``%Y``) are specified. (8) Similar to ``%U`` and ``%W``, ``%V`` is only used in calculations when the day of the week and the ISO year (``%G``) are specified in a - :meth:`strptime` format string. Also note that ``%G`` and ``%Y`` are not + :meth:`~.datetime.strptime` format string. Also note that ``%G`` and ``%Y`` are not interchangeable. (9) - When used with the :meth:`strptime` method, the leading zero is optional + When used with the :meth:`~.datetime.strptime` method, the leading zero is optional for formats ``%d``, ``%m``, ``%H``, ``%I``, ``%M``, ``%S``, ``%j``, ``%U``, ``%W``, and ``%V``. Format ``%y`` does require a leading zero. diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 763503205e1670..bba4fe0d5f2425 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -26,7 +26,6 @@ Doc/library/asyncio-subprocess.rst Doc/library/bdb.rst Doc/library/collections.rst Doc/library/csv.rst -Doc/library/datetime.rst Doc/library/dbm.rst Doc/library/decimal.rst Doc/library/email.charset.rst From c87233fd3fa77067013c35328f8c4884f0567a59 Mon Sep 17 00:00:00 2001 From: mpage Date: Mon, 29 Jan 2024 07:08:23 -0800 Subject: [PATCH 146/160] gh-112050: Adapt collections.deque to Argument Clinic (#113963) --- .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 3 + ...-01-11-22-58-45.gh-issue-112050.hDuvDW.rst | 1 + Modules/_collectionsmodule.c | 412 ++++++++++------- Modules/clinic/_collectionsmodule.c.h | 418 +++++++++++++++++- 7 files changed, 685 insertions(+), 152 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-01-11-22-58-45.gh-issue-112050.hDuvDW.rst diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index e92707051c12b7..57505b5388fd6c 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1049,6 +1049,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(max_length)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxdigits)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxevents)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxlen)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxmem)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxsplit)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxvalue)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index eb60b80c964d42..0f4f3b61910241 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -538,6 +538,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(max_length) STRUCT_FOR_ID(maxdigits) STRUCT_FOR_ID(maxevents) + STRUCT_FOR_ID(maxlen) STRUCT_FOR_ID(maxmem) STRUCT_FOR_ID(maxsplit) STRUCT_FOR_ID(maxvalue) diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 9b39de1d69c6c7..63a2b54c839a4b 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1047,6 +1047,7 @@ extern "C" { INIT_ID(max_length), \ INIT_ID(maxdigits), \ INIT_ID(maxevents), \ + INIT_ID(maxlen), \ INIT_ID(maxmem), \ INIT_ID(maxsplit), \ INIT_ID(maxvalue), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 898d386f4cfd05..bf8cdd85e4be5c 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1455,6 +1455,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(maxevents); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(maxlen); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(maxmem); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-01-11-22-58-45.gh-issue-112050.hDuvDW.rst b/Misc/NEWS.d/next/Core and Builtins/2024-01-11-22-58-45.gh-issue-112050.hDuvDW.rst new file mode 100644 index 00000000000000..e5f3d5ea0cea25 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-01-11-22-58-45.gh-issue-112050.hDuvDW.rst @@ -0,0 +1 @@ +Convert :class:`collections.deque` to use Argument Clinic. diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c index c8cd53de5e2262..ef77d34b10e47b 100644 --- a/Modules/_collectionsmodule.c +++ b/Modules/_collectionsmodule.c @@ -44,8 +44,11 @@ find_module_state_by_def(PyTypeObject *type) /*[clinic input] module _collections class _tuplegetter "_tuplegetterobject *" "clinic_state()->tuplegetter_type" +class _collections.deque "dequeobject *" "clinic_state()->deque_type" [clinic start generated code]*/ -/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7356042a89862e0e]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=a033cc2a8476b3f1]*/ + +typedef struct dequeobject dequeobject; /* We can safely assume type to be the defining class, * since tuplegetter is not a base type */ @@ -53,6 +56,12 @@ class _tuplegetter "_tuplegetterobject *" "clinic_state()->tuplegetter_type" #include "clinic/_collectionsmodule.c.h" #undef clinic_state +/*[python input] +class dequeobject_converter(self_converter): + type = "dequeobject *" +[python start generated code]*/ +/*[python end generated code: output=da39a3ee5e6b4b0d input=b6ae4a3ff852be2f]*/ + /* collections module implementation of a deque() datatype Written and maintained by Raymond D. Hettinger */ @@ -121,7 +130,7 @@ typedef struct BLOCK { struct BLOCK *rightlink; } block; -typedef struct { +struct dequeobject { PyObject_VAR_HEAD block *leftblock; block *rightblock; @@ -132,7 +141,7 @@ typedef struct { Py_ssize_t numfreeblocks; block *freeblocks[MAXFREEBLOCKS]; PyObject *weakreflist; -} dequeobject; +}; /* For debug builds, add error checking to track the endpoints * in the chain of links. The goal is to make sure that link @@ -219,8 +228,17 @@ deque_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return (PyObject *)deque; } +/*[clinic input] +_collections.deque.pop as deque_pop + + deque: dequeobject + +Remove and return the rightmost element. +[clinic start generated code]*/ + static PyObject * -deque_pop(dequeobject *deque, PyObject *unused) +deque_pop_impl(dequeobject *deque) +/*[clinic end generated code: output=2e5f7890c4251f07 input=eb6e6d020f877dec]*/ { PyObject *item; block *prevblock; @@ -254,10 +272,17 @@ deque_pop(dequeobject *deque, PyObject *unused) return item; } -PyDoc_STRVAR(pop_doc, "Remove and return the rightmost element."); +/*[clinic input] +_collections.deque.popleft as deque_popleft + + deque: dequeobject + +Remove and return the leftmost element. +[clinic start generated code]*/ static PyObject * -deque_popleft(dequeobject *deque, PyObject *unused) +deque_popleft_impl(dequeobject *deque) +/*[clinic end generated code: output=62b154897097ff68 input=acb41b9af50a9d9b]*/ { PyObject *item; block *prevblock; @@ -292,8 +317,6 @@ deque_popleft(dequeobject *deque, PyObject *unused) return item; } -PyDoc_STRVAR(popleft_doc, "Remove and return the leftmost element."); - /* The deque's size limit is d.maxlen. The limit can be zero or positive. * If there is no limit, then d.maxlen == -1. * @@ -326,7 +349,7 @@ deque_append_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) deque->rightindex++; deque->rightblock->data[deque->rightindex] = item; if (NEEDS_TRIM(deque, maxlen)) { - PyObject *olditem = deque_popleft(deque, NULL); + PyObject *olditem = deque_popleft_impl(deque); Py_DECREF(olditem); } else { deque->state++; @@ -334,16 +357,25 @@ deque_append_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) return 0; } +/*[clinic input] +_collections.deque.append as deque_append + + deque: dequeobject + item: object + / + +Add an element to the right side of the deque. +[clinic start generated code]*/ + static PyObject * deque_append(dequeobject *deque, PyObject *item) +/*[clinic end generated code: output=507b13efc4853ecc input=f112b83c380528e3]*/ { if (deque_append_internal(deque, Py_NewRef(item), deque->maxlen) < 0) return NULL; Py_RETURN_NONE; } -PyDoc_STRVAR(append_doc, "Add an element to the right side of the deque."); - static inline int deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) { @@ -362,7 +394,7 @@ deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) deque->leftindex--; deque->leftblock->data[deque->leftindex] = item; if (NEEDS_TRIM(deque, deque->maxlen)) { - PyObject *olditem = deque_pop(deque, NULL); + PyObject *olditem = deque_pop_impl(deque); Py_DECREF(olditem); } else { deque->state++; @@ -370,16 +402,25 @@ deque_appendleft_internal(dequeobject *deque, PyObject *item, Py_ssize_t maxlen) return 0; } +/*[clinic input] +_collections.deque.appendleft as deque_appendleft + + deque: dequeobject + item: object + / + +Add an element to the left side of the deque. +[clinic start generated code]*/ + static PyObject * deque_appendleft(dequeobject *deque, PyObject *item) +/*[clinic end generated code: output=de0335a64800ffd8 input=bbdaa60a3e956062]*/ { if (deque_appendleft_internal(deque, Py_NewRef(item), deque->maxlen) < 0) return NULL; Py_RETURN_NONE; } -PyDoc_STRVAR(appendleft_doc, "Add an element to the left side of the deque."); - static PyObject* finalize_iterator(PyObject *it) { @@ -410,8 +451,19 @@ consume_iterator(PyObject *it) return finalize_iterator(it); } +/*[clinic input] +_collections.deque.extend as deque_extend + + deque: dequeobject + iterable: object + / + +Extend the right side of the deque with elements from the iterable. +[clinic start generated code]*/ + static PyObject * deque_extend(dequeobject *deque, PyObject *iterable) +/*[clinic end generated code: output=a3a6e74d17063f8d input=cfebfd34d5383339]*/ { PyObject *it, *item; PyObject *(*iternext)(PyObject *); @@ -454,11 +506,19 @@ deque_extend(dequeobject *deque, PyObject *iterable) return finalize_iterator(it); } -PyDoc_STRVAR(extend_doc, -"Extend the right side of the deque with elements from the iterable"); +/*[clinic input] +_collections.deque.extendleft as deque_extendleft + + deque: dequeobject + iterable: object + / + +Extend the left side of the deque with elements from the iterable. +[clinic start generated code]*/ static PyObject * deque_extendleft(dequeobject *deque, PyObject *iterable) +/*[clinic end generated code: output=2dba946c50498c67 input=f4820e695a6f9416]*/ { PyObject *it, *item; PyObject *(*iternext)(PyObject *); @@ -501,9 +561,6 @@ deque_extendleft(dequeobject *deque, PyObject *iterable) return finalize_iterator(it); } -PyDoc_STRVAR(extendleft_doc, -"Extend the left side of the deque with elements from the iterable"); - static PyObject * deque_inplace_concat(dequeobject *deque, PyObject *other) { @@ -517,8 +574,17 @@ deque_inplace_concat(dequeobject *deque, PyObject *other) return (PyObject *)deque; } +/*[clinic input] +_collections.deque.copy as deque_copy + + deque: dequeobject + +Return a shallow copy of a deque. +[clinic start generated code]*/ + static PyObject * -deque_copy(PyObject *deque, PyObject *Py_UNUSED(ignored)) +deque_copy_impl(dequeobject *deque) +/*[clinic end generated code: output=6409b3d1ad2898b5 input=0e22f138bc1fcbee]*/ { PyObject *result; dequeobject *old_deque = (dequeobject *)deque; @@ -537,7 +603,7 @@ deque_copy(PyObject *deque, PyObject *Py_UNUSED(ignored)) PyObject *item = old_deque->leftblock->data[old_deque->leftindex]; rv = deque_append(new_deque, item); } else { - rv = deque_extend(new_deque, deque); + rv = deque_extend(new_deque, (PyObject *)deque); } if (rv != NULL) { Py_DECREF(rv); @@ -547,7 +613,8 @@ deque_copy(PyObject *deque, PyObject *Py_UNUSED(ignored)) return NULL; } if (old_deque->maxlen < 0) - result = PyObject_CallOneArg((PyObject *)(Py_TYPE(deque)), deque); + result = PyObject_CallOneArg((PyObject *)(Py_TYPE(deque)), + (PyObject *)deque); else result = PyObject_CallFunction((PyObject *)(Py_TYPE(deque)), "Oi", deque, old_deque->maxlen, NULL); @@ -561,7 +628,18 @@ deque_copy(PyObject *deque, PyObject *Py_UNUSED(ignored)) return result; } -PyDoc_STRVAR(copy_doc, "Return a shallow copy of a deque."); +/*[clinic input] +_collections.deque.__copy__ as deque___copy__ = _collections.deque.copy + +Return a shallow copy of a deque. +[clinic start generated code]*/ + +static PyObject * +deque___copy___impl(dequeobject *deque) +/*[clinic end generated code: output=7c5821504342bf23 input=fce05df783e7912b]*/ +{ + return deque_copy_impl(deque); +} static PyObject * deque_concat(dequeobject *deque, PyObject *other) @@ -580,7 +658,7 @@ deque_concat(dequeobject *deque, PyObject *other) return NULL; } - new_deque = deque_copy((PyObject *)deque, NULL); + new_deque = deque_copy_impl(deque); if (new_deque == NULL) return NULL; result = deque_extend((dequeobject *)new_deque, other); @@ -669,22 +747,29 @@ deque_clear(dequeobject *deque) alternate_method: while (Py_SIZE(deque)) { - item = deque_pop(deque, NULL); + item = deque_pop_impl(deque); assert (item != NULL); Py_DECREF(item); } return 0; } +/*[clinic input] +_collections.deque.clear as deque_clearmethod + + deque: dequeobject + +Remove all elements from the deque. +[clinic start generated code]*/ + static PyObject * -deque_clearmethod(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +deque_clearmethod_impl(dequeobject *deque) +/*[clinic end generated code: output=79b2513e097615c1 input=20488eb932f89f9e]*/ { deque_clear(deque); Py_RETURN_NONE; } -PyDoc_STRVAR(clear_doc, "Remove all elements from the deque."); - static PyObject * deque_inplace_repeat(dequeobject *deque, Py_ssize_t n) { @@ -768,7 +853,7 @@ deque_repeat(dequeobject *deque, Py_ssize_t n) dequeobject *new_deque; PyObject *rv; - new_deque = (dequeobject *)deque_copy((PyObject *) deque, NULL); + new_deque = (dequeobject *)deque_copy_impl(deque); if (new_deque == NULL) return NULL; rv = deque_inplace_repeat(new_deque, n); @@ -925,36 +1010,36 @@ _deque_rotate(dequeobject *deque, Py_ssize_t n) return rv; } -static PyObject * -deque_rotate(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) -{ - Py_ssize_t n=1; +/*[clinic input] +_collections.deque.rotate as deque_rotate - if (!_PyArg_CheckPositional("deque.rotate", nargs, 0, 1)) { - return NULL; - } - if (nargs) { - PyObject *index = _PyNumber_Index(args[0]); - if (index == NULL) { - return NULL; - } - n = PyLong_AsSsize_t(index); - Py_DECREF(index); - if (n == -1 && PyErr_Occurred()) { - return NULL; - } - } + deque: dequeobject + n: Py_ssize_t = 1 + / +Rotate the deque n steps to the right. If n is negative, rotates left. +[clinic start generated code]*/ + +static PyObject * +deque_rotate_impl(dequeobject *deque, Py_ssize_t n) +/*[clinic end generated code: output=96c2402a371eb15d input=d22070f49cc06c76]*/ +{ if (!_deque_rotate(deque, n)) Py_RETURN_NONE; return NULL; } -PyDoc_STRVAR(rotate_doc, -"Rotate the deque n steps to the right (default n=1). If n is negative, rotates left."); +/*[clinic input] +_collections.deque.reverse as deque_reverse + + deque: dequeobject + +Reverse *IN PLACE*. +[clinic start generated code]*/ static PyObject * -deque_reverse(dequeobject *deque, PyObject *unused) +deque_reverse_impl(dequeobject *deque) +/*[clinic end generated code: output=bdeebc2cf8c1f064 input=f139787f406101c9]*/ { block *leftblock = deque->leftblock; block *rightblock = deque->rightblock; @@ -991,11 +1076,19 @@ deque_reverse(dequeobject *deque, PyObject *unused) Py_RETURN_NONE; } -PyDoc_STRVAR(reverse_doc, -"D.reverse() -- reverse *IN PLACE*"); +/*[clinic input] +_collections.deque.count as deque_count + + deque: dequeobject + value as v: object + / + +Return number of occurrences of value. +[clinic start generated code]*/ static PyObject * deque_count(dequeobject *deque, PyObject *v) +/*[clinic end generated code: output=7405d289d94d7b9b input=1892925260ff5d78]*/ { block *b = deque->leftblock; Py_ssize_t index = deque->leftindex; @@ -1030,9 +1123,6 @@ deque_count(dequeobject *deque, PyObject *v) return PyLong_FromSsize_t(count); } -PyDoc_STRVAR(count_doc, -"D.count(value) -- return number of occurrences of value"); - static int deque_contains(dequeobject *deque, PyObject *v) { @@ -1071,22 +1161,33 @@ deque_len(dequeobject *deque) return Py_SIZE(deque); } +/*[clinic input] +@text_signature "($self, value, [start, [stop]])" +_collections.deque.index as deque_index + + deque: dequeobject + value as v: object + start: object(converter='_PyEval_SliceIndexNotNone', type='Py_ssize_t', c_default='0') = NULL + stop: object(converter='_PyEval_SliceIndexNotNone', type='Py_ssize_t', c_default='Py_SIZE(deque)') = NULL + / + +Return first index of value. + +Raises ValueError if the value is not present. +[clinic start generated code]*/ + static PyObject * -deque_index(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +deque_index_impl(dequeobject *deque, PyObject *v, Py_ssize_t start, + Py_ssize_t stop) +/*[clinic end generated code: output=df45132753175ef9 input=140210c099830f64]*/ { - Py_ssize_t i, n, start=0, stop=Py_SIZE(deque); - PyObject *v, *item; + Py_ssize_t i, n; + PyObject *item; block *b = deque->leftblock; Py_ssize_t index = deque->leftindex; size_t start_state = deque->state; int cmp; - if (!_PyArg_ParseStack(args, nargs, "O|O&O&:index", &v, - _PyEval_SliceIndexNotNone, &start, - _PyEval_SliceIndexNotNone, &stop)) { - return NULL; - } - if (start < 0) { start += Py_SIZE(deque); if (start < 0) @@ -1138,10 +1239,6 @@ deque_index(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) return NULL; } -PyDoc_STRVAR(index_doc, -"D.index(value, [start, [stop]]) -- return first index of value.\n" -"Raises ValueError if the value is not present."); - /* insert(), remove(), and delitem() are implemented in terms of rotate() for simplicity and reasonable performance near the end points. If for some reason these methods become popular, it is not @@ -1150,18 +1247,24 @@ PyDoc_STRVAR(index_doc, boost (by moving each pointer only once instead of twice). */ +/*[clinic input] +_collections.deque.insert as deque_insert + + deque: dequeobject + index: Py_ssize_t + value: object + / + +Insert value before index. +[clinic start generated code]*/ + static PyObject * -deque_insert(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +deque_insert_impl(dequeobject *deque, Py_ssize_t index, PyObject *value) +/*[clinic end generated code: output=ef4d2c15d5532b80 input=3e5c1c120d70c0e6]*/ { - Py_ssize_t index; Py_ssize_t n = Py_SIZE(deque); - PyObject *value; PyObject *rv; - if (!_PyArg_ParseStack(args, nargs, "nO:insert", &index, &value)) { - return NULL; - } - if (deque->maxlen == Py_SIZE(deque)) { PyErr_SetString(PyExc_IndexError, "deque already at its maximum size"); return NULL; @@ -1184,12 +1287,6 @@ deque_insert(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) Py_RETURN_NONE; } -PyDoc_STRVAR(insert_doc, -"D.insert(index, object) -- insert object before index"); - -PyDoc_STRVAR(remove_doc, -"D.remove(value) -- remove first occurrence of value."); - static int valid_index(Py_ssize_t i, Py_ssize_t limit) { @@ -1246,15 +1343,26 @@ deque_del_item(dequeobject *deque, Py_ssize_t i) assert (i >= 0 && i < Py_SIZE(deque)); if (_deque_rotate(deque, -i)) return -1; - item = deque_popleft(deque, NULL); + item = deque_popleft_impl(deque); rv = _deque_rotate(deque, i); assert (item != NULL); Py_DECREF(item); return rv; } +/*[clinic input] +_collections.deque.remove as deque_remove + + deque: dequeobject + value: object + / + +Remove first occurrence of value. +[clinic start generated code]*/ + static PyObject * deque_remove(dequeobject *deque, PyObject *value) +/*[clinic end generated code: output=49e1666d612fe911 input=d972f32d15990880]*/ { PyObject *item; block *b = deque->leftblock; @@ -1375,8 +1483,17 @@ deque_traverse(dequeobject *deque, visitproc visit, void *arg) return 0; } +/*[clinic input] +_collections.deque.__reduce__ as deque___reduce__ + + deque: dequeobject + +Return state information for pickling. +[clinic start generated code]*/ + static PyObject * -deque_reduce(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +deque___reduce___impl(dequeobject *deque) +/*[clinic end generated code: output=cb85d9e0b7d2c5ad input=991a933a5bc7a526]*/ { PyObject *state, *it; @@ -1510,26 +1627,23 @@ deque_richcompare(PyObject *v, PyObject *w, int op) return NULL; } +/*[clinic input] +@text_signature "([iterable[, maxlen]])" +_collections.deque.__init__ as deque_init + + deque: dequeobject + iterable: object = NULL + maxlen as maxlenobj: object = NULL + +A list-like sequence optimized for data accesses near its endpoints. +[clinic start generated code]*/ + static int -deque_init(dequeobject *deque, PyObject *args, PyObject *kwdargs) +deque_init_impl(dequeobject *deque, PyObject *iterable, PyObject *maxlenobj) +/*[clinic end generated code: output=7084a39d71218dcd input=5ebdffc48a2d27ae]*/ + { - PyObject *iterable = NULL; - PyObject *maxlenobj = NULL; Py_ssize_t maxlen = -1; - char *kwlist[] = {"iterable", "maxlen", 0}; - - if (kwdargs == NULL && PyTuple_GET_SIZE(args) <= 2) { - if (PyTuple_GET_SIZE(args) > 0) { - iterable = PyTuple_GET_ITEM(args, 0); - } - if (PyTuple_GET_SIZE(args) > 1) { - maxlenobj = PyTuple_GET_ITEM(args, 1); - } - } else { - if (!PyArg_ParseTupleAndKeywords(args, kwdargs, "|OO:deque", kwlist, - &iterable, &maxlenobj)) - return -1; - } if (maxlenobj != NULL && maxlenobj != Py_None) { maxlen = PyLong_AsSsize_t(maxlenobj); if (maxlen == -1 && PyErr_Occurred()) @@ -1551,8 +1665,17 @@ deque_init(dequeobject *deque, PyObject *args, PyObject *kwdargs) return 0; } +/*[clinic input] +_collections.deque.__sizeof__ as deque___sizeof__ + + deque: dequeobject + +Return the size of the deque in memory, in bytes. +[clinic start generated code]*/ + static PyObject * -deque_sizeof(dequeobject *deque, void *unused) +deque___sizeof___impl(dequeobject *deque) +/*[clinic end generated code: output=4d36e9fb4f30bbaf input=4e7c9a00c03c3290]*/ { size_t res = _PyObject_SIZE(Py_TYPE(deque)); size_t blocks; @@ -1563,9 +1686,6 @@ deque_sizeof(dequeobject *deque, void *unused) return PyLong_FromSize_t(res); } -PyDoc_STRVAR(sizeof_doc, -"D.__sizeof__() -- size of D in memory, in bytes"); - static PyObject * deque_get_maxlen(dequeobject *deque, void *Py_UNUSED(ignored)) { @@ -1574,6 +1694,22 @@ deque_get_maxlen(dequeobject *deque, void *Py_UNUSED(ignored)) return PyLong_FromSsize_t(deque->maxlen); } +static PyObject *deque_reviter(dequeobject *deque); + +/*[clinic input] +_collections.deque.__reversed__ as deque___reversed__ + + deque: dequeobject + +Return a reverse iterator over the deque. +[clinic start generated code]*/ + +static PyObject * +deque___reversed___impl(dequeobject *deque) +/*[clinic end generated code: output=3e7e7e715883cf2e input=3d494c25a6fe5c7e]*/ +{ + return deque_reviter(deque); +} /* deque object ********************************************************/ @@ -1584,47 +1720,26 @@ static PyGetSetDef deque_getset[] = { }; static PyObject *deque_iter(dequeobject *deque); -static PyObject *deque_reviter(dequeobject *deque, PyObject *Py_UNUSED(ignored)); -PyDoc_STRVAR(reversed_doc, - "D.__reversed__() -- return a reverse iterator over the deque"); static PyMethodDef deque_methods[] = { - {"append", (PyCFunction)deque_append, - METH_O, append_doc}, - {"appendleft", (PyCFunction)deque_appendleft, - METH_O, appendleft_doc}, - {"clear", (PyCFunction)deque_clearmethod, - METH_NOARGS, clear_doc}, - {"__copy__", deque_copy, - METH_NOARGS, copy_doc}, - {"copy", deque_copy, - METH_NOARGS, copy_doc}, - {"count", (PyCFunction)deque_count, - METH_O, count_doc}, - {"extend", (PyCFunction)deque_extend, - METH_O, extend_doc}, - {"extendleft", (PyCFunction)deque_extendleft, - METH_O, extendleft_doc}, - {"index", _PyCFunction_CAST(deque_index), - METH_FASTCALL, index_doc}, - {"insert", _PyCFunction_CAST(deque_insert), - METH_FASTCALL, insert_doc}, - {"pop", (PyCFunction)deque_pop, - METH_NOARGS, pop_doc}, - {"popleft", (PyCFunction)deque_popleft, - METH_NOARGS, popleft_doc}, - {"__reduce__", (PyCFunction)deque_reduce, - METH_NOARGS, reduce_doc}, - {"remove", (PyCFunction)deque_remove, - METH_O, remove_doc}, - {"__reversed__", (PyCFunction)deque_reviter, - METH_NOARGS, reversed_doc}, - {"reverse", (PyCFunction)deque_reverse, - METH_NOARGS, reverse_doc}, - {"rotate", _PyCFunction_CAST(deque_rotate), - METH_FASTCALL, rotate_doc}, - {"__sizeof__", (PyCFunction)deque_sizeof, - METH_NOARGS, sizeof_doc}, + DEQUE_APPEND_METHODDEF + DEQUE_APPENDLEFT_METHODDEF + DEQUE_CLEARMETHOD_METHODDEF + DEQUE___COPY___METHODDEF + DEQUE_COPY_METHODDEF + DEQUE_COUNT_METHODDEF + DEQUE_EXTEND_METHODDEF + DEQUE_EXTENDLEFT_METHODDEF + DEQUE_INDEX_METHODDEF + DEQUE_INSERT_METHODDEF + DEQUE_POP_METHODDEF + DEQUE_POPLEFT_METHODDEF + DEQUE___REDUCE___METHODDEF + DEQUE_REMOVE_METHODDEF + DEQUE___REVERSED___METHODDEF + DEQUE_REVERSE_METHODDEF + DEQUE_ROTATE_METHODDEF + DEQUE___SIZEOF___METHODDEF {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL} /* sentinel */ @@ -1635,17 +1750,12 @@ static PyMemberDef deque_members[] = { {NULL}, }; -PyDoc_STRVAR(deque_doc, -"deque([iterable[, maxlen]]) --> deque object\n\ -\n\ -A list-like sequence optimized for data accesses near its endpoints."); - static PyType_Slot deque_slots[] = { {Py_tp_dealloc, deque_dealloc}, {Py_tp_repr, deque_repr}, {Py_tp_hash, PyObject_HashNotImplemented}, {Py_tp_getattro, PyObject_GenericGetAttr}, - {Py_tp_doc, (void *)deque_doc}, + {Py_tp_doc, (void *)deque_init__doc__}, {Py_tp_traverse, deque_traverse}, {Py_tp_clear, deque_clear}, {Py_tp_richcompare, deque_richcompare}, @@ -1834,7 +1944,7 @@ static PyType_Spec dequeiter_spec = { /*********************** Deque Reverse Iterator **************************/ static PyObject * -deque_reviter(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +deque_reviter(dequeobject *deque) { dequeiterobject *it; collections_state *state = find_module_state_by_def(Py_TYPE(deque)); @@ -1889,7 +1999,7 @@ dequereviter_new(PyTypeObject *type, PyObject *args, PyObject *kwds) return NULL; assert(type == state->dequereviter_type); - it = (dequeiterobject*)deque_reviter((dequeobject *)deque, NULL); + it = (dequeiterobject *)deque_reviter((dequeobject *)deque); if (!it) return NULL; /* consume items from the queue */ diff --git a/Modules/clinic/_collectionsmodule.c.h b/Modules/clinic/_collectionsmodule.c.h index 591ab50c76a8e8..60fb12a2231619 100644 --- a/Modules/clinic/_collectionsmodule.c.h +++ b/Modules/clinic/_collectionsmodule.c.h @@ -2,9 +2,425 @@ preserve [clinic start generated code]*/ +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif #include "pycore_abstract.h" // _PyNumber_Index() #include "pycore_modsupport.h" // _PyArg_CheckPositional() +PyDoc_STRVAR(deque_pop__doc__, +"pop($self, /)\n" +"--\n" +"\n" +"Remove and return the rightmost element."); + +#define DEQUE_POP_METHODDEF \ + {"pop", (PyCFunction)deque_pop, METH_NOARGS, deque_pop__doc__}, + +static PyObject * +deque_pop_impl(dequeobject *deque); + +static PyObject * +deque_pop(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_pop_impl(deque); +} + +PyDoc_STRVAR(deque_popleft__doc__, +"popleft($self, /)\n" +"--\n" +"\n" +"Remove and return the leftmost element."); + +#define DEQUE_POPLEFT_METHODDEF \ + {"popleft", (PyCFunction)deque_popleft, METH_NOARGS, deque_popleft__doc__}, + +static PyObject * +deque_popleft_impl(dequeobject *deque); + +static PyObject * +deque_popleft(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_popleft_impl(deque); +} + +PyDoc_STRVAR(deque_append__doc__, +"append($self, item, /)\n" +"--\n" +"\n" +"Add an element to the right side of the deque."); + +#define DEQUE_APPEND_METHODDEF \ + {"append", (PyCFunction)deque_append, METH_O, deque_append__doc__}, + +PyDoc_STRVAR(deque_appendleft__doc__, +"appendleft($self, item, /)\n" +"--\n" +"\n" +"Add an element to the left side of the deque."); + +#define DEQUE_APPENDLEFT_METHODDEF \ + {"appendleft", (PyCFunction)deque_appendleft, METH_O, deque_appendleft__doc__}, + +PyDoc_STRVAR(deque_extend__doc__, +"extend($self, iterable, /)\n" +"--\n" +"\n" +"Extend the right side of the deque with elements from the iterable."); + +#define DEQUE_EXTEND_METHODDEF \ + {"extend", (PyCFunction)deque_extend, METH_O, deque_extend__doc__}, + +PyDoc_STRVAR(deque_extendleft__doc__, +"extendleft($self, iterable, /)\n" +"--\n" +"\n" +"Extend the left side of the deque with elements from the iterable."); + +#define DEQUE_EXTENDLEFT_METHODDEF \ + {"extendleft", (PyCFunction)deque_extendleft, METH_O, deque_extendleft__doc__}, + +PyDoc_STRVAR(deque_copy__doc__, +"copy($self, /)\n" +"--\n" +"\n" +"Return a shallow copy of a deque."); + +#define DEQUE_COPY_METHODDEF \ + {"copy", (PyCFunction)deque_copy, METH_NOARGS, deque_copy__doc__}, + +static PyObject * +deque_copy_impl(dequeobject *deque); + +static PyObject * +deque_copy(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_copy_impl(deque); +} + +PyDoc_STRVAR(deque___copy____doc__, +"__copy__($self, /)\n" +"--\n" +"\n" +"Return a shallow copy of a deque."); + +#define DEQUE___COPY___METHODDEF \ + {"__copy__", (PyCFunction)deque___copy__, METH_NOARGS, deque___copy____doc__}, + +static PyObject * +deque___copy___impl(dequeobject *deque); + +static PyObject * +deque___copy__(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque___copy___impl(deque); +} + +PyDoc_STRVAR(deque_clearmethod__doc__, +"clear($self, /)\n" +"--\n" +"\n" +"Remove all elements from the deque."); + +#define DEQUE_CLEARMETHOD_METHODDEF \ + {"clear", (PyCFunction)deque_clearmethod, METH_NOARGS, deque_clearmethod__doc__}, + +static PyObject * +deque_clearmethod_impl(dequeobject *deque); + +static PyObject * +deque_clearmethod(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_clearmethod_impl(deque); +} + +PyDoc_STRVAR(deque_rotate__doc__, +"rotate($self, n=1, /)\n" +"--\n" +"\n" +"Rotate the deque n steps to the right. If n is negative, rotates left."); + +#define DEQUE_ROTATE_METHODDEF \ + {"rotate", _PyCFunction_CAST(deque_rotate), METH_FASTCALL, deque_rotate__doc__}, + +static PyObject * +deque_rotate_impl(dequeobject *deque, Py_ssize_t n); + +static PyObject * +deque_rotate(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t n = 1; + + if (!_PyArg_CheckPositional("rotate", nargs, 0, 1)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + n = ival; + } +skip_optional: + return_value = deque_rotate_impl(deque, n); + +exit: + return return_value; +} + +PyDoc_STRVAR(deque_reverse__doc__, +"reverse($self, /)\n" +"--\n" +"\n" +"Reverse *IN PLACE*."); + +#define DEQUE_REVERSE_METHODDEF \ + {"reverse", (PyCFunction)deque_reverse, METH_NOARGS, deque_reverse__doc__}, + +static PyObject * +deque_reverse_impl(dequeobject *deque); + +static PyObject * +deque_reverse(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque_reverse_impl(deque); +} + +PyDoc_STRVAR(deque_count__doc__, +"count($self, value, /)\n" +"--\n" +"\n" +"Return number of occurrences of value."); + +#define DEQUE_COUNT_METHODDEF \ + {"count", (PyCFunction)deque_count, METH_O, deque_count__doc__}, + +PyDoc_STRVAR(deque_index__doc__, +"index($self, value, [start, [stop]])\n" +"--\n" +"\n" +"Return first index of value.\n" +"\n" +"Raises ValueError if the value is not present."); + +#define DEQUE_INDEX_METHODDEF \ + {"index", _PyCFunction_CAST(deque_index), METH_FASTCALL, deque_index__doc__}, + +static PyObject * +deque_index_impl(dequeobject *deque, PyObject *v, Py_ssize_t start, + Py_ssize_t stop); + +static PyObject * +deque_index(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *v; + Py_ssize_t start = 0; + Py_ssize_t stop = Py_SIZE(deque); + + if (!_PyArg_CheckPositional("index", nargs, 1, 3)) { + goto exit; + } + v = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (!_PyEval_SliceIndexNotNone(args[1], &start)) { + goto exit; + } + if (nargs < 3) { + goto skip_optional; + } + if (!_PyEval_SliceIndexNotNone(args[2], &stop)) { + goto exit; + } +skip_optional: + return_value = deque_index_impl(deque, v, start, stop); + +exit: + return return_value; +} + +PyDoc_STRVAR(deque_insert__doc__, +"insert($self, index, value, /)\n" +"--\n" +"\n" +"Insert value before index."); + +#define DEQUE_INSERT_METHODDEF \ + {"insert", _PyCFunction_CAST(deque_insert), METH_FASTCALL, deque_insert__doc__}, + +static PyObject * +deque_insert_impl(dequeobject *deque, Py_ssize_t index, PyObject *value); + +static PyObject * +deque_insert(dequeobject *deque, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + Py_ssize_t index; + PyObject *value; + + if (!_PyArg_CheckPositional("insert", nargs, 2, 2)) { + goto exit; + } + { + Py_ssize_t ival = -1; + PyObject *iobj = _PyNumber_Index(args[0]); + if (iobj != NULL) { + ival = PyLong_AsSsize_t(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + index = ival; + } + value = args[1]; + return_value = deque_insert_impl(deque, index, value); + +exit: + return return_value; +} + +PyDoc_STRVAR(deque_remove__doc__, +"remove($self, value, /)\n" +"--\n" +"\n" +"Remove first occurrence of value."); + +#define DEQUE_REMOVE_METHODDEF \ + {"remove", (PyCFunction)deque_remove, METH_O, deque_remove__doc__}, + +PyDoc_STRVAR(deque___reduce____doc__, +"__reduce__($self, /)\n" +"--\n" +"\n" +"Return state information for pickling."); + +#define DEQUE___REDUCE___METHODDEF \ + {"__reduce__", (PyCFunction)deque___reduce__, METH_NOARGS, deque___reduce____doc__}, + +static PyObject * +deque___reduce___impl(dequeobject *deque); + +static PyObject * +deque___reduce__(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque___reduce___impl(deque); +} + +PyDoc_STRVAR(deque_init__doc__, +"deque([iterable[, maxlen]])\n" +"--\n" +"\n" +"A list-like sequence optimized for data accesses near its endpoints."); + +static int +deque_init_impl(dequeobject *deque, PyObject *iterable, PyObject *maxlenobj); + +static int +deque_init(PyObject *deque, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(iterable), &_Py_ID(maxlen), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"iterable", "maxlen", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "deque", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 0; + PyObject *iterable = NULL; + PyObject *maxlenobj = NULL; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, 0, 2, 0, argsbuf); + if (!fastargs) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (fastargs[0]) { + iterable = fastargs[0]; + if (!--noptargs) { + goto skip_optional_pos; + } + } + maxlenobj = fastargs[1]; +skip_optional_pos: + return_value = deque_init_impl((dequeobject *)deque, iterable, maxlenobj); + +exit: + return return_value; +} + +PyDoc_STRVAR(deque___sizeof____doc__, +"__sizeof__($self, /)\n" +"--\n" +"\n" +"Return the size of the deque in memory, in bytes."); + +#define DEQUE___SIZEOF___METHODDEF \ + {"__sizeof__", (PyCFunction)deque___sizeof__, METH_NOARGS, deque___sizeof____doc__}, + +static PyObject * +deque___sizeof___impl(dequeobject *deque); + +static PyObject * +deque___sizeof__(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque___sizeof___impl(deque); +} + +PyDoc_STRVAR(deque___reversed____doc__, +"__reversed__($self, /)\n" +"--\n" +"\n" +"Return a reverse iterator over the deque."); + +#define DEQUE___REVERSED___METHODDEF \ + {"__reversed__", (PyCFunction)deque___reversed__, METH_NOARGS, deque___reversed____doc__}, + +static PyObject * +deque___reversed___impl(dequeobject *deque); + +static PyObject * +deque___reversed__(dequeobject *deque, PyObject *Py_UNUSED(ignored)) +{ + return deque___reversed___impl(deque); +} + PyDoc_STRVAR(_collections__count_elements__doc__, "_count_elements($module, mapping, iterable, /)\n" "--\n" @@ -72,4 +488,4 @@ tuplegetter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=c896a72f8c45930d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3633a5cbc23e8440 input=a9049054013a1b77]*/ From 15fe8cea174772060b24c96d335a498aba3b8ed4 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 29 Jan 2024 16:45:31 +0100 Subject: [PATCH 147/160] gh-91325: Skip Stable ABI checks with Py_TRACE_REFS special build (GH-92046) Skip Stable ABI checks with Py_TRACE_REFS special build This build is not compatible with Py_LIMITED_API nor with the stable ABI. --- Lib/test/test_stable_abi_ctypes.py | 36 +++++++++++++++++++--------- Misc/stable_abi.toml | 4 ++++ Modules/_testcapi_feature_macros.inc | 9 +++++++ Tools/build/stable_abi.py | 19 ++++++++------- 4 files changed, 48 insertions(+), 20 deletions(-) diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 4976ac3642bbe4..90d45272838420 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -9,6 +9,13 @@ from _testcapi import get_feature_macros feature_macros = get_feature_macros() + +# Stable ABI is incompatible with Py_TRACE_REFS builds due to PyObject +# layout differences. +# See https://github.com/python/cpython/issues/88299#issuecomment-1113366226 +if feature_macros['Py_TRACE_REFS']: + raise unittest.SkipTest("incompatible with Py_TRACE_REFS.") + ctypes_test = import_module('ctypes') class TestStableABIAvailability(unittest.TestCase): @@ -441,7 +448,9 @@ def test_windows_feature_macros(self): "PyModule_AddObjectRef", "PyModule_AddStringConstant", "PyModule_AddType", + "PyModule_Create2", "PyModule_ExecDef", + "PyModule_FromDefAndSpec2", "PyModule_GetDef", "PyModule_GetDict", "PyModule_GetFilename", @@ -911,6 +920,13 @@ def test_windows_feature_macros(self): "_Py_TrueStruct", "_Py_VaBuildValue_SizeT", ) +if feature_macros['HAVE_FORK']: + SYMBOL_NAMES += ( + 'PyOS_AfterFork', + 'PyOS_AfterFork_Child', + 'PyOS_AfterFork_Parent', + 'PyOS_BeforeFork', + ) if feature_macros['MS_WINDOWS']: SYMBOL_NAMES += ( 'PyErr_SetExcFromWindowsErr', @@ -926,17 +942,6 @@ def test_windows_feature_macros(self): 'PyUnicode_DecodeMBCSStateful', 'PyUnicode_EncodeCodePage', ) -if feature_macros['HAVE_FORK']: - SYMBOL_NAMES += ( - 'PyOS_AfterFork', - 'PyOS_AfterFork_Child', - 'PyOS_AfterFork_Parent', - 'PyOS_BeforeFork', - ) -if feature_macros['USE_STACKCHECK']: - SYMBOL_NAMES += ( - 'PyOS_CheckStack', - ) if feature_macros['PY_HAVE_THREAD_NATIVE_ID']: SYMBOL_NAMES += ( 'PyThread_get_thread_native_id', @@ -946,14 +951,23 @@ def test_windows_feature_macros(self): '_Py_NegativeRefcount', '_Py_RefTotal', ) +if feature_macros['Py_TRACE_REFS']: + SYMBOL_NAMES += ( + ) +if feature_macros['USE_STACKCHECK']: + SYMBOL_NAMES += ( + 'PyOS_CheckStack', + ) EXPECTED_FEATURE_MACROS = set(['HAVE_FORK', 'MS_WINDOWS', 'PY_HAVE_THREAD_NATIVE_ID', 'Py_REF_DEBUG', + 'Py_TRACE_REFS', 'USE_STACKCHECK']) WINDOWS_FEATURE_MACROS = {'HAVE_FORK': False, 'MS_WINDOWS': True, 'PY_HAVE_THREAD_NATIVE_ID': True, 'Py_REF_DEBUG': 'maybe', + 'Py_TRACE_REFS': 'maybe', 'USE_STACKCHECK': 'maybe'} diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 22b25dd0ec141f..2e6b0fff9cd770 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -78,6 +78,10 @@ [feature_macro.Py_REF_DEBUG] doc = 'when Python is compiled in debug mode (with Py_REF_DEBUG)' windows = 'maybe' +[feature_macro.Py_TRACE_REFS] + # nb. This mode is not compatible with Stable ABI/Limited API. + doc = 'when Python is compiled with Py_TRACE_REFS' + windows = 'maybe' # Mentioned in PEP 384: diff --git a/Modules/_testcapi_feature_macros.inc b/Modules/_testcapi_feature_macros.inc index a076e714980074..f5f3524f2c0177 100644 --- a/Modules/_testcapi_feature_macros.inc +++ b/Modules/_testcapi_feature_macros.inc @@ -38,6 +38,15 @@ if (res) { Py_DECREF(result); return NULL; } +#ifdef Py_TRACE_REFS + res = PyDict_SetItemString(result, "Py_TRACE_REFS", Py_True); +#else + res = PyDict_SetItemString(result, "Py_TRACE_REFS", Py_False); +#endif +if (res) { + Py_DECREF(result); return NULL; +} + #ifdef USE_STACKCHECK res = PyDict_SetItemString(result, "USE_STACKCHECK", Py_True); #else diff --git a/Tools/build/stable_abi.py b/Tools/build/stable_abi.py index 85c437d521a15a..83146622c74f94 100644 --- a/Tools/build/stable_abi.py +++ b/Tools/build/stable_abi.py @@ -278,6 +278,13 @@ def gen_ctypes_test(manifest, args, outfile): from _testcapi import get_feature_macros feature_macros = get_feature_macros() + + # Stable ABI is incompatible with Py_TRACE_REFS builds due to PyObject + # layout differences. + # See https://github.com/python/cpython/issues/88299#issuecomment-1113366226 + if feature_macros['Py_TRACE_REFS']: + raise unittest.SkipTest("incompatible with Py_TRACE_REFS.") + ctypes_test = import_module('ctypes') class TestStableABIAvailability(unittest.TestCase): @@ -308,16 +315,11 @@ def test_windows_feature_macros(self): {'function', 'data'}, include_abi_only=True, ) - optional_items = {} + feature_macros = list(manifest.select({'feature_macro'})) + optional_items = {m.name: [] for m in feature_macros} for item in items: - if item.name in ( - # Some symbols aren't exported on all platforms. - # This is a bug: https://bugs.python.org/issue44133 - 'PyModule_Create2', 'PyModule_FromDefAndSpec2', - ): - continue if item.ifdef: - optional_items.setdefault(item.ifdef, []).append(item.name) + optional_items[item.ifdef].append(item.name) else: write(f' "{item.name}",') write(")") @@ -328,7 +330,6 @@ def test_windows_feature_macros(self): write(f" {name!r},") write(" )") write("") - feature_macros = list(manifest.select({'feature_macro'})) feature_names = sorted(m.name for m in feature_macros) write(f"EXPECTED_FEATURE_MACROS = set({pprint.pformat(feature_names)})") From 0f54ee4c6cdba74492183eb2dd142393c7dba403 Mon Sep 17 00:00:00 2001 From: Steven Ward Date: Mon, 29 Jan 2024 11:00:15 -0500 Subject: [PATCH 148/160] Remove limit in calendar CLI help message for year arg (GH-114719) The limit was removed in 66c88ce30ca2b23daa37038e1a3c0de98f241f50 (GH-4109). --- Lib/calendar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/calendar.py b/Lib/calendar.py index 03469d8ac96bcd..3c79540f986b63 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -737,7 +737,7 @@ def main(args=None): parser.add_argument( "year", nargs='?', type=int, - help="year number (1-9999)" + help="year number" ) parser.add_argument( "month", From e351ca3c205860e94cad5da25c74bd76933f5f11 Mon Sep 17 00:00:00 2001 From: Soumendra Ganguly <67527439+8vasu@users.noreply.github.com> Date: Mon, 29 Jan 2024 17:10:28 +0100 Subject: [PATCH 149/160] gh-85984: Add POSIX pseudo-terminal functions. (GH-102413) Signed-off-by: Soumendra Ganguly Co-authored-by: Gregory P. Smith Co-authored-by: Petr Viktorin --- Doc/conf.py | 5 + Doc/library/os.rst | 59 ++++++ Lib/test/test_os.py | 45 ++++- ...3-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst | 2 + Modules/clinic/posixmodule.c.h | 168 +++++++++++++++++- Modules/posixmodule.c | 147 +++++++++++++++ configure | 30 ++++ configure.ac | 8 +- pyconfig.h.in | 15 ++ 9 files changed, 468 insertions(+), 11 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst diff --git a/Doc/conf.py b/Doc/conf.py index a96e7787d167a3..e12128ad356e1b 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -97,12 +97,16 @@ ('c:func', 'free'), ('c:func', 'gettimeofday'), ('c:func', 'gmtime'), + ('c:func', 'grantpt'), ('c:func', 'localeconv'), ('c:func', 'localtime'), ('c:func', 'main'), ('c:func', 'malloc'), ('c:func', 'mktime'), + ('c:func', 'posix_openpt'), ('c:func', 'printf'), + ('c:func', 'ptsname'), + ('c:func', 'ptsname_r'), ('c:func', 'realloc'), ('c:func', 'snprintf'), ('c:func', 'sprintf'), @@ -110,6 +114,7 @@ ('c:func', 'strftime'), ('c:func', 'system'), ('c:func', 'time'), + ('c:func', 'unlockpt'), ('c:func', 'vsnprintf'), # Standard C types ('c:type', 'FILE'), diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 0008ec6a40c76f..cc9f3e75a80c51 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -1122,6 +1122,20 @@ as internal buffering of data. .. versionchanged:: 3.12 Added support for pipes on Windows. + +.. function:: grantpt(fd, /) + + Grant access to the slave pseudo-terminal device associated with the + master pseudo-terminal device to which the file descriptor *fd* refers. + The file descriptor *fd* is not closed upon failure. + + Calls the C standard library function :c:func:`grantpt`. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.13 + + .. function:: isatty(fd, /) Return ``True`` if the file descriptor *fd* is open and connected to a @@ -1429,6 +1443,23 @@ or `the MSDN `_ on Windo .. versionadded:: 3.3 +.. function:: posix_openpt(oflag, /) + + Open and return a file descriptor for a master pseudo-terminal device. + + Calls the C standard library function :c:func:`posix_openpt`. The *oflag* + argument is used to set file status flags and file access modes as + specified in the manual page of :c:func:`posix_openpt` of your system. + + The returned file descriptor is :ref:`non-inheritable `. + If the value :data:`O_CLOEXEC` is available on the system, it is added to + *oflag*. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.13 + + .. function:: preadv(fd, buffers, offset, flags=0, /) Read from a file descriptor *fd* at a position of *offset* into mutable @@ -1486,6 +1517,21 @@ or `the MSDN `_ on Windo .. versionadded:: 3.7 +.. function:: ptsname(fd, /) + + Return the name of the slave pseudo-terminal device associated with the + master pseudo-terminal device to which the file descriptor *fd* refers. + The file descriptor *fd* is not closed upon failure. + + Calls the reentrant C standard library function :c:func:`ptsname_r` if + it is available; otherwise, the C standard library function + :c:func:`ptsname`, which is not guaranteed to be thread-safe, is called. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.13 + + .. function:: pwrite(fd, str, offset, /) Write the bytestring in *str* to file descriptor *fd* at position of @@ -1738,6 +1784,19 @@ or `the MSDN `_ on Windo .. availability:: Unix. +.. function:: unlockpt(fd, /) + + Unlock the slave pseudo-terminal device associated with the master + pseudo-terminal device to which the file descriptor *fd* refers. + The file descriptor *fd* is not closed upon failure. + + Calls the C standard library function :c:func:`unlockpt`. + + .. availability:: Unix, not Emscripten, not WASI. + + .. versionadded:: 3.13 + + .. function:: write(fd, str, /) Write the bytestring in *str* to file descriptor *fd*. diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index e6f0dfde8cb4ae..ed79a2c24ef30b 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -4536,13 +4536,46 @@ def test_dup2(self): self.assertEqual(os.dup2(fd, fd3, inheritable=False), fd3) self.assertFalse(os.get_inheritable(fd3)) - @unittest.skipUnless(hasattr(os, 'openpty'), "need os.openpty()") +@unittest.skipUnless(hasattr(os, 'openpty'), "need os.openpty()") +class PseudoterminalTests(unittest.TestCase): + def open_pty(self): + """Open a pty fd-pair, and schedule cleanup for it""" + main_fd, second_fd = os.openpty() + self.addCleanup(os.close, main_fd) + self.addCleanup(os.close, second_fd) + return main_fd, second_fd + def test_openpty(self): - master_fd, slave_fd = os.openpty() - self.addCleanup(os.close, master_fd) - self.addCleanup(os.close, slave_fd) - self.assertEqual(os.get_inheritable(master_fd), False) - self.assertEqual(os.get_inheritable(slave_fd), False) + main_fd, second_fd = self.open_pty() + self.assertEqual(os.get_inheritable(main_fd), False) + self.assertEqual(os.get_inheritable(second_fd), False) + + @unittest.skipUnless(hasattr(os, 'ptsname'), "need os.ptsname()") + @unittest.skipUnless(hasattr(os, 'O_RDWR'), "need os.O_RDWR") + @unittest.skipUnless(hasattr(os, 'O_NOCTTY'), "need os.O_NOCTTY") + def test_open_via_ptsname(self): + main_fd, second_fd = self.open_pty() + second_path = os.ptsname(main_fd) + reopened_second_fd = os.open(second_path, os.O_RDWR|os.O_NOCTTY) + self.addCleanup(os.close, reopened_second_fd) + os.write(reopened_second_fd, b'foo') + self.assertEqual(os.read(main_fd, 3), b'foo') + + @unittest.skipUnless(hasattr(os, 'posix_openpt'), "need os.posix_openpt()") + @unittest.skipUnless(hasattr(os, 'grantpt'), "need os.grantpt()") + @unittest.skipUnless(hasattr(os, 'unlockpt'), "need os.unlockpt()") + @unittest.skipUnless(hasattr(os, 'ptsname'), "need os.ptsname()") + @unittest.skipUnless(hasattr(os, 'O_RDWR'), "need os.O_RDWR") + @unittest.skipUnless(hasattr(os, 'O_NOCTTY'), "need os.O_NOCTTY") + def test_posix_pty_functions(self): + mother_fd = os.posix_openpt(os.O_RDWR|os.O_NOCTTY) + self.addCleanup(os.close, mother_fd) + os.grantpt(mother_fd) + os.unlockpt(mother_fd) + son_path = os.ptsname(mother_fd) + son_fd = os.open(son_path, os.O_RDWR|os.O_NOCTTY) + self.addCleanup(os.close, son_fd) + self.assertEqual(os.ptsname(mother_fd), os.ttyname(son_fd)) @unittest.skipUnless(hasattr(os, 'spawnl'), "need os.openpty()") def test_pipe_spawnl(self): diff --git a/Misc/NEWS.d/next/Library/2023-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst b/Misc/NEWS.d/next/Library/2023-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst new file mode 100644 index 00000000000000..0e54a1fe3c8a1c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-15-03-21-18.gh-issue-85984.Xaq6ZN.rst @@ -0,0 +1,2 @@ +Add POSIX pseudo-terminal functions :func:`os.posix_openpt`, +:func:`os.grantpt`, :func:`os.unlockpt`, and :func:`os.ptsname`. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index ba3e1cfa8dbc21..1373bdef03ba5e 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -4465,6 +4465,156 @@ os_sched_getaffinity(PyObject *module, PyObject *arg) #endif /* defined(HAVE_SCHED_H) && defined(HAVE_SCHED_SETAFFINITY) */ +#if defined(HAVE_POSIX_OPENPT) + +PyDoc_STRVAR(os_posix_openpt__doc__, +"posix_openpt($module, oflag, /)\n" +"--\n" +"\n" +"Open and return a file descriptor for a master pseudo-terminal device.\n" +"\n" +"Performs a posix_openpt() C function call. The oflag argument is used to\n" +"set file status flags and file access modes as specified in the manual page\n" +"of posix_openpt() of your system."); + +#define OS_POSIX_OPENPT_METHODDEF \ + {"posix_openpt", (PyCFunction)os_posix_openpt, METH_O, os_posix_openpt__doc__}, + +static int +os_posix_openpt_impl(PyObject *module, int oflag); + +static PyObject * +os_posix_openpt(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int oflag; + int _return_value; + + oflag = PyLong_AsInt(arg); + if (oflag == -1 && PyErr_Occurred()) { + goto exit; + } + _return_value = os_posix_openpt_impl(module, oflag); + if ((_return_value == -1) && PyErr_Occurred()) { + goto exit; + } + return_value = PyLong_FromLong((long)_return_value); + +exit: + return return_value; +} + +#endif /* defined(HAVE_POSIX_OPENPT) */ + +#if defined(HAVE_GRANTPT) + +PyDoc_STRVAR(os_grantpt__doc__, +"grantpt($module, fd, /)\n" +"--\n" +"\n" +"Grant access to the slave pseudo-terminal device.\n" +"\n" +" fd\n" +" File descriptor of a master pseudo-terminal device.\n" +"\n" +"Performs a grantpt() C function call."); + +#define OS_GRANTPT_METHODDEF \ + {"grantpt", (PyCFunction)os_grantpt, METH_O, os_grantpt__doc__}, + +static PyObject * +os_grantpt_impl(PyObject *module, int fd); + +static PyObject * +os_grantpt(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int fd; + + if (!_PyLong_FileDescriptor_Converter(arg, &fd)) { + goto exit; + } + return_value = os_grantpt_impl(module, fd); + +exit: + return return_value; +} + +#endif /* defined(HAVE_GRANTPT) */ + +#if defined(HAVE_UNLOCKPT) + +PyDoc_STRVAR(os_unlockpt__doc__, +"unlockpt($module, fd, /)\n" +"--\n" +"\n" +"Unlock a pseudo-terminal master/slave pair.\n" +"\n" +" fd\n" +" File descriptor of a master pseudo-terminal device.\n" +"\n" +"Performs an unlockpt() C function call."); + +#define OS_UNLOCKPT_METHODDEF \ + {"unlockpt", (PyCFunction)os_unlockpt, METH_O, os_unlockpt__doc__}, + +static PyObject * +os_unlockpt_impl(PyObject *module, int fd); + +static PyObject * +os_unlockpt(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int fd; + + if (!_PyLong_FileDescriptor_Converter(arg, &fd)) { + goto exit; + } + return_value = os_unlockpt_impl(module, fd); + +exit: + return return_value; +} + +#endif /* defined(HAVE_UNLOCKPT) */ + +#if (defined(HAVE_PTSNAME) || defined(HAVE_PTSNAME_R)) + +PyDoc_STRVAR(os_ptsname__doc__, +"ptsname($module, fd, /)\n" +"--\n" +"\n" +"Return the name of the slave pseudo-terminal device.\n" +"\n" +" fd\n" +" File descriptor of a master pseudo-terminal device.\n" +"\n" +"If the ptsname_r() C function is available, it is called;\n" +"otherwise, performs a ptsname() C function call."); + +#define OS_PTSNAME_METHODDEF \ + {"ptsname", (PyCFunction)os_ptsname, METH_O, os_ptsname__doc__}, + +static PyObject * +os_ptsname_impl(PyObject *module, int fd); + +static PyObject * +os_ptsname(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + int fd; + + if (!_PyLong_FileDescriptor_Converter(arg, &fd)) { + goto exit; + } + return_value = os_ptsname_impl(module, fd); + +exit: + return return_value; +} + +#endif /* (defined(HAVE_PTSNAME) || defined(HAVE_PTSNAME_R)) */ + #if (defined(HAVE_OPENPTY) || defined(HAVE__GETPTY) || defined(HAVE_DEV_PTMX)) PyDoc_STRVAR(os_openpty__doc__, @@ -11991,6 +12141,22 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #define OS_SCHED_GETAFFINITY_METHODDEF #endif /* !defined(OS_SCHED_GETAFFINITY_METHODDEF) */ +#ifndef OS_POSIX_OPENPT_METHODDEF + #define OS_POSIX_OPENPT_METHODDEF +#endif /* !defined(OS_POSIX_OPENPT_METHODDEF) */ + +#ifndef OS_GRANTPT_METHODDEF + #define OS_GRANTPT_METHODDEF +#endif /* !defined(OS_GRANTPT_METHODDEF) */ + +#ifndef OS_UNLOCKPT_METHODDEF + #define OS_UNLOCKPT_METHODDEF +#endif /* !defined(OS_UNLOCKPT_METHODDEF) */ + +#ifndef OS_PTSNAME_METHODDEF + #define OS_PTSNAME_METHODDEF +#endif /* !defined(OS_PTSNAME_METHODDEF) */ + #ifndef OS_OPENPTY_METHODDEF #define OS_OPENPTY_METHODDEF #endif /* !defined(OS_OPENPTY_METHODDEF) */ @@ -12422,4 +12588,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=18c128534c355d84 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=43e4e557c771358a input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 007fc1cb116f84..40ff131b119d66 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -8358,6 +8358,149 @@ os_sched_getaffinity_impl(PyObject *module, pid_t pid) #endif /* HAVE_SCHED_H */ +#ifdef HAVE_POSIX_OPENPT +/*[clinic input] +os.posix_openpt -> int + + oflag: int + / + +Open and return a file descriptor for a master pseudo-terminal device. + +Performs a posix_openpt() C function call. The oflag argument is used to +set file status flags and file access modes as specified in the manual page +of posix_openpt() of your system. +[clinic start generated code]*/ + +static int +os_posix_openpt_impl(PyObject *module, int oflag) +/*[clinic end generated code: output=ee0bc2624305fc79 input=0de33d0e29693caa]*/ +{ + int fd; + +#if defined(O_CLOEXEC) + oflag |= O_CLOEXEC; +#endif + + fd = posix_openpt(oflag); + if (fd == -1) { + posix_error(); + return -1; + } + + // Just in case, likely a no-op given O_CLOEXEC above. + if (_Py_set_inheritable(fd, 0, NULL) < 0) { + close(fd); + return -1; + } + + return fd; +} +#endif /* HAVE_POSIX_OPENPT */ + +#ifdef HAVE_GRANTPT +/*[clinic input] +os.grantpt + + fd: fildes + File descriptor of a master pseudo-terminal device. + / + +Grant access to the slave pseudo-terminal device. + +Performs a grantpt() C function call. +[clinic start generated code]*/ + +static PyObject * +os_grantpt_impl(PyObject *module, int fd) +/*[clinic end generated code: output=dfd580015cf548ab input=0668e3b96760e849]*/ +{ + int ret; + int saved_errno; + PyOS_sighandler_t sig_saved; + + sig_saved = PyOS_setsig(SIGCHLD, SIG_DFL); + + ret = grantpt(fd); + if (ret == -1) + saved_errno = errno; + + PyOS_setsig(SIGCHLD, sig_saved); + + if (ret == -1) { + errno = saved_errno; + return posix_error(); + } + + Py_RETURN_NONE; +} +#endif /* HAVE_GRANTPT */ + +#ifdef HAVE_UNLOCKPT +/*[clinic input] +os.unlockpt + + fd: fildes + File descriptor of a master pseudo-terminal device. + / + +Unlock a pseudo-terminal master/slave pair. + +Performs an unlockpt() C function call. +[clinic start generated code]*/ + +static PyObject * +os_unlockpt_impl(PyObject *module, int fd) +/*[clinic end generated code: output=e08d354dec12d30c input=de7ab1f59f69a2b4]*/ +{ + if (unlockpt(fd) == -1) + return posix_error(); + + Py_RETURN_NONE; +} +#endif /* HAVE_UNLOCKPT */ + +#if defined(HAVE_PTSNAME) || defined(HAVE_PTSNAME_R) +/*[clinic input] +os.ptsname + + fd: fildes + File descriptor of a master pseudo-terminal device. + / + +Return the name of the slave pseudo-terminal device. + +If the ptsname_r() C function is available, it is called; +otherwise, performs a ptsname() C function call. +[clinic start generated code]*/ + +static PyObject * +os_ptsname_impl(PyObject *module, int fd) +/*[clinic end generated code: output=ef300fadc5675872 input=1369ccc0546f3130]*/ +{ +#ifdef HAVE_PTSNAME_R + int ret; + char name[MAXPATHLEN+1]; + + ret = ptsname_r(fd, name, sizeof(name)); + if (ret != 0) { + errno = ret; + return posix_error(); + } +#else + char *name; + + name = ptsname(fd); + /* POSIX manpage: Upon failure, ptsname() shall return a null pointer and may set errno. + *MAY* set errno? Hmm... */ + if (name == NULL) + return posix_error(); +#endif /* HAVE_PTSNAME_R */ + + return PyUnicode_DecodeFSDefault(name); +} +#endif /* defined(HAVE_PTSNAME) || defined(HAVE_PTSNAME_R) */ + /* AIX uses /dev/ptc but is otherwise the same as /dev/ptmx */ #if defined(HAVE_DEV_PTC) && !defined(HAVE_DEV_PTMX) # define DEV_PTY_FILE "/dev/ptc" @@ -16275,6 +16418,10 @@ static PyMethodDef posix_methods[] = { OS_SCHED_YIELD_METHODDEF OS_SCHED_SETAFFINITY_METHODDEF OS_SCHED_GETAFFINITY_METHODDEF + OS_POSIX_OPENPT_METHODDEF + OS_GRANTPT_METHODDEF + OS_UNLOCKPT_METHODDEF + OS_PTSNAME_METHODDEF OS_OPENPTY_METHODDEF OS_LOGIN_TTY_METHODDEF OS_FORKPTY_METHODDEF diff --git a/configure b/configure index c563c3f5d3c7e6..adc5a8f014c795 100755 --- a/configure +++ b/configure @@ -17637,6 +17637,12 @@ if test "x$ac_cv_func_getwd" = xyes then : printf "%s\n" "#define HAVE_GETWD 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "grantpt" "ac_cv_func_grantpt" +if test "x$ac_cv_func_grantpt" = xyes +then : + printf "%s\n" "#define HAVE_GRANTPT 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "if_nameindex" "ac_cv_func_if_nameindex" if test "x$ac_cv_func_if_nameindex" = xyes @@ -17823,6 +17829,12 @@ if test "x$ac_cv_func_posix_fallocate" = xyes then : printf "%s\n" "#define HAVE_POSIX_FALLOCATE 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "posix_openpt" "ac_cv_func_posix_openpt" +if test "x$ac_cv_func_posix_openpt" = xyes +then : + printf "%s\n" "#define HAVE_POSIX_OPENPT 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "posix_spawn" "ac_cv_func_posix_spawn" if test "x$ac_cv_func_posix_spawn" = xyes @@ -17877,6 +17889,18 @@ if test "x$ac_cv_func_pthread_kill" = xyes then : printf "%s\n" "#define HAVE_PTHREAD_KILL 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "ptsname" "ac_cv_func_ptsname" +if test "x$ac_cv_func_ptsname" = xyes +then : + printf "%s\n" "#define HAVE_PTSNAME 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "ptsname_r" "ac_cv_func_ptsname_r" +if test "x$ac_cv_func_ptsname_r" = xyes +then : + printf "%s\n" "#define HAVE_PTSNAME_R 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "pwrite" "ac_cv_func_pwrite" if test "x$ac_cv_func_pwrite" = xyes @@ -18285,6 +18309,12 @@ if test "x$ac_cv_func_unlinkat" = xyes then : printf "%s\n" "#define HAVE_UNLINKAT 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "unlockpt" "ac_cv_func_unlockpt" +if test "x$ac_cv_func_unlockpt" = xyes +then : + printf "%s\n" "#define HAVE_UNLOCKPT 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "utimensat" "ac_cv_func_utimensat" if test "x$ac_cv_func_utimensat" = xyes diff --git a/configure.ac b/configure.ac index 13c46b3e80151d..f5fa17fd8b7d0d 100644 --- a/configure.ac +++ b/configure.ac @@ -4791,12 +4791,12 @@ AC_CHECK_FUNCS([ \ getgrnam_r getgrouplist getgroups gethostname getitimer getloadavg getlogin \ getpeername getpgid getpid getppid getpriority _getpty \ getpwent getpwnam_r getpwuid getpwuid_r getresgid getresuid getrusage getsid getspent \ - getspnam getuid getwd if_nameindex initgroups kill killpg lchown linkat \ + getspnam getuid getwd grantpt if_nameindex initgroups kill killpg lchown linkat \ lockf lstat lutimes madvise mbrtowc memrchr mkdirat mkfifo mkfifoat \ mknod mknodat mktime mmap mremap nice openat opendir pathconf pause pipe \ - pipe2 plock poll posix_fadvise posix_fallocate posix_spawn posix_spawnp \ + pipe2 plock poll posix_fadvise posix_fallocate posix_openpt posix_spawn posix_spawnp \ posix_spawn_file_actions_addclosefrom_np \ - pread preadv preadv2 pthread_condattr_setclock pthread_init pthread_kill \ + pread preadv preadv2 pthread_condattr_setclock pthread_init pthread_kill ptsname ptsname_r \ pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ rtpSpawn sched_get_priority_max sched_rr_get_interval sched_setaffinity \ sched_setparam sched_setscheduler sem_clockwait sem_getvalue sem_open \ @@ -4806,7 +4806,7 @@ AC_CHECK_FUNCS([ \ sigfillset siginterrupt sigpending sigrelse sigtimedwait sigwait \ sigwaitinfo snprintf splice strftime strlcpy strsignal symlinkat sync \ sysconf system tcgetpgrp tcsetpgrp tempnam timegm times tmpfile \ - tmpnam tmpnam_r truncate ttyname umask uname unlinkat utimensat utimes vfork \ + tmpnam tmpnam_r truncate ttyname umask uname unlinkat unlockpt utimensat utimes vfork \ wait wait3 wait4 waitid waitpid wcscoll wcsftime wcsxfrm wmemcmp writev \ ]) diff --git a/pyconfig.h.in b/pyconfig.h.in index d8a9f68951afbd..02e33c7007196d 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -601,6 +601,9 @@ bcopy. */ #undef HAVE_GLIBC_MEMMOVE_BUG +/* Define to 1 if you have the `grantpt' function. */ +#undef HAVE_GRANTPT + /* Define to 1 if you have the header file. */ #undef HAVE_GRP_H @@ -899,6 +902,9 @@ /* Define to 1 if you have the `posix_fallocate' function. */ #undef HAVE_POSIX_FALLOCATE +/* Define to 1 if you have the `posix_openpt' function. */ +#undef HAVE_POSIX_OPENPT + /* Define to 1 if you have the `posix_spawn' function. */ #undef HAVE_POSIX_SPAWN @@ -951,6 +957,12 @@ /* Define if platform requires stubbed pthreads support */ #undef HAVE_PTHREAD_STUBS +/* Define to 1 if you have the `ptsname' function. */ +#undef HAVE_PTSNAME + +/* Define to 1 if you have the `ptsname_r' function. */ +#undef HAVE_PTSNAME_R + /* Define to 1 if you have the header file. */ #undef HAVE_PTY_H @@ -1459,6 +1471,9 @@ /* Define to 1 if you have the `unlinkat' function. */ #undef HAVE_UNLINKAT +/* Define to 1 if you have the `unlockpt' function. */ +#undef HAVE_UNLOCKPT + /* Define to 1 if you have the `unshare' function. */ #undef HAVE_UNSHARE From 39c766b579cabc71a4a50773d299d4350221a70b Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 29 Jan 2024 18:20:13 +0200 Subject: [PATCH 150/160] Fix more references to datetime and time classes (GH-114717) They could be confused with references to datetime and time modules. --- Doc/library/datetime.rst | 8 ++++---- Doc/library/mailbox.rst | 8 ++++---- Doc/whatsnew/3.8.rst | 4 ++-- Misc/NEWS.d/3.13.0a1.rst | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Doc/library/datetime.rst b/Doc/library/datetime.rst index 47ecb0ba331bdc..4ff049c8709289 100644 --- a/Doc/library/datetime.rst +++ b/Doc/library/datetime.rst @@ -1255,7 +1255,7 @@ Instance methods: ``tzinfo=None`` can be specified to create a naive datetime from an aware datetime with no conversion of date and time data. - :class:`datetime` objects are also supported by generic function + :class:`.datetime` objects are also supported by generic function :func:`copy.replace`. .. versionchanged:: 3.6 @@ -1678,7 +1678,7 @@ Usage of ``KabulTz`` from above:: :class:`.time` Objects ---------------------- -A :class:`time` object represents a (local) time of day, independent of any particular +A :class:`.time` object represents a (local) time of day, independent of any particular day, and subject to adjustment via a :class:`tzinfo` object. .. class:: time(hour=0, minute=0, second=0, microsecond=0, tzinfo=None, *, fold=0) @@ -1836,7 +1836,7 @@ Instance methods: ``tzinfo=None`` can be specified to create a naive :class:`.time` from an aware :class:`.time`, without conversion of the time data. - :class:`time` objects are also supported by generic function + :class:`.time` objects are also supported by generic function :func:`copy.replace`. .. versionchanged:: 3.6 @@ -2522,7 +2522,7 @@ information, which are supported in ``datetime.strptime`` but are discarded by ``time.strptime``. For :class:`.time` objects, the format codes for year, month, and day should not -be used, as :class:`time` objects have no such values. If they're used anyway, +be used, as :class:`!time` objects have no such values. If they're used anyway, ``1900`` is substituted for the year, and ``1`` for the month and day. For :class:`date` objects, the format codes for hours, minutes, seconds, and diff --git a/Doc/library/mailbox.rst b/Doc/library/mailbox.rst index fa5b273093f583..a613548c9e518e 100644 --- a/Doc/library/mailbox.rst +++ b/Doc/library/mailbox.rst @@ -1136,8 +1136,8 @@ When a :class:`!MaildirMessage` instance is created based upon a leading "From " or trailing newline. For convenience, *time_* may be specified and will be formatted appropriately and appended to *from_*. If *time_* is specified, it should be a :class:`time.struct_time` instance, a - tuple suitable for passing to :meth:`time.strftime`, or ``True`` (to use - :meth:`time.gmtime`). + tuple suitable for passing to :func:`time.strftime`, or ``True`` (to use + :func:`time.gmtime`). .. method:: get_flags() @@ -1508,8 +1508,8 @@ When a :class:`!BabylMessage` instance is created based upon an leading "From " or trailing newline. For convenience, *time_* may be specified and will be formatted appropriately and appended to *from_*. If *time_* is specified, it should be a :class:`time.struct_time` instance, a - tuple suitable for passing to :meth:`time.strftime`, or ``True`` (to use - :meth:`time.gmtime`). + tuple suitable for passing to :func:`time.strftime`, or ``True`` (to use + :func:`time.gmtime`). .. method:: get_flags() diff --git a/Doc/whatsnew/3.8.rst b/Doc/whatsnew/3.8.rst index 304d1b4ef4efe8..b041e592d61ed1 100644 --- a/Doc/whatsnew/3.8.rst +++ b/Doc/whatsnew/3.8.rst @@ -754,8 +754,8 @@ datetime -------- Added new alternate constructors :meth:`datetime.date.fromisocalendar` and -:meth:`datetime.datetime.fromisocalendar`, which construct :class:`date` and -:class:`datetime` objects respectively from ISO year, week number, and weekday; +:meth:`datetime.datetime.fromisocalendar`, which construct :class:`~datetime.date` and +:class:`~datetime.datetime` objects respectively from ISO year, week number, and weekday; these are the inverse of each class's ``isocalendar`` method. (Contributed by Paul Ganssle in :issue:`36004`.) diff --git a/Misc/NEWS.d/3.13.0a1.rst b/Misc/NEWS.d/3.13.0a1.rst index 102bddcee5c5c2..d385b6a4504f97 100644 --- a/Misc/NEWS.d/3.13.0a1.rst +++ b/Misc/NEWS.d/3.13.0a1.rst @@ -2276,7 +2276,7 @@ creation. .. nonce: m2H5Bk .. section: Library -Remove unnecessary extra ``__slots__`` in :py:class:`datetime`\'s pure +Remove unnecessary extra ``__slots__`` in :class:`~datetime.datetime`\'s pure python implementation to reduce memory size, as they are defined in the superclass. Patch by James Hilton-Balfe From 2c089b09ac0872e08d146c55ed60d754154761c3 Mon Sep 17 00:00:00 2001 From: Steven Ward Date: Mon, 29 Jan 2024 11:58:21 -0500 Subject: [PATCH 151/160] gh-112240: Add option to calendar module CLI to specify the weekday to start each week (GH-112241) --- Doc/library/calendar.rst | 8 +++++++- Lib/calendar.py | 7 +++++++ .../2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst | 2 ++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst diff --git a/Doc/library/calendar.rst b/Doc/library/calendar.rst index 6586f539a8da4f..c4dcf5641d6066 100644 --- a/Doc/library/calendar.rst +++ b/Doc/library/calendar.rst @@ -586,10 +586,16 @@ The following options are accepted: or as an HTML document. +.. option:: --first-weekday WEEKDAY, -f WEEKDAY + + The weekday to start each week. + Must be a number between 0 (Monday) and 6 (Sunday). + Defaults to 0. + + .. option:: year The year to print the calendar for. - Must be a number between 1 and 9999. Defaults to the current year. diff --git a/Lib/calendar.py b/Lib/calendar.py index 3c79540f986b63..833ce331b14a0c 100644 --- a/Lib/calendar.py +++ b/Lib/calendar.py @@ -734,6 +734,11 @@ def main(args=None): choices=("text", "html"), help="output type (text or html)" ) + parser.add_argument( + "-f", "--first-weekday", + type=int, default=0, + help="weekday (0 is Monday, 6 is Sunday) to start each week (default 0)" + ) parser.add_argument( "year", nargs='?', type=int, @@ -761,6 +766,7 @@ def main(args=None): cal = LocaleHTMLCalendar(locale=locale) else: cal = HTMLCalendar() + cal.setfirstweekday(options.first_weekday) encoding = options.encoding if encoding is None: encoding = sys.getdefaultencoding() @@ -775,6 +781,7 @@ def main(args=None): cal = LocaleTextCalendar(locale=locale) else: cal = TextCalendar() + cal.setfirstweekday(options.first_weekday) optdict = dict(w=options.width, l=options.lines) if options.month is None: optdict["c"] = options.spacing diff --git a/Misc/NEWS.d/next/Library/2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst b/Misc/NEWS.d/next/Library/2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst new file mode 100644 index 00000000000000..686f0311e80dcb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-11-18-16-30-21.gh-issue-112240.YXS0tj.rst @@ -0,0 +1,2 @@ +Add option to calendar module CLI to specify the weekday to start each week. +Patch by Steven Ward. From 3d716655d22dc14e79ac0d30f33eef0a49efdac0 Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 29 Jan 2024 09:38:03 -0800 Subject: [PATCH 152/160] gh-112075: Use PyMem_* for allocating dict keys objects (#114543) Use PyMem_* for keys allocation --- Objects/dictobject.c | 66 +++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 43 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index e608b91679b568..c5477ab15f8dc9 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -262,7 +262,7 @@ _PyDict_ClearFreeList(PyInterpreterState *interp) PyObject_GC_Del(op); } while (state->keys_numfree) { - PyObject_Free(state->keys_free_list[--state->keys_numfree]); + PyMem_Free(state->keys_free_list[--state->keys_numfree]); } #endif } @@ -332,6 +332,22 @@ dictkeys_decref(PyInterpreterState *interp, PyDictKeysObject *dk) _Py_DecRefTotal(_PyInterpreterState_GET()); #endif if (--dk->dk_refcnt == 0) { + if (DK_IS_UNICODE(dk)) { + PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(dk); + Py_ssize_t i, n; + for (i = 0, n = dk->dk_nentries; i < n; i++) { + Py_XDECREF(entries[i].me_key); + Py_XDECREF(entries[i].me_value); + } + } + else { + PyDictKeyEntry *entries = DK_ENTRIES(dk); + Py_ssize_t i, n; + for (i = 0, n = dk->dk_nentries; i < n; i++) { + Py_XDECREF(entries[i].me_key); + Py_XDECREF(entries[i].me_value); + } + } free_keys_object(interp, dk); } } @@ -640,9 +656,9 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) else #endif { - dk = PyObject_Malloc(sizeof(PyDictKeysObject) - + ((size_t)1 << log2_bytes) - + entry_size * usable); + dk = PyMem_Malloc(sizeof(PyDictKeysObject) + + ((size_t)1 << log2_bytes) + + entry_size * usable); if (dk == NULL) { PyErr_NoMemory(); return NULL; @@ -666,23 +682,6 @@ new_keys_object(PyInterpreterState *interp, uint8_t log2_size, bool unicode) static void free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) { - assert(keys != Py_EMPTY_KEYS); - if (DK_IS_UNICODE(keys)) { - PyDictUnicodeEntry *entries = DK_UNICODE_ENTRIES(keys); - Py_ssize_t i, n; - for (i = 0, n = keys->dk_nentries; i < n; i++) { - Py_XDECREF(entries[i].me_key); - Py_XDECREF(entries[i].me_value); - } - } - else { - PyDictKeyEntry *entries = DK_ENTRIES(keys); - Py_ssize_t i, n; - for (i = 0, n = keys->dk_nentries; i < n; i++) { - Py_XDECREF(entries[i].me_key); - Py_XDECREF(entries[i].me_value); - } - } #if PyDict_MAXFREELIST > 0 struct _Py_dict_state *state = get_dict_state(interp); #ifdef Py_DEBUG @@ -697,7 +696,7 @@ free_keys_object(PyInterpreterState *interp, PyDictKeysObject *keys) return; } #endif - PyObject_Free(keys); + PyMem_Free(keys); } static inline PyDictValues* @@ -798,7 +797,7 @@ clone_combined_dict_keys(PyDictObject *orig) assert(orig->ma_keys->dk_refcnt == 1); size_t keys_size = _PyDict_KeysSize(orig->ma_keys); - PyDictKeysObject *keys = PyObject_Malloc(keys_size); + PyDictKeysObject *keys = PyMem_Malloc(keys_size); if (keys == NULL) { PyErr_NoMemory(); return NULL; @@ -1544,32 +1543,13 @@ dictresize(PyInterpreterState *interp, PyDictObject *mp, } } - // We can not use free_keys_object here because key's reference - // are moved already. if (oldkeys != Py_EMPTY_KEYS) { #ifdef Py_REF_DEBUG _Py_DecRefTotal(_PyInterpreterState_GET()); #endif assert(oldkeys->dk_kind != DICT_KEYS_SPLIT); assert(oldkeys->dk_refcnt == 1); -#if PyDict_MAXFREELIST > 0 - struct _Py_dict_state *state = get_dict_state(interp); -#ifdef Py_DEBUG - // dictresize() must not be called after _PyDict_Fini() - assert(state->keys_numfree != -1); -#endif - if (DK_LOG_SIZE(oldkeys) == PyDict_LOG_MINSIZE && - DK_IS_UNICODE(oldkeys) && - state->keys_numfree < PyDict_MAXFREELIST) - { - state->keys_free_list[state->keys_numfree++] = oldkeys; - OBJECT_STAT_INC(to_freelist); - } - else -#endif - { - PyObject_Free(oldkeys); - } + free_keys_object(interp, oldkeys); } } From 0cd9bacb8ad41fe86f95b326e9199caa749539eb Mon Sep 17 00:00:00 2001 From: Dino Viehland Date: Mon, 29 Jan 2024 09:47:54 -0800 Subject: [PATCH 153/160] gh-112075: Dictionary global version counter should use atomic increments (#114568) Dictionary global version counter should use atomic increments --- Include/internal/pycore_dict.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index d96870e9197bbf..b4e1f8cf1e320b 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -209,8 +209,14 @@ static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { #define DICT_VERSION_INCREMENT (1 << DICT_MAX_WATCHERS) #define DICT_VERSION_MASK (DICT_VERSION_INCREMENT - 1) +#ifdef Py_GIL_DISABLED +#define DICT_NEXT_VERSION(INTERP) \ + (_Py_atomic_add_uint64(&(INTERP)->dict_state.global_version, DICT_VERSION_INCREMENT) + DICT_VERSION_INCREMENT) + +#else #define DICT_NEXT_VERSION(INTERP) \ ((INTERP)->dict_state.global_version += DICT_VERSION_INCREMENT) +#endif void _PyDict_SendEvent(int watcher_bits, From aa3402ad451777d8dd3ec560e14cb16dc8540c0e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 29 Jan 2024 19:58:31 +0200 Subject: [PATCH 154/160] gh-114678: Fix incorrect deprecation warning for 'N' specifier in Decimal format (GH-114683) Co-authored-by: Stefan Krah --- Lib/test/test_decimal.py | 10 +++++++++- .../2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst | 3 +++ Modules/_decimal/_decimal.c | 14 ++++++++------ 3 files changed, 20 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py index 7a5fe62b467372..1423bc61c7f690 100644 --- a/Lib/test/test_decimal.py +++ b/Lib/test/test_decimal.py @@ -41,6 +41,7 @@ darwin_malloc_err_warning, is_emscripten) from test.support.import_helper import import_fresh_module from test.support import threading_helper +from test.support import warnings_helper import random import inspect import threading @@ -1237,7 +1238,14 @@ def test_deprecated_N_format(self): else: self.assertRaises(ValueError, format, h, 'N') self.assertRaises(ValueError, format, h, '010.3N') - + with warnings_helper.check_no_warnings(self): + self.assertEqual(format(h, 'N>10.3'), 'NN6.63E-34') + self.assertEqual(format(h, 'N>10.3n'), 'NN6.63e-34') + self.assertEqual(format(h, 'N>10.3e'), 'N6.626e-34') + self.assertEqual(format(h, 'N>10.3f'), 'NNNNN0.000') + self.assertRaises(ValueError, format, h, '>Nf') + self.assertRaises(ValueError, format, h, '10Nf') + self.assertRaises(ValueError, format, h, 'Nx') @run_with_locale('LC_ALL', 'ps_AF') def test_wide_char_separator_decimal_point(self): diff --git a/Misc/NEWS.d/next/Library/2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst b/Misc/NEWS.d/next/Library/2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst new file mode 100644 index 00000000000000..2306af4a39dcf6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-28-19-40-40.gh-issue-114678.kYKcJw.rst @@ -0,0 +1,3 @@ +Ensure that deprecation warning for 'N' specifier in :class:`~decimal.Decimal` +format is not raised for cases where 'N' appears in other places +in the format specifier. Based on patch by Stefan Krah. diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 8b93f8e2cbcf0b..127f5f2887d4cd 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -3446,6 +3446,14 @@ dec_format(PyObject *dec, PyObject *args) if (fmt == NULL) { return NULL; } + + if (size > 0 && fmt[size-1] == 'N') { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "Format specifier 'N' is deprecated", 1) < 0) { + return NULL; + } + } + /* NOTE: If https://github.com/python/cpython/pull/29438 lands, the * format string manipulation below can be eliminated by enhancing * the forked mpd_parse_fmt_str(). */ @@ -3593,12 +3601,6 @@ dec_format(PyObject *dec, PyObject *args) if (replace_fillchar) { dec_replace_fillchar(decstring); } - if (strchr(fmt, 'N') != NULL) { - if (PyErr_WarnEx(PyExc_DeprecationWarning, - "Format specifier 'N' is deprecated", 1) < 0) { - goto finish; - } - } result = PyUnicode_DecodeUTF8(decstring, size, NULL); From 29952c86f3f8a972203a1ccd8381448efe145ada Mon Sep 17 00:00:00 2001 From: Matan Perelman Date: Mon, 29 Jan 2024 21:12:33 +0200 Subject: [PATCH 155/160] TaskGroup: Use explicit None check for cancellation error (#114708) --- Lib/asyncio/taskgroups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/asyncio/taskgroups.py b/Lib/asyncio/taskgroups.py index e1c56d140bef7d..f322b1f6653f6a 100644 --- a/Lib/asyncio/taskgroups.py +++ b/Lib/asyncio/taskgroups.py @@ -132,7 +132,7 @@ async def __aexit__(self, et, exc, tb): # Propagate CancelledError if there is one, except if there # are other errors -- those have priority. - if propagate_cancellation_error and not self._errors: + if propagate_cancellation_error is not None and not self._errors: raise propagate_cancellation_error if et is not None and not issubclass(et, exceptions.CancelledError): From 53d921ed96e1c57b2e42f984d3a5ca8347fedb81 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" Date: Mon, 29 Jan 2024 21:48:49 +0100 Subject: [PATCH 156/160] gh-114569: Use PyMem_* APIs for non-PyObjects in unicodeobject.c (#114690) --- Objects/unicodeobject.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 4b03cc3f4da5fa..b236ddba9cdc69 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -996,7 +996,7 @@ resize_compact(PyObject *unicode, Py_ssize_t length) new_size = (struct_size + (length + 1) * char_size); if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_Free(_PyUnicode_UTF8(unicode)); + PyMem_Free(_PyUnicode_UTF8(unicode)); _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } @@ -1049,7 +1049,7 @@ resize_inplace(PyObject *unicode, Py_ssize_t length) if (!share_utf8 && _PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_Free(_PyUnicode_UTF8(unicode)); + PyMem_Free(_PyUnicode_UTF8(unicode)); _PyUnicode_UTF8(unicode) = NULL; _PyUnicode_UTF8_LENGTH(unicode) = 0; } @@ -1590,10 +1590,10 @@ unicode_dealloc(PyObject *unicode) return; } if (_PyUnicode_HAS_UTF8_MEMORY(unicode)) { - PyObject_Free(_PyUnicode_UTF8(unicode)); + PyMem_Free(_PyUnicode_UTF8(unicode)); } if (!PyUnicode_IS_COMPACT(unicode) && _PyUnicode_DATA_ANY(unicode)) { - PyObject_Free(_PyUnicode_DATA_ANY(unicode)); + PyMem_Free(_PyUnicode_DATA_ANY(unicode)); } Py_TYPE(unicode)->tp_free(unicode); @@ -5203,7 +5203,7 @@ unicode_fill_utf8(PyObject *unicode) PyBytes_AS_STRING(writer.buffer); Py_ssize_t len = end - start; - char *cache = PyObject_Malloc(len + 1); + char *cache = PyMem_Malloc(len + 1); if (cache == NULL) { _PyBytesWriter_Dealloc(&writer); PyErr_NoMemory(); @@ -14674,7 +14674,7 @@ unicode_subtype_new(PyTypeObject *type, PyObject *unicode) PyErr_NoMemory(); goto onError; } - data = PyObject_Malloc((length + 1) * char_size); + data = PyMem_Malloc((length + 1) * char_size); if (data == NULL) { PyErr_NoMemory(); goto onError; From 3996cbdd33a479b7e59757b81489cbb3370f85e5 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Mon, 29 Jan 2024 23:24:21 +0200 Subject: [PATCH 157/160] Set `hosted_on` for Read the Docs builds (#114697) --- Doc/conf.py | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/Doc/conf.py b/Doc/conf.py index e12128ad356e1b..c2d57696aeeaa3 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -6,7 +6,9 @@ # The contents of this file are pickled, so don't put values in the namespace # that aren't pickleable (module imports are okay, they're removed automatically). -import sys, os, time +import os +import sys +import time sys.path.append(os.path.abspath('tools/extensions')) sys.path.append(os.path.abspath('includes')) @@ -55,7 +57,7 @@ # General substitutions. project = 'Python' -copyright = '2001-%s, Python Software Foundation' % time.strftime('%Y') +copyright = f"2001-{time.strftime('%Y')}, Python Software Foundation" # We look for the Include/patchlevel.h file in the current Python source tree # and replace the values accordingly. @@ -302,6 +304,9 @@ 'root_include_title': False # We use the version switcher instead. } +if os.getenv("READTHEDOCS"): + html_theme_options["hosted_on"] = 'Read the Docs' + # Override stylesheet fingerprinting for Windows CHM htmlhelp to fix GH-91207 # https://github.com/python/cpython/issues/91207 if any('htmlhelp' in arg for arg in sys.argv): @@ -310,7 +315,7 @@ print("It may be removed in the future\n") # Short title used e.g. for HTML tags. -html_short_title = '%s Documentation' % release +html_short_title = f'{release} Documentation' # Deployment preview information # (See .readthedocs.yml and https://docs.readthedocs.io/en/stable/reference/environment-variables.html) @@ -359,12 +364,9 @@ latex_engine = 'xelatex' -# Get LaTeX to handle Unicode correctly latex_elements = { -} - -# Additional stuff for the LaTeX preamble. -latex_elements['preamble'] = r''' + # For the LaTeX preamble. + 'preamble': r''' \authoraddress{ \sphinxstrong{Python Software Foundation}\\ Email: \sphinxemail{docs@python.org} @@ -372,13 +374,12 @@ \let\Verbatim=\OriginalVerbatim \let\endVerbatim=\endOriginalVerbatim \setcounter{tocdepth}{2} -''' - -# The paper size ('letter' or 'a4'). -latex_elements['papersize'] = 'a4' - -# The font size ('10pt', '11pt' or '12pt'). -latex_elements['pointsize'] = '10pt' +''', + # The paper size ('letter' or 'a4'). + 'papersize': 'a4', + # The font size ('10pt', '11pt' or '12pt'). + 'pointsize': '10pt', +} # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). @@ -441,9 +442,9 @@ # Regexes to find C items in the source files. coverage_c_regexes = { - 'cfunction': (r'^PyAPI_FUNC\(.*\)\s+([^_][\w_]+)'), - 'data': (r'^PyAPI_DATA\(.*\)\s+([^_][\w_]+)'), - 'macro': (r'^#define ([^_][\w_]+)\(.*\)[\s|\\]'), + 'cfunction': r'^PyAPI_FUNC\(.*\)\s+([^_][\w_]+)', + 'data': r'^PyAPI_DATA\(.*\)\s+([^_][\w_]+)', + 'macro': r'^#define ([^_][\w_]+)\(.*\)[\s|\\]', } # The coverage checker will ignore all C items whose names match these regexes From 8612230c1cacab6d48bfbeb9e17d04ef5a9acf21 Mon Sep 17 00:00:00 2001 From: "Erlend E. Aasland" <erlend@python.org> Date: Tue, 30 Jan 2024 00:04:34 +0100 Subject: [PATCH 158/160] gh-114569: Use PyMem_* APIs for non-PyObjects in compiler (#114587) --- Python/compile.c | 25 ++++++++++++------------- Python/flowgraph.c | 6 +++--- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/Python/compile.c b/Python/compile.c index 7cf05dd0683119..4c1d3bb2d2b475 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -160,7 +160,7 @@ _PyCompile_EnsureArrayLargeEnough(int idx, void **array, int *alloc, if (idx >= new_alloc) { new_alloc = idx + default_alloc; } - arr = PyObject_Calloc(new_alloc, item_size); + arr = PyMem_Calloc(new_alloc, item_size); if (arr == NULL) { PyErr_NoMemory(); return ERROR; @@ -181,7 +181,7 @@ _PyCompile_EnsureArrayLargeEnough(int idx, void **array, int *alloc, } assert(newsize > 0); - void *tmp = PyObject_Realloc(arr, newsize); + void *tmp = PyMem_Realloc(arr, newsize); if (tmp == NULL) { PyErr_NoMemory(); return ERROR; @@ -282,10 +282,10 @@ instr_sequence_insert_instruction(instr_sequence *seq, int pos, static void instr_sequence_fini(instr_sequence *seq) { - PyObject_Free(seq->s_labelmap); + PyMem_Free(seq->s_labelmap); seq->s_labelmap = NULL; - PyObject_Free(seq->s_instrs); + PyMem_Free(seq->s_instrs); seq->s_instrs = NULL; } @@ -690,7 +690,7 @@ compiler_unit_free(struct compiler_unit *u) Py_CLEAR(u->u_metadata.u_cellvars); Py_CLEAR(u->u_metadata.u_fasthidden); Py_CLEAR(u->u_private); - PyObject_Free(u); + PyMem_Free(u); } static int @@ -1262,8 +1262,7 @@ compiler_enter_scope(struct compiler *c, identifier name, struct compiler_unit *u; - u = (struct compiler_unit *)PyObject_Calloc(1, sizeof( - struct compiler_unit)); + u = (struct compiler_unit *)PyMem_Calloc(1, sizeof(struct compiler_unit)); if (!u) { PyErr_NoMemory(); return ERROR; @@ -6657,7 +6656,7 @@ ensure_fail_pop(struct compiler *c, pattern_context *pc, Py_ssize_t n) return SUCCESS; } Py_ssize_t needed = sizeof(jump_target_label) * size; - jump_target_label *resized = PyObject_Realloc(pc->fail_pop, needed); + jump_target_label *resized = PyMem_Realloc(pc->fail_pop, needed); if (resized == NULL) { PyErr_NoMemory(); return ERROR; @@ -6696,13 +6695,13 @@ emit_and_reset_fail_pop(struct compiler *c, location loc, USE_LABEL(c, pc->fail_pop[pc->fail_pop_size]); if (codegen_addop_noarg(INSTR_SEQUENCE(c), POP_TOP, loc) < 0) { pc->fail_pop_size = 0; - PyObject_Free(pc->fail_pop); + PyMem_Free(pc->fail_pop); pc->fail_pop = NULL; return ERROR; } } USE_LABEL(c, pc->fail_pop[0]); - PyObject_Free(pc->fail_pop); + PyMem_Free(pc->fail_pop); pc->fail_pop = NULL; return SUCCESS; } @@ -7206,7 +7205,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) Py_DECREF(pc->stores); *pc = old_pc; Py_INCREF(pc->stores); - // Need to NULL this for the PyObject_Free call in the error block. + // Need to NULL this for the PyMem_Free call in the error block. old_pc.fail_pop = NULL; // No match. Pop the remaining copy of the subject and fail: if (codegen_addop_noarg(INSTR_SEQUENCE(c), POP_TOP, LOC(p)) < 0 || @@ -7252,7 +7251,7 @@ compiler_pattern_or(struct compiler *c, pattern_ty p, pattern_context *pc) diff: compiler_error(c, LOC(p), "alternative patterns bind different names"); error: - PyObject_Free(old_pc.fail_pop); + PyMem_Free(old_pc.fail_pop); Py_DECREF(old_pc.stores); Py_XDECREF(control); return ERROR; @@ -7453,7 +7452,7 @@ compiler_match(struct compiler *c, stmt_ty s) pattern_context pc; pc.fail_pop = NULL; int result = compiler_match_inner(c, s, &pc); - PyObject_Free(pc.fail_pop); + PyMem_Free(pc.fail_pop); return result; } diff --git a/Python/flowgraph.c b/Python/flowgraph.c index 96610b3cb11a43..bfc23a298ff492 100644 --- a/Python/flowgraph.c +++ b/Python/flowgraph.c @@ -162,7 +162,7 @@ basicblock_last_instr(const basicblock *b) { static basicblock * cfg_builder_new_block(cfg_builder *g) { - basicblock *b = (basicblock *)PyObject_Calloc(1, sizeof(basicblock)); + basicblock *b = (basicblock *)PyMem_Calloc(1, sizeof(basicblock)); if (b == NULL) { PyErr_NoMemory(); return NULL; @@ -437,10 +437,10 @@ _PyCfgBuilder_Free(cfg_builder *g) basicblock *b = g->g_block_list; while (b != NULL) { if (b->b_instr) { - PyObject_Free((void *)b->b_instr); + PyMem_Free((void *)b->b_instr); } basicblock *next = b->b_list; - PyObject_Free((void *)b); + PyMem_Free((void *)b); b = next; } PyMem_Free(g); From 742ba6081c92744ba30f16a0bb17ef9d9e809611 Mon Sep 17 00:00:00 2001 From: Brandt Bucher <brandtbucher@microsoft.com> Date: Mon, 29 Jan 2024 16:29:54 -0800 Subject: [PATCH 159/160] GH-113464: Make Brandt a codeowner for JIT stuff (GH-114739) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index ae915423ece955..f4d0411504a832 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -21,6 +21,7 @@ configure* @erlend-aasland @corona10 **/*context* @1st1 **/*genobject* @markshannon **/*hamt* @1st1 +**/*jit* @brandtbucher Objects/set* @rhettinger Objects/dict* @methane @markshannon Objects/typevarobject.c @JelleZijlstra @@ -37,7 +38,6 @@ Python/ast_opt.c @isidentical Python/bytecodes.c @markshannon @gvanrossum Python/optimizer*.c @markshannon @gvanrossum Lib/test/test_patma.py @brandtbucher -Lib/test/test_peepholer.py @brandtbucher Lib/test/test_type_*.py @JelleZijlstra Lib/test/test_capi/test_misc.py @markshannon @gvanrossum Tools/c-analyzer/ @ericsnowcurrently From 963904335e579bfe39101adf3fd6a0cf705975ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sviatoslav=20Sydorenko=20=28=D0=A1=D0=B2=D1=8F=D1=82=D0=BE?= =?UTF-8?q?=D1=81=D0=BB=D0=B0=D0=B2=20=D0=A1=D0=B8=D0=B4=D0=BE=D1=80=D0=B5?= =?UTF-8?q?=D0=BD=D0=BA=D0=BE=29?= <wk@sydorenko.org.ua> Date: Tue, 30 Jan 2024 02:25:31 +0100 Subject: [PATCH 160/160] GH-80789: Get rid of the ``ensurepip`` infra for many wheels (#109245) Co-authored-by: vstinner@python.org Co-authored-by: Pradyun Gedam <pradyunsg@gmail.com> Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com> --- Lib/ensurepip/__init__.py | 129 ++++++++++--------------- Lib/test/test_ensurepip.py | 46 ++++----- Tools/build/verify_ensurepip_wheels.py | 6 +- 3 files changed, 73 insertions(+), 108 deletions(-) diff --git a/Lib/ensurepip/__init__.py b/Lib/ensurepip/__init__.py index a09bf3201e1fb7..80ee125cfd4ed3 100644 --- a/Lib/ensurepip/__init__.py +++ b/Lib/ensurepip/__init__.py @@ -1,78 +1,64 @@ -import collections import os -import os.path import subprocess import sys import sysconfig import tempfile +from contextlib import nullcontext from importlib import resources +from pathlib import Path +from shutil import copy2 __all__ = ["version", "bootstrap"] -_PACKAGE_NAMES = ('pip',) _PIP_VERSION = "23.3.2" -_PROJECTS = [ - ("pip", _PIP_VERSION, "py3"), -] - -# Packages bundled in ensurepip._bundled have wheel_name set. -# Packages from WHEEL_PKG_DIR have wheel_path set. -_Package = collections.namedtuple('Package', - ('version', 'wheel_name', 'wheel_path')) # Directory of system wheel packages. Some Linux distribution packaging # policies recommend against bundling dependencies. For example, Fedora # installs wheel packages in the /usr/share/python-wheels/ directory and don't # install the ensurepip._bundled package. -_WHEEL_PKG_DIR = sysconfig.get_config_var('WHEEL_PKG_DIR') +if (_pkg_dir := sysconfig.get_config_var('WHEEL_PKG_DIR')) is not None: + _WHEEL_PKG_DIR = Path(_pkg_dir).resolve() +else: + _WHEEL_PKG_DIR = None + +def _find_wheel_pkg_dir_pip(): + if _WHEEL_PKG_DIR is None: + # NOTE: The compile-time `WHEEL_PKG_DIR` is unset so there is no place + # NOTE: for looking up the wheels. + return None -def _find_packages(path): - packages = {} + dist_matching_wheels = _WHEEL_PKG_DIR.glob('pip-*.whl') try: - filenames = os.listdir(path) - except OSError: - # Ignore: path doesn't exist or permission error - filenames = () - # Make the code deterministic if a directory contains multiple wheel files - # of the same package, but don't attempt to implement correct version - # comparison since this case should not happen. - filenames = sorted(filenames) - for filename in filenames: - # filename is like 'pip-21.2.4-py3-none-any.whl' - if not filename.endswith(".whl"): - continue - for name in _PACKAGE_NAMES: - prefix = name + '-' - if filename.startswith(prefix): - break - else: - continue - - # Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl' - version = filename.removeprefix(prefix).partition('-')[0] - wheel_path = os.path.join(path, filename) - packages[name] = _Package(version, None, wheel_path) - return packages - - -def _get_packages(): - global _PACKAGES, _WHEEL_PKG_DIR - if _PACKAGES is not None: - return _PACKAGES - - packages = {} - for name, version, py_tag in _PROJECTS: - wheel_name = f"{name}-{version}-{py_tag}-none-any.whl" - packages[name] = _Package(version, wheel_name, None) - if _WHEEL_PKG_DIR: - dir_packages = _find_packages(_WHEEL_PKG_DIR) - # only used the wheel package directory if all packages are found there - if all(name in dir_packages for name in _PACKAGE_NAMES): - packages = dir_packages - _PACKAGES = packages - return packages -_PACKAGES = None + last_matching_dist_wheel = sorted(dist_matching_wheels)[-1] + except IndexError: + # NOTE: `WHEEL_PKG_DIR` does not contain any wheel files for `pip`. + return None + + return nullcontext(last_matching_dist_wheel) + + +def _get_pip_whl_path_ctx(): + # Prefer pip from the wheel package directory, if present. + if (alternative_pip_wheel_path := _find_wheel_pkg_dir_pip()) is not None: + return alternative_pip_wheel_path + + return resources.as_file( + resources.files('ensurepip') + / '_bundled' + / f'pip-{_PIP_VERSION}-py3-none-any.whl' + ) + + +def _get_pip_version(): + with _get_pip_whl_path_ctx() as bundled_wheel_path: + wheel_name = bundled_wheel_path.name + return ( + # Extract '21.2.4' from 'pip-21.2.4-py3-none-any.whl' + wheel_name. + removeprefix('pip-'). + partition('-')[0] + ) def _run_pip(args, additional_paths=None): @@ -105,7 +91,7 @@ def version(): """ Returns a string specifying the bundled version of pip. """ - return _get_packages()['pip'].version + return _get_pip_version() def _disable_pip_configuration_settings(): @@ -167,24 +153,10 @@ def _bootstrap(*, root=None, upgrade=False, user=False, with tempfile.TemporaryDirectory() as tmpdir: # Put our bundled wheels into a temporary directory and construct the # additional paths that need added to sys.path - additional_paths = [] - for name, package in _get_packages().items(): - if package.wheel_name: - # Use bundled wheel package - wheel_name = package.wheel_name - wheel_path = resources.files("ensurepip") / "_bundled" / wheel_name - whl = wheel_path.read_bytes() - else: - # Use the wheel package directory - with open(package.wheel_path, "rb") as fp: - whl = fp.read() - wheel_name = os.path.basename(package.wheel_path) - - filename = os.path.join(tmpdir, wheel_name) - with open(filename, "wb") as fp: - fp.write(whl) - - additional_paths.append(filename) + tmpdir_path = Path(tmpdir) + with _get_pip_whl_path_ctx() as bundled_wheel_path: + tmp_wheel_path = tmpdir_path / bundled_wheel_path.name + copy2(bundled_wheel_path, tmp_wheel_path) # Construct the arguments to be passed to the pip command args = ["install", "--no-cache-dir", "--no-index", "--find-links", tmpdir] @@ -197,7 +169,8 @@ def _bootstrap(*, root=None, upgrade=False, user=False, if verbosity: args += ["-" + "v" * verbosity] - return _run_pip([*args, *_PACKAGE_NAMES], additional_paths) + return _run_pip([*args, "pip"], [os.fsdecode(tmp_wheel_path)]) + def _uninstall_helper(*, verbosity=0): """Helper to support a clean default uninstall process on Windows @@ -227,7 +200,7 @@ def _uninstall_helper(*, verbosity=0): if verbosity: args += ["-" + "v" * verbosity] - return _run_pip([*args, *reversed(_PACKAGE_NAMES)]) + return _run_pip([*args, "pip"]) def _main(argv=None): diff --git a/Lib/test/test_ensurepip.py b/Lib/test/test_ensurepip.py index 69ab2a4feaa938..a4b36a90d8815e 100644 --- a/Lib/test/test_ensurepip.py +++ b/Lib/test/test_ensurepip.py @@ -6,6 +6,8 @@ import test.support import unittest import unittest.mock +from importlib.resources.abc import Traversable +from pathlib import Path import ensurepip import ensurepip._uninstall @@ -20,41 +22,35 @@ def test_version(self): # Test version() with tempfile.TemporaryDirectory() as tmpdir: self.touch(tmpdir, "pip-1.2.3b1-py2.py3-none-any.whl") - with (unittest.mock.patch.object(ensurepip, '_PACKAGES', None), - unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', tmpdir)): + with unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', Path(tmpdir)): self.assertEqual(ensurepip.version(), '1.2.3b1') - def test_get_packages_no_dir(self): - # Test _get_packages() without a wheel package directory - with (unittest.mock.patch.object(ensurepip, '_PACKAGES', None), - unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', None)): - packages = ensurepip._get_packages() - - # when bundled wheel packages are used, we get _PIP_VERSION + def test_version_no_dir(self): + # Test version() without a wheel package directory + with unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', None): + # when the bundled pip wheel is used, we get _PIP_VERSION self.assertEqual(ensurepip._PIP_VERSION, ensurepip.version()) - # use bundled wheel packages - self.assertIsNotNone(packages['pip'].wheel_name) + def test_selected_wheel_path_no_dir(self): + pip_filename = f'pip-{ensurepip._PIP_VERSION}-py3-none-any.whl' + with unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', None): + with ensurepip._get_pip_whl_path_ctx() as bundled_wheel_path: + self.assertEqual(pip_filename, bundled_wheel_path.name) - def test_get_packages_with_dir(self): - # Test _get_packages() with a wheel package directory + def test_selected_wheel_path_with_dir(self): + # Test _get_pip_whl_path_ctx() with a wheel package directory pip_filename = "pip-20.2.2-py2.py3-none-any.whl" with tempfile.TemporaryDirectory() as tmpdir: self.touch(tmpdir, pip_filename) - # not used, make sure that it's ignored + # not used, make sure that they're ignored + self.touch(tmpdir, "pip-1.2.3-py2.py3-none-any.whl") self.touch(tmpdir, "wheel-0.34.2-py2.py3-none-any.whl") + self.touch(tmpdir, "pip-script.py") - with (unittest.mock.patch.object(ensurepip, '_PACKAGES', None), - unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', tmpdir)): - packages = ensurepip._get_packages() - - self.assertEqual(packages['pip'].version, '20.2.2') - self.assertEqual(packages['pip'].wheel_path, - os.path.join(tmpdir, pip_filename)) - - # wheel package is ignored - self.assertEqual(sorted(packages), ['pip']) + with unittest.mock.patch.object(ensurepip, '_WHEEL_PKG_DIR', Path(tmpdir)): + with ensurepip._get_pip_whl_path_ctx() as bundled_wheel_path: + self.assertEqual(pip_filename, bundled_wheel_path.name) class EnsurepipMixin: @@ -69,7 +65,7 @@ def setUp(self): real_devnull = os.devnull os_patch = unittest.mock.patch("ensurepip.os") patched_os = os_patch.start() - # But expose os.listdir() used by _find_packages() + # But expose os.listdir() used by _find_wheel_pkg_dir_pip() patched_os.listdir = os.listdir self.addCleanup(os_patch.stop) patched_os.devnull = real_devnull diff --git a/Tools/build/verify_ensurepip_wheels.py b/Tools/build/verify_ensurepip_wheels.py index 29897425da6c03..a37da2f70757e5 100755 --- a/Tools/build/verify_ensurepip_wheels.py +++ b/Tools/build/verify_ensurepip_wheels.py @@ -14,7 +14,6 @@ from pathlib import Path from urllib.request import urlopen -PACKAGE_NAMES = ("pip",) ENSURE_PIP_ROOT = Path(__file__).parent.parent.parent / "Lib/ensurepip" WHEEL_DIR = ENSURE_PIP_ROOT / "_bundled" ENSURE_PIP_INIT_PY_TEXT = (ENSURE_PIP_ROOT / "__init__.py").read_text(encoding="utf-8") @@ -97,8 +96,5 @@ def verify_wheel(package_name: str) -> bool: if __name__ == "__main__": - exit_status = 0 - for package_name in PACKAGE_NAMES: - if not verify_wheel(package_name): - exit_status = 1 + exit_status = int(not verify_wheel("pip")) raise SystemExit(exit_status)