From e2713409cff5b71b1176b0e3fa63dae447548672 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Mon, 2 Dec 2024 10:38:17 +0900 Subject: [PATCH 01/76] gh-115999: Add partial free-thread specialization for BINARY_SUBSCR (gh-127227) --- Include/internal/pycore_list.h | 3 + Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Lib/test/test_dis.py | 21 ----- Lib/test/test_opcache.py | 107 ++++++++++++++++------ Objects/listobject.c | 6 ++ Python/bytecodes.c | 10 +- Python/executor_cases.c.h | 11 +++ Python/generated_cases.c.h | 12 ++- Python/specialize.c | 25 +++-- 10 files changed, 128 insertions(+), 71 deletions(-) diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h index 2c666f9be4bd79..f03e484f5ef8b0 100644 --- a/Include/internal/pycore_list.h +++ b/Include/internal/pycore_list.h @@ -10,6 +10,9 @@ extern "C" { PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); extern void _PyList_DebugMallocStats(FILE *out); +// _PyList_GetItemRef should be used only when the object is known as a list +// because it doesn't raise TypeError when the object is not a list, whereas PyList_GetItemRef does. +extern PyObject* _PyList_GetItemRef(PyListObject *, Py_ssize_t i); #define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index 5ce172856e1b19..d63c8df8ca6690 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -1952,7 +1952,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, - [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, [BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1c1f478c3833c8..1825bb3a5abc80 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -84,7 +84,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, - [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG, + [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index f26411ace8fa73..55890e58ed4bae 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1260,27 +1260,6 @@ def test_super_instructions(self): got = self.get_disassembly(load_test, adaptive=True) self.do_disassembly_compare(got, dis_load_test_quickened_code) - @cpython_only - @requires_specialization - def test_binary_subscr_specialize(self): - binary_subscr_quicken = """\ - 0 RESUME_CHECK 0 - - 1 LOAD_NAME 0 (a) - LOAD_SMALL_INT 0 - %s - RETURN_VALUE -""" - co_list = compile('a[0]', "", "eval") - self.code_quicken(lambda: exec(co_list, {}, {'a': [0]})) - got = self.get_disassembly(co_list, adaptive=True) - self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_LIST_INT") - - co_dict = compile('a[0]', "", "eval") - self.code_quicken(lambda: exec(co_dict, {}, {'a': {0: '1'}})) - got = self.get_disassembly(co_dict, adaptive=True) - self.do_disassembly_compare(got, binary_subscr_quicken % "BINARY_SUBSCR_DICT") - @cpython_only @requires_specialization def test_load_attr_specialize(self): diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 1a6eac236009c3..b989b21cd9b3a9 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -617,7 +617,7 @@ def write(items): opname = "BINARY_SUBSCR_GETITEM" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_binary_subscr_list_int(self): def get_items(): items = [] @@ -1023,7 +1023,7 @@ def write(items): opname = "STORE_ATTR_WITH_HINT" self.assert_races_do_not_crash(opname, get_items, read, write) - @requires_specialization + @requires_specialization_ft def test_store_subscr_list_int(self): def get_items(): items = [] @@ -1229,48 +1229,48 @@ class TestSpecializer(TestBase): @cpython_only @requires_specialization_ft def test_binary_op(self): - def f(): + def binary_op_add_int(): for _ in range(100): a, b = 1, 2 c = a + b self.assertEqual(c, 3) - f() - self.assert_specialized(f, "BINARY_OP_ADD_INT") - self.assert_no_opcode(f, "BINARY_OP") + binary_op_add_int() + self.assert_specialized(binary_op_add_int, "BINARY_OP_ADD_INT") + self.assert_no_opcode(binary_op_add_int, "BINARY_OP") - def g(): + def binary_op_add_unicode(): for _ in range(100): a, b = "foo", "bar" c = a + b self.assertEqual(c, "foobar") - g() - self.assert_specialized(g, "BINARY_OP_ADD_UNICODE") - self.assert_no_opcode(g, "BINARY_OP") + binary_op_add_unicode() + self.assert_specialized(binary_op_add_unicode, "BINARY_OP_ADD_UNICODE") + self.assert_no_opcode(binary_op_add_unicode, "BINARY_OP") @cpython_only @requires_specialization_ft def test_contain_op(self): - def f(): + def contains_op_dict(): for _ in range(100): a, b = 1, {1: 2, 2: 5} self.assertTrue(a in b) self.assertFalse(3 in b) - f() - self.assert_specialized(f, "CONTAINS_OP_DICT") - self.assert_no_opcode(f, "CONTAINS_OP") + contains_op_dict() + self.assert_specialized(contains_op_dict, "CONTAINS_OP_DICT") + self.assert_no_opcode(contains_op_dict, "CONTAINS_OP") - def g(): + def contains_op_set(): for _ in range(100): a, b = 1, {1, 2} self.assertTrue(a in b) self.assertFalse(3 in b) - g() - self.assert_specialized(g, "CONTAINS_OP_SET") - self.assert_no_opcode(g, "CONTAINS_OP") + contains_op_set() + self.assert_specialized(contains_op_set, "CONTAINS_OP_SET") + self.assert_no_opcode(contains_op_set, "CONTAINS_OP") @cpython_only @requires_specialization_ft @@ -1342,34 +1342,81 @@ def to_bool_str(): @cpython_only @requires_specialization_ft def test_unpack_sequence(self): - def f(): + def unpack_sequence_two_tuple(): for _ in range(100): a, b = 1, 2 self.assertEqual(a, 1) self.assertEqual(b, 2) - f() - self.assert_specialized(f, "UNPACK_SEQUENCE_TWO_TUPLE") - self.assert_no_opcode(f, "UNPACK_SEQUENCE") + unpack_sequence_two_tuple() + self.assert_specialized(unpack_sequence_two_tuple, + "UNPACK_SEQUENCE_TWO_TUPLE") + self.assert_no_opcode(unpack_sequence_two_tuple, "UNPACK_SEQUENCE") - def g(): + def unpack_sequence_tuple(): for _ in range(100): a, = 1, self.assertEqual(a, 1) - g() - self.assert_specialized(g, "UNPACK_SEQUENCE_TUPLE") - self.assert_no_opcode(g, "UNPACK_SEQUENCE") + unpack_sequence_tuple() + self.assert_specialized(unpack_sequence_tuple, "UNPACK_SEQUENCE_TUPLE") + self.assert_no_opcode(unpack_sequence_tuple, "UNPACK_SEQUENCE") - def x(): + def unpack_sequence_list(): for _ in range(100): a, b = [1, 2] self.assertEqual(a, 1) self.assertEqual(b, 2) - x() - self.assert_specialized(x, "UNPACK_SEQUENCE_LIST") - self.assert_no_opcode(x, "UNPACK_SEQUENCE") + unpack_sequence_list() + self.assert_specialized(unpack_sequence_list, "UNPACK_SEQUENCE_LIST") + self.assert_no_opcode(unpack_sequence_list, "UNPACK_SEQUENCE") + + @cpython_only + @requires_specialization_ft + def test_binary_subscr(self): + def binary_subscr_list_int(): + for _ in range(100): + a = [1, 2, 3] + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_list_int() + self.assert_specialized(binary_subscr_list_int, + "BINARY_SUBSCR_LIST_INT") + self.assert_no_opcode(binary_subscr_list_int, "BINARY_SUBSCR") + + def binary_subscr_tuple_int(): + for _ in range(100): + a = (1, 2, 3) + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_tuple_int() + self.assert_specialized(binary_subscr_tuple_int, + "BINARY_SUBSCR_TUPLE_INT") + self.assert_no_opcode(binary_subscr_tuple_int, "BINARY_SUBSCR") + + def binary_subscr_dict(): + for _ in range(100): + a = {1: 2, 2: 3} + self.assertEqual(a[1], 2) + self.assertEqual(a[2], 3) + + binary_subscr_dict() + self.assert_specialized(binary_subscr_dict, "BINARY_SUBSCR_DICT") + self.assert_no_opcode(binary_subscr_dict, "BINARY_SUBSCR") + + def binary_subscr_str_int(): + for _ in range(100): + a = "foobar" + for idx, expected in enumerate(a): + self.assertEqual(a[idx], expected) + + binary_subscr_str_int() + self.assert_specialized(binary_subscr_str_int, "BINARY_SUBSCR_STR_INT") + self.assert_no_opcode(binary_subscr_str_int, "BINARY_SUBSCR") + if __name__ == "__main__": unittest.main() diff --git a/Objects/listobject.c b/Objects/listobject.c index bb0040cbe9f272..4b24f4a428e18b 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -391,6 +391,12 @@ PyList_GetItemRef(PyObject *op, Py_ssize_t i) return item; } +PyObject * +_PyList_GetItemRef(PyListObject *list, Py_ssize_t i) +{ + return list_get_item_ref(list, i); +} + int PyList_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem) diff --git a/Python/bytecodes.c b/Python/bytecodes.c index a14b32b8108be8..c07ec42ec68f8b 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -704,7 +704,7 @@ dummy_func( }; specializing op(_SPECIALIZE_BINARY_SUBSCR, (counter/1, container, sub -- container, sub)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT assert(frame->stackpointer == NULL); if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; @@ -713,7 +713,7 @@ dummy_func( } OPCODE_DEFERRED_INC(BINARY_SUBSCR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_BINARY_SUBSCR, (container, sub -- res)) { @@ -790,11 +790,17 @@ dummy_func( // Deopt unless 0 <= sub < PyList_Size(list) DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub)); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; +#ifdef Py_GIL_DISABLED + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + DEOPT_IF(res_o == NULL); + STAT_INC(BINARY_SUBSCR, hit); +#else DEOPT_IF(index >= PyList_GET_SIZE(list)); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); +#endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); DEAD(sub_st); PyStackRef_CLOSE(list_st); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index d46412a193332b..c91257b06cad11 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -981,6 +981,16 @@ JUMP_TO_JUMP_TARGET(); } Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + stack_pointer = _PyFrame_GetStackPointer(frame); + if (res_o == NULL) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } + STAT_INC(BINARY_SUBSCR, hit); + #else if (index >= PyList_GET_SIZE(list)) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -989,6 +999,7 @@ PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); + #endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c9a5132269398c..45bcc4242af9d7 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -433,7 +433,7 @@ container = stack_pointer[-2]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT assert(frame->stackpointer == NULL); if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; @@ -444,7 +444,7 @@ } OPCODE_DEFERRED_INC(BINARY_SUBSCR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _BINARY_SUBSCR { @@ -577,11 +577,19 @@ // Deopt unless 0 <= sub < PyList_Size(list) DEOPT_IF(!_PyLong_IsNonNegativeCompact((PyLongObject *)sub), BINARY_SUBSCR); Py_ssize_t index = ((PyLongObject*)sub)->long_value.ob_digit[0]; + #ifdef Py_GIL_DISABLED + _PyFrame_SetStackPointer(frame, stack_pointer); + PyObject *res_o = _PyList_GetItemRef((PyListObject*)list, index); + stack_pointer = _PyFrame_GetStackPointer(frame); + DEOPT_IF(res_o == NULL, BINARY_SUBSCR); + STAT_INC(BINARY_SUBSCR, hit); + #else DEOPT_IF(index >= PyList_GET_SIZE(list), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); PyObject *res_o = PyList_GET_ITEM(list, index); assert(res_o != NULL); Py_INCREF(res_o); + #endif PyStackRef_CLOSE_SPECIALIZED(sub_st, (destructor)PyObject_Free); PyStackRef_CLOSE(list_st); res = PyStackRef_FromPyObjectSteal(res_o); diff --git a/Python/specialize.c b/Python/specialize.c index 172dae7d374602..d03310de782fe7 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1717,15 +1717,15 @@ _Py_Specialize_BinarySubscr( PyObject *container = PyStackRef_AsPyObjectBorrow(container_st); PyObject *sub = PyStackRef_AsPyObjectBorrow(sub_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[BINARY_SUBSCR] == INLINE_CACHE_ENTRIES_BINARY_SUBSCR); - _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1); PyTypeObject *container_type = Py_TYPE(container); + uint8_t specialized_op; if (container_type == &PyList_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_LIST_INT; + specialized_op = BINARY_SUBSCR_LIST_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1738,7 +1738,7 @@ _Py_Specialize_BinarySubscr( if (container_type == &PyTuple_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_TUPLE_INT; + specialized_op = BINARY_SUBSCR_TUPLE_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1751,7 +1751,7 @@ _Py_Specialize_BinarySubscr( if (container_type == &PyUnicode_Type) { if (PyLong_CheckExact(sub)) { if (_PyLong_IsNonNegativeCompact((PyLongObject *)sub)) { - instr->op.code = BINARY_SUBSCR_STR_INT; + specialized_op = BINARY_SUBSCR_STR_INT; goto success; } SPECIALIZATION_FAIL(BINARY_SUBSCR, SPEC_FAIL_OUT_OF_RANGE); @@ -1762,9 +1762,10 @@ _Py_Specialize_BinarySubscr( goto fail; } if (container_type == &PyDict_Type) { - instr->op.code = BINARY_SUBSCR_DICT; + specialized_op = BINARY_SUBSCR_DICT; goto success; } +#ifndef Py_GIL_DISABLED PyTypeObject *cls = Py_TYPE(container); PyObject *descriptor = _PyType_Lookup(cls, &_Py_ID(__getitem__)); if (descriptor && Py_TYPE(descriptor) == &PyFunction_Type) { @@ -1797,21 +1798,17 @@ _Py_Specialize_BinarySubscr( // struct _specialization_cache): ht->_spec_cache.getitem = descriptor; ht->_spec_cache.getitem_version = version; - instr->op.code = BINARY_SUBSCR_GETITEM; + specialized_op = BINARY_SUBSCR_GETITEM; goto success; } +#endif // Py_GIL_DISABLED SPECIALIZATION_FAIL(BINARY_SUBSCR, binary_subscr_fail_kind(container_type, sub)); fail: - STAT_INC(BINARY_SUBSCR, failure); - assert(!PyErr_Occurred()); - instr->op.code = BINARY_SUBSCR; - cache->counter = adaptive_counter_backoff(cache->counter); + unspecialize(instr); return; success: - STAT_INC(BINARY_SUBSCR, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + specialize(instr, specialized_op); } From 2950bc50af8fc2539e64731359bfb39b335a614d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns=20=F0=9F=87=B5=F0=9F=87=B8?= Date: Mon, 2 Dec 2024 07:12:36 +0000 Subject: [PATCH 02/76] GH-127429: fix sysconfig data generation on cross-builds (#127430) --- .github/workflows/jit.yml | 2 +- Lib/sysconfig/__init__.py | 49 ++++++++++++++----- Lib/sysconfig/__main__.py | 7 ++- ...-11-29-23-02-43.gh-issue-127429.dQf2w4.rst | 3 ++ configure | 6 +-- configure.ac | 2 +- 6 files changed, 49 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 7dbbe71b2131e7..4ef543d7369734 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -104,7 +104,7 @@ jobs: # The `find` line is required as a result of https://github.com/actions/runner-images/issues/9966. # This is a bug in the macOS runner image where the pre-installed Python is installed in the same - # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes + # directory as the Homebrew Python, which causes the build to fail for macos-13. This line removes # the symlink to the pre-installed Python so that the Homebrew Python is used instead. - name: Native macOS if: runner.os == 'macOS' diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py index ee52700b51fd07..ad86609016e478 100644 --- a/Lib/sysconfig/__init__.py +++ b/Lib/sysconfig/__init__.py @@ -318,14 +318,22 @@ def get_default_scheme(): def get_makefile_filename(): """Return the path of the Makefile.""" + + # GH-127429: When cross-compiling, use the Makefile from the target, instead of the host Python. + if cross_base := os.environ.get('_PYTHON_PROJECT_BASE'): + return os.path.join(cross_base, 'Makefile') + if _PYTHON_BUILD: return os.path.join(_PROJECT_BASE, "Makefile") + if hasattr(sys, 'abiflags'): config_dir_name = f'config-{_PY_VERSION_SHORT}{sys.abiflags}' else: config_dir_name = 'config' + if hasattr(sys.implementation, '_multiarch'): config_dir_name += f'-{sys.implementation._multiarch}' + return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile') @@ -464,27 +472,44 @@ def get_path(name, scheme=get_default_scheme(), vars=None, expand=True): def _init_config_vars(): global _CONFIG_VARS _CONFIG_VARS = {} + + prefix = _PREFIX + exec_prefix = _EXEC_PREFIX + base_prefix = _BASE_PREFIX + base_exec_prefix = _BASE_EXEC_PREFIX + + try: + abiflags = sys.abiflags + except AttributeError: + abiflags = '' + + if os.name == 'posix': + _init_posix(_CONFIG_VARS) + # If we are cross-compiling, load the prefixes from the Makefile instead. + if '_PYTHON_PROJECT_BASE' in os.environ: + prefix = _CONFIG_VARS['prefix'] + exec_prefix = _CONFIG_VARS['exec_prefix'] + base_prefix = _CONFIG_VARS['prefix'] + base_exec_prefix = _CONFIG_VARS['exec_prefix'] + abiflags = _CONFIG_VARS['ABIFLAGS'] + # Normalized versions of prefix and exec_prefix are handy to have; # in fact, these are the standard versions used most places in the # Distutils. - _CONFIG_VARS['prefix'] = _PREFIX - _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX + _CONFIG_VARS['prefix'] = prefix + _CONFIG_VARS['exec_prefix'] = exec_prefix _CONFIG_VARS['py_version'] = _PY_VERSION _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT _CONFIG_VARS['py_version_nodot'] = _PY_VERSION_SHORT_NO_DOT - _CONFIG_VARS['installed_base'] = _BASE_PREFIX - _CONFIG_VARS['base'] = _PREFIX - _CONFIG_VARS['installed_platbase'] = _BASE_EXEC_PREFIX - _CONFIG_VARS['platbase'] = _EXEC_PREFIX + _CONFIG_VARS['installed_base'] = base_prefix + _CONFIG_VARS['base'] = prefix + _CONFIG_VARS['installed_platbase'] = base_exec_prefix + _CONFIG_VARS['platbase'] = exec_prefix _CONFIG_VARS['projectbase'] = _PROJECT_BASE _CONFIG_VARS['platlibdir'] = sys.platlibdir _CONFIG_VARS['implementation'] = _get_implementation() _CONFIG_VARS['implementation_lower'] = _get_implementation().lower() - try: - _CONFIG_VARS['abiflags'] = sys.abiflags - except AttributeError: - # sys.abiflags may not be defined on all platforms. - _CONFIG_VARS['abiflags'] = '' + _CONFIG_VARS['abiflags'] = abiflags try: _CONFIG_VARS['py_version_nodot_plat'] = sys.winver.replace('.', '') except AttributeError: @@ -493,8 +518,6 @@ def _init_config_vars(): if os.name == 'nt': _init_non_posix(_CONFIG_VARS) _CONFIG_VARS['VPATH'] = sys._vpath - if os.name == 'posix': - _init_posix(_CONFIG_VARS) if _HAS_USER_BASE: # Setting 'userbase' is done below the call to the # init function to enable using 'get_config_var' in diff --git a/Lib/sysconfig/__main__.py b/Lib/sysconfig/__main__.py index 5660a6c5105b9f..10728c709e1811 100644 --- a/Lib/sysconfig/__main__.py +++ b/Lib/sysconfig/__main__.py @@ -7,6 +7,7 @@ _PYTHON_BUILD, _get_sysconfigdata_name, get_config_h_filename, + get_config_var, get_config_vars, get_default_scheme, get_makefile_filename, @@ -161,7 +162,7 @@ def _print_config_dict(d, stream): def _get_pybuilddir(): pybuilddir = f'build/lib.{get_platform()}-{get_python_version()}' - if hasattr(sys, "gettotalrefcount"): + if get_config_var('Py_DEBUG') == '1': pybuilddir += '-pydebug' return pybuilddir @@ -229,11 +230,15 @@ def _generate_posix_vars(): f.write('build_time_vars = ') _print_config_dict(vars, stream=f) + print(f'Written {destfile}') + # Write a JSON file with the output of sysconfig.get_config_vars jsonfile = os.path.join(pybuilddir, _get_json_data_name()) with open(jsonfile, 'w') as f: json.dump(get_config_vars(), f, indent=2) + print(f'Written {jsonfile}') + # Create file used for sys.path fixup -- see Modules/getpath.c with open('pybuilddir.txt', 'w', encoding='utf8') as f: f.write(pybuilddir) diff --git a/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst b/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst new file mode 100644 index 00000000000000..708c1a6437d812 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-29-23-02-43.gh-issue-127429.dQf2w4.rst @@ -0,0 +1,3 @@ +Fixed bug where, on cross-builds, the :mod:`sysconfig` POSIX data was being +generated with the host Python's ``Makefile``. The data is now generated from +current build's ``Makefile``. diff --git a/configure b/configure index 84b74ac3584bcd..4e4043260ed2df 100755 --- a/configure +++ b/configure @@ -944,8 +944,8 @@ AR LINK_PYTHON_OBJS LINK_PYTHON_DEPS LIBRARY_DEPS -NODE HOSTRUNNER +NODE STATIC_LIBPYTHON GNULD EXPORTSFROM @@ -1147,7 +1147,6 @@ LDFLAGS LIBS CPPFLAGS CPP -HOSTRUNNER PROFILE_TASK BOLT_INSTRUMENT_FLAGS BOLT_APPLY_FLAGS @@ -1968,7 +1967,6 @@ Some influential environment variables: CPPFLAGS (Objective) C/C++ preprocessor flags, e.g. -I if you have headers in a nonstandard directory CPP C preprocessor - HOSTRUNNER Program to run CPython for the host platform PROFILE_TASK Python args for PGO generation task BOLT_INSTRUMENT_FLAGS @@ -7622,9 +7620,9 @@ if test "$cross_compiling" = yes; then RUNSHARED= fi +# HOSTRUNNER - Program to run CPython for the host platform { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking HOSTRUNNER" >&5 printf %s "checking HOSTRUNNER... " >&6; } - if test -z "$HOSTRUNNER" then case $ac_sys_system in #( diff --git a/configure.ac b/configure.ac index 8fa6cb60900ad1..4cfced10432491 100644 --- a/configure.ac +++ b/configure.ac @@ -1609,8 +1609,8 @@ if test "$cross_compiling" = yes; then RUNSHARED= fi +# HOSTRUNNER - Program to run CPython for the host platform AC_MSG_CHECKING([HOSTRUNNER]) -AC_ARG_VAR([HOSTRUNNER], [Program to run CPython for the host platform]) if test -z "$HOSTRUNNER" then AS_CASE([$ac_sys_system], From 1f8267b85dda655282922ba20df90d0ac6bea634 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Mon, 2 Dec 2024 09:21:00 +0100 Subject: [PATCH 03/76] gh-127443: Fix some entries in `Doc/data/refcounts.dat` (#127451) Fix incorrect entries in `Doc/data/refcounts.dat` --- Doc/data/refcounts.dat | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 6bfcc191b2270b..3f49c88c3cc028 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -180,7 +180,7 @@ PyCapsule_IsValid:const char*:name:: PyCapsule_New:PyObject*::+1: PyCapsule_New:void*:pointer:: PyCapsule_New:const char *:name:: -PyCapsule_New::void (* destructor)(PyObject* ):: +PyCapsule_New:void (*)(PyObject *):destructor:: PyCapsule_SetContext:int::: PyCapsule_SetContext:PyObject*:self:0: @@ -349,11 +349,11 @@ PyComplex_CheckExact:int::: PyComplex_CheckExact:PyObject*:p:0: PyComplex_FromCComplex:PyObject*::+1: -PyComplex_FromCComplex::Py_complex v:: +PyComplex_FromCComplex:Py_complex:v:: PyComplex_FromDoubles:PyObject*::+1: -PyComplex_FromDoubles::double real:: -PyComplex_FromDoubles::double imag:: +PyComplex_FromDoubles:double:real:: +PyComplex_FromDoubles:double:imag:: PyComplex_ImagAsDouble:double::: PyComplex_ImagAsDouble:PyObject*:op:0: @@ -620,7 +620,9 @@ PyErr_GetExcInfo:PyObject**:pvalue:+1: PyErr_GetExcInfo:PyObject**:ptraceback:+1: PyErr_GetRaisedException:PyObject*::+1: -PyErr_SetRaisedException:::: + +PyErr_SetRaisedException:void::: +PyErr_SetRaisedException:PyObject *:exc:0:stolen PyErr_GivenExceptionMatches:int::: PyErr_GivenExceptionMatches:PyObject*:given:0: @@ -640,9 +642,9 @@ PyErr_NewExceptionWithDoc:PyObject*:dict:0: PyErr_NoMemory:PyObject*::null: PyErr_NormalizeException:void::: -PyErr_NormalizeException:PyObject**:exc::??? -PyErr_NormalizeException:PyObject**:val::??? -PyErr_NormalizeException:PyObject**:tb::??? +PyErr_NormalizeException:PyObject**:exc:+1:??? +PyErr_NormalizeException:PyObject**:val:+1:??? +PyErr_NormalizeException:PyObject**:tb:+1:??? PyErr_Occurred:PyObject*::0: @@ -1314,7 +1316,7 @@ PyMapping_GetItemString:const char*:key:: PyMapping_HasKey:int::: PyMapping_HasKey:PyObject*:o:0: -PyMapping_HasKey:PyObject*:key:: +PyMapping_HasKey:PyObject*:key:0: PyMapping_HasKeyString:int::: PyMapping_HasKeyString:PyObject*:o:0: @@ -1474,7 +1476,7 @@ PyModule_GetState:void*::: PyModule_GetState:PyObject*:module:0: PyModule_New:PyObject*::+1: -PyModule_New::char* name:: +PyModule_New:char*:name:: PyModule_NewObject:PyObject*::+1: PyModule_NewObject:PyObject*:name:+1: @@ -1484,7 +1486,7 @@ PyModule_SetDocString:PyObject*:module:0: PyModule_SetDocString:const char*:docstring:: PyModuleDef_Init:PyObject*::0: -PyModuleDef_Init:PyModuleDef*:def:0: +PyModuleDef_Init:PyModuleDef*:def:: PyNumber_Absolute:PyObject*::+1: PyNumber_Absolute:PyObject*:o:0: @@ -1984,10 +1986,10 @@ PyRun_StringFlags:PyObject*:locals:0: PyRun_StringFlags:PyCompilerFlags*:flags:: PySeqIter_Check:int::: -PySeqIter_Check::op:: +PySeqIter_Check:PyObject *:op:0: PySeqIter_New:PyObject*::+1: -PySeqIter_New:PyObject*:seq:: +PySeqIter_New:PyObject*:seq:0: PySequence_Check:int::: PySequence_Check:PyObject*:o:0: @@ -2421,7 +2423,7 @@ PyUnicode_GET_LENGTH:PyObject*:o:0: PyUnicode_KIND:int::: PyUnicode_KIND:PyObject*:o:0: -PyUnicode_MAX_CHAR_VALUE:::: +PyUnicode_MAX_CHAR_VALUE:Py_UCS4::: PyUnicode_MAX_CHAR_VALUE:PyObject*:o:0: Py_UNICODE_ISALNUM:int::: @@ -2488,7 +2490,7 @@ PyUnicode_FromWideChar:const wchar_t*:w:: PyUnicode_FromWideChar:Py_ssize_t:size:: PyUnicode_AsWideChar:Py_ssize_t::: -PyUnicode_AsWideChar:PyObject*:*unicode:0: +PyUnicode_AsWideChar:PyObject*:unicode:0: PyUnicode_AsWideChar:wchar_t*:w:: PyUnicode_AsWideChar:Py_ssize_t:size:: @@ -2541,7 +2543,7 @@ PyUnicode_AsUTF8String:PyObject*:unicode:0: PyUnicode_AsUTF8AndSize:const char*::: PyUnicode_AsUTF8AndSize:PyObject*:unicode:0: -PyUnicode_AsUTF8AndSize:Py_ssize_t*:size:0: +PyUnicode_AsUTF8AndSize:Py_ssize_t*:size:: PyUnicode_AsUTF8:const char*::: PyUnicode_AsUTF8:PyObject*:unicode:0: @@ -2864,13 +2866,13 @@ PyUnicodeDecodeError_SetStart:PyObject*:exc:0: PyUnicodeDecodeError_SetStart:Py_ssize_t:start:: PyWeakref_Check:int::: -PyWeakref_Check:PyObject*:ob:: +PyWeakref_Check:PyObject*:ob:0: PyWeakref_CheckProxy:int::: -PyWeakref_CheckProxy:PyObject*:ob:: +PyWeakref_CheckProxy:PyObject*:ob:0: PyWeakref_CheckRef:int::: -PyWeakref_CheckRef:PyObject*:ob:: +PyWeakref_CheckRef:PyObject*:ob:0: PyWeakref_GET_OBJECT:PyObject*::0: PyWeakref_GET_OBJECT:PyObject*:ref:0: From 2a373da7700cf928e0a5ce3998d19351a3565df4 Mon Sep 17 00:00:00 2001 From: CF Bolz-Tereick Date: Mon, 2 Dec 2024 11:11:28 +0100 Subject: [PATCH 04/76] add missing gc_collect() calls in sqlite3 tests (#127446) --- Lib/test/test_sqlite3/test_regression.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_sqlite3/test_regression.py b/Lib/test/test_sqlite3/test_regression.py index db4e13222da9da..50cced3891d13a 100644 --- a/Lib/test/test_sqlite3/test_regression.py +++ b/Lib/test/test_sqlite3/test_regression.py @@ -433,6 +433,7 @@ def test_table_lock_cursor_dealloc(self): con.commit() cur = con.execute("select t from t") del cur + support.gc_collect() con.execute("drop table t") con.commit() @@ -448,6 +449,7 @@ def dup(v): con.create_function("dup", 1, dup) cur = con.execute("select dup(t) from t") del cur + support.gc_collect() con.execute("drop table t") con.commit() From a8dd821d5b25b42c0adeae6642e9b3f9228580f9 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 2 Dec 2024 10:12:17 +0000 Subject: [PATCH 05/76] GH-126491: GC: Mark objects reachable from roots before doing cycle collection (GH-127110) * Mark almost all reachable objects before doing collection phase * Add stats for objects marked * Visit new frames before each increment * Update docs * Clearer calculation of work to do. --- Include/cpython/pystats.h | 2 + Include/internal/pycore_frame.h | 3 + Include/internal/pycore_gc.h | 10 +- Include/internal/pycore_object.h | 4 +- Include/internal/pycore_runtime_init.h | 1 + InternalDocs/garbage_collector.md | 39 ++ Lib/test/libregrtest/refleak.py | 2 +- Lib/test/test_gc.py | 24 +- ...-11-21-16-13-52.gh-issue-126491.0YvL94.rst | 4 + Modules/_testinternalcapi.c | 6 + Python/ceval.c | 1 + Python/gc.c | 355 +++++++++++++----- Python/specialize.c | 2 + Tools/scripts/summarize_stats.py | 5 +- 14 files changed, 355 insertions(+), 103 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index f1ca54839fbc38..2ae48002d720e9 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -99,6 +99,8 @@ typedef struct _gc_stats { uint64_t collections; uint64_t object_visits; uint64_t objects_collected; + uint64_t objects_transitively_reachable; + uint64_t objects_not_transitively_reachable; } GCStats; typedef struct _uop_stats { diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 8c0100390d036e..b786c5f49e9831 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -75,6 +75,7 @@ typedef struct _PyInterpreterFrame { _PyStackRef *stackpointer; uint16_t return_offset; /* Only relevant during a function call */ char owner; + char visited; /* Locals and stack */ _PyStackRef localsplus[1]; } _PyInterpreterFrame; @@ -207,6 +208,7 @@ _PyFrame_Initialize( #endif frame->return_offset = 0; frame->owner = FRAME_OWNED_BY_THREAD; + frame->visited = 0; for (int i = null_locals_from; i < code->co_nlocalsplus; i++) { frame->localsplus[i] = PyStackRef_NULL; @@ -389,6 +391,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int frame->instr_ptr = _PyCode_CODE(code); #endif frame->owner = FRAME_OWNED_BY_THREAD; + frame->visited = 0; frame->return_offset = 0; #ifdef Py_GIL_DISABLED diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 479fe10d00066d..4ff34bf8ead7d0 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -10,11 +10,11 @@ extern "C" { /* GC information is stored BEFORE the object structure. */ typedef struct { - // Pointer to next object in the list. + // Tagged pointer to next object in the list. // 0 means the object is not tracked uintptr_t _gc_next; - // Pointer to previous object in the list. + // Tagged pointer to previous object in the list. // Lowest two bits are used for flags documented later. uintptr_t _gc_prev; } PyGC_Head; @@ -284,6 +284,11 @@ struct gc_generation_stats { Py_ssize_t uncollectable; }; +enum _GCPhase { + GC_PHASE_MARK = 0, + GC_PHASE_COLLECT = 1 +}; + struct _gc_runtime_state { /* List of objects that still need to be cleaned up, singly linked * via their gc headers' gc_prev pointers. */ @@ -311,6 +316,7 @@ struct _gc_runtime_state { Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ int visited_space; + int phase; #ifdef Py_GIL_DISABLED /* This is the number of objects that survived the last full diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index 34d835a7f84ee7..c52ed8f14707ba 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -471,8 +471,8 @@ static inline void _PyObject_GC_TRACK( PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); _PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_PREV(gc, last); - /* Young objects will be moved into the visited space during GC, so set the bit here */ - gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space; + uintptr_t not_visited = 1 ^ interp->gc.visited_space; + gc->_gc_next = ((uintptr_t)generation0) | not_visited; generation0->_gc_prev = (uintptr_t)gc; #endif } diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 9f6748945bab36..1260b957ce9482 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -137,6 +137,7 @@ extern PyTypeObject _PyExc_MemoryError; { .threshold = 0, }, \ }, \ .work_to_do = -5000, \ + .phase = GC_PHASE_MARK, \ }, \ .qsbr = { \ .wr_seq = QSBR_INITIAL, \ diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 9e01a5864e33f8..08db080a200ea4 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -477,6 +477,45 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. ``` +Optimization: visiting reachable objects +======================================== + +An object cannot be garbage if it can be reached. + +To avoid having to identify reference cycles across the whole heap, we can +reduce the amount of work done considerably by first moving most reachable objects +to the `visited` space. Empirically, most reachable objects can be reached from a +small set of global objects and local variables. +This step does much less work per object, so reduces the time spent +performing garbage collection by at least half. + +> [!NOTE] +> Objects that are not determined to be reachable by this pass are not necessarily +> unreachable. We still need to perform the main algorithm to determine which objects +> are actually unreachable. +We use the same technique of forming a transitive closure as the incremental +collector does to find reachable objects, seeding the list with some global +objects and the currently executing frames. + +This phase moves objects to the `visited` space, as follows: + +1. All objects directly referred to by any builtin class, the `sys` module, the `builtins` +module and all objects directly referred to from stack frames are added to a working +set of reachable objects. +2. Until this working set is empty: + 1. Pop an object from the set and move it to the `visited` space + 2. For each object directly reachable from that object: + * If it is not already in `visited` space and it is a GC object, + add it to the working set + + +Before each increment of collection is performed, the stacks are scanned +to check for any new stack frames that have been created since the last +increment. All objects directly referred to from those stack frames are +added to the working set. +Then the above algorithm is repeated, starting from step 2. + + Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/libregrtest/refleak.py b/Lib/test/libregrtest/refleak.py index e783475cc7a36b..d0d1c8cdc9a11b 100644 --- a/Lib/test/libregrtest/refleak.py +++ b/Lib/test/libregrtest/refleak.py @@ -123,9 +123,9 @@ def get_pooled_int(value): xml_filename = 'refleak-xml.tmp' result = None dash_R_cleanup(fs, ps, pic, zdc, abcs) - support.gc_collect() for i in rep_range: + support.gc_collect() current = refleak_helper._hunting_for_refleaks refleak_helper._hunting_for_refleaks = True try: diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 0372815b9bfd27..b5140057a69d36 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -31,6 +31,11 @@ def __new__(cls, *args, **kwargs): return C ContainerNoGC = None +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None + ### Support code ############################################################################### @@ -1130,6 +1135,7 @@ def setUp(self): def tearDown(self): gc.disable() + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @requires_gil_enabled("Free threading does not support incremental GC") # Use small increments to emulate longer running process in a shorter time @gc_threshold(200, 10) @@ -1167,20 +1173,15 @@ def make_ll(depth): enabled = gc.isenabled() gc.enable() olds = [] + initial_heap_size = _testinternalcapi.get_tracked_heap_size() for i in range(20_000): newhead = make_ll(20) count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: - stats = gc.get_stats() - young = stats[0] - incremental = stats[1] - old = stats[2] - collected = young['collected'] + incremental['collected'] + old['collected'] - count += CORRECTION - live = count - collected - self.assertLess(live, 25000) + new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size + self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() @@ -1322,7 +1323,8 @@ def test_refcount_errors(self): from test.support import gc_collect, SuppressCrashReport a = [1, 2, 3] - b = [a] + b = [a, a] + a.append(b) # Avoid coredump when Py_FatalError() calls abort() SuppressCrashReport().__enter__() @@ -1332,6 +1334,8 @@ def test_refcount_errors(self): # (to avoid deallocating it): import ctypes ctypes.pythonapi.Py_DecRef(ctypes.py_object(a)) + del a + del b # The garbage collector should now have a fatal error # when it reaches the broken object @@ -1360,7 +1364,7 @@ def test_refcount_errors(self): self.assertRegex(stderr, br'object type name: list') self.assertRegex(stderr, - br'object repr : \[1, 2, 3\]') + br'object repr : \[1, 2, 3, \[\[...\], \[...\]\]\]') class GCTogglingTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst new file mode 100644 index 00000000000000..9ef2b8dc33ed0f --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-21-16-13-52.gh-issue-126491.0YvL94.rst @@ -0,0 +1,4 @@ +Add a marking phase to the GC. All objects that can be transitively reached +from builtin modules or the stacks are marked as reachable before cycle +detection. This reduces the amount of work done by the GC by approximately +half. diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index a925191d479bd6..1bb71a3e80b39d 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2076,6 +2076,11 @@ has_deferred_refcount(PyObject *self, PyObject *op) return PyBool_FromLong(_PyObject_HasDeferredRefcount(op)); } +static PyObject * +get_tracked_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size); +} static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, @@ -2174,6 +2179,7 @@ static PyMethodDef module_functions[] = { {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS}, {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS}, {"has_deferred_refcount", has_deferred_refcount, METH_O}, + {"get_tracked_heap_size", get_tracked_heap_size, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/ceval.c b/Python/ceval.c index eba0f233a81ef3..f9514a6bf25c1b 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -818,6 +818,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1; entry_frame.stackpointer = entry_frame.localsplus; entry_frame.owner = FRAME_OWNED_BY_CSTACK; + entry_frame.visited = 0; entry_frame.return_offset = 0; /* Push frame */ entry_frame.previous = tstate->current_frame; diff --git a/Python/gc.c b/Python/gc.c index 63adecf0e05114..5b9588c8741b97 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -106,7 +106,7 @@ gc_old_space(PyGC_Head *g) } static inline int -flip_old_space(int space) +other_space(int space) { assert(space == 0 || space == 1); return space ^ _PyGC_NEXT_MASK_OLD_SPACE_1; @@ -430,24 +430,32 @@ validate_list(PyGC_Head *head, enum flagstates flags) #endif #ifdef GC_EXTRA_DEBUG + + static void -validate_old(GCState *gcstate) +gc_list_validate_space(PyGC_Head *head, int space) { + PyGC_Head *gc = GC_NEXT(head); + while (gc != head) { + assert(gc_old_space(gc) == space); + gc = GC_NEXT(gc); + } +} + +static void +validate_spaces(GCState *gcstate) { + int visited = gcstate->visited_space; + int not_visited = other_space(visited); + gc_list_validate_space(&gcstate->young.head, not_visited); for (int space = 0; space < 2; space++) { - PyGC_Head *head = &gcstate->old[space].head; - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - PyGC_Head *next = GC_NEXT(gc); - assert(gc_old_space(gc) == space); - gc = next; - } + gc_list_validate_space(&gcstate->old[space].head, space); } + gc_list_validate_space(&gcstate->permanent_generation.head, visited); } static void validate_consistent_old_space(PyGC_Head *head) { - PyGC_Head *prev = head; PyGC_Head *gc = GC_NEXT(head); if (gc == head) { return; @@ -457,23 +465,13 @@ validate_consistent_old_space(PyGC_Head *head) PyGC_Head *truenext = GC_NEXT(gc); assert(truenext != NULL); assert(gc_old_space(gc) == old_space); - prev = gc; gc = truenext; } - assert(prev == GC_PREV(head)); } -static void -gc_list_validate_space(PyGC_Head *head, int space) { - PyGC_Head *gc = GC_NEXT(head); - while (gc != head) { - assert(gc_old_space(gc) == space); - gc = GC_NEXT(gc); - } -} #else -#define validate_old(g) do{}while(0) +#define validate_spaces(g) do{}while(0) #define validate_consistent_old_space(l) do{}while(0) #define gc_list_validate_space(l, s) do{}while(0) #endif @@ -494,7 +492,7 @@ update_refs(PyGC_Head *containers) next = GC_NEXT(gc); PyObject *op = FROM_GC(gc); if (_Py_IsImmortal(op)) { - gc_list_move(gc, &get_gc_state()->permanent_generation.head); + _PyObject_GC_UNTRACK(op); gc = next; continue; } @@ -733,13 +731,25 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) unreachable->_gc_next &= _PyGC_PREV_MASK; } +/* In theory, all tuples should be younger than the +* objects they refer to, as tuples are immortal. +* Therefore, untracking tuples in oldest-first order in the +* young generation before promoting them should have tracked +* all the tuples that can be untracked. +* +* Unfortunately, the C API allows tuples to be created +* and then filled in. So this won't untrack all tuples +* that can be untracked. It should untrack most of them +* and is much faster than a more complex approach that +* would untrack all relevant tuples. +*/ static void untrack_tuples(PyGC_Head *head) { - PyGC_Head *next, *gc = GC_NEXT(head); + PyGC_Head *gc = GC_NEXT(head); while (gc != head) { PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); + PyGC_Head *next = GC_NEXT(gc); if (PyTuple_CheckExact(op)) { _PyTuple_MaybeUntrack(op); } @@ -1293,8 +1303,10 @@ gc_collect_young(PyThreadState *tstate, struct gc_collection_stats *stats) { GCState *gcstate = &tstate->interp->gc; + validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples(young); GC_STAT_ADD(0, collections, 1); #ifdef Py_STATS { @@ -1308,39 +1320,21 @@ gc_collect_young(PyThreadState *tstate, PyGC_Head survivors; gc_list_init(&survivors); + gc_list_set_space(young, gcstate->visited_space); gc_collect_region(tstate, young, &survivors, stats); - Py_ssize_t survivor_count = 0; - if (gcstate->visited_space) { - /* objects in visited space have bit set, so we set it here */ - survivor_count = gc_list_set_space(&survivors, 1); - } - else { - PyGC_Head *gc; - for (gc = GC_NEXT(&survivors); gc != &survivors; gc = GC_NEXT(gc)) { -#ifdef GC_DEBUG - assert(gc_old_space(gc) == 0); -#endif - survivor_count++; - } - } - (void)survivor_count; // Silence compiler warning gc_list_merge(&survivors, visited); - validate_old(gcstate); + validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[gcstate->visited_space].count++; - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; add_stats(gcstate, 0, stats); + validate_spaces(gcstate); } #ifndef NDEBUG static inline int IS_IN_VISITED(PyGC_Head *gc, int visited_space) { - assert(visited_space == 0 || flip_old_space(visited_space) == 0); + assert(visited_space == 0 || other_space(visited_space) == 0); return gc_old_space(gc) == visited_space; } #endif @@ -1348,7 +1342,7 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space) struct container_and_flag { PyGC_Head *container; int visited_space; - uintptr_t size; + intptr_t size; }; /* A traversal callback for adding to container) */ @@ -1371,7 +1365,7 @@ visit_add_to_container(PyObject *op, void *arg) return 0; } -static uintptr_t +static intptr_t expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) { struct container_and_flag arg = { @@ -1385,6 +1379,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat * have been marked as visited */ assert(IS_IN_VISITED(gc, gcstate->visited_space)); PyObject *op = FROM_GC(gc); + assert(_PyObject_GC_IS_TRACKED(op)); if (_Py_IsImmortal(op)) { PyGC_Head *next = GC_NEXT(gc); gc_list_move(gc, &get_gc_state()->permanent_generation.head); @@ -1402,22 +1397,191 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat /* Do bookkeeping for a completed GC cycle */ static void -completed_cycle(GCState *gcstate) -{ -#ifdef Py_DEBUG - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - assert(gc_list_is_empty(not_visited)); -#endif - gcstate->visited_space = flip_old_space(gcstate->visited_space); - /* Make sure all young objects have old space bit set correctly */ - PyGC_Head *young = &gcstate->young.head; - PyGC_Head *gc = GC_NEXT(young); - while (gc != young) { - PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, gcstate->visited_space); - gc = next; +completed_scavenge(GCState *gcstate) +{ + /* We must observe two invariants: + * 1. Members of the permanent generation must be marked visited. + * 2. We cannot touch members of the permanent generation. */ + int visited; + if (gc_list_is_empty(&gcstate->permanent_generation.head)) { + /* Permanent generation is empty so we can flip spaces bit */ + int not_visited = gcstate->visited_space; + visited = other_space(not_visited); + gcstate->visited_space = visited; + /* Make sure all objects have visited bit set correctly */ + gc_list_set_space(&gcstate->young.head, not_visited); } + else { + /* We must move the objects from visited to pending space. */ + visited = gcstate->visited_space; + int not_visited = other_space(visited); + assert(gc_list_is_empty(&gcstate->old[not_visited].head)); + gc_list_merge(&gcstate->old[visited].head, &gcstate->old[not_visited].head); + gc_list_set_space(&gcstate->old[not_visited].head, not_visited); + } + assert(gc_list_is_empty(&gcstate->old[visited].head)); gcstate->work_to_do = 0; + gcstate->phase = GC_PHASE_MARK; +} + +static intptr_t +move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, reachable); + return 1; + } + } + return 0; +} + +static intptr_t +mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +{ + // Transitively traverse all objects from reachable, until empty + struct container_and_flag arg = { + .container = reachable, + .visited_space = visited_space, + .size = 0 + }; + while (!gc_list_is_empty(reachable)) { + PyGC_Head *gc = _PyGCHead_NEXT(reachable); + assert(gc_old_space(gc) == visited_space); + gc_list_move(gc, visited); + PyObject *op = FROM_GC(gc); + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); + } + gc_list_validate_space(visited, visited_space); + return arg.size; +} + +static intptr_t +mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + // Move all objects on stacks to reachable + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + PyThreadState* ts = PyInterpreterState_ThreadHead(interp); + HEAD_UNLOCK(runtime); + while (ts) { + _PyInterpreterFrame *frame = ts->current_frame; + while (frame) { + if (frame->owner == FRAME_OWNED_BY_CSTACK) { + frame = frame->previous; + continue; + } + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, &reachable, visited_space); + while (sp > locals) { + sp--; + if (PyStackRef_IsNull(*sp)) { + continue; + } + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + objects_marked++; + gc_list_move(gc, &reachable); + } + } + } + if (!start && frame->visited) { + // If this frame has already been visited, then the lower frames + // will have already been visited and will not have changed + break; + } + frame->visited = 1; + frame = frame->previous; + } + HEAD_LOCK(runtime); + ts = PyThreadState_Next(ts); + HEAD_UNLOCK(runtime); + } + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static intptr_t +mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); + objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); + objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + struct types_state *types = &interp->types; + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + } + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + } + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static intptr_t +mark_at_start(PyThreadState *tstate) +{ + // TO DO -- Make this incremental + GCState *gcstate = &tstate->interp->gc; + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); + objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + gcstate->work_to_do -= objects_marked; + gcstate->phase = GC_PHASE_COLLECT; + validate_spaces(gcstate); + return objects_marked; +} + +static intptr_t +assess_work_to_do(GCState *gcstate) +{ + /* The amount of work we want to do depends on three things. + * 1. The number of new objects created + * 2. The growth in heap size since the last collection + * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + * + * For a steady state heap, the amount of work to do is three times the number + * of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. + * + * This could be improved by tracking survival rates, but it is still a + * large improvement on the non-marking approach. + */ + intptr_t scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } + intptr_t new_objects = gcstate->young.count; + intptr_t max_heap_fraction = new_objects*3/2; + intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_fraction > max_heap_fraction) { + heap_fraction = max_heap_fraction; + } + gcstate->young.count = 0; + return new_objects + heap_fraction; } static void @@ -1425,18 +1589,30 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; + gcstate->work_to_do += assess_work_to_do(gcstate); + untrack_tuples(&gcstate->young.head); + if (gcstate->phase == GC_PHASE_MARK) { + Py_ssize_t objects_marked = mark_at_start(tstate); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; + validate_spaces(gcstate); + return; + } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; gc_list_init(&increment); - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } + int scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } + intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &increment); - gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = 0; + Py_ssize_t increment_size = gc_list_size(&increment); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; @@ -1444,54 +1620,56 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head *gc = _PyGCHead_NEXT(not_visited); gc_list_move(gc, &increment); increment_size++; + assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } + GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); gc_collect_region(tstate, &increment, &survivors, stats); - gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; - validate_old(gcstate); add_stats(gcstate, 1, stats); if (gc_list_is_empty(not_visited)) { - completed_cycle(gcstate); + completed_scavenge(gcstate); } + validate_spaces(gcstate); } - static void gc_collect_full(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(2, collections, 1); GCState *gcstate = &tstate->interp->gc; - validate_old(gcstate); + validate_spaces(gcstate); PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples(young); /* merge all generations into visited */ - gc_list_validate_space(young, gcstate->visited_space); - gc_list_set_space(pending, gcstate->visited_space); gc_list_merge(young, pending); + gc_list_validate_space(pending, 1-gcstate->visited_space); + gc_list_set_space(pending, gcstate->visited_space); gcstate->young.count = 0; gc_list_merge(pending, visited); + validate_spaces(gcstate); gc_collect_region(tstate, visited, visited, stats); + validate_spaces(gcstate); gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; - - gcstate->work_to_do = - gcstate->young.threshold * 2; + completed_scavenge(gcstate); _PyGC_ClearAllFreeLists(tstate->interp); - validate_old(gcstate); + validate_spaces(gcstate); add_stats(gcstate, 2, stats); } @@ -1733,20 +1911,23 @@ void _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; - /* The permanent_generation has its old space bit set to zero */ - if (gcstate->visited_space) { - gc_list_set_space(&gcstate->young.head, 0); - } + /* The permanent_generation must be visited */ + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); gcstate->young.count = 0; PyGC_Head*old0 = &gcstate->old[0].head; PyGC_Head*old1 = &gcstate->old[1].head; + if (gcstate->visited_space) { + gc_list_set_space(old0, 1); + } + else { + gc_list_set_space(old1, 0); + } gc_list_merge(old0, &gcstate->permanent_generation.head); gcstate->old[0].count = 0; - gc_list_set_space(old1, 0); gc_list_merge(old1, &gcstate->permanent_generation.head); gcstate->old[1].count = 0; - validate_old(gcstate); + validate_spaces(gcstate); } void @@ -1754,8 +1935,8 @@ _PyGC_Unfreeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; gc_list_merge(&gcstate->permanent_generation.head, - &gcstate->old[0].head); - validate_old(gcstate); + &gcstate->old[gcstate->visited_space].head); + validate_spaces(gcstate); } Py_ssize_t @@ -1860,7 +2041,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) _Py_stats->object_stats.object_visits = 0; } #endif - validate_old(gcstate); + validate_spaces(gcstate); _Py_atomic_store_int(&gcstate->collecting, 0); return stats.uncollectable + stats.collected; } diff --git a/Python/specialize.c b/Python/specialize.c index d03310de782fe7..504eef4f448429 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -231,6 +231,8 @@ print_gc_stats(FILE *out, GCStats *stats) fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections); fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits); fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected); + fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable); + fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable); } } diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index abfdea78253760..bc7ccfe33e777d 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -1118,6 +1118,8 @@ def calc_gc_stats(stats: Stats) -> Rows: Count(gen["collections"]), Count(gen["objects collected"]), Count(gen["object visits"]), + Count(gen["objects reachable from roots"]), + Count(gen["objects not reachable from roots"]), ) for (i, gen) in enumerate(gc_stats) ] @@ -1127,7 +1129,8 @@ def calc_gc_stats(stats: Stats) -> Rows: "GC collections and effectiveness", [ Table( - ("Generation:", "Collections:", "Objects collected:", "Object visits:"), + ("Generation:", "Collections:", "Objects collected:", "Object visits:", + "Reachable from roots:", "Not reachable from roots:"), calc_gc_stats, ) ], From bf21e2160d1dc6869fb230b90a23ab030835395b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 2 Dec 2024 11:14:47 +0100 Subject: [PATCH 06/76] Fix Unicode encode_wstr_utf8() (#127420) Raise RuntimeError instead of RuntimeWarning. --- Objects/unicodeobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 33fa21d4c7d1bf..463da06445984b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -16158,7 +16158,7 @@ encode_wstr_utf8(wchar_t *wstr, char **str, const char *name) int res; res = _Py_EncodeUTF8Ex(wstr, str, NULL, NULL, 1, _Py_ERROR_STRICT); if (res == -2) { - PyErr_Format(PyExc_RuntimeWarning, "cannot encode %s", name); + PyErr_Format(PyExc_RuntimeError, "cannot encode %s", name); return -1; } if (res < 0) { From c112de1da2d18e3b5c2ea30b0e409f18e574efd8 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Mon, 2 Dec 2024 07:50:34 -0500 Subject: [PATCH 07/76] gh-126890: Restore stripped `ssl` docstrings (GH-127281) --- Modules/_ssl.c | 63 ++++++++++++++++++++++++++++++--------- Modules/clinic/_ssl.c.h | 65 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 114 insertions(+), 14 deletions(-) diff --git a/Modules/_ssl.c b/Modules/_ssl.c index e5b8bf21002ea5..59c414f9ce1ceb 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -2162,11 +2162,17 @@ _ssl__SSLSocket_compression_impl(PySSLSocket *self) @critical_section @getter _ssl._SSLSocket.context + +This changes the context associated with the SSLSocket. + +This is typically used from within a callback function set by the sni_callback +on the SSLContext to change the certificate information associated with the +SSLSocket before the cryptographic exchange handshake messages. [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_context_get_impl(PySSLSocket *self) -/*[clinic end generated code: output=d23e82f72f32e3d7 input=25aa82e4d9fa344a]*/ +/*[clinic end generated code: output=d23e82f72f32e3d7 input=7cbb97407c2ace30]*/ { return Py_NewRef(self->ctx); } @@ -2233,11 +2239,15 @@ _ssl__SSLSocket_server_hostname_get_impl(PySSLSocket *self) @critical_section @getter _ssl._SSLSocket.owner + +The Python-level owner of this object. + +Passed as "self" in servername callback. [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_owner_get_impl(PySSLSocket *self) -/*[clinic end generated code: output=1f278cb930382927 input=bc2861ff3cf1402e]*/ +/*[clinic end generated code: output=1f278cb930382927 input=978a8382d9c25c92]*/ { if (self->owner == NULL) { Py_RETURN_NONE; @@ -2851,11 +2861,13 @@ _ssl__SSLSocket_verify_client_post_handshake_impl(PySSLSocket *self) @critical_section @getter _ssl._SSLSocket.session + +The underlying SSLSession object. [clinic start generated code]*/ static PyObject * _ssl__SSLSocket_session_get_impl(PySSLSocket *self) -/*[clinic end generated code: output=a5cd5755b35da670 input=b9792df9255a9f63]*/ +/*[clinic end generated code: output=a5cd5755b35da670 input=d427318604244bf8]*/ { /* get_session can return sessions from a server-side connection, * it does not check for handshake done or client socket. */ @@ -3657,11 +3669,13 @@ _ssl__SSLContext_maximum_version_set_impl(PySSLContext *self, @critical_section @getter _ssl._SSLContext.num_tickets + +Control the number of TLSv1.3 session tickets. [clinic start generated code]*/ static PyObject * _ssl__SSLContext_num_tickets_get_impl(PySSLContext *self) -/*[clinic end generated code: output=3d06d016318846c9 input=1dee26d75163c073]*/ +/*[clinic end generated code: output=3d06d016318846c9 input=1e2599a2e22564ff]*/ { // Clinic seems to be misbehaving when the comment is wrapped with in directive #if defined(TLS1_3_VERSION) && !defined(OPENSSL_NO_TLS1_3) @@ -3705,11 +3719,13 @@ _ssl__SSLContext_num_tickets_set_impl(PySSLContext *self, PyObject *value) @critical_section @getter _ssl._SSLContext.security_level + +The current security level. [clinic start generated code]*/ static PyObject * _ssl__SSLContext_security_level_get_impl(PySSLContext *self) -/*[clinic end generated code: output=56ece09e6a9572d0 input=a0416598e07c3183]*/ +/*[clinic end generated code: output=56ece09e6a9572d0 input=2bdeecb57bb86e3f]*/ { PyObject *res = PyLong_FromLong(SSL_CTX_get_security_level(self->ctx)); return res; @@ -4666,11 +4682,18 @@ _servername_callback(SSL *s, int *al, void *args) @critical_section @getter _ssl._SSLContext.sni_callback + +Set a callback that will be called when a server name is provided by the SSL/TLS client in the SNI extension. + +If the argument is None then the callback is disabled. The method is called +with the SSLSocket, the server name as a string, and the SSLContext object. + +See RFC 6066 for details of the SNI extension. [clinic start generated code]*/ static PyObject * _ssl__SSLContext_sni_callback_get_impl(PySSLContext *self) -/*[clinic end generated code: output=961e6575cdfaf036 input=22dd28c31fdc4318]*/ +/*[clinic end generated code: output=961e6575cdfaf036 input=9b2473c5e984cfe6]*/ { PyObject *cb = self->set_sni_cb; if (cb == NULL) { @@ -5243,11 +5266,13 @@ memory_bio_dealloc(PySSLMemoryBIO *self) @critical_section @getter _ssl.MemoryBIO.pending + +The number of bytes pending in the memory BIO. [clinic start generated code]*/ static PyObject * _ssl_MemoryBIO_pending_get_impl(PySSLMemoryBIO *self) -/*[clinic end generated code: output=19236a32a51ac8ff input=c0b6d14eba107f6a]*/ +/*[clinic end generated code: output=19236a32a51ac8ff input=02d9063d8ac31732]*/ { size_t res = BIO_ctrl_pending(self->bio); return PyLong_FromSize_t(res); @@ -5257,11 +5282,13 @@ _ssl_MemoryBIO_pending_get_impl(PySSLMemoryBIO *self) @critical_section @getter _ssl.MemoryBIO.eof + +Whether the memory BIO is at EOF. [clinic start generated code]*/ static PyObject * _ssl_MemoryBIO_eof_get_impl(PySSLMemoryBIO *self) -/*[clinic end generated code: output=c255a9ea16e31b92 input=0f5c6be69752e04c]*/ +/*[clinic end generated code: output=c255a9ea16e31b92 input=c6ecc12c4509de1f]*/ { size_t pending = BIO_ctrl_pending(self->bio); return PyBool_FromLong((pending == 0) && self->eof_written); @@ -5502,11 +5529,13 @@ PySSLSession_clear(PySSLSession *self) @critical_section @getter _ssl.SSLSession.time + +Session creation time (seconds since epoch). [clinic start generated code]*/ static PyObject * _ssl_SSLSession_time_get_impl(PySSLSession *self) -/*[clinic end generated code: output=4b887b9299de9be4 input=8d1e4afd09103279]*/ +/*[clinic end generated code: output=4b887b9299de9be4 input=67f2325284450ae2]*/ { #if OPENSSL_VERSION_NUMBER >= 0x30300000L return _PyLong_FromTime_t(SSL_SESSION_get_time_ex(self->session)); @@ -5519,11 +5548,13 @@ _ssl_SSLSession_time_get_impl(PySSLSession *self) @critical_section @getter _ssl.SSLSession.timeout + +Session timeout (delta in seconds). [clinic start generated code]*/ static PyObject * _ssl_SSLSession_timeout_get_impl(PySSLSession *self) -/*[clinic end generated code: output=82339c148ab2f7d1 input=ae5e84a9d85df60d]*/ +/*[clinic end generated code: output=82339c148ab2f7d1 input=cd17c2b087c442f2]*/ { long timeout = SSL_SESSION_get_timeout(self->session); PyObject *res = PyLong_FromLong(timeout); @@ -5534,11 +5565,13 @@ _ssl_SSLSession_timeout_get_impl(PySSLSession *self) @critical_section @getter _ssl.SSLSession.ticket_lifetime_hint + +Ticket life time hint. [clinic start generated code]*/ static PyObject * _ssl_SSLSession_ticket_lifetime_hint_get_impl(PySSLSession *self) -/*[clinic end generated code: output=c8b6db498136c275 input=d0e06942ddd8d07f]*/ +/*[clinic end generated code: output=c8b6db498136c275 input=f0e2df50961a7806]*/ { unsigned long hint = SSL_SESSION_get_ticket_lifetime_hint(self->session); return PyLong_FromUnsignedLong(hint); @@ -5548,11 +5581,13 @@ _ssl_SSLSession_ticket_lifetime_hint_get_impl(PySSLSession *self) @critical_section @getter _ssl.SSLSession.id + +Session ID. [clinic start generated code]*/ static PyObject * _ssl_SSLSession_id_get_impl(PySSLSession *self) -/*[clinic end generated code: output=c532fb96b10c5adf input=e7322372cf6325dd]*/ +/*[clinic end generated code: output=c532fb96b10c5adf input=0a379e64312b776d]*/ { const unsigned char *id; @@ -5565,11 +5600,13 @@ _ssl_SSLSession_id_get_impl(PySSLSession *self) @critical_section @getter _ssl.SSLSession.has_ticket + +Does the session contain a ticket? [clinic start generated code]*/ static PyObject * _ssl_SSLSession_has_ticket_get_impl(PySSLSession *self) -/*[clinic end generated code: output=aa3ccfc40b10b96d input=1a48ae8955fa9601]*/ +/*[clinic end generated code: output=aa3ccfc40b10b96d input=fa475555f53a5086]*/ { int res = SSL_SESSION_has_ticket(self->session); return res ? Py_True : Py_False; diff --git a/Modules/clinic/_ssl.c.h b/Modules/clinic/_ssl.c.h index 957f5ced3a2cee..1ff85e32ffe5a0 100644 --- a/Modules/clinic/_ssl.c.h +++ b/Modules/clinic/_ssl.c.h @@ -258,6 +258,14 @@ _ssl__SSLSocket_compression(PySSLSocket *self, PyObject *Py_UNUSED(ignored)) return _ssl__SSLSocket_compression_impl(self); } +PyDoc_STRVAR(_ssl__SSLSocket_context__doc__, +"This changes the context associated with the SSLSocket.\n" +"\n" +"This is typically used from within a callback function set by the sni_callback\n" +"on the SSLContext to change the certificate information associated with the\n" +"SSLSocket before the cryptographic exchange handshake messages."); +#define _ssl__SSLSocket_context_DOCSTR _ssl__SSLSocket_context__doc__ + #if !defined(_ssl__SSLSocket_context_DOCSTR) # define _ssl__SSLSocket_context_DOCSTR NULL #endif @@ -366,6 +374,12 @@ _ssl__SSLSocket_server_hostname_get(PySSLSocket *self, void *Py_UNUSED(context)) return return_value; } +PyDoc_STRVAR(_ssl__SSLSocket_owner__doc__, +"The Python-level owner of this object.\n" +"\n" +"Passed as \"self\" in servername callback."); +#define _ssl__SSLSocket_owner_DOCSTR _ssl__SSLSocket_owner__doc__ + #if !defined(_ssl__SSLSocket_owner_DOCSTR) # define _ssl__SSLSocket_owner_DOCSTR NULL #endif @@ -652,6 +666,10 @@ _ssl__SSLSocket_verify_client_post_handshake(PySSLSocket *self, PyObject *Py_UNU return return_value; } +PyDoc_STRVAR(_ssl__SSLSocket_session__doc__, +"The underlying SSLSession object."); +#define _ssl__SSLSocket_session_DOCSTR _ssl__SSLSocket_session__doc__ + #if !defined(_ssl__SSLSocket_session_DOCSTR) # define _ssl__SSLSocket_session_DOCSTR NULL #endif @@ -1057,6 +1075,10 @@ _ssl__SSLContext_maximum_version_set(PySSLContext *self, PyObject *value, void * return return_value; } +PyDoc_STRVAR(_ssl__SSLContext_num_tickets__doc__, +"Control the number of TLSv1.3 session tickets."); +#define _ssl__SSLContext_num_tickets_DOCSTR _ssl__SSLContext_num_tickets__doc__ + #if !defined(_ssl__SSLContext_num_tickets_DOCSTR) # define _ssl__SSLContext_num_tickets_DOCSTR NULL #endif @@ -1107,6 +1129,10 @@ _ssl__SSLContext_num_tickets_set(PySSLContext *self, PyObject *value, void *Py_U return return_value; } +PyDoc_STRVAR(_ssl__SSLContext_security_level__doc__, +"The current security level."); +#define _ssl__SSLContext_security_level_DOCSTR _ssl__SSLContext_security_level__doc__ + #if !defined(_ssl__SSLContext_security_level_DOCSTR) # define _ssl__SSLContext_security_level_DOCSTR NULL #endif @@ -1745,6 +1771,15 @@ _ssl__SSLContext_set_ecdh_curve(PySSLContext *self, PyObject *name) return return_value; } +PyDoc_STRVAR(_ssl__SSLContext_sni_callback__doc__, +"Set a callback that will be called when a server name is provided by the SSL/TLS client in the SNI extension.\n" +"\n" +"If the argument is None then the callback is disabled. The method is called\n" +"with the SSLSocket, the server name as a string, and the SSLContext object.\n" +"\n" +"See RFC 6066 for details of the SNI extension."); +#define _ssl__SSLContext_sni_callback_DOCSTR _ssl__SSLContext_sni_callback__doc__ + #if !defined(_ssl__SSLContext_sni_callback_DOCSTR) # define _ssl__SSLContext_sni_callback_DOCSTR NULL #endif @@ -2063,6 +2098,10 @@ _ssl_MemoryBIO(PyTypeObject *type, PyObject *args, PyObject *kwargs) return return_value; } +PyDoc_STRVAR(_ssl_MemoryBIO_pending__doc__, +"The number of bytes pending in the memory BIO."); +#define _ssl_MemoryBIO_pending_DOCSTR _ssl_MemoryBIO_pending__doc__ + #if !defined(_ssl_MemoryBIO_pending_DOCSTR) # define _ssl_MemoryBIO_pending_DOCSTR NULL #endif @@ -2088,6 +2127,10 @@ _ssl_MemoryBIO_pending_get(PySSLMemoryBIO *self, void *Py_UNUSED(context)) return return_value; } +PyDoc_STRVAR(_ssl_MemoryBIO_eof__doc__, +"Whether the memory BIO is at EOF."); +#define _ssl_MemoryBIO_eof_DOCSTR _ssl_MemoryBIO_eof__doc__ + #if !defined(_ssl_MemoryBIO_eof_DOCSTR) # define _ssl_MemoryBIO_eof_DOCSTR NULL #endif @@ -2217,6 +2260,10 @@ _ssl_MemoryBIO_write_eof(PySSLMemoryBIO *self, PyObject *Py_UNUSED(ignored)) return return_value; } +PyDoc_STRVAR(_ssl_SSLSession_time__doc__, +"Session creation time (seconds since epoch)."); +#define _ssl_SSLSession_time_DOCSTR _ssl_SSLSession_time__doc__ + #if !defined(_ssl_SSLSession_time_DOCSTR) # define _ssl_SSLSession_time_DOCSTR NULL #endif @@ -2242,6 +2289,10 @@ _ssl_SSLSession_time_get(PySSLSession *self, void *Py_UNUSED(context)) return return_value; } +PyDoc_STRVAR(_ssl_SSLSession_timeout__doc__, +"Session timeout (delta in seconds)."); +#define _ssl_SSLSession_timeout_DOCSTR _ssl_SSLSession_timeout__doc__ + #if !defined(_ssl_SSLSession_timeout_DOCSTR) # define _ssl_SSLSession_timeout_DOCSTR NULL #endif @@ -2267,6 +2318,10 @@ _ssl_SSLSession_timeout_get(PySSLSession *self, void *Py_UNUSED(context)) return return_value; } +PyDoc_STRVAR(_ssl_SSLSession_ticket_lifetime_hint__doc__, +"Ticket life time hint."); +#define _ssl_SSLSession_ticket_lifetime_hint_DOCSTR _ssl_SSLSession_ticket_lifetime_hint__doc__ + #if !defined(_ssl_SSLSession_ticket_lifetime_hint_DOCSTR) # define _ssl_SSLSession_ticket_lifetime_hint_DOCSTR NULL #endif @@ -2292,6 +2347,10 @@ _ssl_SSLSession_ticket_lifetime_hint_get(PySSLSession *self, void *Py_UNUSED(con return return_value; } +PyDoc_STRVAR(_ssl_SSLSession_id__doc__, +"Session ID."); +#define _ssl_SSLSession_id_DOCSTR _ssl_SSLSession_id__doc__ + #if !defined(_ssl_SSLSession_id_DOCSTR) # define _ssl_SSLSession_id_DOCSTR NULL #endif @@ -2317,6 +2376,10 @@ _ssl_SSLSession_id_get(PySSLSession *self, void *Py_UNUSED(context)) return return_value; } +PyDoc_STRVAR(_ssl_SSLSession_has_ticket__doc__, +"Does the session contain a ticket?"); +#define _ssl_SSLSession_has_ticket_DOCSTR _ssl_SSLSession_has_ticket__doc__ + #if !defined(_ssl_SSLSession_has_ticket_DOCSTR) # define _ssl_SSLSession_has_ticket_DOCSTR NULL #endif @@ -2767,4 +2830,4 @@ _ssl_enum_crls(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje #ifndef _SSL_ENUM_CRLS_METHODDEF #define _SSL_ENUM_CRLS_METHODDEF #endif /* !defined(_SSL_ENUM_CRLS_METHODDEF) */ -/*[clinic end generated code: output=44ab066d21277ee5 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=654d6d7af659f6cd input=a9049054013a1b77]*/ From 930ba0ce605eee9e3b992fa368b00a3f2b7dc4c1 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Mon, 2 Dec 2024 16:14:40 +0300 Subject: [PATCH 08/76] gh-126618: fix repr(itertools.count(sys.maxsize)) (#127048) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/test/test_itertools.py | 23 +++++++++++++++++++ ...-11-20-08-54-11.gh-issue-126618.ef_53g.rst | 2 ++ Modules/itertoolsmodule.c | 9 +++----- 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-20-08-54-11.gh-issue-126618.ef_53g.rst diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index b94d688738f9e8..f0fd1d28f56f55 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -518,6 +518,15 @@ def test_count(self): self.assertEqual(next(c), -8) self.assertEqual(repr(count(10.25)), 'count(10.25)') self.assertEqual(repr(count(10.0)), 'count(10.0)') + + self.assertEqual(repr(count(maxsize)), f'count({maxsize})') + c = count(maxsize - 1) + self.assertEqual(repr(c), f'count({maxsize - 1})') + next(c) # c is now at masize + self.assertEqual(repr(c), f'count({maxsize})') + next(c) + self.assertEqual(repr(c), f'count({maxsize + 1})') + self.assertEqual(type(next(count(10.0))), float) for i in (-sys.maxsize-5, -sys.maxsize+5 ,-10, -1, 0, 10, sys.maxsize-5, sys.maxsize+5): # Test repr @@ -578,6 +587,20 @@ def test_count_with_step(self): self.assertEqual(type(next(c)), int) self.assertEqual(type(next(c)), float) + c = count(maxsize -2, 2) + self.assertEqual(repr(c), f'count({maxsize - 2}, 2)') + next(c) # c is now at masize + self.assertEqual(repr(c), f'count({maxsize}, 2)') + next(c) + self.assertEqual(repr(c), f'count({maxsize + 2}, 2)') + + c = count(maxsize + 1, -1) + self.assertEqual(repr(c), f'count({maxsize + 1}, -1)') + next(c) # c is now at masize + self.assertEqual(repr(c), f'count({maxsize}, -1)') + next(c) + self.assertEqual(repr(c), f'count({maxsize - 1}, -1)') + @threading_helper.requires_working_threading() def test_count_threading(self, step=1): # this test verifies multithreading consistency, which is diff --git a/Misc/NEWS.d/next/Library/2024-11-20-08-54-11.gh-issue-126618.ef_53g.rst b/Misc/NEWS.d/next/Library/2024-11-20-08-54-11.gh-issue-126618.ef_53g.rst new file mode 100644 index 00000000000000..7a0a7b7517b70d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-20-08-54-11.gh-issue-126618.ef_53g.rst @@ -0,0 +1,2 @@ +Fix the representation of :class:`itertools.count` objects when the count +value is :data:`sys.maxsize`. diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 78fbdcdf77a923..3f736f0cf19968 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -3235,7 +3235,7 @@ typedef struct { fast_mode: when cnt an integer < PY_SSIZE_T_MAX and no step is specified. - assert(cnt != PY_SSIZE_T_MAX && long_cnt == NULL && long_step==PyLong(1)); + assert(long_cnt == NULL && long_step==PyLong(1)); Advances with: cnt += 1 When count hits PY_SSIZE_T_MAX, switch to slow_mode. @@ -3291,9 +3291,6 @@ itertools_count_impl(PyTypeObject *type, PyObject *long_cnt, PyErr_Clear(); fast_mode = 0; } - else if (cnt == PY_SSIZE_T_MAX) { - fast_mode = 0; - } } } else { cnt = 0; @@ -3325,7 +3322,7 @@ itertools_count_impl(PyTypeObject *type, PyObject *long_cnt, else cnt = PY_SSIZE_T_MAX; - assert((cnt != PY_SSIZE_T_MAX && long_cnt == NULL && fast_mode) || + assert((long_cnt == NULL && fast_mode) || (cnt == PY_SSIZE_T_MAX && long_cnt != NULL && !fast_mode)); assert(!fast_mode || (PyLong_Check(long_step) && PyLong_AS_LONG(long_step) == 1)); @@ -3418,7 +3415,7 @@ count_next(countobject *lz) static PyObject * count_repr(countobject *lz) { - if (lz->cnt != PY_SSIZE_T_MAX) + if (lz->long_cnt == NULL) return PyUnicode_FromFormat("%s(%zd)", _PyType_Name(Py_TYPE(lz)), lz->cnt); From 31f16e427b545f66a9a45ea9dd6c933975ce0e4c Mon Sep 17 00:00:00 2001 From: Giovanni Siragusa Date: Mon, 2 Dec 2024 14:18:30 +0100 Subject: [PATCH 09/76] gh-109523: Raise a BlockingIOError if reading text from a non-blocking stream cannot immediately return bytes. (GH-122933) --- Doc/library/io.rst | 20 +++++++++++++++++++ Doc/whatsnew/3.14.rst | 9 +++++++++ Lib/_pyio.py | 5 ++++- Lib/test/test_io.py | 16 +++++++++++++++ Misc/ACKS | 1 + ...-08-12-10-15-19.gh-issue-109523.S2c3fi.rst | 1 + Modules/_io/textio.c | 6 ++++++ 7 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-08-12-10-15-19.gh-issue-109523.S2c3fi.rst diff --git a/Doc/library/io.rst b/Doc/library/io.rst index f793d7a7ef9a84..0d8cc5171d5476 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -64,6 +64,12 @@ In-memory text streams are also available as :class:`StringIO` objects:: f = io.StringIO("some initial text data") +.. note:: + + When working with a non-blocking stream, be aware that read operations on text I/O objects + might raise a :exc:`BlockingIOError` if the stream cannot perform the operation + immediately. + The text stream API is described in detail in the documentation of :class:`TextIOBase`. @@ -770,6 +776,11 @@ than raw I/O does. Read and return *size* bytes, or if *size* is not given or negative, until EOF or if the read call would block in non-blocking mode. + .. note:: + + When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` + may be raised if a read operation cannot be completed immediately. + .. method:: read1(size=-1, /) Read and return up to *size* bytes with only one call on the raw stream. @@ -779,6 +790,10 @@ than raw I/O does. .. versionchanged:: 3.7 The *size* argument is now optional. + .. note:: + + When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` + may be raised if a read operation cannot be completed immediately. .. class:: BufferedWriter(raw, buffer_size=DEFAULT_BUFFER_SIZE) @@ -1007,6 +1022,11 @@ Text I/O .. versionchanged:: 3.10 The *encoding* argument now supports the ``"locale"`` dummy encoding name. + .. note:: + + When the underlying raw stream is non-blocking, a :exc:`BlockingIOError` + may be raised if a read operation cannot be completed immediately. + :class:`TextIOWrapper` provides these data attributes and methods in addition to those from :class:`TextIOBase` and :class:`IOBase`: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index f9322da3d4fbb0..75d027d33ccd16 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -404,6 +404,15 @@ inspect (Contributed by Zhikang Yan in :gh:`125634`.) + +io +-- + +* Reading text from a non-blocking stream with ``read`` may now raise a + :exc:`BlockingIOError` if the operation cannot immediately return bytes. + (Contributed by Giovanni Siragusa in :gh:`109523`.) + + json ---- diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 42b0aea4e2eb2e..14961c39d3541d 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -2545,9 +2545,12 @@ def read(self, size=None): size = size_index() decoder = self._decoder or self._get_decoder() if size < 0: + chunk = self.buffer.read() + if chunk is None: + raise BlockingIOError("Read returned None.") # Read everything. result = (self._get_decoded_chars() + - decoder.decode(self.buffer.read(), final=True)) + decoder.decode(chunk, final=True)) if self._snapshot is not None: self._set_decoded_chars('') self._snapshot = None diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index f1f8ce57668f3b..81c17b2731cc58 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3932,6 +3932,22 @@ def test_issue35928(self): f.write(res) self.assertEqual(res + f.readline(), 'foo\nbar\n') + @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") + def test_read_non_blocking(self): + import os + r, w = os.pipe() + try: + os.set_blocking(r, False) + with self.io.open(r, 'rt') as textfile: + r = None + # Nothing has been written so a non-blocking read raises a BlockingIOError exception. + with self.assertRaises(BlockingIOError): + textfile.read() + finally: + if r is not None: + os.close(r) + os.close(w) + class MemviewBytesIO(io.BytesIO): '''A BytesIO object whose read method returns memoryviews diff --git a/Misc/ACKS b/Misc/ACKS index fc4b83a0e2b823..913f7c8ecf5f1e 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1736,6 +1736,7 @@ Ng Pheng Siong Yann Sionneau George Sipe J. Sipprell +Giovanni Siragusa Ngalim Siregar Kragen Sitaker Kaartic Sivaraam diff --git a/Misc/NEWS.d/next/C_API/2024-08-12-10-15-19.gh-issue-109523.S2c3fi.rst b/Misc/NEWS.d/next/C_API/2024-08-12-10-15-19.gh-issue-109523.S2c3fi.rst new file mode 100644 index 00000000000000..9d6b2e0c565623 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-08-12-10-15-19.gh-issue-109523.S2c3fi.rst @@ -0,0 +1 @@ +Reading text from a non-blocking stream with ``read`` may now raise a :exc:`BlockingIOError` if the operation cannot immediately return bytes. diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 0d851ee211511c..791ee070401fe5 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1992,6 +1992,12 @@ _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n) if (bytes == NULL) goto fail; + if (bytes == Py_None){ + Py_DECREF(bytes); + PyErr_SetString(PyExc_BlockingIOError, "Read returned None."); + return NULL; + } + _PyIO_State *state = self->state; if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type)) decoded = _PyIncrementalNewlineDecoder_decode(self->decoder, From 3e812253ab6b2f98fc5d17bfb82947e392b0b2a2 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Mon, 2 Dec 2024 22:51:35 +0900 Subject: [PATCH 10/76] gh-101100: Fix Sphinx warnings about list methods (#127054) --- Doc/library/collections.rst | 4 ++-- Doc/tools/.nitignore | 1 - Doc/tutorial/datastructures.rst | 10 +++++----- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Doc/library/collections.rst b/Doc/library/collections.rst index 0cc9063f153aba..5b4e445762e076 100644 --- a/Doc/library/collections.rst +++ b/Doc/library/collections.rst @@ -783,10 +783,10 @@ sequence of key-value pairs into a dictionary of lists: When each key is encountered for the first time, it is not already in the mapping; so an entry is automatically created using the :attr:`~defaultdict.default_factory` -function which returns an empty :class:`list`. The :meth:`list.append` +function which returns an empty :class:`list`. The :meth:`!list.append` operation then attaches the value to the new list. When keys are encountered again, the look-up proceeds normally (returning the list for that key) and the -:meth:`list.append` operation adds another value to the list. This technique is +:meth:`!list.append` operation adds another value to the list. This technique is simpler and faster than an equivalent technique using :meth:`dict.setdefault`: >>> d = {} diff --git a/Doc/tools/.nitignore b/Doc/tools/.nitignore index 711c0b64095bd2..39d1f5975e331c 100644 --- a/Doc/tools/.nitignore +++ b/Doc/tools/.nitignore @@ -71,7 +71,6 @@ Doc/library/xmlrpc.server.rst Doc/library/zlib.rst Doc/reference/compound_stmts.rst Doc/reference/datamodel.rst -Doc/tutorial/datastructures.rst Doc/using/windows.rst Doc/whatsnew/2.4.rst Doc/whatsnew/2.5.rst diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 31941bc112a135..263b0c2e2815a1 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -142,8 +142,8 @@ Using Lists as Stacks The list methods make it very easy to use a list as a stack, where the last element added is the first element retrieved ("last-in, first-out"). To add an -item to the top of the stack, use :meth:`~list.append`. To retrieve an item from the -top of the stack, use :meth:`~list.pop` without an explicit index. For example:: +item to the top of the stack, use :meth:`!~list.append`. To retrieve an item from the +top of the stack, use :meth:`!~list.pop` without an explicit index. For example:: >>> stack = [3, 4, 5] >>> stack.append(6) @@ -340,7 +340,7 @@ The :keyword:`!del` statement ============================= There is a way to remove an item from a list given its index instead of its -value: the :keyword:`del` statement. This differs from the :meth:`~list.pop` method +value: the :keyword:`del` statement. This differs from the :meth:`!~list.pop` method which returns a value. The :keyword:`!del` statement can also be used to remove slices from a list or clear the entire list (which we did earlier by assignment of an empty list to the slice). For example:: @@ -500,8 +500,8 @@ any immutable type; strings and numbers can always be keys. Tuples can be used as keys if they contain only strings, numbers, or tuples; if a tuple contains any mutable object either directly or indirectly, it cannot be used as a key. You can't use lists as keys, since lists can be modified in place using index -assignments, slice assignments, or methods like :meth:`~list.append` and -:meth:`~list.extend`. +assignments, slice assignments, or methods like :meth:`!~list.append` and +:meth:`!~list.extend`. It is best to think of a dictionary as a set of *key: value* pairs, with the requirement that the keys are unique (within one dictionary). A pair of From 7c2bd9b2266665ff4010b6c6c175bab18e08e4f8 Mon Sep 17 00:00:00 2001 From: Donghee Na Date: Tue, 3 Dec 2024 00:14:40 +0900 Subject: [PATCH 11/76] gh-115999: Use light-weight lock for UNPACK_SEQUENCE_LIST (gh-127514) --- Include/internal/pycore_opcode_metadata.h | 2 +- Include/internal/pycore_uop_metadata.h | 2 +- Python/bytecodes.c | 13 +++---------- Python/executor_cases.c.h | 22 ++++++---------------- Python/generated_cases.c.h | 19 +++---------------- 5 files changed, 14 insertions(+), 44 deletions(-) diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h index d63c8df8ca6690..81dde66a6f26c2 100644 --- a/Include/internal/pycore_opcode_metadata.h +++ b/Include/internal/pycore_opcode_metadata.h @@ -2148,7 +2148,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[266] = { [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, - [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h index 1825bb3a5abc80..89fce193f40bd8 100644 --- a/Include/internal/pycore_uop_metadata.h +++ b/Include/internal/pycore_uop_metadata.h @@ -112,7 +112,7 @@ const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { [_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, - [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG, + [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, [_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, diff --git a/Python/bytecodes.c b/Python/bytecodes.c index c07ec42ec68f8b..e96674c3502ef1 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1438,14 +1438,9 @@ dummy_func( inst(UNPACK_SEQUENCE_LIST, (unused/1, seq -- values[oparg])) { PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq); DEOPT_IF(!PyList_CheckExact(seq_o)); - #ifdef Py_GIL_DISABLED - PyCriticalSection cs; - PyCriticalSection_Begin(&cs, seq_o); - #endif + DEOPT_IF(!LOCK_OBJECT(seq_o)); if (PyList_GET_SIZE(seq_o) != oparg) { - #ifdef Py_GIL_DISABLED - PyCriticalSection_End(&cs); - #endif + UNLOCK_OBJECT(seq_o); DEOPT_IF(true); } STAT_INC(UNPACK_SEQUENCE, hit); @@ -1453,9 +1448,7 @@ dummy_func( for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } - #ifdef Py_GIL_DISABLED - PyCriticalSection_End(&cs); - #endif + UNLOCK_OBJECT(seq_o); DECREF_INPUTS(); } diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index c91257b06cad11..580814657608db 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -1728,18 +1728,12 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - #ifdef Py_GIL_DISABLED - PyCriticalSection cs; - _PyFrame_SetStackPointer(frame, stack_pointer); - PyCriticalSection_Begin(&cs, seq_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - #endif + if (!LOCK_OBJECT(seq_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } if (PyList_GET_SIZE(seq_o) != oparg) { - #ifdef Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); - PyCriticalSection_End(&cs); - stack_pointer = _PyFrame_GetStackPointer(frame); - #endif + UNLOCK_OBJECT(seq_o); if (true) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); @@ -1750,11 +1744,7 @@ for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } - #ifdef Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); - PyCriticalSection_End(&cs); - stack_pointer = _PyFrame_GetStackPointer(frame); - #endif + UNLOCK_OBJECT(seq_o); PyStackRef_CLOSE(seq); stack_pointer += -1 + oparg; assert(WITHIN_STACK_BOUNDS()); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 45bcc4242af9d7..e1f951558de7da 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -8040,18 +8040,9 @@ values = &stack_pointer[-1]; PyObject *seq_o = PyStackRef_AsPyObjectBorrow(seq); DEOPT_IF(!PyList_CheckExact(seq_o), UNPACK_SEQUENCE); - #ifdef Py_GIL_DISABLED - PyCriticalSection cs; - _PyFrame_SetStackPointer(frame, stack_pointer); - PyCriticalSection_Begin(&cs, seq_o); - stack_pointer = _PyFrame_GetStackPointer(frame); - #endif + DEOPT_IF(!LOCK_OBJECT(seq_o), UNPACK_SEQUENCE); if (PyList_GET_SIZE(seq_o) != oparg) { - #ifdef Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); - PyCriticalSection_End(&cs); - stack_pointer = _PyFrame_GetStackPointer(frame); - #endif + UNLOCK_OBJECT(seq_o); DEOPT_IF(true, UNPACK_SEQUENCE); } STAT_INC(UNPACK_SEQUENCE, hit); @@ -8059,11 +8050,7 @@ for (int i = oparg; --i >= 0; ) { *values++ = PyStackRef_FromPyObjectNew(items[i]); } - #ifdef Py_GIL_DISABLED - _PyFrame_SetStackPointer(frame, stack_pointer); - PyCriticalSection_End(&cs); - stack_pointer = _PyFrame_GetStackPointer(frame); - #endif + UNLOCK_OBJECT(seq_o); PyStackRef_CLOSE(seq); stack_pointer += -1 + oparg; assert(WITHIN_STACK_BOUNDS()); From c46acd3588864e97d0e0fe37a41aa5e94ac7af51 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Mon, 2 Dec 2024 16:51:50 +0100 Subject: [PATCH 12/76] gh-126876: Fix test_socket.testLargeTimeout() for missing _testcapi (#127517) --- Lib/test/test_socket.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py index 7b3914f30e5f52..307d6e886c617f 100644 --- a/Lib/test/test_socket.py +++ b/Lib/test/test_socket.py @@ -5136,7 +5136,10 @@ def testLargeTimeout(self): # gh-126876: Check that a timeout larger than INT_MAX is replaced with # INT_MAX in the poll() code path. The following assertion must not # fail: assert(INT_MIN <= ms && ms <= INT_MAX). - large_timeout = _testcapi.INT_MAX + 1 + if _testcapi is not None: + large_timeout = _testcapi.INT_MAX + 1 + else: + large_timeout = 2147483648 # test recv() with large timeout conn, addr = self.serv.accept() @@ -5151,7 +5154,10 @@ def testLargeTimeout(self): def _testLargeTimeout(self): # test sendall() with large timeout - large_timeout = _testcapi.INT_MAX + 1 + if _testcapi is not None: + large_timeout = _testcapi.INT_MAX + 1 + else: + large_timeout = 2147483648 self.cli.connect((HOST, self.port)) try: self.cli.settimeout(large_timeout) From c4303763dac4494300e299e54c079a4a11931a55 Mon Sep 17 00:00:00 2001 From: mpage Date: Mon, 2 Dec 2024 10:13:30 -0800 Subject: [PATCH 13/76] gh-127411: Fix invalid conversion of load of TLBC array when compiled in C++ (#127466) Cast the result of the load to the correct type --- Include/internal/pycore_code.h | 9 ++++++++- Include/internal/pycore_frame.h | 2 +- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index a0acf76db6f04d..d607a54aa4a2f5 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -609,12 +609,19 @@ PyAPI_DATA(const struct _PyCode8) _Py_InitCleanup; #ifdef Py_GIL_DISABLED +static inline _PyCodeArray * +_PyCode_GetTLBCArray(PyCodeObject *co) +{ + return _Py_STATIC_CAST(_PyCodeArray *, + _Py_atomic_load_ptr_acquire(&co->co_tlbc)); +} + // Return a pointer to the thread-local bytecode for the current thread, if it // exists. static inline _Py_CODEUNIT * _PyCode_GetTLBCFast(PyThreadState *tstate, PyCodeObject *co) { - _PyCodeArray *code = _Py_atomic_load_ptr_acquire(&co->co_tlbc); + _PyCodeArray *code = _PyCode_GetTLBCArray(co); int32_t idx = ((_PyThreadStateImpl*) tstate)->tlbc_index; if (idx < code->size && code->entries[idx] != NULL) { return (_Py_CODEUNIT *) code->entries[idx]; diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index b786c5f49e9831..96ae4dd22ecb43 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -94,7 +94,7 @@ _PyFrame_GetBytecode(_PyInterpreterFrame *f) { #ifdef Py_GIL_DISABLED PyCodeObject *co = _PyFrame_GetCode(f); - _PyCodeArray *tlbc = _Py_atomic_load_ptr_acquire(&co->co_tlbc); + _PyCodeArray *tlbc = _PyCode_GetTLBCArray(co); assert(f->tlbc_index >= 0 && f->tlbc_index < tlbc->size); return (_Py_CODEUNIT *)tlbc->entries[f->tlbc_index]; #else From c7dec02de2ed4baf3cd22ad094350265b52c18af Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Mon, 2 Dec 2024 19:38:26 +0000 Subject: [PATCH 14/76] gh-127521: Mark list as "shared" before resizing if necessary (#127524) In the free threading build, if a non-owning thread resizes a list, it must use QSBR to free the old list array because there may be a concurrent access (without a lock) from the owning thread. To match the pattern in dictobject.c, we just mark the list as "shared" before resizing if it's from a non-owning thread and not already marked as shared. --- Objects/listobject.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/Objects/listobject.c b/Objects/listobject.c index 4b24f4a428e18b..8abe9e8933420b 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -68,6 +68,20 @@ free_list_items(PyObject** items, bool use_qsbr) #endif } +static void +ensure_shared_on_resize(PyListObject *self) +{ +#ifdef Py_GIL_DISABLED + // Ensure that the list array is freed using QSBR if we are not the + // owning thread. + if (!_Py_IsOwnedByCurrentThread((PyObject *)self) && + !_PyObject_GC_IS_SHARED(self)) + { + _PyObject_GC_SET_SHARED(self); + } +#endif +} + /* Ensure ob_item has room for at least newsize elements, and set * ob_size to newsize. If newsize > ob_size on entry, the content * of the new slots at exit is undefined heap trash; it's the caller's @@ -117,6 +131,8 @@ list_resize(PyListObject *self, Py_ssize_t newsize) if (newsize == 0) new_allocated = 0; + ensure_shared_on_resize(self); + #ifdef Py_GIL_DISABLED _PyListArray *array = list_allocate_array(new_allocated); if (array == NULL) { @@ -804,6 +820,9 @@ list_clear_impl(PyListObject *a, bool is_resize) Py_XDECREF(items[i]); } #ifdef Py_GIL_DISABLED + if (is_resize) { + ensure_shared_on_resize(a); + } bool use_qsbr = is_resize && _PyObject_GC_IS_SHARED(a); #else bool use_qsbr = false; @@ -3069,6 +3088,7 @@ list_sort_impl(PyListObject *self, PyObject *keyfunc, int reverse) Py_XDECREF(final_ob_item[i]); } #ifdef Py_GIL_DISABLED + ensure_shared_on_resize(self); bool use_qsbr = _PyObject_GC_IS_SHARED(self); #else bool use_qsbr = false; From edefb8678a11a20bdcdcbb8bb6a62ae22101bb51 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Mon, 2 Dec 2024 15:17:08 -0500 Subject: [PATCH 15/76] gh-127518: Fix pystats build after #127169 (#127526) gh-127518: Fix pystats build after #127619 --- Python/specialize.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Python/specialize.c b/Python/specialize.c index 504eef4f448429..ba13b02a29b133 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1816,8 +1816,9 @@ _Py_Specialize_BinarySubscr( #ifdef Py_STATS static int -store_subscr_fail_kind(PyObject *container_type) +store_subscr_fail_kind(PyObject *container, PyObject *sub) { + PyTypeObject *container_type = Py_TYPE(container); PyMappingMethods *as_mapping = container_type->tp_as_mapping; if (as_mapping && (as_mapping->mp_ass_subscript == PyDict_Type.tp_as_mapping->mp_ass_subscript)) { @@ -1915,7 +1916,7 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, _PyStackRef sub_st, _Py_COD specialize(instr, STORE_SUBSCR_DICT); return; } - SPECIALIZATION_FAIL(STORE_SUBSCR, store_subscr_fail_kind(container_type)); + SPECIALIZATION_FAIL(STORE_SUBSCR, store_subscr_fail_kind(container, sub)); unspecialize(instr); } From bfb0788bfcaab7474c1be0605552744e15082ee9 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Tue, 3 Dec 2024 00:30:24 +0100 Subject: [PATCH 16/76] gh-127111: Emscripten Make web example work again (#127113) Moves the Emscripten web example into a standalone folder, and updates Makefile targets to build the web example. Instructions for usage have also been added. --- Makefile.pre.in | 51 +++++--- ...-11-30-16-36-09.gh-issue-127111.QI9mMZ.rst | 2 + Tools/wasm/README.md | 122 ++++++++++++------ .../{ => emscripten/web_example}/python.html | 4 +- .../web_example/python.worker.mjs} | 25 +++- .../web_example/server.py} | 8 +- .../web_example}/wasm_assets.py | 25 ++-- configure | 14 +- configure.ac | 16 ++- 9 files changed, 175 insertions(+), 92 deletions(-) create mode 100644 Misc/NEWS.d/next/Build/2024-11-30-16-36-09.gh-issue-127111.QI9mMZ.rst rename Tools/wasm/{ => emscripten/web_example}/python.html (99%) rename Tools/wasm/{python.worker.js => emscripten/web_example/python.worker.mjs} (65%) rename Tools/wasm/{wasm_webserver.py => emscripten/web_example/server.py} (85%) rename Tools/wasm/{ => emscripten/web_example}/wasm_assets.py (91%) diff --git a/Makefile.pre.in b/Makefile.pre.in index 724354746b8d81..dd8a3ab82eacd2 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -269,10 +269,6 @@ SRCDIRS= @SRCDIRS@ # Other subdirectories SUBDIRSTOO= Include Lib Misc -# assets for Emscripten browser builds -WASM_ASSETS_DIR=.$(prefix) -WASM_STDLIB=$(WASM_ASSETS_DIR)/lib/python$(VERSION)/os.py - # Files and directories to be distributed CONFIGFILES= configure configure.ac acconfig.h pyconfig.h.in Makefile.pre.in DISTFILES= README.rst ChangeLog $(CONFIGFILES) @@ -737,6 +733,9 @@ build_all: check-clean-src check-app-store-compliance $(BUILDPYTHON) platform sh build_wasm: check-clean-src $(BUILDPYTHON) platform sharedmods \ python-config checksharedmods +.PHONY: build_emscripten +build_emscripten: build_wasm web_example + # Check that the source is clean when building out of source. .PHONY: check-clean-src check-clean-src: @@ -1016,23 +1015,38 @@ $(DLLLIBRARY) libpython$(LDVERSION).dll.a: $(LIBRARY_OBJS) else true; \ fi -# wasm32-emscripten browser build -# wasm assets directory is relative to current build dir, e.g. "./usr/local". -# --preload-file turns a relative asset path into an absolute path. +# wasm32-emscripten browser web example + +WEBEX_DIR=$(srcdir)/Tools/wasm/emscripten/web_example/ +web_example/python.html: $(WEBEX_DIR)/python.html + @mkdir -p web_example + @cp $< $@ + +web_example/python.worker.mjs: $(WEBEX_DIR)/python.worker.mjs + @mkdir -p web_example + @cp $< $@ -.PHONY: wasm_stdlib -wasm_stdlib: $(WASM_STDLIB) -$(WASM_STDLIB): $(srcdir)/Lib/*.py $(srcdir)/Lib/*/*.py \ - $(srcdir)/Tools/wasm/wasm_assets.py \ +web_example/server.py: $(WEBEX_DIR)/server.py + @mkdir -p web_example + @cp $< $@ + +WEB_STDLIB=web_example/python$(VERSION)$(ABI_THREAD).zip +$(WEB_STDLIB): $(srcdir)/Lib/*.py $(srcdir)/Lib/*/*.py \ + $(WEBEX_DIR)/wasm_assets.py \ Makefile pybuilddir.txt Modules/Setup.local - $(PYTHON_FOR_BUILD) $(srcdir)/Tools/wasm/wasm_assets.py \ - --buildroot . --prefix $(prefix) + $(PYTHON_FOR_BUILD) $(WEBEX_DIR)/wasm_assets.py \ + --buildroot . --prefix $(prefix) -o $@ -python.html: $(srcdir)/Tools/wasm/python.html python.worker.js - @cp $(srcdir)/Tools/wasm/python.html $@ +web_example/python.mjs web_example/python.wasm: $(BUILDPYTHON) + @if test $(HOST_GNU_TYPE) != 'wasm32-unknown-emscripten' ; then \ + echo "Can only build web_example when target is Emscripten" ;\ + exit 1 ;\ + fi + cp python.mjs web_example/python.mjs + cp python.wasm web_example/python.wasm -python.worker.js: $(srcdir)/Tools/wasm/python.worker.js - @cp $(srcdir)/Tools/wasm/python.worker.js $@ +.PHONY: web_example +web_example: web_example/python.mjs web_example/python.worker.mjs web_example/python.html web_example/server.py $(WEB_STDLIB) ############################################################################ # Header files @@ -3053,8 +3067,7 @@ clean-retain-profile: pycremoval find build -name '*.py[co]' -exec rm -f {} ';' || true -rm -f pybuilddir.txt -rm -f _bootstrap_python - -rm -f python.html python*.js python.data python*.symbols python*.map - -rm -f $(WASM_STDLIB) + -rm -rf web_example python.mjs python.wasm python*.symbols python*.map -rm -f Programs/_testembed Programs/_freeze_module -rm -rf Python/deepfreeze -rm -f Python/frozen_modules/*.h diff --git a/Misc/NEWS.d/next/Build/2024-11-30-16-36-09.gh-issue-127111.QI9mMZ.rst b/Misc/NEWS.d/next/Build/2024-11-30-16-36-09.gh-issue-127111.QI9mMZ.rst new file mode 100644 index 00000000000000..d90067cd3bfaa3 --- /dev/null +++ b/Misc/NEWS.d/next/Build/2024-11-30-16-36-09.gh-issue-127111.QI9mMZ.rst @@ -0,0 +1,2 @@ +Updated the Emscripten web example to use ES6 modules and be built into a +distinct ``web_example`` subfolder. diff --git a/Tools/wasm/README.md b/Tools/wasm/README.md index 3f4211fb1dfb28..4802d9683de52e 100644 --- a/Tools/wasm/README.md +++ b/Tools/wasm/README.md @@ -23,9 +23,9 @@ https://github.com/psf/webassembly for more information. To cross compile to the ``wasm32-emscripten`` platform you need [the Emscripten compiler toolchain](https://emscripten.org/), -a Python interpreter, and an installation of Node version 18 or newer. Emscripten -version 3.1.42 or newer is recommended. All commands below are relative to a checkout -of the Python repository. +a Python interpreter, and an installation of Node version 18 or newer. +Emscripten version 3.1.73 or newer is recommended. All commands below are +relative to a checkout of the Python repository. #### Install [the Emscripten compiler toolchain](https://emscripten.org/docs/getting_started/downloads.html) @@ -50,7 +50,7 @@ sourced. Otherwise the source script removes the environment variable. export EM_COMPILER_WRAPPER=ccache ``` -### Compile and build Python interpreter +#### Compile and build Python interpreter You can use `python Tools/wasm/emscripten` to compile and build targetting Emscripten. You can do everything at once with: @@ -70,6 +70,88 @@ instance, to do a debug build, you can use: python Tools/wasm/emscripten build --with-py-debug ``` +### Running from node + +If you want to run the normal Python CLI, you can use `python.sh`. It takes the +same options as the normal Python CLI entrypoint, though the REPL does not +function and will crash. + +`python.sh` invokes `node_entry.mjs` which imports the Emscripten module for the +Python process and starts it up with the appropriate settings. If you wish to +make a node application that "embeds" the interpreter instead of acting like the +CLI you will need to write your own alternative to `node_entry.mjs`. + + +### The Web Example + +When building for Emscripten, the web example will be built automatically. It is +in the ``web_example`` directory. To run the web example, ``cd`` into the +``web_example`` directory, then run ``python server.py``. This will start a web +server; you can then visit ``http://localhost:8000/python.html`` in a browser to +see a simple REPL example. + +The web example relies on a bug fix in Emscripten version 3.1.73 so if you build +with earlier versions of Emscripten it may not work. The web example uses +``SharedArrayBuffer``. For security reasons browsers only provide +``SharedArrayBuffer`` in secure environments with cross-origin isolation. The +webserver must send cross-origin headers and correct MIME types for the +JavaScript and WebAssembly files. Otherwise the terminal will fail to load with +an error message like ``ReferenceError: SharedArrayBuffer is not defined``. See +more information here: +https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/SharedArrayBuffer#security_requirements + +Note that ``SharedArrayBuffer`` is _not required_ to use Python itself, only the +web example. If cross-origin isolation is not appropriate for your use case you +may make your own application embedding `python.mjs` which does not use +``SharedArrayBuffer`` and serve it without the cross-origin isolation headers. + +### Embedding Python in a custom JavaScript application + +You can look at `python.worker.mjs` and `node_entry.mjs` for inspiration. At a +minimum you must import ``createEmscriptenModule`` and you need to call +``createEmscriptenModule`` with an appropriate settings object. This settings +object will need a prerun hook that installs the Python standard library into +the Emscripten file system. + +#### NodeJs + +In Node, you can use the NodeFS to mount the standard library in your native +file system into the Emscripten file system: +```js +import createEmscriptenModule from "./python.mjs"; + +await createEmscriptenModule({ + preRun(Module) { + Module.FS.mount( + Module.FS.filesystems.NODEFS, + { root: "/path/to/python/stdlib" }, + "/lib/", + ); + }, +}); +``` + +#### Browser + +In the browser, the simplest approach is to put the standard library in a zip +file it and install it. With Python 3.14 this could look like: +```js +import createEmscriptenModule from "./python.mjs"; + +await createEmscriptenModule({ + async preRun(Module) { + Module.FS.mkdirTree("/lib/python3.14/lib-dynload/"); + Module.addRunDependency("install-stdlib"); + const resp = await fetch("python3.14.zip"); + const stdlibBuffer = await resp.arrayBuffer(); + Module.FS.writeFile(`/lib/python314.zip`, new Uint8Array(stdlibBuffer), { + canOwn: true, + }); + Module.removeRunDependency("install-stdlib"); + }, +}); +``` + ### Limitations and issues #### Network stack @@ -151,38 +233,6 @@ python Tools/wasm/emscripten build --with-py-debug - Test modules are disabled by default. Use ``--enable-test-modules`` build test modules like ``_testcapi``. -### wasm32-emscripten in node - -Node builds use ``NODERAWFS``. - -- Node RawFS allows direct access to the host file system without need to - perform ``FS.mount()`` call. - -### Hosting Python WASM builds - -The simple REPL terminal uses SharedArrayBuffer. For security reasons -browsers only provide the feature in secure environments with cross-origin -isolation. The webserver must send cross-origin headers and correct MIME types -for the JavaScript and WASM files. Otherwise the terminal will fail to load -with an error message like ``Browsers disable shared array buffer``. - -#### Apache HTTP .htaccess - -Place a ``.htaccess`` file in the same directory as ``python.wasm``. - -``` -# .htaccess -Header set Cross-Origin-Opener-Policy same-origin -Header set Cross-Origin-Embedder-Policy require-corp - -AddType application/javascript js -AddType application/wasm wasm - - - AddOutputFilterByType DEFLATE text/html application/javascript application/wasm - -``` - ## WASI (wasm32-wasi) See [the devguide on how to build and run for WASI](https://devguide.python.org/getting-started/setup-building/#wasi). diff --git a/Tools/wasm/python.html b/Tools/wasm/emscripten/web_example/python.html similarity index 99% rename from Tools/wasm/python.html rename to Tools/wasm/emscripten/web_example/python.html index 81a035a5c4cd93..fae1e9ad4e8acb 100644 --- a/Tools/wasm/python.html +++ b/Tools/wasm/emscripten/web_example/python.html @@ -47,7 +47,7 @@ async initialiseWorker() { if (!this.worker) { - this.worker = new Worker(this.workerURL) + this.worker = new Worker(this.workerURL, {type: "module"}) this.worker.addEventListener('message', this.handleMessageFromWorker) } } @@ -347,7 +347,7 @@ programRunning(false) } - const pythonWorkerManager = new WorkerManager('./python.worker.js', stdio, readyCallback, finishedCallback) + const pythonWorkerManager = new WorkerManager('./python.worker.mjs', stdio, readyCallback, finishedCallback) } diff --git a/Tools/wasm/python.worker.js b/Tools/wasm/emscripten/web_example/python.worker.mjs similarity index 65% rename from Tools/wasm/python.worker.js rename to Tools/wasm/emscripten/web_example/python.worker.mjs index 4ce4e16fc0fa19..42c2e1e08af24b 100644 --- a/Tools/wasm/python.worker.js +++ b/Tools/wasm/emscripten/web_example/python.worker.mjs @@ -1,3 +1,5 @@ +import createEmscriptenModule from "./python.mjs"; + class StdinBuffer { constructor() { this.sab = new SharedArrayBuffer(128 * Int32Array.BYTES_PER_ELEMENT) @@ -59,24 +61,40 @@ const stderr = (charCode) => { const stdinBuffer = new StdinBuffer() -var Module = { +const emscriptenSettings = { noInitialRun: true, stdin: stdinBuffer.stdin, stdout: stdout, stderr: stderr, onRuntimeInitialized: () => { postMessage({type: 'ready', stdinBuffer: stdinBuffer.sab}) + }, + async preRun(Module) { + const versionHex = Module.HEAPU32[Module._Py_Version/4].toString(16); + const versionTuple = versionHex.padStart(8, "0").match(/.{1,2}/g).map((x) => parseInt(x, 16)); + const [major, minor, ..._] = versionTuple; + // Prevent complaints about not finding exec-prefix by making a lib-dynload directory + Module.FS.mkdirTree(`/lib/python${major}.${minor}/lib-dynload/`); + Module.addRunDependency("install-stdlib"); + const resp = await fetch(`python${major}.${minor}.zip`); + const stdlibBuffer = await resp.arrayBuffer(); + Module.FS.writeFile(`/lib/python${major}${minor}.zip`, new Uint8Array(stdlibBuffer), { canOwn: true }); + Module.removeRunDependency("install-stdlib"); } } -onmessage = (event) => { +const modulePromise = createEmscriptenModule(emscriptenSettings); + + +onmessage = async (event) => { if (event.data.type === 'run') { + const Module = await modulePromise; if (event.data.files) { for (const [filename, contents] of Object.entries(event.data.files)) { Module.FS.writeFile(filename, contents) } } - const ret = callMain(event.data.args) + const ret = Module.callMain(event.data.args); postMessage({ type: 'finished', returnCode: ret @@ -84,4 +102,3 @@ onmessage = (event) => { } } -importScripts('python.js') diff --git a/Tools/wasm/wasm_webserver.py b/Tools/wasm/emscripten/web_example/server.py similarity index 85% rename from Tools/wasm/wasm_webserver.py rename to Tools/wasm/emscripten/web_example/server.py index 3d1d5d42a1e8c4..768e6f84e07798 100755 --- a/Tools/wasm/wasm_webserver.py +++ b/Tools/wasm/emscripten/web_example/server.py @@ -14,13 +14,6 @@ class MyHTTPRequestHandler(server.SimpleHTTPRequestHandler): - extensions_map = server.SimpleHTTPRequestHandler.extensions_map.copy() - extensions_map.update( - { - ".wasm": "application/wasm", - } - ) - def end_headers(self) -> None: self.send_my_headers() super().end_headers() @@ -42,5 +35,6 @@ def main() -> None: bind=args.bind, ) + if __name__ == "__main__": main() diff --git a/Tools/wasm/wasm_assets.py b/Tools/wasm/emscripten/web_example/wasm_assets.py similarity index 91% rename from Tools/wasm/wasm_assets.py rename to Tools/wasm/emscripten/web_example/wasm_assets.py index ffa5e303412c46..7f0fa7ae7c10ec 100755 --- a/Tools/wasm/wasm_assets.py +++ b/Tools/wasm/emscripten/web_example/wasm_assets.py @@ -19,7 +19,7 @@ from typing import Dict # source directory -SRCDIR = pathlib.Path(__file__).parent.parent.parent.absolute() +SRCDIR = pathlib.Path(__file__).parents[4].absolute() SRCDIR_LIB = SRCDIR / "Lib" @@ -28,9 +28,7 @@ WASM_STDLIB_ZIP = ( WASM_LIB / f"python{sys.version_info.major}{sys.version_info.minor}.zip" ) -WASM_STDLIB = ( - WASM_LIB / f"python{sys.version_info.major}.{sys.version_info.minor}" -) +WASM_STDLIB = WASM_LIB / f"python{sys.version_info.major}.{sys.version_info.minor}" WASM_DYNLOAD = WASM_STDLIB / "lib-dynload" @@ -114,9 +112,7 @@ def get_sysconfigdata(args: argparse.Namespace) -> pathlib.Path: assert isinstance(args.builddir, pathlib.Path) data_name: str = sysconfig._get_sysconfigdata_name() # type: ignore[attr-defined] if not data_name.startswith(SYSCONFIG_NAMES): - raise ValueError( - f"Invalid sysconfig data name '{data_name}'.", SYSCONFIG_NAMES - ) + raise ValueError(f"Invalid sysconfig data name '{data_name}'.", SYSCONFIG_NAMES) filename = data_name + ".py" return args.builddir / filename @@ -131,7 +127,7 @@ def filterfunc(filename: str) -> bool: return pathname not in args.omit_files_absolute with zipfile.PyZipFile( - args.wasm_stdlib_zip, + args.output, mode="w", compression=args.compression, optimize=optimize, @@ -195,6 +191,12 @@ def path(val: str) -> pathlib.Path: default=pathlib.Path("/usr/local"), type=path, ) +parser.add_argument( + "-o", + "--output", + help="output file", + type=path, +) def main() -> None: @@ -204,7 +206,6 @@ def main() -> None: args.srcdir = SRCDIR args.srcdir_lib = SRCDIR_LIB args.wasm_root = args.buildroot / relative_prefix - args.wasm_stdlib_zip = args.wasm_root / WASM_STDLIB_ZIP args.wasm_stdlib = args.wasm_root / WASM_STDLIB args.wasm_dynload = args.wasm_root / WASM_DYNLOAD @@ -234,12 +235,10 @@ def main() -> None: args.wasm_dynload.mkdir(parents=True, exist_ok=True) marker = args.wasm_dynload / ".empty" marker.touch() - # os.py is a marker for finding the correct lib directory. - shutil.copy(args.srcdir_lib / "os.py", args.wasm_stdlib) # The rest of stdlib that's useful in a WASM context. create_stdlib_zip(args) - size = round(args.wasm_stdlib_zip.stat().st_size / 1024**2, 2) - parser.exit(0, f"Created {args.wasm_stdlib_zip} ({size} MiB)\n") + size = round(args.output.stat().st_size / 1024**2, 2) + parser.exit(0, f"Created {args.output} ({size} MiB)\n") if __name__ == "__main__": diff --git a/configure b/configure index 4e4043260ed2df..7efda041ae69d4 100755 --- a/configure +++ b/configure @@ -8333,8 +8333,12 @@ fi fi -elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then - DEF_MAKE_ALL_RULE="build_wasm" +elif test "$ac_sys_system" = "Emscripten"; then + DEF_MAKE_ALL_RULE="build_emscripten" + REQUIRE_PGO="no" + DEF_MAKE_RULE="all" +elif test "$ac_sys_system" = "WASI"; then + DEF_MAKE_ALL_RULE="build_wasm" REQUIRE_PGO="no" DEF_MAKE_RULE="all" else @@ -9425,12 +9429,13 @@ else $as_nop wasm_debug=no fi - as_fn_append LDFLAGS_NODIST " -sALLOW_MEMORY_GROWTH -sTOTAL_MEMORY=20971520" + as_fn_append LDFLAGS_NODIST " -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520" as_fn_append LDFLAGS_NODIST " -sWASM_BIGINT" as_fn_append LDFLAGS_NODIST " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" - as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS" + as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain" + as_fn_append LDFLAGS_NODIST " -sEXPORTED_FUNCTIONS=_main,_Py_Version" if test "x$enable_wasm_dynamic_linking" = xyes then : @@ -9447,7 +9452,6 @@ then : as_fn_append LINKFORSHARED " -sPROXY_TO_PTHREAD" fi - as_fn_append LDFLAGS_NODIST " -sALLOW_MEMORY_GROWTH" as_fn_append LDFLAGS_NODIST " -sEXIT_RUNTIME" WASM_LINKFORSHARED_DEBUG="-gseparate-dwarf --emit-symbol-map" diff --git a/configure.ac b/configure.ac index 4cfced10432491..15f7d07f22473b 100644 --- a/configure.ac +++ b/configure.ac @@ -1854,9 +1854,13 @@ if test "$Py_OPT" = 'true' ; then LDFLAGS_NODIST="$LDFLAGS_NODIST -fno-semantic-interposition" ], [], [-Werror]) ]) -elif test "$ac_sys_system" = "Emscripten" -o "$ac_sys_system" = "WASI"; then - dnl Emscripten does not support shared extensions yet. Build - dnl "python.[js,wasm]", "pybuilddir.txt", and "platform" files. +elif test "$ac_sys_system" = "Emscripten"; then + dnl Build "python.[js,wasm]", "pybuilddir.txt", and "platform" files. + DEF_MAKE_ALL_RULE="build_emscripten" + REQUIRE_PGO="no" + DEF_MAKE_RULE="all" +elif test "$ac_sys_system" = "WASI"; then + dnl Build "python.wasm", "pybuilddir.txt", and "platform" files. DEF_MAKE_ALL_RULE="build_wasm" REQUIRE_PGO="no" DEF_MAKE_RULE="all" @@ -2321,14 +2325,15 @@ AS_CASE([$ac_sys_system], AS_VAR_IF([Py_DEBUG], [yes], [wasm_debug=yes], [wasm_debug=no]) dnl Start with 20 MB and allow to grow - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sALLOW_MEMORY_GROWTH -sTOTAL_MEMORY=20971520"]) + AS_VAR_APPEND([LDFLAGS_NODIST], [" -sALLOW_MEMORY_GROWTH -sINITIAL_MEMORY=20971520"]) dnl map int64_t and uint64_t to JS bigint AS_VAR_APPEND([LDFLAGS_NODIST], [" -sWASM_BIGINT"]) dnl Include file system support AS_VAR_APPEND([LDFLAGS_NODIST], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS"]) + AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain"]) + AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) AS_VAR_IF([enable_wasm_dynamic_linking], [yes], [ AS_VAR_APPEND([LINKFORSHARED], [" -sMAIN_MODULE"]) @@ -2339,7 +2344,6 @@ AS_CASE([$ac_sys_system], AS_VAR_APPEND([LDFLAGS_NODIST], [" -sUSE_PTHREADS"]) AS_VAR_APPEND([LINKFORSHARED], [" -sPROXY_TO_PTHREAD"]) ]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sALLOW_MEMORY_GROWTH"]) dnl not completely sure whether or not we want -sEXIT_RUNTIME, keeping it for now. AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXIT_RUNTIME"]) WASM_LINKFORSHARED_DEBUG="-gseparate-dwarf --emit-symbol-map" From dffb90911a585a0921664c8b1c229d0883e65ee7 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Mon, 2 Dec 2024 20:45:36 -0600 Subject: [PATCH 17/76] Speed-up lazy heapq import in collections (gh-127538) --- Lib/collections/__init__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py index d688141f9b183d..78229ac54b80da 100644 --- a/Lib/collections/__init__.py +++ b/Lib/collections/__init__.py @@ -59,6 +59,8 @@ except ImportError: pass +heapq = None # Lazily imported + ################################################################################ ### OrderedDict @@ -633,7 +635,10 @@ def most_common(self, n=None): return sorted(self.items(), key=_itemgetter(1), reverse=True) # Lazy import to speedup Python startup time - import heapq + global heapq + if heapq is None: + import heapq + return heapq.nlargest(n, self.items(), key=_itemgetter(1)) def elements(self): From 8c3fd1f245fbdc747966daedfd22ed48491309dc Mon Sep 17 00:00:00 2001 From: Alexander Stepchenko Date: Tue, 3 Dec 2024 09:52:12 +0300 Subject: [PATCH 18/76] docs(logging): fix phrasing from "operation on" to "operate on" (#127543) --- Doc/howto/logging.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/logging.rst b/Doc/howto/logging.rst index 3182d5664ab6ec..2982cf88bf97b4 100644 --- a/Doc/howto/logging.rst +++ b/Doc/howto/logging.rst @@ -127,7 +127,7 @@ that; formatting options will also be explained later. Notice that in this example, we use functions directly on the ``logging`` module, like ``logging.debug``, rather than creating a logger and calling -functions on it. These functions operation on the root logger, but can be useful +functions on it. These functions operate on the root logger, but can be useful as they will call :func:`~logging.basicConfig` for you if it has not been called yet, like in this example. In larger programs you'll usually want to control the logging configuration explicitly however - so for that reason as well as others, it's From 35d37d6592d1be71ea76042165f6cbfa6c4c3a17 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 3 Dec 2024 13:30:27 +0100 Subject: [PATCH 19/76] gh-127253: Note that Stable ABI is about ABI stability (GH-127254) --- Doc/c-api/stable.rst | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/stable.rst b/Doc/c-api/stable.rst index 5b9e43874c7f2b..124e58cf950b7a 100644 --- a/Doc/c-api/stable.rst +++ b/Doc/c-api/stable.rst @@ -66,7 +66,7 @@ Limited C API Python 3.2 introduced the *Limited API*, a subset of Python's C API. Extensions that only use the Limited API can be -compiled once and work with multiple versions of Python. +compiled once and be loaded on multiple versions of Python. Contents of the Limited API are :ref:`listed below `. .. c:macro:: Py_LIMITED_API @@ -76,7 +76,7 @@ Contents of the Limited API are :ref:`listed below `. Define ``Py_LIMITED_API`` to the value of :c:macro:`PY_VERSION_HEX` corresponding to the lowest Python version your extension supports. - The extension will work without recompilation with all Python 3 releases + The extension will be ABI-compatible with all Python 3 releases from the specified one onward, and can use Limited API introduced up to that version. @@ -94,7 +94,15 @@ Stable ABI ---------- To enable this, Python provides a *Stable ABI*: a set of symbols that will -remain compatible across Python 3.x versions. +remain ABI-compatible across Python 3.x versions. + +.. note:: + + The Stable ABI prevents ABI issues, like linker errors due to missing + symbols or data corruption due to changes in structure layouts or function + signatures. + However, other changes in Python can change the *behavior* of extensions. + See Python's Backwards Compatibility Policy (:pep:`387`) for details. The Stable ABI contains symbols exposed in the :ref:`Limited API `, but also other ones – for example, functions necessary to From 84ff1313d04e8cbeec7b2cbe4503d86f1f5b449d Mon Sep 17 00:00:00 2001 From: "RUANG (James Roy)" Date: Tue, 3 Dec 2024 20:45:50 +0800 Subject: [PATCH 20/76] gh-126585: Add EHWPOISON error code (#126586) --- Doc/library/errno.rst | 7 +++++++ Doc/whatsnew/3.14.rst | 5 +++++ .../Library/2024-11-04-22-02-30.gh-issue-85046.Y5d_ZN.rst | 1 + Modules/errnomodule.c | 3 +++ 4 files changed, 16 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-11-04-22-02-30.gh-issue-85046.Y5d_ZN.rst diff --git a/Doc/library/errno.rst b/Doc/library/errno.rst index 4983b8961b1c3f..824d489818fac9 100644 --- a/Doc/library/errno.rst +++ b/Doc/library/errno.rst @@ -613,6 +613,13 @@ defined by the module. The specific list of defined symbols is available as No route to host +.. data:: EHWPOISON + + Memory page has hardware error. + + .. versionadded:: next + + .. data:: EALREADY Operation already in progress. This error is mapped to the diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 75d027d33ccd16..7bb9657e6ed9da 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -345,6 +345,11 @@ dis This feature is also exposed via :option:`dis --show-positions`. (Contributed by Bénédikt Tran in :gh:`123165`.) +errno +----- + +* Add :data:`errno.EHWPOISON` error code. + (Contributed by James Roy in :gh:`126585`.) fractions --------- diff --git a/Misc/NEWS.d/next/Library/2024-11-04-22-02-30.gh-issue-85046.Y5d_ZN.rst b/Misc/NEWS.d/next/Library/2024-11-04-22-02-30.gh-issue-85046.Y5d_ZN.rst new file mode 100644 index 00000000000000..ae1392e2caf387 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-04-22-02-30.gh-issue-85046.Y5d_ZN.rst @@ -0,0 +1 @@ +Add :data:`~errno.EHWPOISON` error code to :mod:`errno`. diff --git a/Modules/errnomodule.c b/Modules/errnomodule.c index 3f96f2f846d612..9557d68e759497 100644 --- a/Modules/errnomodule.c +++ b/Modules/errnomodule.c @@ -845,6 +845,9 @@ errno_exec(PyObject *module) #ifdef ENOKEY add_errcode("ENOKEY", ENOKEY, "Required key not available"); #endif +#ifdef EHWPOISON + add_errcode("EHWPOISON", EHWPOISON, "Memory page has hardware error"); +#endif #ifdef EKEYEXPIRED add_errcode("EKEYEXPIRED", EKEYEXPIRED, "Key has expired"); #endif From 979bf2489d0c59ae451b97d7e3c148f47e259f0b Mon Sep 17 00:00:00 2001 From: Daniele Parmeggiani <8658291+dpdani@users.noreply.github.com> Date: Tue, 3 Dec 2024 15:41:53 +0100 Subject: [PATCH 21/76] gh-117657: TSAN Fix races in `PyMember_Get` and `PyMember_Set` for C extensions (GH-123211) --- Include/cpython/pyatomic.h | 45 ++++ Include/cpython/pyatomic_gcc.h | 64 +++++ Include/cpython/pyatomic_msc.h | 102 ++++++++ Include/cpython/pyatomic_std.h | 136 ++++++++++ .../internal/pycore_pyatomic_ft_wrappers.h | 73 +++++- Lib/test/libregrtest/tsan.py | 1 + Lib/test/test_capi/test_structmembers.py | 13 + Lib/test/test_free_threading/test_slots.py | 244 ++++++++++++++++++ Modules/_testcapi/structmember.c | 10 +- Python/structmember.c | 97 +++---- 10 files changed, 735 insertions(+), 50 deletions(-) diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h index 4ecef4f56edf42..6d106c1b499c69 100644 --- a/Include/cpython/pyatomic.h +++ b/Include/cpython/pyatomic.h @@ -321,6 +321,27 @@ _Py_atomic_load_ptr(const void *obj); static inline int _Py_atomic_load_int_relaxed(const int *obj); +static inline char +_Py_atomic_load_char_relaxed(const char *obj); + +static inline unsigned char +_Py_atomic_load_uchar_relaxed(const unsigned char *obj); + +static inline short +_Py_atomic_load_short_relaxed(const short *obj); + +static inline unsigned short +_Py_atomic_load_ushort_relaxed(const unsigned short *obj); + +static inline long +_Py_atomic_load_long_relaxed(const long *obj); + +static inline double +_Py_atomic_load_double_relaxed(const double *obj); + +static inline long long +_Py_atomic_load_llong_relaxed(const long long *obj); + static inline int8_t _Py_atomic_load_int8_relaxed(const int8_t *obj); @@ -458,6 +479,30 @@ static inline void _Py_atomic_store_ullong_relaxed(unsigned long long *obj, unsigned long long value); +static inline void +_Py_atomic_store_char_relaxed(char *obj, char value); + +static inline void +_Py_atomic_store_uchar_relaxed(unsigned char *obj, unsigned char value); + +static inline void +_Py_atomic_store_short_relaxed(short *obj, short value); + +static inline void +_Py_atomic_store_ushort_relaxed(unsigned short *obj, unsigned short value); + +static inline void +_Py_atomic_store_long_relaxed(long *obj, long value); + +static inline void +_Py_atomic_store_float_relaxed(float *obj, float value); + +static inline void +_Py_atomic_store_double_relaxed(double *obj, double value); + +static inline void +_Py_atomic_store_llong_relaxed(long long *obj, long long value); + // --- _Py_atomic_load_ptr_acquire / _Py_atomic_store_ptr_release ------------ diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h index ef09954d53ac1d..b179e4c9a185a9 100644 --- a/Include/cpython/pyatomic_gcc.h +++ b/Include/cpython/pyatomic_gcc.h @@ -306,6 +306,34 @@ static inline int _Py_atomic_load_int_relaxed(const int *obj) { return __atomic_load_n(obj, __ATOMIC_RELAXED); } +static inline char +_Py_atomic_load_char_relaxed(const char *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline unsigned char +_Py_atomic_load_uchar_relaxed(const unsigned char *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline short +_Py_atomic_load_short_relaxed(const short *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline unsigned short +_Py_atomic_load_ushort_relaxed(const unsigned short *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline long +_Py_atomic_load_long_relaxed(const long *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline float +_Py_atomic_load_float_relaxed(const float *obj) +{ float ret; __atomic_load(obj, &ret, __ATOMIC_RELAXED); return ret; } + +static inline double +_Py_atomic_load_double_relaxed(const double *obj) +{ double ret; __atomic_load(obj, &ret, __ATOMIC_RELAXED); return ret; } + static inline int8_t _Py_atomic_load_int8_relaxed(const int8_t *obj) { return __atomic_load_n(obj, __ATOMIC_RELAXED); } @@ -362,6 +390,10 @@ static inline unsigned long long _Py_atomic_load_ullong_relaxed(const unsigned long long *obj) { return __atomic_load_n(obj, __ATOMIC_RELAXED); } +static inline long long +_Py_atomic_load_llong_relaxed(const long long *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + // --- _Py_atomic_store ------------------------------------------------------ @@ -485,6 +517,38 @@ _Py_atomic_store_ullong_relaxed(unsigned long long *obj, unsigned long long value) { __atomic_store_n(obj, value, __ATOMIC_RELAXED); } +static inline void +_Py_atomic_store_char_relaxed(char *obj, char value) +{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); } + +static inline void +_Py_atomic_store_uchar_relaxed(unsigned char *obj, unsigned char value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_short_relaxed(short *obj, short value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_ushort_relaxed(unsigned short *obj, unsigned short value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_long_relaxed(long *obj, long value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_float_relaxed(float *obj, float value) +{ __atomic_store(obj, &value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_double_relaxed(double *obj, double value) +{ __atomic_store(obj, &value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_llong_relaxed(long long *obj, long long value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + // --- _Py_atomic_load_ptr_acquire / _Py_atomic_store_ptr_release ------------ diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h index 84da21bdcbff4f..d155955df0cddf 100644 --- a/Include/cpython/pyatomic_msc.h +++ b/Include/cpython/pyatomic_msc.h @@ -634,6 +634,48 @@ _Py_atomic_load_int_relaxed(const int *obj) return *(volatile int *)obj; } +static inline char +_Py_atomic_load_char_relaxed(const char *obj) +{ + return *(volatile char *)obj; +} + +static inline unsigned char +_Py_atomic_load_uchar_relaxed(const unsigned char *obj) +{ + return *(volatile unsigned char *)obj; +} + +static inline short +_Py_atomic_load_short_relaxed(const short *obj) +{ + return *(volatile short *)obj; +} + +static inline unsigned short +_Py_atomic_load_ushort_relaxed(const unsigned short *obj) +{ + return *(volatile unsigned short *)obj; +} + +static inline long +_Py_atomic_load_long_relaxed(const long *obj) +{ + return *(volatile long *)obj; +} + +static inline float +_Py_atomic_load_float_relaxed(const float *obj) +{ + return *(volatile float *)obj; +} + +static inline double +_Py_atomic_load_double_relaxed(const double *obj) +{ + return *(volatile double *)obj; +} + static inline int8_t _Py_atomic_load_int8_relaxed(const int8_t *obj) { @@ -718,6 +760,12 @@ _Py_atomic_load_ullong_relaxed(const unsigned long long *obj) return *(volatile unsigned long long *)obj; } +static inline long long +_Py_atomic_load_llong_relaxed(const long long *obj) +{ + return *(volatile long long *)obj; +} + // --- _Py_atomic_store ------------------------------------------------------ @@ -899,6 +947,60 @@ _Py_atomic_store_ullong_relaxed(unsigned long long *obj, *(volatile unsigned long long *)obj = value; } +static inline void +_Py_atomic_store_char_relaxed(char *obj, char value) +{ + *(volatile char *)obj = value; +} + +static inline void +_Py_atomic_store_uchar_relaxed(unsigned char *obj, unsigned char value) +{ + *(volatile unsigned char *)obj = value; +} + +static inline void +_Py_atomic_store_short_relaxed(short *obj, short value) +{ + *(volatile short *)obj = value; +} + +static inline void +_Py_atomic_store_ushort_relaxed(unsigned short *obj, unsigned short value) +{ + *(volatile unsigned short *)obj = value; +} + +static inline void +_Py_atomic_store_uint_release(unsigned int *obj, unsigned int value) +{ + *(volatile unsigned int *)obj = value; +} + +static inline void +_Py_atomic_store_long_relaxed(long *obj, long value) +{ + *(volatile long *)obj = value; +} + +static inline void +_Py_atomic_store_float_relaxed(float *obj, float value) +{ + *(volatile float *)obj = value; +} + +static inline void +_Py_atomic_store_double_relaxed(double *obj, double value) +{ + *(volatile double *)obj = value; +} + +static inline void +_Py_atomic_store_llong_relaxed(long long *obj, long long value) +{ + *(volatile long long *)obj = value; +} + // --- _Py_atomic_load_ptr_acquire / _Py_atomic_store_ptr_release ------------ diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h index 7c71e94c68f8e6..69a8b9e615ea5f 100644 --- a/Include/cpython/pyatomic_std.h +++ b/Include/cpython/pyatomic_std.h @@ -515,6 +515,62 @@ _Py_atomic_load_int_relaxed(const int *obj) memory_order_relaxed); } +static inline char +_Py_atomic_load_char_relaxed(const char *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(char)*)obj, + memory_order_relaxed); +} + +static inline unsigned char +_Py_atomic_load_uchar_relaxed(const unsigned char *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(unsigned char)*)obj, + memory_order_relaxed); +} + +static inline short +_Py_atomic_load_short_relaxed(const short *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(short)*)obj, + memory_order_relaxed); +} + +static inline unsigned short +_Py_atomic_load_ushort_relaxed(const unsigned short *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(unsigned short)*)obj, + memory_order_relaxed); +} + +static inline long +_Py_atomic_load_long_relaxed(const long *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(long)*)obj, + memory_order_relaxed); +} + +static inline float +_Py_atomic_load_float_relaxed(const float *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(float)*)obj, + memory_order_relaxed); +} + +static inline double +_Py_atomic_load_double_relaxed(const double *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(double)*)obj, + memory_order_relaxed); +} + static inline int8_t _Py_atomic_load_int8_relaxed(const int8_t *obj) { @@ -627,6 +683,14 @@ _Py_atomic_load_ullong_relaxed(const unsigned long long *obj) memory_order_relaxed); } +static inline long long +_Py_atomic_load_llong_relaxed(const long long *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(long long)*)obj, + memory_order_relaxed); +} + // --- _Py_atomic_store ------------------------------------------------------ @@ -852,6 +916,78 @@ _Py_atomic_store_ullong_relaxed(unsigned long long *obj, memory_order_relaxed); } +static inline void +_Py_atomic_store_char_relaxed(char *obj, char value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(char)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_uchar_relaxed(unsigned char *obj, unsigned char value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(unsigned char)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_short_relaxed(short *obj, short value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(short)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_ushort_relaxed(unsigned short *obj, unsigned short value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(unsigned short)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_uint_release(unsigned int *obj, unsigned int value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(unsigned int)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_long_relaxed(long *obj, long value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(long)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_float_relaxed(float *obj, float value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(float)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_double_relaxed(double *obj, double value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(double)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_llong_relaxed(long long *obj, long long value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(long long)*)obj, value, + memory_order_relaxed); +} + // --- _Py_atomic_load_ptr_acquire / _Py_atomic_store_ptr_release ------------ diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h index a1bb383bcd22e9..d755d03a5fa190 100644 --- a/Include/internal/pycore_pyatomic_ft_wrappers.h +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -61,6 +61,54 @@ extern "C" { _Py_atomic_store_uint16_relaxed(&value, new_value) #define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) \ _Py_atomic_store_uint32_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_CHAR_RELAXED(value, new_value) \ + _Py_atomic_store_char_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_CHAR_RELAXED(value) \ + _Py_atomic_load_char_relaxed(&value) +#define FT_ATOMIC_STORE_UCHAR_RELAXED(value, new_value) \ + _Py_atomic_store_uchar_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_UCHAR_RELAXED(value) \ + _Py_atomic_load_uchar_relaxed(&value) +#define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) \ + _Py_atomic_store_short_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_SHORT_RELAXED(value) \ + _Py_atomic_load_short_relaxed(&value) +#define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) \ + _Py_atomic_store_ushort_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \ + _Py_atomic_load_ushort_relaxed(&value) +#define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \ + _Py_atomic_store_int_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_INT_RELAXED(value) \ + _Py_atomic_load_int_relaxed(&value) +#define FT_ATOMIC_STORE_UINT_RELAXED(value, new_value) \ + _Py_atomic_store_uint_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_UINT_RELAXED(value) \ + _Py_atomic_load_uint_relaxed(&value) +#define FT_ATOMIC_STORE_LONG_RELAXED(value, new_value) \ + _Py_atomic_store_long_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_LONG_RELAXED(value) \ + _Py_atomic_load_long_relaxed(&value) +#define FT_ATOMIC_STORE_ULONG_RELAXED(value, new_value) \ + _Py_atomic_store_ulong_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ + _Py_atomic_store_ssize_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_FLOAT_RELAXED(value, new_value) \ + _Py_atomic_store_float_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_FLOAT_RELAXED(value) \ + _Py_atomic_load_float_relaxed(&value) +#define FT_ATOMIC_STORE_DOUBLE_RELAXED(value, new_value) \ + _Py_atomic_store_double_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_DOUBLE_RELAXED(value) \ + _Py_atomic_load_double_relaxed(&value) +#define FT_ATOMIC_STORE_LLONG_RELAXED(value, new_value) \ + _Py_atomic_store_llong_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_LLONG_RELAXED(value) \ + _Py_atomic_load_llong_relaxed(&value) +#define FT_ATOMIC_STORE_ULLONG_RELAXED(value, new_value) \ + _Py_atomic_store_ullong_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_ULLONG_RELAXED(value) \ + _Py_atomic_load_ullong_relaxed(&value) #else #define FT_ATOMIC_LOAD_PTR(value) value @@ -68,7 +116,6 @@ extern "C" { #define FT_ATOMIC_LOAD_SSIZE(value) value #define FT_ATOMIC_LOAD_SSIZE_ACQUIRE(value) value #define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value -#define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value #define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value #define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) value #define FT_ATOMIC_LOAD_PTR_RELAXED(value) value @@ -85,6 +132,30 @@ extern "C" { #define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT16_RELAXED(value, new_value) value = new_value #define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_CHAR_RELAXED(value) value +#define FT_ATOMIC_STORE_CHAR_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_UCHAR_RELAXED(value) value +#define FT_ATOMIC_STORE_UCHAR_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_SHORT_RELAXED(value) value +#define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value +#define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_INT_RELAXED(value) value +#define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_UINT_RELAXED(value) value +#define FT_ATOMIC_STORE_UINT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_LONG_RELAXED(value) value +#define FT_ATOMIC_STORE_LONG_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_ULONG_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_FLOAT_RELAXED(value) value +#define FT_ATOMIC_STORE_FLOAT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_DOUBLE_RELAXED(value) value +#define FT_ATOMIC_STORE_DOUBLE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_LLONG_RELAXED(value) value +#define FT_ATOMIC_STORE_LLONG_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_ULLONG_RELAXED(value) value +#define FT_ATOMIC_STORE_ULLONG_RELAXED(value, new_value) value = new_value #endif diff --git a/Lib/test/libregrtest/tsan.py b/Lib/test/libregrtest/tsan.py index 822ac0f4044d9e..00d5779d950e72 100644 --- a/Lib/test/libregrtest/tsan.py +++ b/Lib/test/libregrtest/tsan.py @@ -25,6 +25,7 @@ 'test_threading_local', 'test_threadsignals', 'test_weakref', + 'test_free_threading.test_slots', ] diff --git a/Lib/test/test_capi/test_structmembers.py b/Lib/test/test_capi/test_structmembers.py index 6b27dc512a7d15..ae9168fc39243f 100644 --- a/Lib/test/test_capi/test_structmembers.py +++ b/Lib/test/test_capi/test_structmembers.py @@ -37,6 +37,9 @@ def _make_test_object(cls): 9.99999,# T_FLOAT 10.1010101010, # T_DOUBLE "hi", # T_STRING_INPLACE + 12, # T_LONGLONG + 13, # T_ULONGLONG + "c", # T_CHAR ) @@ -162,6 +165,16 @@ def test_inplace_string(self): self.assertRaises(TypeError, setattr, ts, "T_STRING_INPLACE", "s") self.assertRaises(TypeError, delattr, ts, "T_STRING_INPLACE") + def test_char(self): + ts = self.ts + self.assertEqual(ts.T_CHAR, "c") + ts.T_CHAR = "z" + self.assertEqual(ts.T_CHAR, "z") + self.assertRaises(TypeError, setattr, ts, "T_CHAR", "") + self.assertRaises(TypeError, setattr, ts, "T_CHAR", b"a") + self.assertRaises(TypeError, setattr, ts, "T_CHAR", bytearray(b"b")) + self.assertRaises(TypeError, delattr, ts, "T_STRING_INPLACE") + class ReadWriteTests_OldAPI(ReadWriteTests, unittest.TestCase): cls = _test_structmembersType_OldAPI diff --git a/Lib/test/test_free_threading/test_slots.py b/Lib/test/test_free_threading/test_slots.py index 758f74f54d0b56..a3b9f4b0175ae7 100644 --- a/Lib/test/test_free_threading/test_slots.py +++ b/Lib/test/test_free_threading/test_slots.py @@ -1,3 +1,4 @@ +import _testcapi import threading from test.support import threading_helper from unittest import TestCase @@ -41,3 +42,246 @@ def reader(): assert 0 <= eggs <= iters run_in_threads([writer, reader, reader, reader]) + + def test_T_BOOL(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + # different code paths for True and False + spam_old.T_BOOL = True + spam_new.T_BOOL = True + spam_old.T_BOOL = False + spam_new.T_BOOL = False + + def reader(): + for _ in range(1_000): + spam_old.T_BOOL + spam_new.T_BOOL + + run_in_threads([writer, reader]) + + def test_T_BYTE(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_BYTE = 0 + spam_new.T_BYTE = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_BYTE + spam_new.T_BYTE + + run_in_threads([writer, reader]) + + def test_T_UBYTE(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_UBYTE = 0 + spam_new.T_UBYTE = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_UBYTE + spam_new.T_UBYTE + + run_in_threads([writer, reader]) + + def test_T_SHORT(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_SHORT = 0 + spam_new.T_SHORT = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_SHORT + spam_new.T_SHORT + + run_in_threads([writer, reader]) + + def test_T_USHORT(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_USHORT = 0 + spam_new.T_USHORT = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_USHORT + spam_new.T_USHORT + + run_in_threads([writer, reader]) + + def test_T_INT(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_INT = 0 + spam_new.T_INT = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_INT + spam_new.T_INT + + run_in_threads([writer, reader]) + + def test_T_UINT(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_UINT = 0 + spam_new.T_UINT = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_UINT + spam_new.T_UINT + + run_in_threads([writer, reader]) + + def test_T_LONG(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_LONG = 0 + spam_new.T_LONG = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_LONG + spam_new.T_LONG + + run_in_threads([writer, reader]) + + def test_T_ULONG(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_ULONG = 0 + spam_new.T_ULONG = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_ULONG + spam_new.T_ULONG + + run_in_threads([writer, reader]) + + def test_T_PYSSIZET(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_PYSSIZET = 0 + spam_new.T_PYSSIZET = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_PYSSIZET + spam_new.T_PYSSIZET + + run_in_threads([writer, reader]) + + def test_T_FLOAT(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_FLOAT = 0.0 + spam_new.T_FLOAT = 0.0 + + def reader(): + for _ in range(1_000): + spam_old.T_FLOAT + spam_new.T_FLOAT + + run_in_threads([writer, reader]) + + def test_T_DOUBLE(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_DOUBLE = 0.0 + spam_new.T_DOUBLE = 0.0 + + def reader(): + for _ in range(1_000): + spam_old.T_DOUBLE + spam_new.T_DOUBLE + + run_in_threads([writer, reader]) + + def test_T_LONGLONG(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_LONGLONG = 0 + spam_new.T_LONGLONG = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_LONGLONG + spam_new.T_LONGLONG + + run_in_threads([writer, reader]) + + def test_T_ULONGLONG(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_ULONGLONG = 0 + spam_new.T_ULONGLONG = 0 + + def reader(): + for _ in range(1_000): + spam_old.T_ULONGLONG + spam_new.T_ULONGLONG + + run_in_threads([writer, reader]) + + def test_T_CHAR(self): + spam_old = _testcapi._test_structmembersType_OldAPI() + spam_new = _testcapi._test_structmembersType_NewAPI() + + def writer(): + for _ in range(1_000): + spam_old.T_CHAR = "c" + spam_new.T_CHAR = "c" + + def reader(): + for _ in range(1_000): + spam_old.T_CHAR + spam_new.T_CHAR + + run_in_threads([writer, reader]) diff --git a/Modules/_testcapi/structmember.c b/Modules/_testcapi/structmember.c index 096eaecd40855f..c1861db18c4af2 100644 --- a/Modules/_testcapi/structmember.c +++ b/Modules/_testcapi/structmember.c @@ -22,6 +22,7 @@ typedef struct { char inplace_member[6]; long long longlong_member; unsigned long long ulonglong_member; + char char_member; } all_structmembers; typedef struct { @@ -46,6 +47,7 @@ static struct PyMemberDef test_members_newapi[] = { {"T_STRING_INPLACE", Py_T_STRING_INPLACE, offsetof(test_structmembers, structmembers.inplace_member), 0, NULL}, {"T_LONGLONG", Py_T_LONGLONG, offsetof(test_structmembers, structmembers.longlong_member), 0, NULL}, {"T_ULONGLONG", Py_T_ULONGLONG, offsetof(test_structmembers, structmembers.ulonglong_member), 0, NULL}, + {"T_CHAR", Py_T_CHAR, offsetof(test_structmembers, structmembers.char_member), 0, NULL}, {NULL} }; @@ -56,9 +58,9 @@ test_structmembers_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) "T_BOOL", "T_BYTE", "T_UBYTE", "T_SHORT", "T_USHORT", "T_INT", "T_UINT", "T_LONG", "T_ULONG", "T_PYSSIZET", "T_FLOAT", "T_DOUBLE", "T_STRING_INPLACE", - "T_LONGLONG", "T_ULONGLONG", + "T_LONGLONG", "T_ULONGLONG", "T_CHAR", NULL}; - static const char fmt[] = "|bbBhHiIlknfds#LK"; + static const char fmt[] = "|bbBhHiIlknfds#LKC"; test_structmembers *ob; const char *s = NULL; Py_ssize_t string_len = 0; @@ -82,7 +84,8 @@ test_structmembers_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) &ob->structmembers.double_member, &s, &string_len, &ob->structmembers.longlong_member, - &ob->structmembers.ulonglong_member)) + &ob->structmembers.ulonglong_member, + &ob->structmembers.char_member)) { Py_DECREF(ob); return NULL; @@ -132,6 +135,7 @@ static struct PyMemberDef test_members[] = { {"T_STRING_INPLACE", T_STRING_INPLACE, offsetof(test_structmembers, structmembers.inplace_member), 0, NULL}, {"T_LONGLONG", T_LONGLONG, offsetof(test_structmembers, structmembers.longlong_member), 0, NULL}, {"T_ULONGLONG", T_ULONGLONG, offsetof(test_structmembers, structmembers.ulonglong_member), 0, NULL}, + {"T_CHAR", T_CHAR, offsetof(test_structmembers, structmembers.char_member), 0, NULL}, {NULL} }; diff --git a/Python/structmember.c b/Python/structmember.c index d5e7ab83093dc8..d36e049d6b5d20 100644 --- a/Python/structmember.c +++ b/Python/structmember.c @@ -34,40 +34,40 @@ PyMember_GetOne(const char *obj_addr, PyMemberDef *l) const char* addr = obj_addr + l->offset; switch (l->type) { case Py_T_BOOL: - v = PyBool_FromLong(*(char*)addr); + v = PyBool_FromLong(FT_ATOMIC_LOAD_CHAR_RELAXED(*(char*)addr)); break; case Py_T_BYTE: - v = PyLong_FromLong(*(char*)addr); + v = PyLong_FromLong(FT_ATOMIC_LOAD_CHAR_RELAXED(*(char*)addr)); break; case Py_T_UBYTE: - v = PyLong_FromUnsignedLong(*(unsigned char*)addr); + v = PyLong_FromUnsignedLong(FT_ATOMIC_LOAD_UCHAR_RELAXED(*(unsigned char*)addr)); break; case Py_T_SHORT: - v = PyLong_FromLong(*(short*)addr); + v = PyLong_FromLong(FT_ATOMIC_LOAD_SHORT_RELAXED(*(short*)addr)); break; case Py_T_USHORT: - v = PyLong_FromUnsignedLong(*(unsigned short*)addr); + v = PyLong_FromUnsignedLong(FT_ATOMIC_LOAD_USHORT_RELAXED(*(unsigned short*)addr)); break; case Py_T_INT: - v = PyLong_FromLong(*(int*)addr); + v = PyLong_FromLong(FT_ATOMIC_LOAD_INT_RELAXED(*(int*)addr)); break; case Py_T_UINT: - v = PyLong_FromUnsignedLong(*(unsigned int*)addr); + v = PyLong_FromUnsignedLong(FT_ATOMIC_LOAD_UINT_RELAXED(*(unsigned int*)addr)); break; case Py_T_LONG: - v = PyLong_FromLong(*(long*)addr); + v = PyLong_FromLong(FT_ATOMIC_LOAD_LONG_RELAXED(*(long*)addr)); break; case Py_T_ULONG: - v = PyLong_FromUnsignedLong(*(unsigned long*)addr); + v = PyLong_FromUnsignedLong(FT_ATOMIC_LOAD_ULONG_RELAXED(*(unsigned long*)addr)); break; case Py_T_PYSSIZET: - v = PyLong_FromSsize_t(*(Py_ssize_t*)addr); + v = PyLong_FromSsize_t(FT_ATOMIC_LOAD_SSIZE_RELAXED(*(Py_ssize_t*)addr)); break; case Py_T_FLOAT: - v = PyFloat_FromDouble((double)*(float*)addr); + v = PyFloat_FromDouble((double)FT_ATOMIC_LOAD_FLOAT_RELAXED(*(float*)addr)); break; case Py_T_DOUBLE: - v = PyFloat_FromDouble(*(double*)addr); + v = PyFloat_FromDouble(FT_ATOMIC_LOAD_DOUBLE_RELAXED(*(double*)addr)); break; case Py_T_STRING: if (*(char**)addr == NULL) { @@ -79,9 +79,11 @@ PyMember_GetOne(const char *obj_addr, PyMemberDef *l) case Py_T_STRING_INPLACE: v = PyUnicode_FromString((char*)addr); break; - case Py_T_CHAR: - v = PyUnicode_FromStringAndSize((char*)addr, 1); + case Py_T_CHAR: { + char char_val = FT_ATOMIC_LOAD_CHAR_RELAXED(*addr); + v = PyUnicode_FromStringAndSize(&char_val, 1); break; + } case _Py_T_OBJECT: v = *(PyObject **)addr; if (v == NULL) @@ -104,10 +106,10 @@ PyMember_GetOne(const char *obj_addr, PyMemberDef *l) #endif break; case Py_T_LONGLONG: - v = PyLong_FromLongLong(*(long long *)addr); + v = PyLong_FromLongLong(FT_ATOMIC_LOAD_LLONG_RELAXED(*(long long *)addr)); break; case Py_T_ULONGLONG: - v = PyLong_FromUnsignedLongLong(*(unsigned long long *)addr); + v = PyLong_FromUnsignedLongLong(FT_ATOMIC_LOAD_ULLONG_RELAXED(*(unsigned long long *)addr)); break; case _Py_T_NONE: // doesn't require free-threading code path @@ -169,16 +171,16 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) return -1; } if (v == Py_True) - *(char*)addr = (char) 1; + FT_ATOMIC_STORE_CHAR_RELAXED(*(char*)addr, 1); else - *(char*)addr = (char) 0; + FT_ATOMIC_STORE_CHAR_RELAXED(*(char*)addr, 0); break; } case Py_T_BYTE:{ long long_val = PyLong_AsLong(v); if ((long_val == -1) && PyErr_Occurred()) return -1; - *(char*)addr = (char)long_val; + FT_ATOMIC_STORE_CHAR_RELAXED(*(char*)addr, (char)long_val); /* XXX: For compatibility, only warn about truncations for now. */ if ((long_val > CHAR_MAX) || (long_val < CHAR_MIN)) @@ -189,7 +191,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) long long_val = PyLong_AsLong(v); if ((long_val == -1) && PyErr_Occurred()) return -1; - *(unsigned char*)addr = (unsigned char)long_val; + FT_ATOMIC_STORE_UCHAR_RELAXED(*(unsigned char*)addr, (unsigned char)long_val); if ((long_val > UCHAR_MAX) || (long_val < 0)) WARN("Truncation of value to unsigned char"); break; @@ -198,7 +200,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) long long_val = PyLong_AsLong(v); if ((long_val == -1) && PyErr_Occurred()) return -1; - *(short*)addr = (short)long_val; + FT_ATOMIC_STORE_SHORT_RELAXED(*(short*)addr, (short)long_val); if ((long_val > SHRT_MAX) || (long_val < SHRT_MIN)) WARN("Truncation of value to short"); break; @@ -207,7 +209,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) long long_val = PyLong_AsLong(v); if ((long_val == -1) && PyErr_Occurred()) return -1; - *(unsigned short*)addr = (unsigned short)long_val; + FT_ATOMIC_STORE_USHORT_RELAXED(*(unsigned short*)addr, (unsigned short)long_val); if ((long_val > USHRT_MAX) || (long_val < 0)) WARN("Truncation of value to unsigned short"); break; @@ -216,7 +218,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) long long_val = PyLong_AsLong(v); if ((long_val == -1) && PyErr_Occurred()) return -1; - *(int *)addr = (int)long_val; + FT_ATOMIC_STORE_INT_RELAXED(*(int *)addr, (int)long_val); if ((long_val > INT_MAX) || (long_val < INT_MIN)) WARN("Truncation of value to int"); break; @@ -234,7 +236,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) if (long_val == -1 && PyErr_Occurred()) { return -1; } - *(unsigned int *)addr = (unsigned int)(unsigned long)long_val; + FT_ATOMIC_STORE_UINT_RELAXED(*(unsigned int *)addr, (unsigned int)(unsigned long)long_val); WARN("Writing negative value into unsigned field"); } else { @@ -243,19 +245,20 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) if (ulong_val == (unsigned long)-1 && PyErr_Occurred()) { return -1; } - *(unsigned int*)addr = (unsigned int)ulong_val; + FT_ATOMIC_STORE_UINT_RELAXED(*(unsigned int *)addr, (unsigned int)ulong_val); if (ulong_val > UINT_MAX) { WARN("Truncation of value to unsigned int"); } } break; } - case Py_T_LONG:{ - *(long*)addr = PyLong_AsLong(v); - if ((*(long*)addr == -1) && PyErr_Occurred()) + case Py_T_LONG: { + const long long_val = PyLong_AsLong(v); + if ((long_val == -1) && PyErr_Occurred()) return -1; + FT_ATOMIC_STORE_LONG_RELAXED(*(long*)addr, long_val); break; - } + } case Py_T_ULONG: { /* XXX: For compatibility, accept negative int values as well. */ @@ -269,7 +272,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) if (long_val == -1 && PyErr_Occurred()) { return -1; } - *(unsigned long *)addr = (unsigned long)long_val; + FT_ATOMIC_STORE_ULONG_RELAXED(*(unsigned long *)addr, (unsigned long)long_val); WARN("Writing negative value into unsigned field"); } else { @@ -278,29 +281,31 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) if (ulong_val == (unsigned long)-1 && PyErr_Occurred()) { return -1; } - *(unsigned long*)addr = ulong_val; + FT_ATOMIC_STORE_ULONG_RELAXED(*(unsigned long *)addr, ulong_val); } break; } - case Py_T_PYSSIZET:{ - *(Py_ssize_t*)addr = PyLong_AsSsize_t(v); - if ((*(Py_ssize_t*)addr == (Py_ssize_t)-1) - && PyErr_Occurred()) - return -1; + case Py_T_PYSSIZET: { + const Py_ssize_t ssize_val = PyLong_AsSsize_t(v); + if ((ssize_val == (Py_ssize_t)-1) && PyErr_Occurred()) + return -1; + FT_ATOMIC_STORE_SSIZE_RELAXED(*(Py_ssize_t*)addr, ssize_val); break; - } + } case Py_T_FLOAT:{ double double_val = PyFloat_AsDouble(v); if ((double_val == -1) && PyErr_Occurred()) return -1; - *(float*)addr = (float)double_val; + FT_ATOMIC_STORE_FLOAT_RELAXED(*(float*)addr, (float)double_val); break; } - case Py_T_DOUBLE: - *(double*)addr = PyFloat_AsDouble(v); - if ((*(double*)addr == -1) && PyErr_Occurred()) + case Py_T_DOUBLE: { + const double double_val = PyFloat_AsDouble(v); + if ((double_val == -1) && PyErr_Occurred()) return -1; + FT_ATOMIC_STORE_DOUBLE_RELAXED(*(double *) addr, double_val); break; + } case _Py_T_OBJECT: case Py_T_OBJECT_EX: Py_BEGIN_CRITICAL_SECTION(obj); @@ -318,7 +323,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) PyErr_BadArgument(); return -1; } - *(char*)addr = string[0]; + FT_ATOMIC_STORE_CHAR_RELAXED(*(char*)addr, string[0]); break; } case Py_T_STRING: @@ -326,10 +331,10 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) PyErr_SetString(PyExc_TypeError, "readonly attribute"); return -1; case Py_T_LONGLONG:{ - long long value; - *(long long*)addr = value = PyLong_AsLongLong(v); + long long value = PyLong_AsLongLong(v); if ((value == -1) && PyErr_Occurred()) return -1; + FT_ATOMIC_STORE_LLONG_RELAXED(*(long long*)addr, value); break; } case Py_T_ULONGLONG: { @@ -343,7 +348,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) if (long_val == -1 && PyErr_Occurred()) { return -1; } - *(unsigned long long *)addr = (unsigned long long)(long long)long_val; + FT_ATOMIC_STORE_ULLONG_RELAXED(*(unsigned long long *)addr, (unsigned long long)(long long)long_val); WARN("Writing negative value into unsigned field"); } else { @@ -352,7 +357,7 @@ PyMember_SetOne(char *addr, PyMemberDef *l, PyObject *v) if (ulonglong_val == (unsigned long long)-1 && PyErr_Occurred()) { return -1; } - *(unsigned long long*)addr = ulonglong_val; + FT_ATOMIC_STORE_ULLONG_RELAXED(*(unsigned long long *)addr, ulonglong_val); } break; } From 412e11fe6e37f15971ef855f88b8b01bb3297679 Mon Sep 17 00:00:00 2001 From: Jun Komoda <45822440+junkmd@users.noreply.github.com> Date: Wed, 4 Dec 2024 00:35:08 +0900 Subject: [PATCH 22/76] gh-127255: Make `CopyComPointer` public and add to `ctypes` doc. (GH-127275) --- Doc/library/ctypes.rst | 18 ++++++++++++++++++ Doc/whatsnew/3.14.rst | 5 ++++- Lib/ctypes/__init__.py | 2 +- .../test_ctypes/test_win32_com_foreign_func.py | 3 +-- ...4-11-25-15-02-44.gh-issue-127255.UXeljc.rst | 2 ++ 5 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-25-15-02-44.gh-issue-127255.UXeljc.rst diff --git a/Doc/library/ctypes.rst b/Doc/library/ctypes.rst index bd9529db9ee65a..bd88fa377fb39d 100644 --- a/Doc/library/ctypes.rst +++ b/Doc/library/ctypes.rst @@ -1949,6 +1949,24 @@ Utility functions It behaves similar to ``pointer(obj)``, but the construction is a lot faster. +.. function:: CopyComPointer(src, dst) + + Copies a COM pointer from *src* to *dst* and returns the Windows specific + :c:type:`!HRESULT` value. + + If *src* is not ``NULL``, its ``AddRef`` method is called, incrementing the + reference count. + + In contrast, the reference count of *dst* will not be decremented before + assigning the new value. Unless *dst* is ``NULL``, the caller is responsible + for decrementing the reference count by calling its ``Release`` method when + necessary. + + .. availability:: Windows + + .. versionadded:: next + + .. function:: cast(obj, type) This function is similar to the cast operator in C. It returns a new instance diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 7bb9657e6ed9da..52a6d6e4340194 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -313,9 +313,12 @@ ctypes to help match a non-default ABI. (Contributed by Petr Viktorin in :gh:`97702`.) -* The :exc:`~ctypes.COMError` exception is now public. +* On Windows, the :exc:`~ctypes.COMError` exception is now public. (Contributed by Jun Komoda in :gh:`126686`.) +* On Windows, the :func:`~ctypes.CopyComPointer` function is now public. + (Contributed by Jun Komoda in :gh:`127275`.) + datetime -------- diff --git a/Lib/ctypes/__init__.py b/Lib/ctypes/__init__.py index ac6493892068e9..2f2b0ca9f38633 100644 --- a/Lib/ctypes/__init__.py +++ b/Lib/ctypes/__init__.py @@ -19,7 +19,7 @@ raise Exception("Version number mismatch", __version__, _ctypes_version) if _os.name == "nt": - from _ctypes import COMError, FormatError + from _ctypes import COMError, CopyComPointer, FormatError DEFAULT_MODE = RTLD_LOCAL if _os.name == "posix" and _sys.platform == "darwin": diff --git a/Lib/test/test_ctypes/test_win32_com_foreign_func.py b/Lib/test/test_ctypes/test_win32_com_foreign_func.py index 8d217fc17efa02..7e54f8f6c31d33 100644 --- a/Lib/test/test_ctypes/test_win32_com_foreign_func.py +++ b/Lib/test/test_ctypes/test_win32_com_foreign_func.py @@ -9,8 +9,7 @@ raise unittest.SkipTest("Windows-specific test") -from _ctypes import COMError, CopyComPointer -from ctypes import HRESULT +from ctypes import COMError, CopyComPointer, HRESULT COINIT_APARTMENTTHREADED = 0x2 diff --git a/Misc/NEWS.d/next/Library/2024-11-25-15-02-44.gh-issue-127255.UXeljc.rst b/Misc/NEWS.d/next/Library/2024-11-25-15-02-44.gh-issue-127255.UXeljc.rst new file mode 100644 index 00000000000000..9fe7815e93cf4f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-25-15-02-44.gh-issue-127255.UXeljc.rst @@ -0,0 +1,2 @@ +The :func:`~ctypes.CopyComPointer` function is now public. +Previously, this was private and only available in ``_ctypes``. From 8ba9f5bca9c0ce6130e1f4ba761a68f74f8457d0 Mon Sep 17 00:00:00 2001 From: "Tomas R." Date: Tue, 3 Dec 2024 17:08:39 +0100 Subject: [PATCH 23/76] gh-127347: Document `traceback.print_list` (#127348) Previously, `traceback.print_list` didn't have a documentation entry and was not exposed in `traceback.__all__`. Now it has a documentation entry and is exposed in `__all__`. --- Doc/library/traceback.rst | 7 +++++++ Lib/test/test_traceback.py | 3 +-- Lib/traceback.py | 2 +- .../2024-11-27-22-56-48.gh-issue-127347.xyddWS.rst | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Documentation/2024-11-27-22-56-48.gh-issue-127347.xyddWS.rst diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 100a92b73d5497..8f94fc448f2482 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -157,6 +157,13 @@ Module-Level Functions arguments have the same meaning as for :func:`print_stack`. +.. function:: print_list(extracted_list, file=None) + + Print the list of tuples as returned by :func:`extract_tb` or + :func:`extract_stack` as a formatted stack trace to the given file. + If *file* is ``None``, the output is written to :data:`sys.stderr`. + + .. function:: format_list(extracted_list) Given a list of tuples or :class:`FrameSummary` objects as returned by diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index ec69412f5511eb..ea8d9f2137aca5 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -4488,9 +4488,8 @@ class MiscTest(unittest.TestCase): def test_all(self): expected = set() - denylist = {'print_list'} for name in dir(traceback): - if name.startswith('_') or name in denylist: + if name.startswith('_'): continue module_object = getattr(traceback, name) if getattr(module_object, '__module__', None) == 'traceback': diff --git a/Lib/traceback.py b/Lib/traceback.py index f73149271b9bc9..6367c00e4d4b86 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -15,7 +15,7 @@ 'format_tb', 'print_exc', 'format_exc', 'print_exception', 'print_last', 'print_stack', 'print_tb', 'clear_frames', 'FrameSummary', 'StackSummary', 'TracebackException', - 'walk_stack', 'walk_tb'] + 'walk_stack', 'walk_tb', 'print_list'] # # Formatting and printing lists of traceback lines. diff --git a/Misc/NEWS.d/next/Documentation/2024-11-27-22-56-48.gh-issue-127347.xyddWS.rst b/Misc/NEWS.d/next/Documentation/2024-11-27-22-56-48.gh-issue-127347.xyddWS.rst new file mode 100644 index 00000000000000..79b3faa3d0d385 --- /dev/null +++ b/Misc/NEWS.d/next/Documentation/2024-11-27-22-56-48.gh-issue-127347.xyddWS.rst @@ -0,0 +1 @@ +Publicly expose :func:`traceback.print_list` in :attr:`!traceback.__all__`. From 0cb52220790d8bc70ec325fd89d52b5f3b7ad29c Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 3 Dec 2024 09:32:26 -0800 Subject: [PATCH 24/76] gh-115999: Specialize `LOAD_SUPER_ATTR` in free-threaded builds (gh-127128) Use existing helpers to atomically modify the bytecode. Add unit tests to ensure specializing is happening as expected. Add test_specialize.py that can be used with ThreadSanitizer to detect data races. Fix thread safety issue with cell_set_contents(). --- Lib/test/test_opcache.py | 39 ++++++++++++++++++++++++++++++++++++++ Objects/cellobject.c | 5 +++-- Python/bytecodes.c | 4 ++-- Python/ceval.c | 1 - Python/generated_cases.c.h | 4 ++-- Python/specialize.c | 19 +++++-------------- 6 files changed, 51 insertions(+), 21 deletions(-) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index b989b21cd9b3a9..527114445ac13b 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1249,6 +1249,45 @@ def binary_op_add_unicode(): self.assert_specialized(binary_op_add_unicode, "BINARY_OP_ADD_UNICODE") self.assert_no_opcode(binary_op_add_unicode, "BINARY_OP") + @cpython_only + @requires_specialization_ft + def test_load_super_attr(self): + """Ensure that LOAD_SUPER_ATTR is specialized as expected.""" + + class A: + def __init__(self): + meth = super().__init__ + super().__init__() + + for _ in range(100): + A() + + self.assert_specialized(A.__init__, "LOAD_SUPER_ATTR_ATTR") + self.assert_specialized(A.__init__, "LOAD_SUPER_ATTR_METHOD") + self.assert_no_opcode(A.__init__, "LOAD_SUPER_ATTR") + + # Temporarily replace super() with something else. + real_super = super + + def fake_super(): + def init(self): + pass + + return init + + # Force unspecialize + globals()['super'] = fake_super + try: + # Should be unspecialized after enough calls. + for _ in range(100): + A() + finally: + globals()['super'] = real_super + + # Ensure the specialized instructions are not present + self.assert_no_opcode(A.__init__, "LOAD_SUPER_ATTR_ATTR") + self.assert_no_opcode(A.__init__, "LOAD_SUPER_ATTR_METHOD") + @cpython_only @requires_specialization_ft def test_contain_op(self): diff --git a/Objects/cellobject.c b/Objects/cellobject.c index 590c8a80857699..4ab9083af5e300 100644 --- a/Objects/cellobject.c +++ b/Objects/cellobject.c @@ -145,8 +145,9 @@ cell_get_contents(PyObject *self, void *closure) static int cell_set_contents(PyObject *self, PyObject *obj, void *Py_UNUSED(ignored)) { - PyCellObject *op = _PyCell_CAST(self); - Py_XSETREF(op->ob_ref, Py_XNewRef(obj)); + PyCellObject *cell = _PyCell_CAST(self); + Py_XINCREF(obj); + PyCell_SetTakeRef((PyCellObject *)cell, obj); return 0; } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index e96674c3502ef1..dd28aae6a3cb61 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1946,7 +1946,7 @@ dummy_func( }; specializing op(_SPECIALIZE_LOAD_SUPER_ATTR, (counter/1, global_super_st, class_st, unused -- global_super_st, class_st, unused)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT int load_method = oparg & 1; if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; @@ -1955,7 +1955,7 @@ dummy_func( } OPCODE_DEFERRED_INC(LOAD_SUPER_ATTR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } tier1 op(_LOAD_SUPER_ATTR, (global_super_st, class_st, self_st -- attr, null if (oparg & 1))) { diff --git a/Python/ceval.c b/Python/ceval.c index f9514a6bf25c1b..6795a160506231 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -28,7 +28,6 @@ #include "pycore_setobject.h" // _PySet_Update() #include "pycore_sliceobject.h" // _PyBuildSlice_ConsumeRefs #include "pycore_tuple.h" // _PyTuple_ITEMS() -#include "pycore_typeobject.h" // _PySuper_Lookup() #include "pycore_uop_ids.h" // Uops #include "pycore_pyerrors.h" diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e1f951558de7da..c31601f6d82b77 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -6330,7 +6330,7 @@ global_super_st = stack_pointer[-3]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT int load_method = oparg & 1; if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; @@ -6341,7 +6341,7 @@ } OPCODE_DEFERRED_INC(LOAD_SUPER_ATTR); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _LOAD_SUPER_ATTR { diff --git a/Python/specialize.c b/Python/specialize.c index ba13b02a29b133..0fe4e7904de9f8 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -794,9 +794,8 @@ _Py_Specialize_LoadSuperAttr(_PyStackRef global_super_st, _PyStackRef cls_st, _P PyObject *global_super = PyStackRef_AsPyObjectBorrow(global_super_st); PyObject *cls = PyStackRef_AsPyObjectBorrow(cls_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[LOAD_SUPER_ATTR] == INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR); - _PySuperAttrCache *cache = (_PySuperAttrCache *)(instr + 1); if (global_super != (PyObject *)&PySuper_Type) { SPECIALIZATION_FAIL(LOAD_SUPER_ATTR, SPEC_FAIL_SUPER_SHADOWED); goto fail; @@ -805,19 +804,11 @@ _Py_Specialize_LoadSuperAttr(_PyStackRef global_super_st, _PyStackRef cls_st, _P SPECIALIZATION_FAIL(LOAD_SUPER_ATTR, SPEC_FAIL_SUPER_BAD_CLASS); goto fail; } - instr->op.code = load_method ? LOAD_SUPER_ATTR_METHOD : LOAD_SUPER_ATTR_ATTR; - goto success; - -fail: - STAT_INC(LOAD_SUPER_ATTR, failure); - assert(!PyErr_Occurred()); - instr->op.code = LOAD_SUPER_ATTR; - cache->counter = adaptive_counter_backoff(cache->counter); + uint8_t load_code = load_method ? LOAD_SUPER_ATTR_METHOD : LOAD_SUPER_ATTR_ATTR; + specialize(instr, load_code); return; -success: - STAT_INC(LOAD_SUPER_ATTR, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); +fail: + unspecialize(instr); } typedef enum { From 13b68e1a61e92a032d255aff5d5af435bbb63e8b Mon Sep 17 00:00:00 2001 From: mpage Date: Tue, 3 Dec 2024 09:50:58 -0800 Subject: [PATCH 25/76] gh-127421: Fix race in test_start_new_thread_failed (#127549) Fix race in test_start_new_thread_failed When we succeed in starting a new thread, for example if setrlimit was ineffective, we must wait for the newly spawned thread to exit. Otherwise, we run the risk that the newly spawned thread will race with runtime finalization and access memory that has already been clobbered/freed. `_thread.start_new_thread()` only spawns daemon threads, which the runtime does not wait for at shutdown, and does not return a handle. Use `_thread.start_joinable_thread()` and join the resulting handle when the thread is started successfully. --- Lib/test/test_threading.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index b666533466e578..fe225558fc4f0b 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -1192,11 +1192,12 @@ def f(): resource.setrlimit(resource.RLIMIT_NPROC, (0, hard)) try: - _thread.start_new_thread(f, ()) + handle = _thread.start_joinable_thread(f) except RuntimeError: print('ok') else: print('!skip!') + handle.join() """ _, out, err = assert_python_ok("-u", "-c", code) out = out.strip() From 276cd66ccbbf85996a57bd1db3dd29b93a6eab64 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 3 Dec 2024 10:25:12 -0800 Subject: [PATCH 26/76] gh-115999: Add free-threaded specialization for `SEND` (gh-127426) No additional thread safety changes are required. Note that sending to a generator that is shared between threads is currently not safe in the free-threaded build. --- Lib/test/test_opcache.py | 42 ++++++++++++++++++++++++++++++++++++++ Python/bytecodes.c | 4 ++-- Python/generated_cases.c.h | 4 ++-- Python/specialize.c | 15 ++++---------- 4 files changed, 50 insertions(+), 15 deletions(-) diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index 527114445ac13b..b7a18133ab8b8a 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -1311,6 +1311,48 @@ def contains_op_set(): self.assert_specialized(contains_op_set, "CONTAINS_OP_SET") self.assert_no_opcode(contains_op_set, "CONTAINS_OP") + @cpython_only + @requires_specialization_ft + def test_send_with(self): + def run_async(coro): + while True: + try: + coro.send(None) + except StopIteration: + break + + class CM: + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + pass + + async def send_with(): + for i in range(100): + async with CM(): + x = 1 + + run_async(send_with()) + # Note there are still unspecialized "SEND" opcodes in the + # cleanup paths of the 'with' statement. + self.assert_specialized(send_with, "SEND_GEN") + + @cpython_only + @requires_specialization_ft + def test_send_yield_from(self): + def g(): + yield None + + def send_yield_from(): + yield from g() + + for i in range(100): + list(send_yield_from()) + + self.assert_specialized(send_yield_from, "SEND_GEN") + self.assert_no_opcode(send_yield_from, "SEND") + @cpython_only @requires_specialization_ft def test_to_bool(self): diff --git a/Python/bytecodes.c b/Python/bytecodes.c index dd28aae6a3cb61..d6be3cebf80724 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -1117,7 +1117,7 @@ dummy_func( }; specializing op(_SPECIALIZE_SEND, (counter/1, receiver, unused -- receiver, unused)) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Send(receiver, next_instr); @@ -1125,7 +1125,7 @@ dummy_func( } OPCODE_DEFERRED_INC(SEND); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_SEND, (receiver, v -- receiver, retval)) { diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index c31601f6d82b77..ef191f6f697f24 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7065,7 +7065,7 @@ receiver = stack_pointer[-2]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -7075,7 +7075,7 @@ } OPCODE_DEFERRED_INC(SEND); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } // _SEND { diff --git a/Python/specialize.c b/Python/specialize.c index 0fe4e7904de9f8..8b2d1a14c107e0 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -2627,28 +2627,21 @@ _Py_Specialize_Send(_PyStackRef receiver_st, _Py_CODEUNIT *instr) { PyObject *receiver = PyStackRef_AsPyObjectBorrow(receiver_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[SEND] == INLINE_CACHE_ENTRIES_SEND); - _PySendCache *cache = (_PySendCache *)(instr + 1); PyTypeObject *tp = Py_TYPE(receiver); if (tp == &PyGen_Type || tp == &PyCoro_Type) { if (_PyInterpreterState_GET()->eval_frame) { SPECIALIZATION_FAIL(SEND, SPEC_FAIL_OTHER); goto failure; } - instr->op.code = SEND_GEN; - goto success; + specialize(instr, SEND_GEN); + return; } SPECIALIZATION_FAIL(SEND, _PySpecialization_ClassifyIterator(receiver)); failure: - STAT_INC(SEND, failure); - instr->op.code = SEND; - cache->counter = adaptive_counter_backoff(cache->counter); - return; -success: - STAT_INC(SEND, success); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } #ifdef Py_STATS From fc5a0dc22483a35068888e828c65796d7a792c14 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 3 Dec 2024 10:33:06 -0800 Subject: [PATCH 27/76] gh-127271: Replace use of PyCell_GET/SET (gh-127272) * Replace uses of `PyCell_GET` and `PyCell_SET`. These macros are not safe to use in the free-threaded build. Use `PyCell_GetRef()` and `PyCell_SetTakeRef()` instead. * Since `PyCell_GetRef()` returns a strong rather than borrowed ref, some code restructuring was required, e.g. `frame_get_var()` returns a strong ref now. * Add critical sections to `PyCell_GET` and `PyCell_SET`. * Move critical_section.h earlier in the Python.h file. * Add `PyCell_GET` to the free-threading howto table of APIs that return borrowed refs. * Add additional unit tests for free-threading. --- Doc/howto/free-threading-extensions.rst | 2 + Include/Python.h | 2 +- Include/cpython/cellobject.h | 8 +- Lib/test/test_free_threading/test_races.py | 134 +++++++++++++++++++++ Objects/cellobject.c | 41 ++++--- Objects/frameobject.c | 28 +++-- Objects/typeobject.c | 57 ++++++--- Python/bltinmodule.c | 7 +- 8 files changed, 231 insertions(+), 48 deletions(-) create mode 100644 Lib/test/test_free_threading/test_races.py diff --git a/Doc/howto/free-threading-extensions.rst b/Doc/howto/free-threading-extensions.rst index 6abe93d71ad529..c1ad42e7e55ee5 100644 --- a/Doc/howto/free-threading-extensions.rst +++ b/Doc/howto/free-threading-extensions.rst @@ -167,6 +167,8 @@ that return :term:`strong references `. +-----------------------------------+-----------------------------------+ | :c:func:`PyImport_AddModule` | :c:func:`PyImport_AddModuleRef` | +-----------------------------------+-----------------------------------+ +| :c:func:`PyCell_GET` | :c:func:`PyCell_Get` | ++-----------------------------------+-----------------------------------+ Not all APIs that return borrowed references are problematic. For example, :c:func:`PyTuple_GetItem` is safe because tuples are immutable. diff --git a/Include/Python.h b/Include/Python.h index 717e27feab62db..64be80145890a3 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -69,6 +69,7 @@ #include "pystats.h" #include "pyatomic.h" #include "lock.h" +#include "critical_section.h" #include "object.h" #include "refcount.h" #include "objimpl.h" @@ -130,7 +131,6 @@ #include "import.h" #include "abstract.h" #include "bltinmodule.h" -#include "critical_section.h" #include "cpython/pyctype.h" #include "pystrtod.h" #include "pystrcmp.h" diff --git a/Include/cpython/cellobject.h b/Include/cpython/cellobject.h index 47a6a491497ea0..85a63a13747d87 100644 --- a/Include/cpython/cellobject.h +++ b/Include/cpython/cellobject.h @@ -22,10 +22,14 @@ PyAPI_FUNC(PyObject *) PyCell_Get(PyObject *); PyAPI_FUNC(int) PyCell_Set(PyObject *, PyObject *); static inline PyObject* PyCell_GET(PyObject *op) { + PyObject *res; PyCellObject *cell; assert(PyCell_Check(op)); cell = _Py_CAST(PyCellObject*, op); - return cell->ob_ref; + Py_BEGIN_CRITICAL_SECTION(cell); + res = cell->ob_ref; + Py_END_CRITICAL_SECTION(); + return res; } #define PyCell_GET(op) PyCell_GET(_PyObject_CAST(op)) @@ -33,7 +37,9 @@ static inline void PyCell_SET(PyObject *op, PyObject *value) { PyCellObject *cell; assert(PyCell_Check(op)); cell = _Py_CAST(PyCellObject*, op); + Py_BEGIN_CRITICAL_SECTION(cell); cell->ob_ref = value; + Py_END_CRITICAL_SECTION(); } #define PyCell_SET(op, value) PyCell_SET(_PyObject_CAST(op), (value)) diff --git a/Lib/test/test_free_threading/test_races.py b/Lib/test/test_free_threading/test_races.py new file mode 100644 index 00000000000000..09e1d52e3509f9 --- /dev/null +++ b/Lib/test/test_free_threading/test_races.py @@ -0,0 +1,134 @@ +# It's most useful to run these tests with ThreadSanitizer enabled. +import sys +import functools +import threading +import time +import unittest + +from test.support import threading_helper + + +class TestBase(unittest.TestCase): + pass + + +def do_race(func1, func2): + """Run func1() and func2() repeatedly in separate threads.""" + n = 1000 + + barrier = threading.Barrier(2) + + def repeat(func): + barrier.wait() + for _i in range(n): + func() + + threads = [ + threading.Thread(target=functools.partial(repeat, func1)), + threading.Thread(target=functools.partial(repeat, func2)), + ] + for thread in threads: + thread.start() + for thread in threads: + thread.join() + + +@threading_helper.requires_working_threading() +class TestRaces(TestBase): + def test_racing_cell_set(self): + """Test cell object gettr/settr properties.""" + + def nested_func(): + x = 0 + + def inner(): + nonlocal x + x += 1 + + # This doesn't race because LOAD_DEREF and STORE_DEREF on the + # cell object use critical sections. + do_race(nested_func, nested_func) + + def nested_func2(): + x = 0 + + def inner(): + y = x + frame = sys._getframe(1) + frame.f_locals["x"] = 2 + + return inner + + def mutate_func2(): + inner = nested_func2() + cell = inner.__closure__[0] + old_value = cell.cell_contents + cell.cell_contents = 1000 + time.sleep(0) + cell.cell_contents = old_value + time.sleep(0) + + # This revealed a race with cell_set_contents() since it was missing + # the critical section. + do_race(nested_func2, mutate_func2) + + def test_racing_cell_cmp_repr(self): + """Test cell object compare and repr methods.""" + + def nested_func(): + x = 0 + y = 0 + + def inner(): + return x + y + + return inner.__closure__ + + cell_a, cell_b = nested_func() + + def mutate(): + cell_a.cell_contents += 1 + + def access(): + cell_a == cell_b + s = repr(cell_a) + + # cell_richcompare() and cell_repr used to have data races + do_race(mutate, access) + + def test_racing_load_super_attr(self): + """Test (un)specialization of LOAD_SUPER_ATTR opcode.""" + + class C: + def __init__(self): + try: + super().__init__ + super().__init__() + except RuntimeError: + pass # happens if __class__ is replaced with non-type + + def access(): + C() + + def mutate(): + # Swap out the super() global with a different one + real_super = super + globals()["super"] = lambda s=1: s + time.sleep(0) + globals()["super"] = real_super + time.sleep(0) + # Swap out the __class__ closure value with a non-type + cell = C.__init__.__closure__[0] + real_class = cell.cell_contents + cell.cell_contents = 99 + time.sleep(0) + cell.cell_contents = real_class + + # The initial PR adding specialized opcodes for LOAD_SUPER_ATTR + # had some races (one with the super() global changing and one + # with the cell binding being changed). + do_race(access, mutate) + + +if __name__ == "__main__": + unittest.main() diff --git a/Objects/cellobject.c b/Objects/cellobject.c index 4ab9083af5e300..ec2eeb1a855b63 100644 --- a/Objects/cellobject.c +++ b/Objects/cellobject.c @@ -82,6 +82,17 @@ cell_dealloc(PyObject *self) PyObject_GC_Del(op); } +static PyObject * +cell_compare_impl(PyObject *a, PyObject *b, int op) +{ + if (a != NULL && b != NULL) { + return PyObject_RichCompare(a, b, op); + } + else { + Py_RETURN_RICHCOMPARE(b == NULL, a == NULL, op); + } +} + static PyObject * cell_richcompare(PyObject *a, PyObject *b, int op) { @@ -92,27 +103,28 @@ cell_richcompare(PyObject *a, PyObject *b, int op) if (!PyCell_Check(a) || !PyCell_Check(b)) { Py_RETURN_NOTIMPLEMENTED; } + PyObject *a_ref = PyCell_GetRef((PyCellObject *)a); + PyObject *b_ref = PyCell_GetRef((PyCellObject *)b); /* compare cells by contents; empty cells come before anything else */ - a = ((PyCellObject *)a)->ob_ref; - b = ((PyCellObject *)b)->ob_ref; - if (a != NULL && b != NULL) - return PyObject_RichCompare(a, b, op); + PyObject *res = cell_compare_impl(a_ref, b_ref, op); - Py_RETURN_RICHCOMPARE(b == NULL, a == NULL, op); + Py_XDECREF(a_ref); + Py_XDECREF(b_ref); + return res; } static PyObject * cell_repr(PyObject *self) { - PyCellObject *op = _PyCell_CAST(self); - if (op->ob_ref == NULL) { - return PyUnicode_FromFormat("", op); + PyObject *ref = PyCell_GetRef((PyCellObject *)self); + if (ref == NULL) { + return PyUnicode_FromFormat("", self); } - - return PyUnicode_FromFormat("", - op, Py_TYPE(op->ob_ref)->tp_name, - op->ob_ref); + PyObject *res = PyUnicode_FromFormat("", + self, Py_TYPE(ref)->tp_name, ref); + Py_DECREF(ref); + return res; } static int @@ -135,11 +147,12 @@ static PyObject * cell_get_contents(PyObject *self, void *closure) { PyCellObject *op = _PyCell_CAST(self); - if (op->ob_ref == NULL) { + PyObject *res = PyCell_GetRef(op); + if (res == NULL) { PyErr_SetString(PyExc_ValueError, "Cell is empty"); return NULL; } - return Py_NewRef(op->ob_ref); + return res; } static int diff --git a/Objects/frameobject.c b/Objects/frameobject.c index c743c254848d3a..03ed2b9480f8c9 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -8,6 +8,7 @@ #include "pycore_moduleobject.h" // _PyModule_GetDict() #include "pycore_modsupport.h" // _PyArg_CheckPositional() #include "pycore_object.h" // _PyObject_GC_UNTRACK() +#include "pycore_cell.h" // PyCell_GetRef() PyCell_SetTakeRef() #include "pycore_opcode_metadata.h" // _PyOpcode_Deopt, _PyOpcode_Caches @@ -187,11 +188,8 @@ framelocalsproxy_setitem(PyObject *self, PyObject *key, PyObject *value) } } if (cell != NULL) { - PyObject *oldvalue_o = PyCell_GET(cell); - if (value != oldvalue_o) { - PyCell_SET(cell, Py_XNewRef(value)); - Py_XDECREF(oldvalue_o); - } + Py_XINCREF(value); + PyCell_SetTakeRef((PyCellObject *)cell, value); } else if (value != PyStackRef_AsPyObjectBorrow(oldvalue)) { PyStackRef_XCLOSE(fast[i]); fast[i] = PyStackRef_FromPyObjectNew(value); @@ -1987,19 +1985,25 @@ frame_get_var(_PyInterpreterFrame *frame, PyCodeObject *co, int i, if (kind & CO_FAST_FREE) { // The cell was set by COPY_FREE_VARS. assert(value != NULL && PyCell_Check(value)); - value = PyCell_GET(value); + value = PyCell_GetRef((PyCellObject *)value); } else if (kind & CO_FAST_CELL) { if (value != NULL) { if (PyCell_Check(value)) { assert(!_PyFrame_IsIncomplete(frame)); - value = PyCell_GET(value); + value = PyCell_GetRef((PyCellObject *)value); + } + else { + // (likely) Otherwise it is an arg (kind & CO_FAST_LOCAL), + // with the initial value set when the frame was created... + // (unlikely) ...or it was set via the f_locals proxy. + Py_INCREF(value); } - // (likely) Otherwise it is an arg (kind & CO_FAST_LOCAL), - // with the initial value set when the frame was created... - // (unlikely) ...or it was set via the f_locals proxy. } } + else { + Py_XINCREF(value); + } } *pvalue = value; return 1; @@ -2076,14 +2080,14 @@ PyFrame_GetVar(PyFrameObject *frame_obj, PyObject *name) continue; } - PyObject *value; // borrowed reference + PyObject *value; if (!frame_get_var(frame, co, i, &value)) { break; } if (value == NULL) { break; } - return Py_NewRef(value); + return value; } PyErr_Format(PyExc_NameError, "variable %R does not exist", name); diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2611404a3d0d61..bf9049bce3adeb 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -19,6 +19,7 @@ #include "pycore_typeobject.h" // struct type_cache #include "pycore_unionobject.h" // _Py_union_type_or #include "pycore_weakref.h" // _PyWeakref_GET_REF() +#include "pycore_cell.h" // PyCell_GetRef() #include "opcode.h" // MAKE_CELL #include // ptrdiff_t @@ -11676,23 +11677,28 @@ super_init_without_args(_PyInterpreterFrame *cframe, PyTypeObject **type_p, assert(_PyFrame_GetCode(cframe)->co_nlocalsplus > 0); PyObject *firstarg = PyStackRef_AsPyObjectBorrow(_PyFrame_GetLocalsArray(cframe)[0]); + if (firstarg == NULL) { + PyErr_SetString(PyExc_RuntimeError, "super(): arg[0] deleted"); + return -1; + } // The first argument might be a cell. - if (firstarg != NULL && (_PyLocals_GetKind(co->co_localspluskinds, 0) & CO_FAST_CELL)) { - // "firstarg" is a cell here unless (very unlikely) super() - // was called from the C-API before the first MAKE_CELL op. - if (_PyInterpreterFrame_LASTI(cframe) >= 0) { - // MAKE_CELL and COPY_FREE_VARS have no quickened forms, so no need - // to use _PyOpcode_Deopt here: - assert(_PyCode_CODE(co)[0].op.code == MAKE_CELL || - _PyCode_CODE(co)[0].op.code == COPY_FREE_VARS); - assert(PyCell_Check(firstarg)); - firstarg = PyCell_GET(firstarg); + // "firstarg" is a cell here unless (very unlikely) super() + // was called from the C-API before the first MAKE_CELL op. + if ((_PyLocals_GetKind(co->co_localspluskinds, 0) & CO_FAST_CELL) && + (_PyInterpreterFrame_LASTI(cframe) >= 0)) { + // MAKE_CELL and COPY_FREE_VARS have no quickened forms, so no need + // to use _PyOpcode_Deopt here: + assert(_PyCode_CODE(co)[0].op.code == MAKE_CELL || + _PyCode_CODE(co)[0].op.code == COPY_FREE_VARS); + assert(PyCell_Check(firstarg)); + firstarg = PyCell_GetRef((PyCellObject *)firstarg); + if (firstarg == NULL) { + PyErr_SetString(PyExc_RuntimeError, "super(): arg[0] deleted"); + return -1; } } - if (firstarg == NULL) { - PyErr_SetString(PyExc_RuntimeError, - "super(): arg[0] deleted"); - return -1; + else { + Py_INCREF(firstarg); } // Look for __class__ in the free vars. @@ -11707,18 +11713,22 @@ super_init_without_args(_PyInterpreterFrame *cframe, PyTypeObject **type_p, if (cell == NULL || !PyCell_Check(cell)) { PyErr_SetString(PyExc_RuntimeError, "super(): bad __class__ cell"); + Py_DECREF(firstarg); return -1; } - type = (PyTypeObject *) PyCell_GET(cell); + type = (PyTypeObject *) PyCell_GetRef((PyCellObject *)cell); if (type == NULL) { PyErr_SetString(PyExc_RuntimeError, "super(): empty __class__ cell"); + Py_DECREF(firstarg); return -1; } if (!PyType_Check(type)) { PyErr_Format(PyExc_RuntimeError, "super(): __class__ is not a type (%s)", Py_TYPE(type)->tp_name); + Py_DECREF(type); + Py_DECREF(firstarg); return -1; } break; @@ -11727,6 +11737,7 @@ super_init_without_args(_PyInterpreterFrame *cframe, PyTypeObject **type_p, if (type == NULL) { PyErr_SetString(PyExc_RuntimeError, "super(): __class__ cell not found"); + Py_DECREF(firstarg); return -1; } @@ -11773,16 +11784,24 @@ super_init_impl(PyObject *self, PyTypeObject *type, PyObject *obj) { return -1; } } + else { + Py_INCREF(type); + Py_XINCREF(obj); + } - if (obj == Py_None) + if (obj == Py_None) { + Py_DECREF(obj); obj = NULL; + } if (obj != NULL) { obj_type = supercheck(type, obj); - if (obj_type == NULL) + if (obj_type == NULL) { + Py_DECREF(type); + Py_DECREF(obj); return -1; - Py_INCREF(obj); + } } - Py_XSETREF(su->type, (PyTypeObject*)Py_NewRef(type)); + Py_XSETREF(su->type, (PyTypeObject*)type); Py_XSETREF(su->obj, obj); Py_XSETREF(su->obj_type, obj_type); return 0; diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 17df9208f224f4..fb9868b3740b8c 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -13,6 +13,7 @@ #include "pycore_pythonrun.h" // _Py_SourceAsString() #include "pycore_sysmodule.h" // _PySys_GetAttr() #include "pycore_tuple.h" // _PyTuple_FromArray() +#include "pycore_cell.h" // PyCell_GetRef() #include "clinic/bltinmodule.c.h" @@ -209,7 +210,7 @@ builtin___build_class__(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *margs[3] = {name, bases, ns}; cls = PyObject_VectorcallDict(meta, margs, 3, mkw); if (cls != NULL && PyType_Check(cls) && PyCell_Check(cell)) { - PyObject *cell_cls = PyCell_GET(cell); + PyObject *cell_cls = PyCell_GetRef((PyCellObject *)cell); if (cell_cls != cls) { if (cell_cls == NULL) { const char *msg = @@ -221,9 +222,13 @@ builtin___build_class__(PyObject *self, PyObject *const *args, Py_ssize_t nargs, "__class__ set to %.200R defining %.200R as %.200R"; PyErr_Format(PyExc_TypeError, msg, cell_cls, name, cls); } + Py_XDECREF(cell_cls); Py_SETREF(cls, NULL); goto error; } + else { + Py_DECREF(cell_cls); + } } } error: From dabcecfd6dadb9430733105ba36925b290343d31 Mon Sep 17 00:00:00 2001 From: mpage Date: Tue, 3 Dec 2024 11:20:20 -0800 Subject: [PATCH 28/76] gh-115999: Enable specialization of `CALL` instructions in free-threaded builds (#127123) The CALL family of instructions were mostly thread-safe already and only required a small number of changes, which are documented below. A few changes were needed to make CALL_ALLOC_AND_ENTER_INIT thread-safe: Added _PyType_LookupRefAndVersion, which returns the type version corresponding to the returned ref. Added _PyType_CacheInitForSpecialization, which takes an init method and the corresponding type version and only populates the specialization cache if the current type version matches the supplied version. This prevents potentially caching a stale value in free-threaded builds if we race with an update to __init__. Only cache __init__ functions that are deferred in free-threaded builds. This ensures that the reference to __init__ that is stored in the specialization cache is valid if the type version guard in _CHECK_AND_ALLOCATE_OBJECT passes. Fix a bug in _CREATE_INIT_FRAME where the frame is pushed to the stack on failure. A few other miscellaneous changes were also needed: Use {LOCK,UNLOCK}_OBJECT in LIST_APPEND. This ensures that the list's per-object lock is held while we are appending to it. Add missing co_tlbc for _Py_InitCleanup. Stop/start the world around setting the eval frame hook. This allows us to read interp->eval_frame non-atomically and preserves the behavior of _CHECK_PEP_523 documented below. --- Include/internal/pycore_object.h | 14 ++++ Lib/test/test_monitoring.py | 17 +++-- Lib/test/test_opcache.py | 32 +++++++-- Lib/test/test_type_cache.py | 9 ++- Objects/typeobject.c | 62 +++++++++++++++-- Python/bytecodes.c | 16 +++-- Python/executor_cases.c.h | 21 ++++-- Python/generated_cases.c.h | 21 +++--- Python/perf_trampoline.c | 6 +- Python/pystate.c | 2 + Python/specialize.c | 112 ++++++++++++++++++------------- 11 files changed, 220 insertions(+), 92 deletions(-) diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index c52ed8f14707ba..ce876b093b2522 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -835,6 +835,20 @@ extern int _PyObject_StoreInstanceAttribute(PyObject *obj, PyObject *name, PyObject *value); extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, PyObject **attr); +extern PyObject *_PyType_LookupRefAndVersion(PyTypeObject *, PyObject *, + unsigned int *); + +// Cache the provided init method in the specialization cache of type if the +// provided type version matches the current version of the type. +// +// The cached value is borrowed and is only valid if guarded by a type +// version check. In free-threaded builds the init method must also use +// deferred reference counting. +// +// Returns 1 if the value was cached or 0 otherwise. +extern int _PyType_CacheInitForSpecialization(PyHeapTypeObject *type, + PyObject *init, + unsigned int tp_version); #ifdef Py_GIL_DISABLED # define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) diff --git a/Lib/test/test_monitoring.py b/Lib/test/test_monitoring.py index b640aa08e4a812..5a4bcebedf19de 100644 --- a/Lib/test/test_monitoring.py +++ b/Lib/test/test_monitoring.py @@ -11,7 +11,7 @@ import unittest import test.support -from test.support import requires_specialization, script_helper +from test.support import requires_specialization_ft, script_helper from test.support.import_helper import import_module _testcapi = test.support.import_helper.import_module("_testcapi") @@ -850,6 +850,13 @@ def __init__(self, events): def __call__(self, code, offset, val): self.events.append(("return", code.co_name, val)) +# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that +# are deferred. We only defer functions defined at the top-level. +class ValueErrorRaiser: + def __init__(self): + raise ValueError() + + class ExceptionMonitoringTest(CheckEvents): exception_recorders = ( @@ -1045,16 +1052,12 @@ def func(): ) self.assertEqual(events[0], ("throw", IndexError)) - @requires_specialization + @requires_specialization_ft def test_no_unwind_for_shim_frame(self): - class B: - def __init__(self): - raise ValueError() - def f(): try: - return B() + return ValueErrorRaiser() except ValueError: pass diff --git a/Lib/test/test_opcache.py b/Lib/test/test_opcache.py index b7a18133ab8b8a..50b5f365165921 100644 --- a/Lib/test/test_opcache.py +++ b/Lib/test/test_opcache.py @@ -493,6 +493,18 @@ def f(): self.assertFalse(f()) +# gh-127274: CALL_ALLOC_AND_ENTER_INIT will only cache __init__ methods that +# are deferred. We only defer functions defined at the top-level. +class MyClass: + def __init__(self): + pass + + +class InitTakesArg: + def __init__(self, arg): + self.arg = arg + + class TestCallCache(TestBase): def test_too_many_defaults_0(self): def f(): @@ -522,12 +534,8 @@ def f(x, y): f() @disabling_optimizer - @requires_specialization + @requires_specialization_ft def test_assign_init_code(self): - class MyClass: - def __init__(self): - pass - def instantiate(): return MyClass() @@ -544,6 +552,20 @@ def count_args(self, *args): MyClass.__init__.__code__ = count_args.__code__ instantiate() + @disabling_optimizer + @requires_specialization_ft + def test_push_init_frame_fails(self): + def instantiate(): + return InitTakesArg() + + for _ in range(2): + with self.assertRaises(TypeError): + instantiate() + self.assert_specialized(instantiate, "CALL_ALLOC_AND_ENTER_INIT") + + with self.assertRaises(TypeError): + instantiate() + @threading_helper.requires_working_threading() class TestRacesDoNotCrash(TestBase): diff --git a/Lib/test/test_type_cache.py b/Lib/test/test_type_cache.py index 66abe73f8d766d..e109a65741309a 100644 --- a/Lib/test/test_type_cache.py +++ b/Lib/test/test_type_cache.py @@ -2,7 +2,7 @@ import unittest import dis from test import support -from test.support import import_helper, requires_specialization +from test.support import import_helper, requires_specialization, requires_specialization_ft try: from sys import _clear_type_cache except ImportError: @@ -110,7 +110,6 @@ class HolderSub(Holder): HolderSub.value @support.cpython_only -@requires_specialization class TypeCacheWithSpecializationTests(unittest.TestCase): def tearDown(self): _clear_type_cache() @@ -140,6 +139,7 @@ def _check_specialization(self, func, arg, opname, *, should_specialize): else: self.assertIn(opname, self._all_opnames(func)) + @requires_specialization def test_class_load_attr_specialization_user_type(self): class A: def foo(self): @@ -160,6 +160,7 @@ def load_foo_2(type_): self._check_specialization(load_foo_2, A, "LOAD_ATTR", should_specialize=False) + @requires_specialization def test_class_load_attr_specialization_static_type(self): self.assertNotEqual(type_get_version(str), 0) self.assertNotEqual(type_get_version(bytes), 0) @@ -171,6 +172,7 @@ def get_capitalize_1(type_): self.assertEqual(get_capitalize_1(str)('hello'), 'Hello') self.assertEqual(get_capitalize_1(bytes)(b'hello'), b'Hello') + @requires_specialization def test_property_load_attr_specialization_user_type(self): class G: @property @@ -192,6 +194,7 @@ def load_x_2(instance): self._check_specialization(load_x_2, G(), "LOAD_ATTR", should_specialize=False) + @requires_specialization def test_store_attr_specialization_user_type(self): class B: __slots__ = ("bar",) @@ -211,6 +214,7 @@ def store_bar_2(type_): self._check_specialization(store_bar_2, B(), "STORE_ATTR", should_specialize=False) + @requires_specialization_ft def test_class_call_specialization_user_type(self): class F: def __init__(self): @@ -231,6 +235,7 @@ def call_class_2(type_): self._check_specialization(call_class_2, F, "CALL", should_specialize=False) + @requires_specialization def test_to_bool_specialization_user_type(self): class H: pass diff --git a/Objects/typeobject.c b/Objects/typeobject.c index bf9049bce3adeb..2068d6aa9be52b 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -5528,9 +5528,12 @@ _PyTypes_AfterFork(void) } /* Internal API to look for a name through the MRO. - This returns a borrowed reference, and doesn't set an exception! */ + This returns a strong reference, and doesn't set an exception! + If nonzero, version is set to the value of type->tp_version at the time of + the lookup. +*/ PyObject * -_PyType_LookupRef(PyTypeObject *type, PyObject *name) +_PyType_LookupRefAndVersion(PyTypeObject *type, PyObject *name, unsigned int *version) { PyObject *res; int error; @@ -5553,6 +5556,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) // If the sequence is still valid then we're done if (value == NULL || _Py_TryIncref(value)) { if (_PySeqLock_EndRead(&entry->sequence, sequence)) { + if (version != NULL) { + *version = entry_version; + } return value; } Py_XDECREF(value); @@ -5574,6 +5580,9 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) OBJECT_STAT_INC_COND(type_cache_hits, !is_dunder_name(name)); OBJECT_STAT_INC_COND(type_cache_dunder_hits, is_dunder_name(name)); Py_XINCREF(entry->value); + if (version != NULL) { + *version = entry->version; + } return entry->value; } #endif @@ -5587,12 +5596,12 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) // anyone else can modify our mro or mutate the type. int has_version = 0; - int version = 0; + unsigned int assigned_version = 0; BEGIN_TYPE_LOCK(); res = find_name_in_mro(type, name, &error); if (MCACHE_CACHEABLE_NAME(name)) { has_version = assign_version_tag(interp, type); - version = type->tp_version_tag; + assigned_version = type->tp_version_tag; } END_TYPE_LOCK(); @@ -5609,28 +5618,67 @@ _PyType_LookupRef(PyTypeObject *type, PyObject *name) if (error == -1) { PyErr_Clear(); } + if (version != NULL) { + // 0 is not a valid version + *version = 0; + } return NULL; } if (has_version) { #if Py_GIL_DISABLED - update_cache_gil_disabled(entry, name, version, res); + update_cache_gil_disabled(entry, name, assigned_version, res); #else - PyObject *old_value = update_cache(entry, name, version, res); + PyObject *old_value = update_cache(entry, name, assigned_version, res); Py_DECREF(old_value); #endif } + if (version != NULL) { + // 0 is not a valid version + *version = has_version ? assigned_version : 0; + } return res; } +/* Internal API to look for a name through the MRO. + This returns a strong reference, and doesn't set an exception! +*/ +PyObject * +_PyType_LookupRef(PyTypeObject *type, PyObject *name) +{ + return _PyType_LookupRefAndVersion(type, name, NULL); +} + +/* Internal API to look for a name through the MRO. + This returns a borrowed reference, and doesn't set an exception! */ PyObject * _PyType_Lookup(PyTypeObject *type, PyObject *name) { - PyObject *res = _PyType_LookupRef(type, name); + PyObject *res = _PyType_LookupRefAndVersion(type, name, NULL); Py_XDECREF(res); return res; } +int +_PyType_CacheInitForSpecialization(PyHeapTypeObject *type, PyObject *init, + unsigned int tp_version) +{ + if (!init || !tp_version) { + return 0; + } + int can_cache; + BEGIN_TYPE_LOCK(); + can_cache = ((PyTypeObject*)type)->tp_version_tag == tp_version; + #ifdef Py_GIL_DISABLED + can_cache = can_cache && _PyObject_HasDeferredRefcount(init); + #endif + if (can_cache) { + FT_ATOMIC_STORE_PTR_RELEASE(type->_spec_cache.init, init); + } + END_TYPE_LOCK(); + return can_cache; +} + static void set_flags(PyTypeObject *self, unsigned long mask, unsigned long flags) { diff --git a/Python/bytecodes.c b/Python/bytecodes.c index d6be3cebf80724..3d280941b35244 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -3329,7 +3329,7 @@ dummy_func( }; specializing op(_SPECIALIZE_CALL, (counter/1, callable[1], self_or_null[1], args[oparg] -- callable[1], self_or_null[1], args[oparg])) { - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _Py_Specialize_Call(callable[0], next_instr, oparg + !PyStackRef_IsNull(self_or_null[0])); @@ -3337,7 +3337,7 @@ dummy_func( } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } op(_MAYBE_EXPAND_METHOD, (callable[1], self_or_null[1], args[oparg] -- func[1], maybe_self[1], args[oparg])) { @@ -3722,10 +3722,10 @@ dummy_func( DEOPT_IF(!PyStackRef_IsNull(null[0])); DEOPT_IF(!PyType_Check(callable_o)); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)); STAT_INC(CALL, hit); @@ -3743,17 +3743,19 @@ dummy_func( _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); DEAD(init); DEAD(self); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); SYNC_SP(); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); ERROR_NO_POP(); } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, @@ -4000,8 +4002,10 @@ dummy_func( DEOPT_IF(callable_o != interp->callable_cache.list_append); assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o)); + DEOPT_IF(!LOCK_OBJECT(self_o)); STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); ERROR_IF(err, error); diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index 580814657608db..987ff2e6419669 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -4500,13 +4500,13 @@ JUMP_TO_JUMP_TARGET(); } PyTypeObject *tp = (PyTypeObject *)callable_o; - if (tp->tp_version_tag != type_version) { + if (FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version) { UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; if (!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize)) { UOP_STAT_INC(uopcode, miss); @@ -4537,25 +4537,29 @@ _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); stack_pointer = _PyFrame_GetStackPointer(frame); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); JUMP_TO_ERROR(); } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, * as it will be checked after start_frame */ tstate->py_recursion_remaining--; + stack_pointer[0].bits = (uintptr_t)init_frame; + stack_pointer += 1; + assert(WITHIN_STACK_BOUNDS()); break; } @@ -4908,8 +4912,13 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } + if (!LOCK_OBJECT(self_o)) { + UOP_STAT_INC(uopcode, miss); + JUMP_TO_JUMP_TARGET(); + } STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) JUMP_TO_ERROR(); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index ef191f6f697f24..33f32aba1e5145 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -880,7 +880,7 @@ callable = &stack_pointer[-2 - oparg]; uint16_t counter = read_u16(&this_instr[1].cache); (void)counter; - #if ENABLE_SPECIALIZATION + #if ENABLE_SPECIALIZATION_FT if (ADAPTIVE_COUNTER_TRIGGERS(counter)) { next_instr = this_instr; _PyFrame_SetStackPointer(frame, stack_pointer); @@ -890,7 +890,7 @@ } OPCODE_DEFERRED_INC(CALL); ADVANCE_ADAPTIVE_COUNTER(this_instr[1].counter); - #endif /* ENABLE_SPECIALIZATION */ + #endif /* ENABLE_SPECIALIZATION_FT */ } /* Skip 2 cache entries */ // _MAYBE_EXPAND_METHOD @@ -1048,10 +1048,10 @@ DEOPT_IF(!PyStackRef_IsNull(null[0]), CALL); DEOPT_IF(!PyType_Check(callable_o), CALL); PyTypeObject *tp = (PyTypeObject *)callable_o; - DEOPT_IF(tp->tp_version_tag != type_version, CALL); + DEOPT_IF(FT_ATOMIC_LOAD_UINT32_RELAXED(tp->tp_version_tag) != type_version, CALL); assert(tp->tp_flags & Py_TPFLAGS_INLINE_VALUES); PyHeapTypeObject *cls = (PyHeapTypeObject *)callable_o; - PyFunctionObject *init_func = (PyFunctionObject *)cls->_spec_cache.init; + PyFunctionObject *init_func = (PyFunctionObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(cls->_spec_cache.init); PyCodeObject *code = (PyCodeObject *)init_func->func_code; DEOPT_IF(!_PyThreadState_HasStackSpace(tstate, code->co_framesize + _Py_InitCleanup.co_framesize), CALL); STAT_INC(CALL, hit); @@ -1073,20 +1073,21 @@ _PyInterpreterFrame *shim = _PyFrame_PushTrampolineUnchecked( tstate, (PyCodeObject *)&_Py_InitCleanup, 1, frame); assert(_PyFrame_GetBytecode(shim)[0].op.code == EXIT_INIT_CHECK); + assert(_PyFrame_GetBytecode(shim)[1].op.code == RETURN_VALUE); stack_pointer = _PyFrame_GetStackPointer(frame); /* Push self onto stack of shim */ shim->localsplus[0] = PyStackRef_DUP(self[0]); _PyFrame_SetStackPointer(frame, stack_pointer); - init_frame = _PyEvalFramePushAndInit( + _PyInterpreterFrame *temp = _PyEvalFramePushAndInit( tstate, init[0], NULL, args-1, oparg+1, NULL, shim); stack_pointer = _PyFrame_GetStackPointer(frame); - stack_pointer[-2 - oparg].bits = (uintptr_t)init_frame; - stack_pointer += -1 - oparg; + stack_pointer += -2 - oparg; assert(WITHIN_STACK_BOUNDS()); - if (init_frame == NULL) { + if (temp == NULL) { _PyEval_FrameClearAndPop(tstate, shim); goto error; } + init_frame = temp; frame->return_offset = 1 + INLINE_CACHE_ENTRIES_CALL; /* Account for pushing the extra frame. * We don't check recursion depth here, @@ -1100,8 +1101,6 @@ // Eventually this should be the only occurrence of this code. assert(tstate->interp->eval_frame == NULL); _PyInterpreterFrame *temp = new_frame; - stack_pointer += -1; - assert(WITHIN_STACK_BOUNDS()); _PyFrame_SetStackPointer(frame, stack_pointer); assert(new_frame->previous == frame || new_frame->previous->previous == frame); CALL_STAT_INC(inlined_py_calls); @@ -2383,8 +2382,10 @@ DEOPT_IF(callable_o != interp->callable_cache.list_append, CALL); assert(self_o != NULL); DEOPT_IF(!PyList_Check(self_o), CALL); + DEOPT_IF(!LOCK_OBJECT(self_o), CALL); STAT_INC(CALL, hit); int err = _PyList_AppendTakeRef((PyListObject *)self_o, PyStackRef_AsPyObjectSteal(arg)); + UNLOCK_OBJECT(self_o); PyStackRef_CLOSE(self); PyStackRef_CLOSE(callable); if (err) goto pop_3_error; diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index f144f7d436fe68..ad077dc861b0a7 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -484,11 +484,11 @@ _PyPerfTrampoline_Init(int activate) return -1; } if (!activate) { - tstate->interp->eval_frame = NULL; + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL); perf_status = PERF_STATUS_NO_INIT; } else { - tstate->interp->eval_frame = py_trampoline_evaluator; + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, py_trampoline_evaluator); if (new_code_arena() < 0) { return -1; } @@ -514,7 +514,7 @@ _PyPerfTrampoline_Fini(void) } PyThreadState *tstate = _PyThreadState_GET(); if (tstate->interp->eval_frame == py_trampoline_evaluator) { - tstate->interp->eval_frame = NULL; + _PyInterpreterState_SetEvalFrameFunc(tstate->interp, NULL); } if (perf_status == PERF_STATUS_OK) { trampoline_api.free_state(trampoline_api.state); diff --git a/Python/pystate.c b/Python/pystate.c index 3ceae229f75cd0..839413a65a42fb 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -2838,7 +2838,9 @@ _PyInterpreterState_SetEvalFrameFunc(PyInterpreterState *interp, } #endif RARE_EVENT_INC(set_eval_frame_func); + _PyEval_StopTheWorld(interp); interp->eval_frame = eval_frame; + _PyEval_StartTheWorld(interp); } diff --git a/Python/specialize.c b/Python/specialize.c index 8b2d1a14c107e0..ec2cd7025e5054 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -1911,38 +1911,38 @@ _Py_Specialize_StoreSubscr(_PyStackRef container_st, _PyStackRef sub_st, _Py_COD unspecialize(instr); } -/* Returns a borrowed reference. - * The reference is only valid if guarded by a type version check. - */ -static PyFunctionObject * -get_init_for_simple_managed_python_class(PyTypeObject *tp) +/* Returns a strong reference. */ +static PyObject * +get_init_for_simple_managed_python_class(PyTypeObject *tp, unsigned int *tp_version) { assert(tp->tp_new == PyBaseObject_Type.tp_new); if (tp->tp_alloc != PyType_GenericAlloc) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OVERRIDDEN); return NULL; } - if ((tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { + unsigned long tp_flags = PyType_GetFlags(tp); + if ((tp_flags & Py_TPFLAGS_INLINE_VALUES) == 0) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_INLINE_VALUES); return NULL; } - if (!(tp->tp_flags & Py_TPFLAGS_HEAPTYPE)) { + if (!(tp_flags & Py_TPFLAGS_HEAPTYPE)) { /* Is this possible? */ SPECIALIZATION_FAIL(CALL, SPEC_FAIL_EXPECTED_ERROR); return NULL; } - PyObject *init = _PyType_Lookup(tp, &_Py_ID(__init__)); + PyObject *init = _PyType_LookupRefAndVersion(tp, &_Py_ID(__init__), tp_version); if (init == NULL || !PyFunction_Check(init)) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_PYTHON); + Py_XDECREF(init); return NULL; } int kind = function_kind((PyCodeObject *)PyFunction_GET_CODE(init)); if (kind != SIMPLE_FUNCTION) { SPECIALIZATION_FAIL(CALL, SPEC_FAIL_CALL_INIT_NOT_SIMPLE); + Py_DECREF(init); return NULL; } - ((PyHeapTypeObject *)tp)->_spec_cache.init = init; - return (PyFunctionObject *)init; + return init; } static int @@ -1954,20 +1954,20 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) int oparg = instr->op.arg; if (nargs == 1 && oparg == 1) { if (tp == &PyUnicode_Type) { - instr->op.code = CALL_STR_1; + specialize(instr, CALL_STR_1); return 0; } else if (tp == &PyType_Type) { - instr->op.code = CALL_TYPE_1; + specialize(instr, CALL_TYPE_1); return 0; } else if (tp == &PyTuple_Type) { - instr->op.code = CALL_TUPLE_1; + specialize(instr, CALL_TUPLE_1); return 0; } } if (tp->tp_vectorcall != NULL) { - instr->op.code = CALL_BUILTIN_CLASS; + specialize(instr, CALL_BUILTIN_CLASS); return 0; } goto generic; @@ -1976,19 +1976,25 @@ specialize_class_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) goto generic; } if (tp->tp_new == PyBaseObject_Type.tp_new) { - PyFunctionObject *init = get_init_for_simple_managed_python_class(tp); - if (type_get_version(tp, CALL) == 0) { + unsigned int tp_version = 0; + PyObject *init = get_init_for_simple_managed_python_class(tp, &tp_version); + if (!tp_version) { + SPECIALIZATION_FAIL(CALL, SPEC_FAIL_OUT_OF_VERSIONS); + Py_XDECREF(init); return -1; } - if (init != NULL) { + if (init != NULL && _PyType_CacheInitForSpecialization( + (PyHeapTypeObject *)tp, init, tp_version)) { _PyCallCache *cache = (_PyCallCache *)(instr + 1); - write_u32(cache->func_version, tp->tp_version_tag); - _Py_SET_OPCODE(*instr, CALL_ALLOC_AND_ENTER_INIT); + write_u32(cache->func_version, tp_version); + specialize(instr, CALL_ALLOC_AND_ENTER_INIT); + Py_DECREF(init); return 0; } + Py_XDECREF(init); } generic: - instr->op.code = CALL_NON_PY_GENERAL; + specialize(instr, CALL_NON_PY_GENERAL); return 0; } @@ -2004,7 +2010,7 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, SPECIALIZATION_FAIL(CALL, SPEC_FAIL_WRONG_NUMBER_ARGUMENTS); return -1; } - instr->op.code = CALL_METHOD_DESCRIPTOR_NOARGS; + specialize(instr, CALL_METHOD_DESCRIPTOR_NOARGS); return 0; } case METH_O: { @@ -2018,22 +2024,22 @@ specialize_method_descriptor(PyMethodDescrObject *descr, _Py_CODEUNIT *instr, bool pop = (next.op.code == POP_TOP); int oparg = instr->op.arg; if ((PyObject *)descr == list_append && oparg == 1 && pop) { - instr->op.code = CALL_LIST_APPEND; + specialize(instr, CALL_LIST_APPEND); return 0; } - instr->op.code = CALL_METHOD_DESCRIPTOR_O; + specialize(instr, CALL_METHOD_DESCRIPTOR_O); return 0; } case METH_FASTCALL: { - instr->op.code = CALL_METHOD_DESCRIPTOR_FAST; + specialize(instr, CALL_METHOD_DESCRIPTOR_FAST); return 0; } case METH_FASTCALL | METH_KEYWORDS: { - instr->op.code = CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS; + specialize(instr, CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS); return 0; } } - instr->op.code = CALL_NON_PY_GENERAL; + specialize(instr, CALL_NON_PY_GENERAL); return 0; } @@ -2063,12 +2069,15 @@ specialize_py_call(PyFunctionObject *func, _Py_CODEUNIT *instr, int nargs, return -1; } write_u32(cache->func_version, version); + uint8_t opcode; if (argcount == nargs + bound_method) { - instr->op.code = bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS; + opcode = + bound_method ? CALL_BOUND_METHOD_EXACT_ARGS : CALL_PY_EXACT_ARGS; } else { - instr->op.code = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; + opcode = bound_method ? CALL_BOUND_METHOD_GENERAL : CALL_PY_GENERAL; } + specialize(instr, opcode); return 0; } @@ -2117,10 +2126,10 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) /* len(o) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.len) { - instr->op.code = CALL_LEN; + specialize(instr, CALL_LEN); return 0; } - instr->op.code = CALL_BUILTIN_O; + specialize(instr, CALL_BUILTIN_O); return 0; } case METH_FASTCALL: { @@ -2128,19 +2137,19 @@ specialize_c_call(PyObject *callable, _Py_CODEUNIT *instr, int nargs) /* isinstance(o1, o2) */ PyInterpreterState *interp = _PyInterpreterState_GET(); if (callable == interp->callable_cache.isinstance) { - instr->op.code = CALL_ISINSTANCE; + specialize(instr, CALL_ISINSTANCE); return 0; } } - instr->op.code = CALL_BUILTIN_FAST; + specialize(instr, CALL_BUILTIN_FAST); return 0; } case METH_FASTCALL | METH_KEYWORDS: { - instr->op.code = CALL_BUILTIN_FAST_WITH_KEYWORDS; + specialize(instr, CALL_BUILTIN_FAST_WITH_KEYWORDS); return 0; } default: - instr->op.code = CALL_NON_PY_GENERAL; + specialize(instr, CALL_NON_PY_GENERAL); return 0; } } @@ -2150,10 +2159,9 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) { PyObject *callable = PyStackRef_AsPyObjectBorrow(callable_st); - assert(ENABLE_SPECIALIZATION); + assert(ENABLE_SPECIALIZATION_FT); assert(_PyOpcode_Caches[CALL] == INLINE_CACHE_ENTRIES_CALL); assert(_Py_OPCODE(*instr) != INSTRUMENTED_CALL); - _PyCallCache *cache = (_PyCallCache *)(instr + 1); int fail; if (PyCFunction_CheckExact(callable)) { fail = specialize_c_call(callable, instr, nargs); @@ -2178,19 +2186,11 @@ _Py_Specialize_Call(_PyStackRef callable_st, _Py_CODEUNIT *instr, int nargs) } } else { - instr->op.code = CALL_NON_PY_GENERAL; + specialize(instr, CALL_NON_PY_GENERAL); fail = 0; } if (fail) { - STAT_INC(CALL, failure); - assert(!PyErr_Occurred()); - instr->op.code = CALL; - cache->counter = adaptive_counter_backoff(cache->counter); - } - else { - STAT_INC(CALL, success); - assert(!PyErr_Occurred()); - cache->counter = adaptive_counter_cooldown(); + unspecialize(instr); } } @@ -2793,6 +2793,16 @@ _Py_Specialize_ContainsOp(_PyStackRef value_st, _Py_CODEUNIT *instr) * Ends with a RESUME so that it is not traced. * This is used as a plain code object, not a function, * so must not access globals or builtins. + * There are a few other constraints imposed on the code + * by the free-threaded build: + * + * 1. The RESUME instruction must not be executed. Otherwise we may attempt to + * free the statically allocated TLBC array. + * 2. It must contain no specializable instructions. Specializing multiple + * copies of the same bytecode is not thread-safe in free-threaded builds. + * + * This should be dynamically allocated if either of those restrictions need to + * be lifted. */ #define NO_LOC_4 (128 | (PY_CODE_LOCATION_INFO_NONE << 3) | 3) @@ -2802,6 +2812,13 @@ static const PyBytesObject no_location = { .ob_sval = { NO_LOC_4 } }; +#ifdef Py_GIL_DISABLED +static _PyCodeArray init_cleanup_tlbc = { + .size = 1, + .entries = {(char*) &_Py_InitCleanup.co_code_adaptive}, +}; +#endif + const struct _PyCode8 _Py_InitCleanup = { _PyVarObject_HEAD_INIT(&PyCode_Type, 3), .co_consts = (PyObject *)&_Py_SINGLETON(tuple_empty), @@ -2817,6 +2834,9 @@ const struct _PyCode8 _Py_InitCleanup = { ._co_firsttraceable = 4, .co_stacksize = 2, .co_framesize = 2 + FRAME_SPECIALS_SIZE, +#ifdef Py_GIL_DISABLED + .co_tlbc = &init_cleanup_tlbc, +#endif .co_code_adaptive = { EXIT_INIT_CHECK, 0, RETURN_VALUE, 0, From 12397a5781664bf43da98454db07cdfdec3ab815 Mon Sep 17 00:00:00 2001 From: "RUANG (James Roy)" Date: Wed, 4 Dec 2024 06:33:13 +0800 Subject: [PATCH 29/76] gh-112192: Increase the trace module coverage precision to one decimal (#126972) --- Lib/test/test_regrtest.py | 2 +- Lib/test/test_trace.py | 4 ++-- Lib/trace.py | 7 +++---- .../Library/2024-11-18-23-18-27.gh-issue-112192.DRdRgP.rst | 1 + 4 files changed, 7 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-18-23-18-27.gh-issue-112192.DRdRgP.rst diff --git a/Lib/test/test_regrtest.py b/Lib/test/test_regrtest.py index d4f4a69a7a38c1..0ab7a23aca1df8 100644 --- a/Lib/test/test_regrtest.py +++ b/Lib/test/test_regrtest.py @@ -1138,7 +1138,7 @@ def test_coverage(self): output = self.run_tests("--coverage", test) self.check_executed_tests(output, [test], stats=1) regex = (r'lines +cov% +module +\(path\)\n' - r'(?: *[0-9]+ *[0-9]{1,2}% *[^ ]+ +\([^)]+\)+)+') + r'(?: *[0-9]+ *[0-9]{1,2}\.[0-9]% *[^ ]+ +\([^)]+\)+)+') self.check_line(output, regex) def test_wait(self): diff --git a/Lib/test/test_trace.py b/Lib/test/test_trace.py index 93966ee31d0a01..e7e42531916d0d 100644 --- a/Lib/test/test_trace.py +++ b/Lib/test/test_trace.py @@ -412,7 +412,7 @@ def test_issue9936(self): coverage = {} for line in stdout: lines, cov, module = line.split()[:3] - coverage[module] = (int(lines), int(cov[:-1])) + coverage[module] = (float(lines), float(cov[:-1])) # XXX This is needed to run regrtest.py as a script modname = trace._fullmodname(sys.modules[modname].__file__) self.assertIn(modname, coverage) @@ -553,7 +553,7 @@ def f(): stdout = stdout.decode() self.assertEqual(status, 0) self.assertIn('lines cov% module (path)', stdout) - self.assertIn(f'6 100% {modulename} ({filename})', stdout) + self.assertIn(f'6 100.0% {modulename} ({filename})', stdout) def test_run_as_module(self): assert_python_ok('-m', 'trace', '-l', '--module', 'timeit', '-n', '1') diff --git a/Lib/trace.py b/Lib/trace.py index bb3d34fd8d6550..a87bc6d61a884f 100644 --- a/Lib/trace.py +++ b/Lib/trace.py @@ -279,14 +279,13 @@ def write_results(self, show_missing=True, summary=False, coverdir=None, *, n_hits, n_lines = self.write_results_file(coverpath, source, lnotab, count, encoding) if summary and n_lines: - percent = int(100 * n_hits / n_lines) - sums[modulename] = n_lines, percent, modulename, filename + sums[modulename] = n_lines, n_hits, modulename, filename if summary and sums: print("lines cov% module (path)") for m in sorted(sums): - n_lines, percent, modulename, filename = sums[m] - print("%5d %3d%% %s (%s)" % sums[m]) + n_lines, n_hits, modulename, filename = sums[m] + print(f"{n_lines:5d} {n_hits/n_lines:.1%} {modulename} ({filename})") if self.outfile: # try and store counts and module info into self.outfile diff --git a/Misc/NEWS.d/next/Library/2024-11-18-23-18-27.gh-issue-112192.DRdRgP.rst b/Misc/NEWS.d/next/Library/2024-11-18-23-18-27.gh-issue-112192.DRdRgP.rst new file mode 100644 index 00000000000000..b169c1508d0d30 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-18-23-18-27.gh-issue-112192.DRdRgP.rst @@ -0,0 +1 @@ +In the :mod:`trace` module, increase the coverage precision (``cov%``) to one decimal. From 0f9107817022f0defac157e3795a4093a32ea320 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Wed, 4 Dec 2024 01:06:20 +0100 Subject: [PATCH 30/76] gh-127146: Resolve some minor problems in Emscripten tests (#127565) Adjusts some Emscripten test exclusions regarding strftime, fma, and stack depth. --- Lib/test/test_marshal.py | 4 ++-- Lib/test/test_math.py | 2 +- Lib/test/test_support.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_marshal.py b/Lib/test/test_marshal.py index 93b8684c725d24..4ed9f1fc1b8020 100644 --- a/Lib/test/test_marshal.py +++ b/Lib/test/test_marshal.py @@ -1,5 +1,5 @@ from test import support -from test.support import is_apple_mobile, os_helper, requires_debug_ranges +from test.support import is_apple_mobile, os_helper, requires_debug_ranges, is_emscripten from test.support.script_helper import assert_python_ok import array import io @@ -294,7 +294,7 @@ def test_recursion_limit(self): #if os.name == 'nt' and support.Py_DEBUG: if os.name == 'nt': MAX_MARSHAL_STACK_DEPTH = 1000 - elif sys.platform == 'wasi' or is_apple_mobile: + elif sys.platform == 'wasi' or is_emscripten or is_apple_mobile: MAX_MARSHAL_STACK_DEPTH = 1500 else: MAX_MARSHAL_STACK_DEPTH = 2000 diff --git a/Lib/test/test_math.py b/Lib/test/test_math.py index fecafd53aa6e6f..6976a5d85da019 100644 --- a/Lib/test/test_math.py +++ b/Lib/test/test_math.py @@ -2722,7 +2722,7 @@ def test_fma_infinities(self): # gh-73468: On some platforms, libc fma() doesn't implement IEE 754-2008 # properly: it doesn't use the right sign when the result is zero. @unittest.skipIf( - sys.platform.startswith(("freebsd", "wasi", "netbsd")) + sys.platform.startswith(("freebsd", "wasi", "netbsd", "emscripten")) or (sys.platform == "android" and platform.machine() == "x86_64"), f"this platform doesn't implement IEE 754-2008 properly") def test_fma_zero_result(self): diff --git a/Lib/test/test_support.py b/Lib/test/test_support.py index 9a3cf140d81241..635ae03a404988 100644 --- a/Lib/test/test_support.py +++ b/Lib/test/test_support.py @@ -578,7 +578,7 @@ def test_print_warning(self): 'Warning -- a\nWarning -- b\n') def test_has_strftime_extensions(self): - if support.is_emscripten or sys.platform == "win32": + if sys.platform == "win32": self.assertFalse(support.has_strftime_extensions) else: self.assertTrue(support.has_strftime_extensions) From 7f882c88cfda486947974cb82c20a1ae7047edfc Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Tue, 3 Dec 2024 18:20:01 -0600 Subject: [PATCH 31/76] Itertool recipe additions (gh-127483) --- Doc/library/itertools.rst | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index c138e903fa5a0f..03966f3d3d694b 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -877,6 +877,11 @@ and :term:`generators ` which incur interpreter overhead. "Returns the sequence elements n times." return chain.from_iterable(repeat(tuple(iterable), n)) + def loops(n): + "Loop n times. Like range(n) but without creating integers." + # for _ in loops(100): ... + return repeat(None, n) + def tail(n, iterable): "Return an iterator over the last n items." # tail(3, 'ABCDEFG') → E F G @@ -1099,6 +1104,11 @@ The following recipes have a more mathematical flavor: data[p*p : n : p+p] = bytes(len(range(p*p, n, p+p))) yield from iter_index(data, 1, start=3) + def is_prime(n): + "Return True if n is prime." + # is_prime(1_000_000_000_000_403) → True + return n > 1 and all(n % p for p in sieve(math.isqrt(n) + 1)) + def factor(n): "Prime factors of n." # factor(99) → 3 3 11 @@ -1202,6 +1212,16 @@ The following recipes have a more mathematical flavor: [0, 2, 4, 6] + >>> for _ in loops(5): + ... print('hi') + ... + hi + hi + hi + hi + hi + + >>> list(tail(3, 'ABCDEFG')) ['E', 'F', 'G'] >>> # Verify the input is consumed greedily @@ -1475,6 +1495,23 @@ The following recipes have a more mathematical flavor: True + >>> small_primes = [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97] + >>> list(filter(is_prime, range(-100, 100))) == small_primes + True + >>> carmichael = {561, 1105, 1729, 2465, 2821, 6601, 8911} # https://oeis.org/A002997 + >>> any(map(is_prime, carmichael)) + False + >>> # https://www.wolframalpha.com/input?i=is+128884753939+prime + >>> is_prime(128_884_753_939) # large prime + True + >>> is_prime(999953 * 999983) # large semiprime + False + >>> is_prime(1_000_000_000_000_007) # factor() example + False + >>> is_prime(1_000_000_000_000_403) # factor() example + True + + >>> list(factor(99)) # Code example 1 [3, 3, 11] >>> list(factor(1_000_000_000_000_007)) # Code example 2 From 6fc643674983e27ec5cc312f2e83468050d1d364 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 4 Dec 2024 08:58:22 +0000 Subject: [PATCH 32/76] gh-127572: Fix `test_structmembers` initialization (GH-127577) gh-127572: Fix `test_structmembers` initialization. The 'C' format code expects an `int` as a destination (not a `char`). This led to test failures on big-endian platforms like s390x. Use the 'c' format code, which expects a `char` as the destination (but requires a Python byte objects instead of a str). --- Lib/test/test_capi/test_structmembers.py | 2 +- Modules/_testcapi/structmember.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_capi/test_structmembers.py b/Lib/test/test_capi/test_structmembers.py index ae9168fc39243f..f14ad9a9a5f512 100644 --- a/Lib/test/test_capi/test_structmembers.py +++ b/Lib/test/test_capi/test_structmembers.py @@ -39,7 +39,7 @@ def _make_test_object(cls): "hi", # T_STRING_INPLACE 12, # T_LONGLONG 13, # T_ULONGLONG - "c", # T_CHAR + b"c", # T_CHAR ) diff --git a/Modules/_testcapi/structmember.c b/Modules/_testcapi/structmember.c index c1861db18c4af2..ef30a5a9944e3c 100644 --- a/Modules/_testcapi/structmember.c +++ b/Modules/_testcapi/structmember.c @@ -60,7 +60,7 @@ test_structmembers_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) "T_FLOAT", "T_DOUBLE", "T_STRING_INPLACE", "T_LONGLONG", "T_ULONGLONG", "T_CHAR", NULL}; - static const char fmt[] = "|bbBhHiIlknfds#LKC"; + static const char fmt[] = "|bbBhHiIlknfds#LKc"; test_structmembers *ob; const char *s = NULL; Py_ssize_t string_len = 0; From ad9d059eb10ef132edd73075fa6d8d96d95b8701 Mon Sep 17 00:00:00 2001 From: sobolevn Date: Wed, 4 Dec 2024 13:01:46 +0300 Subject: [PATCH 33/76] gh-126524: Run `regen-unicodedata` as a part of our CI (#126682) --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1f8c468475470c..55effee0e1e393 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -120,7 +120,7 @@ jobs: - name: Build CPython run: | make -j4 regen-all - make regen-stdlib-module-names regen-sbom + make regen-stdlib-module-names regen-sbom regen-unicodedata - name: Check for changes run: | git add -u From bc0f2e945993747c8b1a6dd66cbe902fddd5758b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Wed, 4 Dec 2024 14:13:52 +0100 Subject: [PATCH 34/76] gh-123378: Ensure results of `PyUnicode*Error_Get{Start,End}` are clamped (GH-123380) Co-authored-by: Sergey B Kirpichev --- Doc/c-api/exceptions.rst | 20 +- Doc/library/exceptions.rst | 6 + Lib/test/test_capi/test_exceptions.py | 150 +++++++++++ ...-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 6 + ...-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst | 6 + Modules/_testcapi/exceptions.c | 167 ++++++++++++ Objects/exceptions.c | 248 ++++++++++-------- 7 files changed, 492 insertions(+), 111 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst create mode 100644 Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst diff --git a/Doc/c-api/exceptions.rst b/Doc/c-api/exceptions.rst index fc2336d120c259..c1f0bd750361d6 100644 --- a/Doc/c-api/exceptions.rst +++ b/Doc/c-api/exceptions.rst @@ -853,12 +853,23 @@ The following functions are used to create and modify Unicode exceptions from C. *\*start*. *start* must not be ``NULL``. Return ``0`` on success, ``-1`` on failure. + If the :attr:`UnicodeError.object` is an empty sequence, the resulting + *start* is ``0``. Otherwise, it is clipped to ``[0, len(object) - 1]``. + + .. seealso:: :attr:`UnicodeError.start` + .. c:function:: int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) - Set the *start* attribute of the given exception object to *start*. Return - ``0`` on success, ``-1`` on failure. + Set the *start* attribute of the given exception object to *start*. + Return ``0`` on success, ``-1`` on failure. + + .. note:: + + While passing a negative *start* does not raise an exception, + the corresponding getters will not consider it as a relative + offset. .. c:function:: int PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) int PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) @@ -868,6 +879,9 @@ The following functions are used to create and modify Unicode exceptions from C. *\*end*. *end* must not be ``NULL``. Return ``0`` on success, ``-1`` on failure. + If the :attr:`UnicodeError.object` is an empty sequence, the resulting + *end* is ``0``. Otherwise, it is clipped to ``[1, len(object)]``. + .. c:function:: int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) int PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) @@ -875,6 +889,8 @@ The following functions are used to create and modify Unicode exceptions from C. Set the *end* attribute of the given exception object to *end*. Return ``0`` on success, ``-1`` on failure. + .. seealso:: :attr:`UnicodeError.end` + .. c:function:: PyObject* PyUnicodeDecodeError_GetReason(PyObject *exc) PyObject* PyUnicodeEncodeError_GetReason(PyObject *exc) PyObject* PyUnicodeTranslateError_GetReason(PyObject *exc) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index b5ba86f1b19223..f72b11e34c5c3d 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -644,10 +644,16 @@ The following exceptions are the exceptions that are usually raised. The first index of invalid data in :attr:`object`. + This value should not be negative as it is interpreted as an + absolute offset but this constraint is not enforced at runtime. + .. attribute:: end The index after the last invalid data in :attr:`object`. + This value should not be negative as it is interpreted as an + absolute offset but this constraint is not enforced at runtime. + .. exception:: UnicodeEncodeError diff --git a/Lib/test/test_capi/test_exceptions.py b/Lib/test/test_capi/test_exceptions.py index b22ddd8ad858d4..666e2f2ab09548 100644 --- a/Lib/test/test_capi/test_exceptions.py +++ b/Lib/test/test_capi/test_exceptions.py @@ -415,6 +415,156 @@ def test_err_formatunraisable(self): # CRASHES formatunraisable(NULL, NULL) +class TestUnicodeTranslateError(UnicodeTranslateError): + # UnicodeTranslateError takes 4 arguments instead of 5, + # so we just make a UnicodeTranslateError class that is + # compatible with the UnicodeError.__init__. + def __init__(self, encoding, *args, **kwargs): + super().__init__(*args, **kwargs) + + +class TestUnicodeError(unittest.TestCase): + + def _check_no_crash(self, exc): + # ensure that the __str__() method does not crash + _ = str(exc) + + def test_unicode_encode_error_get_start(self): + get_start = _testcapi.unicode_encode_get_start + self._test_unicode_error_get_start('x', UnicodeEncodeError, get_start) + + def test_unicode_decode_error_get_start(self): + get_start = _testcapi.unicode_decode_get_start + self._test_unicode_error_get_start(b'x', UnicodeDecodeError, get_start) + + def test_unicode_translate_error_get_start(self): + get_start = _testcapi.unicode_translate_get_start + self._test_unicode_error_get_start('x', TestUnicodeTranslateError, get_start) + + def _test_unicode_error_get_start(self, literal, exc_type, get_start): + for obj_len, start, c_start in [ + # normal cases + (5, 0, 0), + (5, 1, 1), + (5, 2, 2), + # out of range start is clamped to max(0, obj_len - 1) + (0, 0, 0), + (0, 1, 0), + (0, 10, 0), + (5, 5, 4), + (5, 10, 4), + # negative values are allowed but clipped in the getter + (0, -1, 0), + (1, -1, 0), + (2, -1, 0), + (2, -2, 0), + ]: + obj = literal * obj_len + with self.subTest(obj, exc_type=exc_type, start=start): + exc = exc_type('utf-8', obj, start, obj_len, 'reason') + self.assertEqual(get_start(exc), c_start) + self._check_no_crash(exc) + + def test_unicode_encode_error_set_start(self): + set_start = _testcapi.unicode_encode_set_start + self._test_unicode_error_set_start('x', UnicodeEncodeError, set_start) + + def test_unicode_decode_error_set_start(self): + set_start = _testcapi.unicode_decode_set_start + self._test_unicode_error_set_start(b'x', UnicodeDecodeError, set_start) + + def test_unicode_translate_error_set_start(self): + set_start = _testcapi.unicode_translate_set_start + self._test_unicode_error_set_start('x', TestUnicodeTranslateError, set_start) + + def _test_unicode_error_set_start(self, literal, exc_type, set_start): + obj_len = 5 + obj = literal * obj_len + for new_start in range(-2 * obj_len, 2 * obj_len): + with self.subTest('C-API', obj=obj, exc_type=exc_type, new_start=new_start): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the C API setter + set_start(exc, new_start) + self.assertEqual(exc.start, new_start) + self._check_no_crash(exc) + + with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_start=new_start): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the attribute setter + exc.start = new_start + self.assertEqual(exc.start, new_start) + self._check_no_crash(exc) + + def test_unicode_encode_error_get_end(self): + get_end = _testcapi.unicode_encode_get_end + self._test_unicode_error_get_end('x', UnicodeEncodeError, get_end) + + def test_unicode_decode_error_get_end(self): + get_end = _testcapi.unicode_decode_get_end + self._test_unicode_error_get_end(b'x', UnicodeDecodeError, get_end) + + def test_unicode_translate_error_get_end(self): + get_end = _testcapi.unicode_translate_get_end + self._test_unicode_error_get_end('x', TestUnicodeTranslateError, get_end) + + def _test_unicode_error_get_end(self, literal, exc_type, get_end): + for obj_len, end, c_end in [ + # normal cases + (5, 0, 1), + (5, 1, 1), + (5, 2, 2), + # out-of-range clipped in [MIN(1, OBJLEN), MAX(MIN(1, OBJLEN), OBJLEN)] + (0, 0, 0), + (0, 1, 0), + (0, 10, 0), + (1, 1, 1), + (1, 2, 1), + (5, 5, 5), + (5, 5, 5), + (5, 10, 5), + # negative values are allowed but clipped in the getter + (0, -1, 0), + (1, -1, 1), + (2, -1, 1), + (2, -2, 1), + ]: + obj = literal * obj_len + with self.subTest(obj, exc_type=exc_type, end=end): + exc = exc_type('utf-8', obj, 0, end, 'reason') + self.assertEqual(get_end(exc), c_end) + self._check_no_crash(exc) + + def test_unicode_encode_error_set_end(self): + set_end = _testcapi.unicode_encode_set_end + self._test_unicode_error_set_end('x', UnicodeEncodeError, set_end) + + def test_unicode_decode_error_set_end(self): + set_end = _testcapi.unicode_decode_set_end + self._test_unicode_error_set_end(b'x', UnicodeDecodeError, set_end) + + def test_unicode_translate_error_set_end(self): + set_end = _testcapi.unicode_translate_set_end + self._test_unicode_error_set_end('x', TestUnicodeTranslateError, set_end) + + def _test_unicode_error_set_end(self, literal, exc_type, set_end): + obj_len = 5 + obj = literal * obj_len + for new_end in range(-2 * obj_len, 2 * obj_len): + with self.subTest('C-API', obj=obj, exc_type=exc_type, new_end=new_end): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the C API setter + set_end(exc, new_end) + self.assertEqual(exc.end, new_end) + self._check_no_crash(exc) + + with self.subTest('Py-API', obj=obj, exc_type=exc_type, new_end=new_end): + exc = exc_type('utf-8', obj, 0, obj_len, 'reason') + # arbitrary value is allowed in the attribute setter + exc.end = new_end + self.assertEqual(exc.end, new_end) + self._check_no_crash(exc) + + class Test_PyUnstable_Exc_PrepReraiseStar(ExceptionIsLikeMixin, unittest.TestCase): def setUp(self): diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst new file mode 100644 index 00000000000000..2cfb8b8a1e245a --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -0,0 +1,6 @@ +Ensure that the value of :attr:`UnicodeEncodeError.start ` +retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in +``[0, max(0, objlen - 1)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object `. Similar +arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` +and their corresponding C interface. Patch by Bénédikt Tran. diff --git a/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst new file mode 100644 index 00000000000000..107751579c4d91 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-12-02-16-10-36.gh-issue-123378.Q6YRwe.rst @@ -0,0 +1,6 @@ +Ensure that the value of :attr:`UnicodeEncodeError.end ` +retrieved by :c:func:`PyUnicodeEncodeError_GetEnd` lies in ``[min(1, objlen), +max(min(1, objlen), objlen)]`` where *objlen* is the length of +:attr:`UnicodeEncodeError.object `. Similar arguments +apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` and their +corresponding C interface. Patch by Bénédikt Tran. diff --git a/Modules/_testcapi/exceptions.c b/Modules/_testcapi/exceptions.c index 316ef0e7ad7e55..e92d9670e7c792 100644 --- a/Modules/_testcapi/exceptions.c +++ b/Modules/_testcapi/exceptions.c @@ -359,6 +359,161 @@ _testcapi_unstable_exc_prep_reraise_star_impl(PyObject *module, return PyUnstable_Exc_PrepReraiseStar(orig, excs); } +/* Test PyUnicodeEncodeError_GetStart */ +static PyObject * +unicode_encode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeEncodeError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeDecodeError_GetStart */ +static PyObject * +unicode_decode_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeDecodeError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeTranslateError_GetStart */ +static PyObject * +unicode_translate_get_start(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t start; + if (PyUnicodeTranslateError_GetStart(arg, &start) < 0) { + return NULL; + } + RETURN_SIZE(start); +} + +/* Test PyUnicodeEncodeError_SetStart */ +static PyObject * +unicode_encode_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeEncodeError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeDecodeError_SetStart */ +static PyObject * +unicode_decode_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeDecodeError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeTranslateError_SetStart */ +static PyObject * +unicode_translate_set_start(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t start; + if (PyArg_ParseTuple(args, "On", &exc, &start) < 0) { + return NULL; + } + if (PyUnicodeTranslateError_SetStart(exc, start) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeEncodeError_GetEnd */ +static PyObject * +unicode_encode_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeEncodeError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeDecodeError_GetEnd */ +static PyObject * +unicode_decode_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeDecodeError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeTranslateError_GetEnd */ +static PyObject * +unicode_translate_get_end(PyObject *Py_UNUSED(module), PyObject *arg) +{ + Py_ssize_t end; + if (PyUnicodeTranslateError_GetEnd(arg, &end) < 0) { + return NULL; + } + RETURN_SIZE(end); +} + +/* Test PyUnicodeEncodeError_SetEnd */ +static PyObject * +unicode_encode_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeEncodeError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeDecodeError_SetEnd */ +static PyObject * +unicode_decode_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeDecodeError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} + +/* Test PyUnicodeTranslateError_SetEnd */ +static PyObject * +unicode_translate_set_end(PyObject *Py_UNUSED(module), PyObject *args) +{ + PyObject *exc; + Py_ssize_t end; + if (PyArg_ParseTuple(args, "On", &exc, &end) < 0) { + return NULL; + } + if (PyUnicodeTranslateError_SetEnd(exc, end) < 0) { + return NULL; + } + Py_RETURN_NONE; +} /* * Define the PyRecurdingInfinitelyError_Type @@ -403,6 +558,18 @@ static PyMethodDef test_methods[] = { _TESTCAPI_SET_EXCEPTION_METHODDEF _TESTCAPI_TRACEBACK_PRINT_METHODDEF _TESTCAPI_UNSTABLE_EXC_PREP_RERAISE_STAR_METHODDEF + {"unicode_encode_get_start", unicode_encode_get_start, METH_O}, + {"unicode_decode_get_start", unicode_decode_get_start, METH_O}, + {"unicode_translate_get_start", unicode_translate_get_start, METH_O}, + {"unicode_encode_set_start", unicode_encode_set_start, METH_VARARGS}, + {"unicode_decode_set_start", unicode_decode_set_start, METH_VARARGS}, + {"unicode_translate_set_start", unicode_translate_set_start, METH_VARARGS}, + {"unicode_encode_get_end", unicode_encode_get_end, METH_O}, + {"unicode_decode_get_end", unicode_decode_get_end, METH_O}, + {"unicode_translate_get_end", unicode_translate_get_end, METH_O}, + {"unicode_encode_set_end", unicode_encode_set_end, METH_VARARGS}, + {"unicode_decode_set_end", unicode_decode_set_end, METH_VARARGS}, + {"unicode_translate_set_end", unicode_translate_set_end, METH_VARARGS}, {NULL}, }; diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 6fbe0f197eaebf..124b591ee3a13f 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2708,6 +2708,46 @@ set_unicodefromstring(PyObject **attr, const char *value) return 0; } +/* + * Adjust the (inclusive) 'start' value of a UnicodeError object. + * + * The 'start' can be negative or not, but when adjusting the value, + * we clip it in [0, max(0, objlen - 1)] but do not intepret it as + * a relative offset. + */ +static inline Py_ssize_t +unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) +{ + assert(objlen >= 0); + if (start < 0) { + start = 0; + } + if (start >= objlen) { + start = objlen == 0 ? 0 : objlen - 1; + } + return start; +} + +/* + * Adjust the (exclusive) 'end' value of a UnicodeError object. + * + * The 'end' can be negative or not, but when adjusting the value, + * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but + * do not intepret it as a relative offset. + */ +static inline Py_ssize_t +unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) +{ + assert(objlen >= 0); + if (end < 1) { + end = 1; + } + if (end > objlen) { + end = objlen; + } + return end; +} + PyObject * PyUnicodeEncodeError_GetEncoding(PyObject *exc) { @@ -2739,38 +2779,31 @@ PyUnicodeTranslateError_GetObject(PyObject *exc) } int -PyUnicodeEncodeError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeEncodeError_GetStart(PyObject *self, Py_ssize_t *start) { - Py_ssize_t size; - PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, - "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { return -1; - *start = ((PyUnicodeErrorObject *)exc)->start; - size = PyUnicode_GET_LENGTH(obj); - if (*start<0) - *start = 0; /*XXX check for values <0*/ - if (*start>=size) - *start = size-1; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); return 0; } int -PyUnicodeDecodeError_GetStart(PyObject *exc, Py_ssize_t *start) +PyUnicodeDecodeError_GetStart(PyObject *self, Py_ssize_t *start) { - Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_string(exc->object, "object"); + if (obj == NULL) { return -1; - size = PyBytes_GET_SIZE(obj); - *start = ((PyUnicodeErrorObject *)exc)->start; - if (*start<0) - *start = 0; - if (*start>=size) - *start = size-1; + } + Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); + *start = unicode_error_adjust_start(exc->start, size); return 0; } @@ -2782,63 +2815,61 @@ PyUnicodeTranslateError_GetStart(PyObject *exc, Py_ssize_t *start) } +static inline int +unicode_error_set_start_impl(PyObject *self, Py_ssize_t start) +{ + ((PyUnicodeErrorObject *)self)->start = start; + return 0; +} + + int PyUnicodeEncodeError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int PyUnicodeDecodeError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int PyUnicodeTranslateError_SetStart(PyObject *exc, Py_ssize_t start) { - ((PyUnicodeErrorObject *)exc)->start = start; - return 0; + return unicode_error_set_start_impl(exc, start); } int -PyUnicodeEncodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeEncodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - Py_ssize_t size; - PyObject *obj = get_unicode(((PyUnicodeErrorObject *)exc)->object, - "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_unicode(exc->object, "object"); + if (obj == NULL) { return -1; - *end = ((PyUnicodeErrorObject *)exc)->end; - size = PyUnicode_GET_LENGTH(obj); - if (*end<1) - *end = 1; - if (*end>size) - *end = size; + } + Py_ssize_t size = PyUnicode_GET_LENGTH(obj); Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); return 0; } int -PyUnicodeDecodeError_GetEnd(PyObject *exc, Py_ssize_t *end) +PyUnicodeDecodeError_GetEnd(PyObject *self, Py_ssize_t *end) { - Py_ssize_t size; - PyObject *obj = get_string(((PyUnicodeErrorObject *)exc)->object, "object"); - if (!obj) + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + PyObject *obj = get_string(exc->object, "object"); + if (obj == NULL) { return -1; - size = PyBytes_GET_SIZE(obj); - *end = ((PyUnicodeErrorObject *)exc)->end; - if (*end<1) - *end = 1; - if (*end>size) - *end = size; + } + Py_ssize_t size = PyBytes_GET_SIZE(obj); Py_DECREF(obj); + *end = unicode_error_adjust_end(exc->end, size); return 0; } @@ -2850,27 +2881,32 @@ PyUnicodeTranslateError_GetEnd(PyObject *exc, Py_ssize_t *end) } -int -PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +static inline int +unicode_error_set_end_impl(PyObject *exc, Py_ssize_t end) { ((PyUnicodeErrorObject *)exc)->end = end; return 0; } +int +PyUnicodeEncodeError_SetEnd(PyObject *exc, Py_ssize_t end) +{ + return unicode_error_set_end_impl(exc, end); +} + + int PyUnicodeDecodeError_SetEnd(PyObject *exc, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_set_end_impl(exc, end); } int PyUnicodeTranslateError_SetEnd(PyObject *exc, Py_ssize_t end) { - ((PyUnicodeErrorObject *)exc)->end = end; - return 0; + return unicode_error_set_end_impl(exc, end); } PyObject * @@ -2966,28 +3002,25 @@ static PyMemberDef UnicodeError_members[] = { static int UnicodeEncodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { - PyUnicodeErrorObject *err; - - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - err = (PyUnicodeErrorObject *)self; - - Py_CLEAR(err->encoding); - Py_CLEAR(err->object); - Py_CLEAR(err->reason); + PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; if (!PyArg_ParseTuple(args, "UUnnU", - &err->encoding, &err->object, - &err->start, &err->end, &err->reason)) { - err->encoding = err->object = err->reason = NULL; + &encoding, &object, &start, &end, &reason)) + { return -1; } - Py_INCREF(err->encoding); - Py_INCREF(err->object); - Py_INCREF(err->reason); - + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->encoding, Py_NewRef(encoding)); + Py_XSETREF(exc->object, Py_NewRef(object)); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } @@ -3073,44 +3106,42 @@ PyObject *PyExc_UnicodeEncodeError = (PyObject *)&_PyExc_UnicodeEncodeError; static int UnicodeDecodeError_init(PyObject *self, PyObject *args, PyObject *kwds) { - PyUnicodeErrorObject *ude; - - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - ude = (PyUnicodeErrorObject *)self; - - Py_CLEAR(ude->encoding); - Py_CLEAR(ude->object); - Py_CLEAR(ude->reason); + PyObject *encoding = NULL, *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; if (!PyArg_ParseTuple(args, "UOnnU", - &ude->encoding, &ude->object, - &ude->start, &ude->end, &ude->reason)) { - ude->encoding = ude->object = ude->reason = NULL; - return -1; + &encoding, &object, &start, &end, &reason)) + { + return -1; } - Py_INCREF(ude->encoding); - Py_INCREF(ude->object); - Py_INCREF(ude->reason); - - if (!PyBytes_Check(ude->object)) { + if (PyBytes_Check(object)) { + Py_INCREF(object); // make 'object' a strong reference + } + else { Py_buffer view; - if (PyObject_GetBuffer(ude->object, &view, PyBUF_SIMPLE) != 0) - goto error; - Py_XSETREF(ude->object, PyBytes_FromStringAndSize(view.buf, view.len)); + if (PyObject_GetBuffer(object, &view, PyBUF_SIMPLE) != 0) { + return -1; + } + // 'object' is borrowed, so we can re-use the variable + object = PyBytes_FromStringAndSize(view.buf, view.len); PyBuffer_Release(&view); - if (!ude->object) - goto error; + if (object == NULL) { + return -1; + } } - return 0; -error: - Py_CLEAR(ude->encoding); - Py_CLEAR(ude->object); - Py_CLEAR(ude->reason); - return -1; + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->encoding, Py_NewRef(encoding)); + Py_XSETREF(exc->object, object /* already a strong reference */); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); + return 0; } static PyObject * @@ -3192,25 +3223,24 @@ PyUnicodeDecodeError_Create( */ static int -UnicodeTranslateError_init(PyUnicodeErrorObject *self, PyObject *args, - PyObject *kwds) +UnicodeTranslateError_init(PyObject *self, PyObject *args, PyObject *kwds) { - if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) + if (BaseException_init((PyBaseExceptionObject *)self, args, kwds) == -1) { return -1; + } - Py_CLEAR(self->object); - Py_CLEAR(self->reason); + PyObject *object = NULL, *reason = NULL; // borrowed + Py_ssize_t start = -1, end = -1; - if (!PyArg_ParseTuple(args, "UnnU", - &self->object, - &self->start, &self->end, &self->reason)) { - self->object = self->reason = NULL; + if (!PyArg_ParseTuple(args, "UnnU", &object, &start, &end, &reason)) { return -1; } - Py_INCREF(self->object); - Py_INCREF(self->reason); - + PyUnicodeErrorObject *exc = (PyUnicodeErrorObject *)self; + Py_XSETREF(exc->object, Py_NewRef(object)); + exc->start = start; + exc->end = end; + Py_XSETREF(exc->reason, Py_NewRef(reason)); return 0; } From 6bc3e830a518112a4e242217807681e3908602f4 Mon Sep 17 00:00:00 2001 From: "RUANG (James Roy)" Date: Wed, 4 Dec 2024 21:30:38 +0800 Subject: [PATCH 35/76] gh-127481: Add `EPOLLWAKEUP` to the `select` module (GH-127482) --- Doc/library/select.rst | 6 ++++++ .../Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst | 1 + Modules/selectmodule.c | 4 ++++ 3 files changed, 11 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst diff --git a/Doc/library/select.rst b/Doc/library/select.rst index f23a249f44b485..4fcff9198944a8 100644 --- a/Doc/library/select.rst +++ b/Doc/library/select.rst @@ -317,11 +317,17 @@ Edge and Level Trigger Polling (epoll) Objects +-------------------------+-----------------------------------------------+ | :const:`EPOLLMSG` | Ignored. | +-------------------------+-----------------------------------------------+ + | :const:`EPOLLWAKEUP` | Prevents sleep during event waiting. | + +-------------------------+-----------------------------------------------+ .. versionadded:: 3.6 :const:`EPOLLEXCLUSIVE` was added. It's only supported by Linux Kernel 4.5 or later. + .. versionadded:: next + :const:`EPOLLWAKEUP` was added. It's only supported by Linux Kernel 3.5 + or later. + .. method:: epoll.close() Close the control file descriptor of the epoll object. diff --git a/Misc/NEWS.d/next/Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst b/Misc/NEWS.d/next/Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst new file mode 100644 index 00000000000000..8ada0b57ddc257 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-01-23-18-43.gh-issue-127481.K36AoP.rst @@ -0,0 +1 @@ +Add the ``EPOLLWAKEUP`` constant to the :mod:`select` module. diff --git a/Modules/selectmodule.c b/Modules/selectmodule.c index 6ced71cca73178..e14e114a6dafd0 100644 --- a/Modules/selectmodule.c +++ b/Modules/selectmodule.c @@ -2715,6 +2715,10 @@ _select_exec(PyObject *m) #ifdef EPOLLMSG ADD_INT(EPOLLMSG); #endif +#ifdef EPOLLWAKEUP + /* Kernel 3.5+ */ + ADD_INT(EPOLLWAKEUP); +#endif #ifdef EPOLL_CLOEXEC ADD_INT(EPOLL_CLOEXEC); From 51cfa569e379f84b3418db0971a71b1ef575a42b Mon Sep 17 00:00:00 2001 From: Beomsoo Kim Date: Thu, 5 Dec 2024 03:30:51 +0900 Subject: [PATCH 36/76] =?UTF-8?q?gh-127552:=20Remove=20comment=20questioni?= =?UTF-8?q?ng=204-digit=20restriction=20for=20=E2=80=98Y=E2=80=99=20in=20d?= =?UTF-8?q?atetime.strptime=20patterns=20(#127590)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The code has required 4 digits for the year since its inclusion in the stdlib in 2002 (over 22 years ago as of this commit). --- Lib/_strptime.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/_strptime.py b/Lib/_strptime.py index 5f4d2475c0169b..e6e23596db6f99 100644 --- a/Lib/_strptime.py +++ b/Lib/_strptime.py @@ -301,8 +301,6 @@ def __init__(self, locale_time=None): 'V': r"(?P5[0-3]|0[1-9]|[1-4]\d|\d)", # W is set below by using 'U' 'y': r"(?P\d\d)", - #XXX: Does 'Y' need to worry about having less or more than - # 4 digits? 'Y': r"(?P\d\d\d\d)", 'z': r"(?P[+-]\d\d:?[0-5]\d(:?[0-5]\d(\.\d{1,6})?)?|(?-i:Z))", 'A': self.__seqToRE(self.locale_time.f_weekday, 'A'), From e51da64ac3bc6cd45339864db32d05115af39ead Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 4 Dec 2024 19:12:15 +0000 Subject: [PATCH 37/76] gh-127536: Add missing locks in listobject.c (GH-127580) We were missing locks around some list operations in the free threading build. --- ...-12-03-21-07-06.gh-issue-127536.3jMMrT.rst | 2 + Objects/listobject.c | 50 +++++++++++++++---- 2 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-03-21-07-06.gh-issue-127536.3jMMrT.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-03-21-07-06.gh-issue-127536.3jMMrT.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-03-21-07-06.gh-issue-127536.3jMMrT.rst new file mode 100644 index 00000000000000..6e2b87fe38146b --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-03-21-07-06.gh-issue-127536.3jMMrT.rst @@ -0,0 +1,2 @@ +Add missing locks around some list assignment operations in the free +threading build. diff --git a/Objects/listobject.c b/Objects/listobject.c index 8abe9e8933420b..3832295600a0ab 100644 --- a/Objects/listobject.c +++ b/Objects/listobject.c @@ -3,6 +3,7 @@ #include "Python.h" #include "pycore_abstract.h" // _PyIndex_Check() #include "pycore_ceval.h" // _PyEval_GetBuiltin() +#include "pycore_critical_section.h" // _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED() #include "pycore_dict.h" // _PyDictViewObject #include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP() #include "pycore_pyatomic_ft_wrappers.h" @@ -72,6 +73,11 @@ static void ensure_shared_on_resize(PyListObject *self) { #ifdef Py_GIL_DISABLED + // We can't use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here because + // the `CALL_LIST_APPEND` bytecode handler may lock the list without + // a critical section. + assert(Py_REFCNT(self) == 1 || PyMutex_IsLocked(&_PyObject_CAST(self)->ob_mutex)); + // Ensure that the list array is freed using QSBR if we are not the // owning thread. if (!_Py_IsOwnedByCurrentThread((PyObject *)self) && @@ -957,10 +963,12 @@ list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v) Py_ssize_t n = PyList_GET_SIZE(a); PyObject *copy = list_slice_lock_held(a, 0, n); if (copy == NULL) { - return -1; + ret = -1; + } + else { + ret = list_ass_slice_lock_held(a, ilow, ihigh, copy); + Py_DECREF(copy); } - ret = list_ass_slice_lock_held(a, ilow, ihigh, copy); - Py_DECREF(copy); Py_END_CRITICAL_SECTION(); } else if (v != NULL && PyList_CheckExact(v)) { @@ -1437,7 +1445,9 @@ PyList_Clear(PyObject *self) PyErr_BadInternalCall(); return -1; } + Py_BEGIN_CRITICAL_SECTION(self); list_clear((PyListObject*)self); + Py_END_CRITICAL_SECTION(); return 0; } @@ -3410,7 +3420,9 @@ list___init___impl(PyListObject *self, PyObject *iterable) /* Empty previous contents */ if (self->ob_item != NULL) { + Py_BEGIN_CRITICAL_SECTION(self); list_clear(self); + Py_END_CRITICAL_SECTION(); } if (iterable != NULL) { if (_list_extend(self, iterable) < 0) { @@ -3583,8 +3595,10 @@ adjust_slice_indexes(PyListObject *lst, } static int -list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) +list_ass_subscript_lock_held(PyObject *_self, PyObject *item, PyObject *value) { + _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(_self); + PyListObject *self = (PyListObject *)_self; if (_PyIndex_Check(item)) { Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); @@ -3592,7 +3606,7 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) return -1; if (i < 0) i += PyList_GET_SIZE(self); - return list_ass_item((PyObject *)self, i, value); + return list_ass_item_lock_held(self, i, value); } else if (PySlice_Check(item)) { Py_ssize_t start, stop, step; @@ -3612,7 +3626,7 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) step); if (step == 1) - return list_ass_slice(self, start, stop, value); + return list_ass_slice_lock_held(self, start, stop, value); if (slicelength <= 0) return 0; @@ -3678,10 +3692,8 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) /* protect against a[::-1] = a */ if (self == (PyListObject*)value) { - Py_BEGIN_CRITICAL_SECTION(value); - seq = list_slice_lock_held((PyListObject*)value, 0, + seq = list_slice_lock_held((PyListObject *)value, 0, Py_SIZE(value)); - Py_END_CRITICAL_SECTION(); } else { seq = PySequence_Fast(value, @@ -3695,7 +3707,7 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) step); if (step == 1) { - int res = list_ass_slice(self, start, stop, seq); + int res = list_ass_slice_lock_held(self, start, stop, seq); Py_DECREF(seq); return res; } @@ -3751,6 +3763,24 @@ list_ass_subscript(PyObject* _self, PyObject* item, PyObject* value) } } +static int +list_ass_subscript(PyObject *self, PyObject *item, PyObject *value) +{ + int res; +#ifdef Py_GIL_DISABLED + if (PySlice_Check(item) && value != NULL && PyList_CheckExact(value)) { + Py_BEGIN_CRITICAL_SECTION2(self, value); + res = list_ass_subscript_lock_held(self, item, value); + Py_END_CRITICAL_SECTION2(); + return res; + } +#endif + Py_BEGIN_CRITICAL_SECTION(self); + res = list_ass_subscript_lock_held(self, item, value); + Py_END_CRITICAL_SECTION(); + return res; +} + static PyMappingMethods list_as_mapping = { list_length, list_subscript, From 7c5a6f67c726608a05a640e76fc62cfbae986a03 Mon Sep 17 00:00:00 2001 From: Diego Russo Date: Wed, 4 Dec 2024 22:12:06 +0000 Subject: [PATCH 38/76] Enable native AArch64 Ubuntu CI jobs (#127584) Co-authored-by: Brandt Bucher --- .github/workflows/build.yml | 9 +++ .github/workflows/jit.yml | 31 ++------- .github/workflows/reusable-ubuntu.yml | 16 ++--- Tools/jit/ignore-tests-emulated-linux.txt | 85 ----------------------- 4 files changed, 24 insertions(+), 117 deletions(-) delete mode 100644 Tools/jit/ignore-tests-emulated-linux.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 55effee0e1e393..9b2f19fd6bcf54 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -235,10 +235,19 @@ jobs: free-threading: - false - true + os: + - ubuntu-24.04 + - ubuntu-24.04-aarch64 + is-fork: # only used for the exclusion trick + - ${{ github.repository_owner != 'python' }} + exclude: + - os: ubuntu-24.04-aarch64 + is-fork: true uses: ./.github/workflows/reusable-ubuntu.yml with: config_hash: ${{ needs.check_source.outputs.config_hash }} free-threading: ${{ matrix.free-threading }} + os: ${{ matrix.os }} build_ubuntu_ssltests: name: 'Ubuntu SSL tests with OpenSSL' diff --git a/.github/workflows/jit.yml b/.github/workflows/jit.yml index 4ef543d7369734..ee30cf5786d55b 100644 --- a/.github/workflows/jit.yml +++ b/.github/workflows/jit.yml @@ -28,7 +28,7 @@ concurrency: jobs: interpreter: name: Interpreter (Debug) - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 timeout-minutes: 90 steps: - uses: actions/checkout@v4 @@ -78,10 +78,11 @@ jobs: runner: macos-14 - target: x86_64-unknown-linux-gnu/gcc architecture: x86_64 - runner: ubuntu-22.04 + runner: ubuntu-24.04 - target: aarch64-unknown-linux-gnu/gcc architecture: aarch64 - runner: ubuntu-22.04 + # Forks don't have access to our paid AArch64 runners. These jobs are skipped below: + runner: ${{ github.repository_owner == 'python' && 'ubuntu-24.04-aarch64' || 'ubuntu-24.04' }} steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -118,7 +119,8 @@ jobs: ./python.exe -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - name: Native Linux - if: runner.os == 'Linux' && matrix.architecture == 'x86_64' + # Forks don't have access to our paid AArch64 runners. Skip those: + if: runner.os == 'Linux' && (matrix.architecture == 'x86_64' || github.repository_owner == 'python') run: | sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" @@ -126,29 +128,10 @@ jobs: make all --jobs 4 ./python -m test --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - - name: Emulated Linux - if: runner.os == 'Linux' && matrix.architecture != 'x86_64' - # The --ignorefile on ./python -m test is used to exclude tests known to fail when running on an emulated Linux. - run: | - sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ./llvm.sh ${{ matrix.llvm }} - export PATH="$(llvm-config-${{ matrix.llvm }} --bindir):$PATH" - ./configure --prefix="$(pwd)/../build" - make install --jobs 4 - make clean --jobs 4 - export HOST=${{ matrix.architecture }}-linux-gnu - sudo apt install --yes "gcc-$HOST" qemu-user - export QEMU_LD_PREFIX="/usr/$HOST" - CC="$HOST-gcc" \ - CPP="$HOST-gcc --preprocess" \ - HOSTRUNNER=qemu-${{ matrix.architecture }} \ - ./configure --enable-experimental-jit ${{ matrix.debug && '--with-pydebug' || '' }} --build=x86_64-linux-gnu --host="$HOST" --with-build-python=../build/bin/python3 --with-pkg-config=no ac_cv_buggy_getaddrinfo=no ac_cv_file__dev_ptc=no ac_cv_file__dev_ptmx=yes - make all --jobs 4 - ./python -m test --ignorefile=Tools/jit/ignore-tests-emulated-linux.txt --multiprocess 0 --timeout 4500 --verbose2 --verbose3 - jit-with-disabled-gil: name: Free-Threaded (Debug) needs: interpreter - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 strategy: matrix: llvm: diff --git a/.github/workflows/reusable-ubuntu.yml b/.github/workflows/reusable-ubuntu.yml index 642354f8b4f61b..2869202c7910c9 100644 --- a/.github/workflows/reusable-ubuntu.yml +++ b/.github/workflows/reusable-ubuntu.yml @@ -11,16 +11,16 @@ on: required: false type: boolean default: false + os: + description: OS to run the job + required: true + type: string jobs: build_ubuntu_reusable: - name: 'build and test' + name: build and test (${{ inputs.os }}) timeout-minutes: 60 - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-24.04, ubuntu-24.04-aarch64] + runs-on: ${{ inputs.os }} env: FORCE_COLOR: 1 OPENSSL_VER: 3.0.15 @@ -42,7 +42,7 @@ jobs: uses: actions/cache@v4 with: path: ./multissl/openssl/${{ env.OPENSSL_VER }} - key: ${{ matrix.os }}-multissl-openssl-${{ env.OPENSSL_VER }} + key: ${{ inputs.os }}-multissl-openssl-${{ env.OPENSSL_VER }} - name: Install OpenSSL if: steps.cache-openssl.outputs.cache-hit != 'true' run: python3 Tools/ssl/multissltests.py --steps=library --base-directory "$MULTISSL_DIR" --openssl "$OPENSSL_VER" --system Linux @@ -84,7 +84,7 @@ jobs: working-directory: ${{ env.CPYTHON_BUILDDIR }} run: make -j - name: Build CPython out-of-tree (for compiler warning check) - if: ${{ !inputs.free-threading}} + if: ${{ !inputs.free-threading }} working-directory: ${{ env.CPYTHON_BUILDDIR }} run: set -o pipefail; make -j --output-sync 2>&1 | tee compiler_output_ubuntu.txt - name: Display build info diff --git a/Tools/jit/ignore-tests-emulated-linux.txt b/Tools/jit/ignore-tests-emulated-linux.txt deleted file mode 100644 index 080a569574470c..00000000000000 --- a/Tools/jit/ignore-tests-emulated-linux.txt +++ /dev/null @@ -1,85 +0,0 @@ -test_multiprocessing_fork -test_strftime_y2k -test.test_asyncio.test_unix_events.TestFork.test_fork_asyncio_run -test.test_asyncio.test_unix_events.TestFork.test_fork_asyncio_subprocess -test.test_asyncio.test_unix_events.TestFork.test_fork_signal_handling -test.test_cmd_line.CmdLineTest.test_no_std_streams -test.test_cmd_line.CmdLineTest.test_no_stdin -test.test_concurrent_futures.test_init.ProcessPoolForkFailingInitializerTest.test_initializer -test.test_concurrent_futures.test_process_pool.ProcessPoolForkProcessPoolExecutorTest.test_ressources_gced_in_workers -test.test_external_inspection.TestGetStackTrace.test_remote_stack_trace -test.test_external_inspection.TestGetStackTrace.test_self_trace -test.test_faulthandler.FaultHandlerTests.test_enable_fd -test.test_faulthandler.FaultHandlerTests.test_enable_file -test.test_init.ProcessPoolForkFailingInitializerTest.test_initializer -test.test_logging.ConfigDictTest.test_111615 -test.test_logging.ConfigDictTest.test_config_queue_handler -test.test_logging.ConfigDictTest.test_multiprocessing_queues -test.test_logging.ConfigDictTest.test_config_queue_handler_multiprocessing_context -test.test_os.ForkTests.test_fork_warns_when_non_python_thread_exists -test.test_os.TimerfdTests.test_timerfd_initval -test.test_os.TimerfdTests.test_timerfd_interval -test.test_os.TimerfdTests.test_timerfd_TFD_TIMER_ABSTIME -test.test_pathlib.PathSubclassTest.test_is_mount_root -test.test_pathlib.PathTest.test_is_mount_root -test.test_pathlib.PosixPathTest.test_is_mount_root -test.test_pathlib.test_pathlib.PathSubclassTest.test_is_mount_root -test.test_pathlib.test_pathlib.PathTest.test_is_mount_root -test.test_pathlib.test_pathlib.PosixPathTest.test_is_mount_root -test.test_posix.TestPosixSpawn.test_close_file -test.test_posix.TestPosixSpawnP.test_close_file -test.test_posixpath.PosixPathTest.test_ismount -test.test_signal.StressTest.test_stress_modifying_handlers -test.test_socket.BasicCANTest.testFilter -test.test_socket.BasicCANTest.testLoopback -test.test_socket.LinuxKernelCryptoAPI.test_aead_aes_gcm -test.test_socket.LinuxKernelCryptoAPI.test_aes_cbc -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSecondCmsgTrunc1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSecondCmsgTrunc2Int -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSecondCmsgTruncInData -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSecondCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSingleCmsgTruncInData -test.test_socket.RecvmsgIntoRFC3542AncillaryUDP6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSecondCmsgTrunc1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSecondCmsgTrunc2Int -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSecondCmsgTruncInData -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSecondCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncInData -test.test_socket.RecvmsgIntoRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen0 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen0Minus1 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen0Plus1 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen1 -test.test_socket.RecvmsgIntoSCMRightsStreamTest.testCmsgTruncLen2Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSecondCmsgTrunc1 -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSecondCmsgTrunc2Int -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSecondCmsgTruncInData -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSecondCmsgTruncLen0Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSingleCmsgTruncInData -test.test_socket.RecvmsgRFC3542AncillaryUDP6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSecondCmsgTrunc1 -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSecondCmsgTrunc2Int -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSecondCmsgTruncInData -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSecondCmsgTruncLen0Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncInData -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgRFC3542AncillaryUDPLITE6Test.testSingleCmsgTruncLen0Minus1 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen0 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen0Minus1 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen0Plus1 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen1 -test.test_socket.RecvmsgSCMRightsStreamTest.testCmsgTruncLen2Minus1 -test.test_subprocess.POSIXProcessTestCase.test_exception_bad_args_0 -test.test_subprocess.POSIXProcessTestCase.test_exception_bad_executable -test.test_subprocess.POSIXProcessTestCase.test_vfork_used_when_expected -test.test_subprocess.ProcessTestCase.test_cwd_with_relative_arg -test.test_subprocess.ProcessTestCase.test_cwd_with_relative_executable -test.test_subprocess.ProcessTestCase.test_empty_env -test.test_subprocess.ProcessTestCase.test_file_not_found_includes_filename -test.test_subprocess.ProcessTestCase.test_one_environment_variable -test.test_subprocess.ProcessTestCaseNoPoll.test_cwd_with_relative_arg -test.test_subprocess.ProcessTestCaseNoPoll.test_cwd_with_relative_executable -test.test_subprocess.ProcessTestCaseNoPoll.test_empty_env -test.test_subprocess.ProcessTestCaseNoPoll.test_file_not_found_includes_filename -test.test_subprocess.ProcessTestCaseNoPoll.test_one_environment_variable -test.test_venv.BasicTest.test_zippath_from_non_installed_posix From 94b8f8b40943bf38cf5c454773a3fb8f4ff71e01 Mon Sep 17 00:00:00 2001 From: Brandt Bucher Date: Wed, 4 Dec 2024 15:01:28 -0800 Subject: [PATCH 39/76] GH-126795: Increase the JIT side-exit threshold from 64 to 4096 (GH-127155) --- Include/internal/pycore_backoff.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h index 3e02728522828e..b5e33fa8b7abc0 100644 --- a/Include/internal/pycore_backoff.h +++ b/Include/internal/pycore_backoff.h @@ -115,10 +115,9 @@ initial_jump_backoff_counter(void) /* Initial exit temperature. * Must be larger than ADAPTIVE_COOLDOWN_VALUE, * otherwise when a side exit warms up we may construct - * a new trace before the Tier 1 code has properly re-specialized. - * Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */ -#define SIDE_EXIT_INITIAL_VALUE 63 -#define SIDE_EXIT_INITIAL_BACKOFF 6 + * a new trace before the Tier 1 code has properly re-specialized. */ +#define SIDE_EXIT_INITIAL_VALUE 4095 +#define SIDE_EXIT_INITIAL_BACKOFF 12 static inline _Py_BackoffCounter initial_temperature_backoff_counter(void) From 2f1cee8477e22bfc36a704310e4c0f409357e7e9 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 5 Dec 2024 01:25:06 +0100 Subject: [PATCH 40/76] gh-127111: Apply prettier formatter to Emscripten web example (#127551) Cleaned up formatting (and a stray closing tag) of the web example HTML and JS. --- Tools/wasm/emscripten/web_example/python.html | 782 ++++++++++-------- .../emscripten/web_example/python.worker.mjs | 175 ++-- 2 files changed, 511 insertions(+), 446 deletions(-) diff --git a/Tools/wasm/emscripten/web_example/python.html b/Tools/wasm/emscripten/web_example/python.html index fae1e9ad4e8acb..078f86eb764419 100644 --- a/Tools/wasm/emscripten/web_example/python.html +++ b/Tools/wasm/emscripten/web_example/python.html @@ -1,373 +1,433 @@ - + - - - - - - - wasm-python terminal - - - - - - -

Simple REPL for Python WASM

- -
- - - - -
-
-
- The simple REPL provides a limited Python experience in the browser. - - Tools/wasm/README.md contains a list of known limitations and - issues. Networking, subprocesses, and threading are not available. -
- + +
+ + + + +
+
+
+ The simple REPL provides a limited Python experience in the browser. + + Tools/wasm/README.md + + contains a list of known limitations and issues. Networking, + subprocesses, and threading are not available. +
+ diff --git a/Tools/wasm/emscripten/web_example/python.worker.mjs b/Tools/wasm/emscripten/web_example/python.worker.mjs index 42c2e1e08af24b..8043e419966743 100644 --- a/Tools/wasm/emscripten/web_example/python.worker.mjs +++ b/Tools/wasm/emscripten/web_example/python.worker.mjs @@ -1,104 +1,109 @@ import createEmscriptenModule from "./python.mjs"; class StdinBuffer { - constructor() { - this.sab = new SharedArrayBuffer(128 * Int32Array.BYTES_PER_ELEMENT) - this.buffer = new Int32Array(this.sab) - this.readIndex = 1; - this.numberOfCharacters = 0; - this.sentNull = true - } + constructor() { + this.sab = new SharedArrayBuffer(128 * Int32Array.BYTES_PER_ELEMENT); + this.buffer = new Int32Array(this.sab); + this.readIndex = 1; + this.numberOfCharacters = 0; + this.sentNull = true; + } - prompt() { - this.readIndex = 1 - Atomics.store(this.buffer, 0, -1) - postMessage({ - type: 'stdin', - buffer: this.sab - }) - Atomics.wait(this.buffer, 0, -1) - this.numberOfCharacters = this.buffer[0] - } + prompt() { + this.readIndex = 1; + Atomics.store(this.buffer, 0, -1); + postMessage({ + type: "stdin", + buffer: this.sab, + }); + Atomics.wait(this.buffer, 0, -1); + this.numberOfCharacters = this.buffer[0]; + } - stdin = () => { - while (this.numberOfCharacters + 1 === this.readIndex) { - if (!this.sentNull) { - // Must return null once to indicate we're done for now. - this.sentNull = true - return null - } - this.sentNull = false - // Prompt will reset this.readIndex to 1 - this.prompt() - } - const char = this.buffer[this.readIndex] - this.readIndex += 1 - return char + stdin = () => { + while (this.numberOfCharacters + 1 === this.readIndex) { + if (!this.sentNull) { + // Must return null once to indicate we're done for now. + this.sentNull = true; + return null; + } + this.sentNull = false; + // Prompt will reset this.readIndex to 1 + this.prompt(); } + const char = this.buffer[this.readIndex]; + this.readIndex += 1; + return char; + }; } const stdout = (charCode) => { - if (charCode) { - postMessage({ - type: 'stdout', - stdout: charCode, - }) - } else { - console.log(typeof charCode, charCode) - } -} + if (charCode) { + postMessage({ + type: "stdout", + stdout: charCode, + }); + } else { + console.log(typeof charCode, charCode); + } +}; const stderr = (charCode) => { - if (charCode) { - postMessage({ - type: 'stderr', - stderr: charCode, - }) - } else { - console.log(typeof charCode, charCode) - } -} + if (charCode) { + postMessage({ + type: "stderr", + stderr: charCode, + }); + } else { + console.log(typeof charCode, charCode); + } +}; -const stdinBuffer = new StdinBuffer() +const stdinBuffer = new StdinBuffer(); const emscriptenSettings = { - noInitialRun: true, - stdin: stdinBuffer.stdin, - stdout: stdout, - stderr: stderr, - onRuntimeInitialized: () => { - postMessage({type: 'ready', stdinBuffer: stdinBuffer.sab}) - }, - async preRun(Module) { - const versionHex = Module.HEAPU32[Module._Py_Version/4].toString(16); - const versionTuple = versionHex.padStart(8, "0").match(/.{1,2}/g).map((x) => parseInt(x, 16)); - const [major, minor, ..._] = versionTuple; - // Prevent complaints about not finding exec-prefix by making a lib-dynload directory - Module.FS.mkdirTree(`/lib/python${major}.${minor}/lib-dynload/`); - Module.addRunDependency("install-stdlib"); - const resp = await fetch(`python${major}.${minor}.zip`); - const stdlibBuffer = await resp.arrayBuffer(); - Module.FS.writeFile(`/lib/python${major}${minor}.zip`, new Uint8Array(stdlibBuffer), { canOwn: true }); - Module.removeRunDependency("install-stdlib"); - } -} + noInitialRun: true, + stdin: stdinBuffer.stdin, + stdout: stdout, + stderr: stderr, + onRuntimeInitialized: () => { + postMessage({ type: "ready", stdinBuffer: stdinBuffer.sab }); + }, + async preRun(Module) { + const versionHex = Module.HEAPU32[Module._Py_Version / 4].toString(16); + const versionTuple = versionHex + .padStart(8, "0") + .match(/.{1,2}/g) + .map((x) => parseInt(x, 16)); + const [major, minor, ..._] = versionTuple; + // Prevent complaints about not finding exec-prefix by making a lib-dynload directory + Module.FS.mkdirTree(`/lib/python${major}.${minor}/lib-dynload/`); + Module.addRunDependency("install-stdlib"); + const resp = await fetch(`python${major}.${minor}.zip`); + const stdlibBuffer = await resp.arrayBuffer(); + Module.FS.writeFile( + `/lib/python${major}${minor}.zip`, + new Uint8Array(stdlibBuffer), + { canOwn: true }, + ); + Module.removeRunDependency("install-stdlib"); + }, +}; const modulePromise = createEmscriptenModule(emscriptenSettings); - onmessage = async (event) => { - if (event.data.type === 'run') { - const Module = await modulePromise; - if (event.data.files) { - for (const [filename, contents] of Object.entries(event.data.files)) { - Module.FS.writeFile(filename, contents) - } - } - const ret = Module.callMain(event.data.args); - postMessage({ - type: 'finished', - returnCode: ret - }) + if (event.data.type === "run") { + const Module = await modulePromise; + if (event.data.files) { + for (const [filename, contents] of Object.entries(event.data.files)) { + Module.FS.writeFile(filename, contents); + } } -} - + const ret = Module.callMain(event.data.args); + postMessage({ + type: "finished", + returnCode: ret, + }); + } +}; From 43634fc1fcc88b35171aa79258f767ba6477f764 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 5 Dec 2024 01:26:25 +0100 Subject: [PATCH 41/76] gh-127146: Emscripten: Skip segfaults in test suite (#127151) Added skips for tests known to cause problems when running on Emscripten. These mostly relate to the limited stack depth on Emscripten. --- Lib/test/list_tests.py | 3 ++- Lib/test/mapping_tests.py | 3 ++- Lib/test/support/__init__.py | 3 +++ Lib/test/test_ast/test_ast.py | 5 ++++- Lib/test/test_call.py | 3 ++- Lib/test/test_capi/test_misc.py | 1 + Lib/test/test_class.py | 3 ++- Lib/test/test_compile.py | 2 ++ Lib/test/test_copy.py | 3 +++ Lib/test/test_descr.py | 3 +++ Lib/test/test_dict.py | 1 + Lib/test/test_dictviews.py | 3 ++- Lib/test/test_exception_group.py | 4 +++- Lib/test/test_functools.py | 2 ++ Lib/test/test_isinstance.py | 3 +++ Lib/test/test_json/test_recursion.py | 3 +++ Lib/test/test_pathlib/test_pathlib_abc.py | 4 +++- Lib/test/test_traceback.py | 2 ++ Lib/test/test_xml_etree_c.py | 1 + configure | 1 + configure.ac | 1 + 21 files changed, 46 insertions(+), 8 deletions(-) diff --git a/Lib/test/list_tests.py b/Lib/test/list_tests.py index dbc5ef4f9f2cd5..dbd9f27872962d 100644 --- a/Lib/test/list_tests.py +++ b/Lib/test/list_tests.py @@ -6,7 +6,7 @@ from functools import cmp_to_key from test import seq_tests -from test.support import ALWAYS_EQ, NEVER_EQ, get_c_recursion_limit +from test.support import ALWAYS_EQ, NEVER_EQ, get_c_recursion_limit, skip_emscripten_stack_overflow class CommonTest(seq_tests.CommonTest): @@ -59,6 +59,7 @@ def test_repr(self): self.assertEqual(str(a2), "[0, 1, 2, [...], 3]") self.assertEqual(repr(a2), "[0, 1, 2, [...], 3]") + @skip_emscripten_stack_overflow() def test_repr_deep(self): a = self.type2test([]) for i in range(get_c_recursion_limit() + 1): diff --git a/Lib/test/mapping_tests.py b/Lib/test/mapping_tests.py index ed89a81a6ea685..f249f0021e9c1c 100644 --- a/Lib/test/mapping_tests.py +++ b/Lib/test/mapping_tests.py @@ -1,7 +1,7 @@ # tests common to dict and UserDict import unittest import collections -from test.support import get_c_recursion_limit +from test.support import get_c_recursion_limit, skip_emscripten_stack_overflow class BasicTestMappingProtocol(unittest.TestCase): @@ -622,6 +622,7 @@ def __repr__(self): d = self._full_mapping({1: BadRepr()}) self.assertRaises(Exc, repr, d) + @skip_emscripten_stack_overflow() def test_repr_deep(self): d = self._empty_mapping() for i in range(get_c_recursion_limit() + 1): diff --git a/Lib/test/support/__init__.py b/Lib/test/support/__init__.py index 2ad267e3e08f0f..5c738ffaa27713 100644 --- a/Lib/test/support/__init__.py +++ b/Lib/test/support/__init__.py @@ -535,6 +535,9 @@ def skip_android_selinux(name): is_emscripten = sys.platform == "emscripten" is_wasi = sys.platform == "wasi" +def skip_emscripten_stack_overflow(): + return unittest.skipIf(is_emscripten, "Exhausts limited stack on Emscripten") + is_apple_mobile = sys.platform in {"ios", "tvos", "watchos"} is_apple = is_apple_mobile or sys.platform == "darwin" diff --git a/Lib/test/test_ast/test_ast.py b/Lib/test/test_ast/test_ast.py index 67ab8cf6baf657..c268a1f00f938e 100644 --- a/Lib/test/test_ast/test_ast.py +++ b/Lib/test/test_ast/test_ast.py @@ -18,7 +18,7 @@ _testinternalcapi = None from test import support -from test.support import os_helper, script_helper +from test.support import os_helper, script_helper, skip_emscripten_stack_overflow from test.support.ast_helper import ASTTestMixin from test.test_ast.utils import to_tuple from test.test_ast.snippets import ( @@ -745,6 +745,7 @@ def next(self): enum._test_simple_enum(_Precedence, ast._Precedence) @support.cpython_only + @skip_emscripten_stack_overflow() def test_ast_recursion_limit(self): fail_depth = support.exceeds_recursion_limit() crash_depth = 100_000 @@ -1661,6 +1662,7 @@ def test_level_as_none(self): exec(code, ns) self.assertIn('sleep', ns) + @skip_emscripten_stack_overflow() def test_recursion_direct(self): e = ast.UnaryOp(op=ast.Not(), lineno=0, col_offset=0, operand=ast.Constant(1)) e.operand = e @@ -1668,6 +1670,7 @@ def test_recursion_direct(self): with support.infinite_recursion(): compile(ast.Expression(e), "", "eval") + @skip_emscripten_stack_overflow() def test_recursion_indirect(self): e = ast.UnaryOp(op=ast.Not(), lineno=0, col_offset=0, operand=ast.Constant(1)) f = ast.UnaryOp(op=ast.Not(), lineno=0, col_offset=0, operand=ast.Constant(1)) diff --git a/Lib/test/test_call.py b/Lib/test/test_call.py index 9d5256b566b8af..78a706436aea0e 100644 --- a/Lib/test/test_call.py +++ b/Lib/test/test_call.py @@ -1,6 +1,6 @@ import unittest from test.support import (cpython_only, is_wasi, requires_limited_api, Py_DEBUG, - set_recursion_limit, skip_on_s390x) + set_recursion_limit, skip_on_s390x, skip_emscripten_stack_overflow) try: import _testcapi except ImportError: @@ -1038,6 +1038,7 @@ class TestRecursion(unittest.TestCase): @skip_on_s390x @unittest.skipIf(is_wasi and Py_DEBUG, "requires deep stack") @unittest.skipIf(_testcapi is None, "requires _testcapi") + @skip_emscripten_stack_overflow() def test_super_deep(self): def recurse(n): diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 80e705a37c4c5e..8e0271919cc8a5 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -2137,6 +2137,7 @@ def test_py_config_isoloated_per_interpreter(self): # test fails, assume that the environment in this process may # be altered and suspect. + @requires_subinterpreters @unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()") def test_configured_settings(self): """ diff --git a/Lib/test/test_class.py b/Lib/test/test_class.py index 7720cf157fa9ae..e20e59944e9ce9 100644 --- a/Lib/test/test_class.py +++ b/Lib/test/test_class.py @@ -1,7 +1,7 @@ "Test the functionality of Python classes implementing operators." import unittest -from test.support import cpython_only, import_helper, script_helper +from test.support import cpython_only, import_helper, script_helper, skip_emscripten_stack_overflow testmeths = [ @@ -554,6 +554,7 @@ class Custom: self.assertFalse(hasattr(o, "__call__")) self.assertFalse(hasattr(c, "__call__")) + @skip_emscripten_stack_overflow() def testSFBug532646(self): # Test for SF bug 532646 diff --git a/Lib/test/test_compile.py b/Lib/test/test_compile.py index f7ea923ef17672..b5cf2ad18fe60b 100644 --- a/Lib/test/test_compile.py +++ b/Lib/test/test_compile.py @@ -121,6 +121,7 @@ def __getitem__(self, key): self.assertEqual(d['z'], 12) @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") + @support.skip_emscripten_stack_overflow() def test_extended_arg(self): repeat = int(get_c_recursion_limit() * 0.9) longexpr = 'x = x or ' + '-x' * repeat @@ -709,6 +710,7 @@ def test_yet_more_evil_still_undecodable(self): @support.cpython_only @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") + @support.skip_emscripten_stack_overflow() def test_compiler_recursion_limit(self): # Expected limit is Py_C_RECURSION_LIMIT limit = get_c_recursion_limit() diff --git a/Lib/test/test_copy.py b/Lib/test/test_copy.py index 3dec64cc9a2414..d76341417e9bef 100644 --- a/Lib/test/test_copy.py +++ b/Lib/test/test_copy.py @@ -371,6 +371,7 @@ def test_deepcopy_list(self): self.assertIsNot(x, y) self.assertIsNot(x[0], y[0]) + @support.skip_emscripten_stack_overflow() def test_deepcopy_reflexive_list(self): x = [] x.append(x) @@ -398,6 +399,7 @@ def test_deepcopy_tuple_of_immutables(self): y = copy.deepcopy(x) self.assertIs(x, y) + @support.skip_emscripten_stack_overflow() def test_deepcopy_reflexive_tuple(self): x = ([],) x[0].append(x) @@ -415,6 +417,7 @@ def test_deepcopy_dict(self): self.assertIsNot(x, y) self.assertIsNot(x["foo"], y["foo"]) + @support.skip_emscripten_stack_overflow() def test_deepcopy_reflexive_dict(self): x = {} x['foo'] = x diff --git a/Lib/test/test_descr.py b/Lib/test/test_descr.py index aa801b9c4f7ad9..168b78a477ee9c 100644 --- a/Lib/test/test_descr.py +++ b/Lib/test/test_descr.py @@ -3663,6 +3663,7 @@ def f(a): return a encoding='latin1', errors='replace') self.assertEqual(ba, b'abc\xbd?') + @support.skip_emscripten_stack_overflow() def test_recursive_call(self): # Testing recursive __call__() by setting to instance of class... class A(object): @@ -3942,6 +3943,7 @@ def __del__(self): # it as a leak. del C.__del__ + @unittest.skipIf(support.is_emscripten, "Seems to works in Pyodide?") def test_slots_trash(self): # Testing slot trash... # Deallocating deeply nested slotted trash caused stack overflows @@ -4864,6 +4866,7 @@ class Thing: # CALL_METHOD_DESCRIPTOR_O deque.append(thing, thing) + @support.skip_emscripten_stack_overflow() def test_repr_as_str(self): # Issue #11603: crash or infinite loop when rebinding __str__ as # __repr__. diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index c94dc2df4f0a7f..86b2f22dee5347 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -594,6 +594,7 @@ def __repr__(self): d = {1: BadRepr()} self.assertRaises(Exc, repr, d) + @support.skip_emscripten_stack_overflow() def test_repr_deep(self): d = {} for i in range(get_c_recursion_limit() + 1): diff --git a/Lib/test/test_dictviews.py b/Lib/test/test_dictviews.py index d9881611c19c43..d6bf00eeeb0013 100644 --- a/Lib/test/test_dictviews.py +++ b/Lib/test/test_dictviews.py @@ -2,7 +2,7 @@ import copy import pickle import unittest -from test.support import get_c_recursion_limit +from test.support import get_c_recursion_limit, skip_emscripten_stack_overflow class DictSetTest(unittest.TestCase): @@ -277,6 +277,7 @@ def test_recursive_repr(self): # Again. self.assertIsInstance(r, str) + @skip_emscripten_stack_overflow() def test_deeply_nested_repr(self): d = {} for i in range(get_c_recursion_limit()//2 + 100): diff --git a/Lib/test/test_exception_group.py b/Lib/test/test_exception_group.py index b4fc290b1f32b6..53212529c27e28 100644 --- a/Lib/test/test_exception_group.py +++ b/Lib/test/test_exception_group.py @@ -1,7 +1,7 @@ import collections.abc import types import unittest -from test.support import get_c_recursion_limit +from test.support import get_c_recursion_limit, skip_emscripten_stack_overflow class TestExceptionGroupTypeHierarchy(unittest.TestCase): def test_exception_group_types(self): @@ -464,11 +464,13 @@ def make_deep_eg(self): e = ExceptionGroup('eg', [e]) return e + @skip_emscripten_stack_overflow() def test_deep_split(self): e = self.make_deep_eg() with self.assertRaises(RecursionError): e.split(TypeError) + @skip_emscripten_stack_overflow() def test_deep_subgroup(self): e = self.make_deep_eg() with self.assertRaises(RecursionError): diff --git a/Lib/test/test_functools.py b/Lib/test/test_functools.py index 6d60f6941c4c5d..ffd2adb8665b45 100644 --- a/Lib/test/test_functools.py +++ b/Lib/test/test_functools.py @@ -404,6 +404,7 @@ def test_setstate_subclasses(self): self.assertEqual(r, ((1, 2), {})) self.assertIs(type(r[0]), tuple) + @support.skip_emscripten_stack_overflow() def test_recursive_pickle(self): with replaced_module('functools', self.module): f = self.partial(capture) @@ -2054,6 +2055,7 @@ def orig(a, /, b, c=True): ... @support.skip_on_s390x @unittest.skipIf(support.is_wasi, "WASI has limited C stack") + @support.skip_emscripten_stack_overflow() def test_lru_recursion(self): @self.module.lru_cache diff --git a/Lib/test/test_isinstance.py b/Lib/test/test_isinstance.py index 95a119ba683e09..abc75c82375d98 100644 --- a/Lib/test/test_isinstance.py +++ b/Lib/test/test_isinstance.py @@ -263,12 +263,14 @@ def test_subclass_tuple(self): self.assertEqual(True, issubclass(int, (int, (float, int)))) self.assertEqual(True, issubclass(str, (str, (Child, str)))) + @support.skip_emscripten_stack_overflow() def test_subclass_recursion_limit(self): # make sure that issubclass raises RecursionError before the C stack is # blown with support.infinite_recursion(): self.assertRaises(RecursionError, blowstack, issubclass, str, str) + @support.skip_emscripten_stack_overflow() def test_isinstance_recursion_limit(self): # make sure that issubclass raises RecursionError before the C stack is # blown @@ -315,6 +317,7 @@ def __bases__(self): self.assertRaises(RecursionError, issubclass, int, X()) self.assertRaises(RecursionError, isinstance, 1, X()) + @support.skip_emscripten_stack_overflow() def test_infinite_recursion_via_bases_tuple(self): """Regression test for bpo-30570.""" class Failure(object): diff --git a/Lib/test/test_json/test_recursion.py b/Lib/test/test_json/test_recursion.py index 290207e9c15b88..663c0643579ac8 100644 --- a/Lib/test/test_json/test_recursion.py +++ b/Lib/test/test_json/test_recursion.py @@ -68,6 +68,7 @@ def default(self, o): self.fail("didn't raise ValueError on default recursion") + @support.skip_emscripten_stack_overflow() def test_highly_nested_objects_decoding(self): # test that loading highly-nested objects doesn't segfault when C # accelerations are used. See #12017 @@ -81,6 +82,7 @@ def test_highly_nested_objects_decoding(self): with support.infinite_recursion(): self.loads('[' * 100000 + '1' + ']' * 100000) + @support.skip_emscripten_stack_overflow() def test_highly_nested_objects_encoding(self): # See #12051 l, d = [], {} @@ -93,6 +95,7 @@ def test_highly_nested_objects_encoding(self): with support.infinite_recursion(5000): self.dumps(d) + @support.skip_emscripten_stack_overflow() def test_endless_recursion(self): # See #12051 class EndlessJSONEncoder(self.json.JSONEncoder): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index af94ac039808f0..5fa2f550cefcf4 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -9,7 +9,7 @@ from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase import posixpath -from test.support import is_wasi +from test.support import is_wasi, is_emscripten from test.support.os_helper import TESTFN @@ -2298,6 +2298,7 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", False, ["dirB/fileB"]) @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") def test_glob_recurse_symlinks_common(self): def _check(path, glob, expected): actual = {path for path in path.glob(glob, recurse_symlinks=True) @@ -2393,6 +2394,7 @@ def test_rglob_windows(self): self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") def test_rglob_recurse_symlinks_common(self): def _check(path, glob, expected): actual = {path for path in path.rglob(glob, recurse_symlinks=True) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index ea8d9f2137aca5..31f0a61d6a9d59 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -2097,6 +2097,7 @@ def deep_eg(self): return e @cpython_only + @support.skip_emscripten_stack_overflow() def test_exception_group_deep_recursion_capi(self): from _testcapi import exception_print LIMIT = 75 @@ -2108,6 +2109,7 @@ def test_exception_group_deep_recursion_capi(self): self.assertIn('ExceptionGroup', output) self.assertLessEqual(output.count('ExceptionGroup'), LIMIT) + @support.skip_emscripten_stack_overflow() def test_exception_group_deep_recursion_traceback(self): LIMIT = 75 eg = self.deep_eg() diff --git a/Lib/test/test_xml_etree_c.py b/Lib/test/test_xml_etree_c.py index 3a0fc572f457ff..db19af419bdeab 100644 --- a/Lib/test/test_xml_etree_c.py +++ b/Lib/test/test_xml_etree_c.py @@ -57,6 +57,7 @@ def test_del_attribute(self): del element.attrib self.assertEqual(element.attrib, {'A': 'B', 'C': 'D'}) + @unittest.skipIf(support.is_emscripten, "segfaults") def test_trashcan(self): # If this test fails, it will most likely die via segfault. e = root = cET.Element('root') diff --git a/configure b/configure index 7efda041ae69d4..c6790777793566 100755 --- a/configure +++ b/configure @@ -9436,6 +9436,7 @@ fi as_fn_append LDFLAGS_NODIST " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain" as_fn_append LDFLAGS_NODIST " -sEXPORTED_FUNCTIONS=_main,_Py_Version" + as_fn_append LDFLAGS_NODIST " -sSTACK_SIZE=5MB" if test "x$enable_wasm_dynamic_linking" = xyes then : diff --git a/configure.ac b/configure.ac index 15f7d07f22473b..9648e438cc7424 100644 --- a/configure.ac +++ b/configure.ac @@ -2334,6 +2334,7 @@ AS_CASE([$ac_sys_system], AS_VAR_APPEND([LDFLAGS_NODIST], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain"]) AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) + AS_VAR_APPEND([LDFLAGS_NODIST], [" -sSTACK_SIZE=5MB"]) AS_VAR_IF([enable_wasm_dynamic_linking], [yes], [ AS_VAR_APPEND([LINKFORSHARED], [" -sMAIN_MODULE"]) From 87faf0a9c4aa7f8eb5b6b6c8f6e8f5f99b1e3d9b Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 5 Dec 2024 01:44:50 +0100 Subject: [PATCH 42/76] gh-127503: Emscripten make Python.sh function as proper Python CLI (#127506) Modifies the python.sh script to work on macOS, and adapt to recent emscripten changes. --- Tools/wasm/emscripten/__main__.py | 21 ++++++++++++-- Tools/wasm/emscripten/node_entry.mjs | 43 +++++++++++++++++++--------- configure | 2 +- configure.ac | 2 +- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/Tools/wasm/emscripten/__main__.py b/Tools/wasm/emscripten/__main__.py index 9ce8dd6a364ad6..c998ed71309dad 100644 --- a/Tools/wasm/emscripten/__main__.py +++ b/Tools/wasm/emscripten/__main__.py @@ -218,9 +218,26 @@ def configure_emscripten_python(context, working_dir): f"""\ #!/bin/sh + # Macs come with FreeBSD coreutils which doesn't have the -s option + # so feature detect and work around it. + if which grealpath > /dev/null; then + # It has brew installed gnu core utils, use that + REALPATH="grealpath -s" + elif which realpath > /dev/null && realpath --version 2&>1 | grep GNU > /dev/null; then + # realpath points to GNU realpath so use it. + REALPATH="realpath -s" + else + # Shim for macs without GNU coreutils + abs_path () {{ + echo "$(cd "$(dirname "$1")" || exit; pwd)/$(basename "$1")" + }} + REALPATH=abs_path + fi + # We compute our own path, not following symlinks and pass it in so that # node_entry.mjs can set sys.executable correctly. - exec {host_runner} {node_entry} "$(realpath -s $0)" "$@" + # Intentionally allow word splitting on NODEFLAGS. + exec {host_runner} $NODEFLAGS {node_entry} --this-program="$($REALPATH "$0")" "$@" """ ) ) @@ -233,7 +250,7 @@ def configure_emscripten_python(context, working_dir): def make_emscripten_python(context, working_dir): """Run `make` for the emscripten/host build.""" call( - ["make", "--jobs", str(cpu_count()), "commoninstall"], + ["make", "--jobs", str(cpu_count()), "all"], env=updated_env(), quiet=context.quiet, ) diff --git a/Tools/wasm/emscripten/node_entry.mjs b/Tools/wasm/emscripten/node_entry.mjs index cb1c6ff3cba6aa..40ab1515cf28c1 100644 --- a/Tools/wasm/emscripten/node_entry.mjs +++ b/Tools/wasm/emscripten/node_entry.mjs @@ -1,30 +1,47 @@ import EmscriptenModule from "./python.mjs"; -import { dirname } from 'node:path'; -import { fileURLToPath } from 'node:url'; +import fs from "node:fs"; if (process?.versions?.node) { const nodeVersion = Number(process.versions.node.split(".", 1)[0]); if (nodeVersion < 18) { - process.stderr.write( - `Node version must be >= 18, got version ${process.version}\n`, - ); - process.exit(1); + process.stderr.write( + `Node version must be >= 18, got version ${process.version}\n`, + ); + process.exit(1); } } +function rootDirsToMount(Module) { + return fs + .readdirSync("/") + .filter((dir) => !["dev", "lib", "proc"].includes(dir)) + .map((dir) => "/" + dir); +} + +function mountDirectories(Module) { + for (const dir of rootDirsToMount(Module)) { + Module.FS.mkdirTree(dir); + Module.FS.mount(Module.FS.filesystems.NODEFS, { root: dir }, dir); + } +} + +const thisProgram = "--this-program="; +const thisProgramIndex = process.argv.findIndex((x) => + x.startsWith(thisProgram), +); + const settings = { preRun(Module) { - const __dirname = dirname(fileURLToPath(import.meta.url)); - Module.FS.mkdirTree("/lib/"); - Module.FS.mount(Module.FS.filesystems.NODEFS, { root: __dirname + "/lib/" }, "/lib/"); + mountDirectories(Module); + Module.FS.chdir(process.cwd()); + Object.assign(Module.ENV, process.env); }, - // The first three arguments are: "node", path to this file, path to - // python.sh. After that come the arguments the user passed to python.sh. - arguments: process.argv.slice(3), // Ensure that sys.executable, sys._base_executable, etc point to python.sh // not to this file. To properly handle symlinks, python.sh needs to compute // its own path. - thisProgram: process.argv[2], + thisProgram: process.argv[thisProgramIndex], + // After python.sh come the arguments thatthe user passed to python.sh. + arguments: process.argv.slice(thisProgramIndex + 1), }; await EmscriptenModule(settings); diff --git a/configure b/configure index c6790777793566..2fa473b9fe32c0 100755 --- a/configure +++ b/configure @@ -9434,7 +9434,7 @@ fi as_fn_append LDFLAGS_NODIST " -sWASM_BIGINT" as_fn_append LDFLAGS_NODIST " -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js" - as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain" + as_fn_append LDFLAGS_NODIST " -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV" as_fn_append LDFLAGS_NODIST " -sEXPORTED_FUNCTIONS=_main,_Py_Version" as_fn_append LDFLAGS_NODIST " -sSTACK_SIZE=5MB" diff --git a/configure.ac b/configure.ac index 9648e438cc7424..8ca8e0f7802742 100644 --- a/configure.ac +++ b/configure.ac @@ -2332,7 +2332,7 @@ AS_CASE([$ac_sys_system], dnl Include file system support AS_VAR_APPEND([LDFLAGS_NODIST], [" -sFORCE_FILESYSTEM -lidbfs.js -lnodefs.js -lproxyfs.js -lworkerfs.js"]) - AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain"]) + AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_RUNTIME_METHODS=FS,callMain,ENV"]) AS_VAR_APPEND([LDFLAGS_NODIST], [" -sEXPORTED_FUNCTIONS=_main,_Py_Version"]) AS_VAR_APPEND([LDFLAGS_NODIST], [" -sSTACK_SIZE=5MB"]) From 6cf77949fba7b44f6885794b2028f091f42f5d6c Mon Sep 17 00:00:00 2001 From: Feodor Fitsner Date: Wed, 4 Dec 2024 19:00:20 -0800 Subject: [PATCH 43/76] gh-127434: Fix iOS `xcrun --sdk` clang/ar scripts to allow arguments with spaces (#127575) Added shell escaping to ensure iOS compiler shims can accept arguments with spaces. --- .../2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst | 1 + iOS/Resources/bin/arm64-apple-ios-ar | 2 +- iOS/Resources/bin/arm64-apple-ios-clang | 2 +- iOS/Resources/bin/arm64-apple-ios-clang++ | 2 +- iOS/Resources/bin/arm64-apple-ios-cpp | 2 +- iOS/Resources/bin/arm64-apple-ios-simulator-ar | 2 +- iOS/Resources/bin/arm64-apple-ios-simulator-clang | 2 +- iOS/Resources/bin/arm64-apple-ios-simulator-clang++ | 2 +- iOS/Resources/bin/arm64-apple-ios-simulator-cpp | 2 +- iOS/Resources/bin/x86_64-apple-ios-simulator-ar | 2 +- iOS/Resources/bin/x86_64-apple-ios-simulator-clang | 2 +- iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ | 2 +- iOS/Resources/bin/x86_64-apple-ios-simulator-cpp | 2 +- 13 files changed, 13 insertions(+), 12 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst new file mode 100644 index 00000000000000..08b27a7890bb1c --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-04-09-52-08.gh-issue-127434.RjkGT_.rst @@ -0,0 +1 @@ +The iOS compiler shims can now accept arguments with spaces. diff --git a/iOS/Resources/bin/arm64-apple-ios-ar b/iOS/Resources/bin/arm64-apple-ios-ar index 8122332b9c1de0..3cf3eb218741fa 100755 --- a/iOS/Resources/bin/arm64-apple-ios-ar +++ b/iOS/Resources/bin/arm64-apple-ios-ar @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphoneos${IOS_SDK_VERSION} ar $@ +xcrun --sdk iphoneos${IOS_SDK_VERSION} ar "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-clang b/iOS/Resources/bin/arm64-apple-ios-clang index 4d525751eba798..c39519cd1f8c94 100755 --- a/iOS/Resources/bin/arm64-apple-ios-clang +++ b/iOS/Resources/bin/arm64-apple-ios-clang @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios $@ +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-clang++ b/iOS/Resources/bin/arm64-apple-ios-clang++ index f24bec11268f7e..d9b12925f384b9 100755 --- a/iOS/Resources/bin/arm64-apple-ios-clang++ +++ b/iOS/Resources/bin/arm64-apple-ios-clang++ @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphoneos${IOS_SDK_VERSION} clang++ -target arm64-apple-ios $@ +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang++ -target arm64-apple-ios "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-cpp b/iOS/Resources/bin/arm64-apple-ios-cpp index 891bb25bb4318c..24da23d3448ae0 100755 --- a/iOS/Resources/bin/arm64-apple-ios-cpp +++ b/iOS/Resources/bin/arm64-apple-ios-cpp @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios -E $@ +xcrun --sdk iphoneos${IOS_SDK_VERSION} clang -target arm64-apple-ios -E "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-ar b/iOS/Resources/bin/arm64-apple-ios-simulator-ar index 74ed3bc6df1c2b..b836b6db9025bb 100755 --- a/iOS/Resources/bin/arm64-apple-ios-simulator-ar +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-ar @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-clang b/iOS/Resources/bin/arm64-apple-ios-simulator-clang index 32574cad284441..92e8d853d6ebc3 100755 --- a/iOS/Resources/bin/arm64-apple-ios-simulator-clang +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-clang @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios-simulator $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios-simulator "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ b/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ index ef37d05b512959..076469cc70cf98 100755 --- a/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-clang++ @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target arm64-apple-ios-simulator $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target arm64-apple-ios-simulator "$@" diff --git a/iOS/Resources/bin/arm64-apple-ios-simulator-cpp b/iOS/Resources/bin/arm64-apple-ios-simulator-cpp index 6aaf6fbe188c32..c57f28cee5bcfe 100755 --- a/iOS/Resources/bin/arm64-apple-ios-simulator-cpp +++ b/iOS/Resources/bin/arm64-apple-ios-simulator-cpp @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios-simulator -E $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target arm64-apple-ios-simulator -E "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-ar b/iOS/Resources/bin/x86_64-apple-ios-simulator-ar index 74ed3bc6df1c2b..b836b6db9025bb 100755 --- a/iOS/Resources/bin/x86_64-apple-ios-simulator-ar +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-ar @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} ar "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-clang b/iOS/Resources/bin/x86_64-apple-ios-simulator-clang index bcbe91f6061e16..17cbe0c8a1e213 100755 --- a/iOS/Resources/bin/x86_64-apple-ios-simulator-clang +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-clang @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios-simulator $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios-simulator "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ index 86f03ea32bc2fd..565d47b24c214b 100755 --- a/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-clang++ @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target x86_64-apple-ios-simulator $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang++ -target x86_64-apple-ios-simulator "$@" diff --git a/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp b/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp index e6a42d9b85dec7..63fc8e8de2d38d 100755 --- a/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp +++ b/iOS/Resources/bin/x86_64-apple-ios-simulator-cpp @@ -1,2 +1,2 @@ #!/bin/sh -xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios-simulator -E $@ +xcrun --sdk iphonesimulator${IOS_SDK_VERSION} clang -target x86_64-apple-ios-simulator -E "$@" From 1ef6e8ca3faf2c2b008fb170c7c44c38b86e874a Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 5 Dec 2024 10:37:14 +0100 Subject: [PATCH 44/76] gh-119182: Complete PyUnicodeWriter documentation (#127607) --- Doc/c-api/unicode.rst | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 59bd7661965d93..dcbc8804cd6b89 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -1588,6 +1588,11 @@ object. Create a Unicode writer instance. + *length* must be greater than or equal to ``0``. + + If *length* is greater than ``0``, preallocate an internal buffer of + *length* characters. + Set an exception and return ``NULL`` on error. .. c:function:: PyObject* PyUnicodeWriter_Finish(PyUnicodeWriter *writer) @@ -1596,12 +1601,16 @@ object. Set an exception and return ``NULL`` on error. + The writer instance is invalid after this call. + .. c:function:: void PyUnicodeWriter_Discard(PyUnicodeWriter *writer) Discard the internal Unicode buffer and destroy the writer instance. If *writer* is ``NULL``, no operation is performed. + The writer instance is invalid after this call. + .. c:function:: int PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch) Write the single Unicode character *ch* into *writer*. From fcbe6ecdb6ed4dd93b2ee144f89a73af755e2634 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 5 Dec 2024 10:39:44 +0100 Subject: [PATCH 45/76] gh-93312: Include to get PIDFD_NONBLOCK (#127593) --- .../Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst | 2 ++ Modules/posixmodule.c | 3 +++ configure | 6 ++++++ configure.ac | 2 +- pyconfig.h.in | 3 +++ 5 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst b/Misc/NEWS.d/next/Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst new file mode 100644 index 00000000000000..e245fa2bdd00b4 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-04-11-01-16.gh-issue-93312.9sB-Qw.rst @@ -0,0 +1,2 @@ +Include ```` to get ``os.PIDFD_NONBLOCK`` constant. Patch by +Victor Stinner. diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 6eb7054b566e3f..2c26fbeac9a1be 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -73,6 +73,9 @@ #ifdef HAVE_SYS_TIME_H # include // futimes() #endif +#ifdef HAVE_SYS_PIDFD_H +# include // PIDFD_NONBLOCK +#endif // SGI apparently needs this forward declaration diff --git a/configure b/configure index 2fa473b9fe32c0..5e9bcb602d884e 100755 --- a/configure +++ b/configure @@ -11178,6 +11178,12 @@ if test "x$ac_cv_header_sys_param_h" = xyes then : printf "%s\n" "#define HAVE_SYS_PARAM_H 1" >>confdefs.h +fi +ac_fn_c_check_header_compile "$LINENO" "sys/pidfd.h" "ac_cv_header_sys_pidfd_h" "$ac_includes_default" +if test "x$ac_cv_header_sys_pidfd_h" = xyes +then : + printf "%s\n" "#define HAVE_SYS_PIDFD_H 1" >>confdefs.h + fi ac_fn_c_check_header_compile "$LINENO" "sys/poll.h" "ac_cv_header_sys_poll_h" "$ac_includes_default" if test "x$ac_cv_header_sys_poll_h" = xyes diff --git a/configure.ac b/configure.ac index 8ca8e0f7802742..bf3685e1b1b209 100644 --- a/configure.ac +++ b/configure.ac @@ -2932,7 +2932,7 @@ AC_CHECK_HEADERS([ \ linux/tipc.h linux/wait.h netdb.h net/ethernet.h netinet/in.h netpacket/packet.h poll.h process.h pthread.h pty.h \ sched.h setjmp.h shadow.h signal.h spawn.h stropts.h sys/audioio.h sys/bsdtty.h sys/devpoll.h \ sys/endian.h sys/epoll.h sys/event.h sys/eventfd.h sys/file.h sys/ioctl.h sys/kern_control.h \ - sys/loadavg.h sys/lock.h sys/memfd.h sys/mkdev.h sys/mman.h sys/modem.h sys/param.h sys/poll.h \ + sys/loadavg.h sys/lock.h sys/memfd.h sys/mkdev.h sys/mman.h sys/modem.h sys/param.h sys/pidfd.h sys/poll.h \ sys/random.h sys/resource.h sys/select.h sys/sendfile.h sys/socket.h sys/soundcard.h sys/stat.h \ sys/statvfs.h sys/sys_domain.h sys/syscall.h sys/sysmacros.h sys/termio.h sys/time.h sys/times.h sys/timerfd.h \ sys/types.h sys/uio.h sys/un.h sys/utsname.h sys/wait.h sys/xattr.h sysexits.h syslog.h \ diff --git a/pyconfig.h.in b/pyconfig.h.in index 924d86627b0e9b..6a1f1284650b9f 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1388,6 +1388,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_SYS_PARAM_H +/* Define to 1 if you have the header file. */ +#undef HAVE_SYS_PIDFD_H + /* Define to 1 if you have the header file. */ #undef HAVE_SYS_POLL_H From 67b9a5331ae45aa126877d7f96a1e235600f9c4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Thu, 5 Dec 2024 16:01:59 +0100 Subject: [PATCH 46/76] gh-127413: allow to show specialized bytecode via `dis` CLI (#127414) --- Doc/library/dis.rst | 8 +++++++- Doc/whatsnew/3.14.rst | 6 ++++++ Lib/dis.py | 5 ++++- .../2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst | 2 ++ 4 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst index e2926f2440af6d..f8f4188d27b472 100644 --- a/Doc/library/dis.rst +++ b/Doc/library/dis.rst @@ -60,6 +60,8 @@ interpreter. The :option:`-P ` command-line option and the ``show_positions`` argument were added. + The :option:`-S ` command-line option is added. + Example: Given the function :func:`!myfunc`:: def myfunc(alist): @@ -89,7 +91,7 @@ The :mod:`dis` module can be invoked as a script from the command line: .. code-block:: sh - python -m dis [-h] [-C] [-O] [-P] [infile] + python -m dis [-h] [-C] [-O] [-P] [-S] [infile] The following options are accepted: @@ -111,6 +113,10 @@ The following options are accepted: Show positions of instructions in the source code. +.. cmdoption:: -S, --specialized + + Show specialized bytecode. + If :file:`infile` is specified, its disassembled code will be written to stdout. Otherwise, disassembly is performed on compiled source code received from stdin. diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index 52a6d6e4340194..e83c509a025ab5 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -348,12 +348,18 @@ dis This feature is also exposed via :option:`dis --show-positions`. (Contributed by Bénédikt Tran in :gh:`123165`.) +* Add the :option:`dis --specialized` command-line option to + show specialized bytecode. + (Contributed by Bénédikt Tran in :gh:`127413`.) + + errno ----- * Add :data:`errno.EHWPOISON` error code. (Contributed by James Roy in :gh:`126585`.) + fractions --------- diff --git a/Lib/dis.py b/Lib/dis.py index 1718e39cceb638..6b3e9ef8399e1c 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -1125,6 +1125,8 @@ def main(): help='show instruction offsets') parser.add_argument('-P', '--show-positions', action='store_true', help='show instruction positions') + parser.add_argument('-S', '--specialized', action='store_true', + help='show specialized bytecode') parser.add_argument('infile', nargs='?', default='-') args = parser.parse_args() if args.infile == '-': @@ -1135,7 +1137,8 @@ def main(): with open(args.infile, 'rb') as infile: source = infile.read() code = compile(source, name, "exec") - dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets, show_positions=args.show_positions) + dis(code, show_caches=args.show_caches, adaptive=args.specialized, + show_offsets=args.show_offsets, show_positions=args.show_positions) if __name__ == "__main__": main() diff --git a/Misc/NEWS.d/next/Library/2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst b/Misc/NEWS.d/next/Library/2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst new file mode 100644 index 00000000000000..2330fb66253265 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-29-14-45-26.gh-issue-127413.z11AUc.rst @@ -0,0 +1,2 @@ +Add the :option:`dis --specialized` command-line option to show specialized +bytecode. Patch by Bénédikt Tran. From 208b0fb645c0e14b0826c0014e74a0b70c58c9d6 Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Thu, 5 Dec 2024 11:07:38 -0500 Subject: [PATCH 47/76] gh-122431: Disallow negative values in `readline.append_history_file` (#122469) Co-authored-by: Victor Stinner --- Lib/test/test_readline.py | 8 ++++++++ .../2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst | 1 + Modules/readline.c | 6 ++++++ 3 files changed, 15 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst diff --git a/Lib/test/test_readline.py b/Lib/test/test_readline.py index 50e77cbbb6be13..8b8772c66ee654 100644 --- a/Lib/test/test_readline.py +++ b/Lib/test/test_readline.py @@ -114,6 +114,14 @@ def test_write_read_append(self): # write_history_file can create the target readline.write_history_file(hfilename) + # Negative values should be disallowed + with self.assertRaises(ValueError): + readline.append_history_file(-42, hfilename) + + # See gh-122431, using the minimum signed integer value caused a segfault + with self.assertRaises(ValueError): + readline.append_history_file(-2147483648, hfilename) + def test_nonascii_history(self): readline.clear_history() try: diff --git a/Misc/NEWS.d/next/Library/2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst b/Misc/NEWS.d/next/Library/2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst new file mode 100644 index 00000000000000..16ad75792aefa2 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-30-11-37-40.gh-issue-122431.lAzVtu.rst @@ -0,0 +1 @@ +:func:`readline.append_history_file` now raises a :exc:`ValueError` when given a negative value. diff --git a/Modules/readline.c b/Modules/readline.c index 35655c70a4618f..7d1f703f7dbdde 100644 --- a/Modules/readline.c +++ b/Modules/readline.c @@ -351,6 +351,12 @@ readline_append_history_file_impl(PyObject *module, int nelements, PyObject *filename_obj) /*[clinic end generated code: output=5df06fc9da56e4e4 input=784b774db3a4b7c5]*/ { + if (nelements < 0) + { + PyErr_SetString(PyExc_ValueError, "nelements must be positive"); + return NULL; + } + PyObject *filename_bytes; const char *filename; int err; From d958d9f4a1b71c6d30960bf6c53c41046ea94590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Kul=C3=ADk?= Date: Thu, 5 Dec 2024 19:43:19 +0100 Subject: [PATCH 48/76] GH-126727: Fix test_era_nl_langinfo with Japanese ERAs on Solaris (GH-127327) Fix test_era_nl_langinfo with Japanese ERAs on Solaris --- Lib/test/test__locale.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 2c751033ebb3e2..cef84fd9580c37 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -102,6 +102,11 @@ def accept(loc): # ps_AF doesn't work on Windows: see bpo-38324 (msg361830) del known_numerics['ps_AF'] +if sys.platform == 'sunos5': + # On Solaris, Japanese ERAs start with the year 1927, + # and thus there's less of them. + known_era['ja_JP'] = (5, '+:1:2019/05/01:2019/12/31:令和:%EC元年') + class _LocaleTests(unittest.TestCase): def setUp(self): From 23f2e8f13c4e4a34106cf96fad9329cbfbf8844d Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Date: Thu, 5 Dec 2024 21:10:46 +0200 Subject: [PATCH 49/76] gh-127221: Add colour to unittest output (#127223) Co-authored-by: Kirill Podoprigora --- Doc/conf.py | 7 ++ Doc/library/doctest.rst | 4 + Doc/library/traceback.rst | 4 + Doc/library/unittest.rst | 4 +- Doc/using/cmdline.rst | 8 -- Doc/whatsnew/3.13.rst | 9 -- Doc/whatsnew/3.14.rst | 7 ++ Lib/test/test_unittest/test_async_case.py | 2 + Lib/test/test_unittest/test_program.py | 6 + Lib/test/test_unittest/test_result.py | 16 ++- Lib/test/test_unittest/test_runner.py | 13 +++ Lib/test/test_unittest/test_skipping.py | 3 + Lib/unittest/result.py | 4 +- Lib/unittest/runner.py | 108 +++++++++++------- ...-11-23-00-17-29.gh-issue-127221.OSXdFE.rst | 1 + 15 files changed, 136 insertions(+), 60 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst diff --git a/Doc/conf.py b/Doc/conf.py index 738c9901eef06f..9cde394cbaed69 100644 --- a/Doc/conf.py +++ b/Doc/conf.py @@ -78,6 +78,13 @@ .. |python_version_literal| replace:: ``Python {version}`` .. |python_x_dot_y_literal| replace:: ``python{version}`` .. |usr_local_bin_python_x_dot_y_literal| replace:: ``/usr/local/bin/python{version}`` + +.. Apparently this how you hack together a formatted link: + (https://www.docutils.org/docs/ref/rst/directives.html#replacement-text) +.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` +.. _FORCE_COLOR: https://force-color.org/ +.. |NO_COLOR| replace:: ``NO_COLOR`` +.. _NO_COLOR: https://no-color.org/ """ # There are two options for replacing |today|. Either, you set today to some diff --git a/Doc/library/doctest.rst b/Doc/library/doctest.rst index 6b0282eed49566..106b0a6c95b7be 100644 --- a/Doc/library/doctest.rst +++ b/Doc/library/doctest.rst @@ -136,6 +136,10 @@ examples of doctests in the standard Python test suite and libraries. Especially useful examples can be found in the standard test file :file:`Lib/test/test_doctest/test_doctest.py`. +.. versionadded:: 3.13 + Output is colorized by default and can be + :ref:`controlled using environment variables `. + .. _doctest-simple-testmod: diff --git a/Doc/library/traceback.rst b/Doc/library/traceback.rst index 8f94fc448f2482..4899ed64ebad8d 100644 --- a/Doc/library/traceback.rst +++ b/Doc/library/traceback.rst @@ -44,6 +44,10 @@ The module's API can be divided into two parts: necessary for later formatting without holding references to actual exception and traceback objects. +.. versionadded:: 3.13 + Output is colorized by default and can be + :ref:`controlled using environment variables `. + Module-Level Functions ---------------------- diff --git a/Doc/library/unittest.rst b/Doc/library/unittest.rst index 38bad9405597dd..7f8b710f611002 100644 --- a/Doc/library/unittest.rst +++ b/Doc/library/unittest.rst @@ -46,7 +46,6 @@ test runner a textual interface, or return a special value to indicate the results of executing the tests. - .. seealso:: Module :mod:`doctest` @@ -198,6 +197,9 @@ For a list of all the command-line options:: In earlier versions it was only possible to run individual test methods and not modules or classes. +.. versionadded:: 3.14 + Output is colorized by default and can be + :ref:`controlled using environment variables `. Command-line options ~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 6cf42b27718022..7db2f4820f346a 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -663,14 +663,6 @@ output. To control the color output only in the Python interpreter, the precedence over ``NO_COLOR``, which in turn takes precedence over ``FORCE_COLOR``. -.. Apparently this how you hack together a formatted link: - -.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` -.. _FORCE_COLOR: https://force-color.org/ - -.. |NO_COLOR| replace:: ``NO_COLOR`` -.. _NO_COLOR: https://no-color.org/ - Options you shouldn't use ~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 664b1866172378..9f6d98b9950d19 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -252,15 +252,6 @@ Improved error messages the canonical |NO_COLOR|_ and |FORCE_COLOR|_ environment variables. (Contributed by Pablo Galindo Salgado in :gh:`112730`.) -.. Apparently this how you hack together a formatted link: - (https://www.docutils.org/docs/ref/rst/directives.html#replacement-text) - -.. |FORCE_COLOR| replace:: ``FORCE_COLOR`` -.. _FORCE_COLOR: https://force-color.org/ - -.. |NO_COLOR| replace:: ``NO_COLOR`` -.. _NO_COLOR: https://no-color.org/ - * A common mistake is to write a script with the same name as a standard library module. When this results in errors, we now display a more helpful error message: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index e83c509a025ab5..db25c037e509b6 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -616,6 +616,13 @@ unicodedata unittest -------- +* :mod:`unittest` output is now colored by default. + This can be controlled via the :envvar:`PYTHON_COLORS` environment + variable as well as the canonical |NO_COLOR|_ + and |FORCE_COLOR|_ environment variables. + See also :ref:`using-on-controlling-color`. + (Contributed by Hugo van Kemenade in :gh:`127221`.) + * unittest discovery supports :term:`namespace package` as start directory again. It was removed in Python 3.11. (Contributed by Jacob Walls in :gh:`80958`.) diff --git a/Lib/test/test_unittest/test_async_case.py b/Lib/test/test_unittest/test_async_case.py index 00ef55bdf9bc83..8ea244bff05c5f 100644 --- a/Lib/test/test_unittest/test_async_case.py +++ b/Lib/test/test_unittest/test_async_case.py @@ -2,6 +2,7 @@ import contextvars import unittest from test import support +from test.support import force_not_colorized support.requires_working_socket(module=True) @@ -252,6 +253,7 @@ async def on_cleanup(self): test.doCleanups() self.assertEqual(events, ['asyncSetUp', 'test', 'asyncTearDown', 'cleanup']) + @force_not_colorized def test_exception_in_tear_clean_up(self): class Test(unittest.IsolatedAsyncioTestCase): async def asyncSetUp(self): diff --git a/Lib/test/test_unittest/test_program.py b/Lib/test/test_unittest/test_program.py index 7241cf59f73d4f..0b46f338ac77e1 100644 --- a/Lib/test/test_unittest/test_program.py +++ b/Lib/test/test_unittest/test_program.py @@ -4,6 +4,7 @@ from test import support import unittest import test.test_unittest +from test.support import force_not_colorized from test.test_unittest.test_result import BufferedWriter @@ -120,6 +121,7 @@ def run(self, test): self.assertEqual(['test.test_unittest', 'test.test_unittest2'], program.testNames) + @force_not_colorized def test_NonExit(self): stream = BufferedWriter() program = unittest.main(exit=False, @@ -135,6 +137,7 @@ def test_NonExit(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_Exit(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: @@ -152,6 +155,7 @@ def test_Exit(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitAsDefault(self): stream = BufferedWriter() with self.assertRaises(SystemExit): @@ -167,6 +171,7 @@ def test_ExitAsDefault(self): 'expected failures=1, unexpected successes=1)\n') self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitSkippedSuite(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: @@ -179,6 +184,7 @@ def test_ExitSkippedSuite(self): expected = '\n\nOK (skipped=1)\n' self.assertTrue(out.endswith(expected)) + @force_not_colorized def test_ExitEmptySuite(self): stream = BufferedWriter() with self.assertRaises(SystemExit) as cm: diff --git a/Lib/test/test_unittest/test_result.py b/Lib/test/test_unittest/test_result.py index 4e5ec54e9c892a..746b9fa2677717 100644 --- a/Lib/test/test_unittest/test_result.py +++ b/Lib/test/test_unittest/test_result.py @@ -7,6 +7,7 @@ import traceback import unittest from unittest.util import strclass +from test.support import force_not_colorized from test.test_unittest.support import BufferedWriter @@ -14,7 +15,7 @@ class MockTraceback(object): class TracebackException: def __init__(self, *args, **kwargs): self.capture_locals = kwargs.get('capture_locals', False) - def format(self): + def format(self, **kwargs): result = ['A traceback'] if self.capture_locals: result.append('locals') @@ -205,6 +206,7 @@ def test_1(self): self.assertIs(test_case, test) self.assertIsInstance(formatted_exc, str) + @force_not_colorized def test_addFailure_filter_traceback_frames(self): class Foo(unittest.TestCase): def test_1(self): @@ -231,6 +233,7 @@ def get_exc_info(): self.assertEqual(len(dropped), 1) self.assertIn("raise self.failureException(msg)", dropped[0]) + @force_not_colorized def test_addFailure_filter_traceback_frames_context(self): class Foo(unittest.TestCase): def test_1(self): @@ -260,6 +263,7 @@ def get_exc_info(): self.assertEqual(len(dropped), 1) self.assertIn("raise self.failureException(msg)", dropped[0]) + @force_not_colorized def test_addFailure_filter_traceback_frames_chained_exception_self_loop(self): class Foo(unittest.TestCase): def test_1(self): @@ -285,6 +289,7 @@ def get_exc_info(): formatted_exc = result.failures[0][1] self.assertEqual(formatted_exc.count("Exception: Loop\n"), 1) + @force_not_colorized def test_addFailure_filter_traceback_frames_chained_exception_cycle(self): class Foo(unittest.TestCase): def test_1(self): @@ -446,6 +451,7 @@ def testFailFast(self): result.addUnexpectedSuccess(None) self.assertTrue(result.shouldStop) + @force_not_colorized def testFailFastSetByRunner(self): stream = BufferedWriter() runner = unittest.TextTestRunner(stream=stream, failfast=True) @@ -619,6 +625,7 @@ def _run_test(self, test_name, verbosity, tearDownError=None): test.run(result) return stream.getvalue() + @force_not_colorized def testDotsOutput(self): self.assertEqual(self._run_test('testSuccess', 1), '.') self.assertEqual(self._run_test('testSkip', 1), 's') @@ -627,6 +634,7 @@ def testDotsOutput(self): self.assertEqual(self._run_test('testExpectedFailure', 1), 'x') self.assertEqual(self._run_test('testUnexpectedSuccess', 1), 'u') + @force_not_colorized def testLongOutput(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSuccess', 2), @@ -642,17 +650,21 @@ def testLongOutput(self): self.assertEqual(self._run_test('testUnexpectedSuccess', 2), f'testUnexpectedSuccess ({classname}.testUnexpectedSuccess) ... unexpected success\n') + @force_not_colorized def testDotsOutputSubTestSuccess(self): self.assertEqual(self._run_test('testSubTestSuccess', 1), '.') + @force_not_colorized def testLongOutputSubTestSuccess(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSubTestSuccess', 2), f'testSubTestSuccess ({classname}.testSubTestSuccess) ... ok\n') + @force_not_colorized def testDotsOutputSubTestMixed(self): self.assertEqual(self._run_test('testSubTestMixed', 1), 'sFE') + @force_not_colorized def testLongOutputSubTestMixed(self): classname = f'{__name__}.{self.Test.__qualname__}' self.assertEqual(self._run_test('testSubTestMixed', 2), @@ -661,6 +673,7 @@ def testLongOutputSubTestMixed(self): f' testSubTestMixed ({classname}.testSubTestMixed) [fail] (c=3) ... FAIL\n' f' testSubTestMixed ({classname}.testSubTestMixed) [error] (d=4) ... ERROR\n') + @force_not_colorized def testDotsOutputTearDownFail(self): out = self._run_test('testSuccess', 1, AssertionError('fail')) self.assertEqual(out, 'F') @@ -671,6 +684,7 @@ def testDotsOutputTearDownFail(self): out = self._run_test('testSkip', 1, AssertionError('fail')) self.assertEqual(out, 'sF') + @force_not_colorized def testLongOutputTearDownFail(self): classname = f'{__name__}.{self.Test.__qualname__}' out = self._run_test('testSuccess', 2, AssertionError('fail')) diff --git a/Lib/test/test_unittest/test_runner.py b/Lib/test/test_unittest/test_runner.py index 1b9cef43e3f9c5..1131cd73128866 100644 --- a/Lib/test/test_unittest/test_runner.py +++ b/Lib/test/test_unittest/test_runner.py @@ -4,6 +4,7 @@ import pickle import subprocess from test import support +from test.support import force_not_colorized import unittest from unittest.case import _Outcome @@ -106,6 +107,7 @@ def cleanup2(*args, **kwargs): self.assertTrue(test.doCleanups()) self.assertEqual(cleanups, [(2, (), {}), (1, (1, 2, 3), dict(four='hello', five='goodbye'))]) + @force_not_colorized def testCleanUpWithErrors(self): class TestableTest(unittest.TestCase): def testNothing(self): @@ -416,6 +418,7 @@ def cleanup2(): self.assertIsInstance(e2[1], CustomError) self.assertEqual(str(e2[1]), 'cleanup1') + @force_not_colorized def test_with_errors_addCleanUp(self): ordering = [] class TestableTest(unittest.TestCase): @@ -439,6 +442,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'cleanup_exc', 'tearDownClass', 'cleanup_good']) + @force_not_colorized def test_run_with_errors_addClassCleanUp(self): ordering = [] class TestableTest(unittest.TestCase): @@ -462,6 +466,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'test', 'cleanup_good', 'tearDownClass', 'cleanup_exc']) + @force_not_colorized def test_with_errors_in_addClassCleanup_and_setUps(self): ordering = [] class_blow_up = False @@ -514,6 +519,7 @@ def tearDownClass(cls): ['setUpClass', 'setUp', 'tearDownClass', 'cleanup_exc']) + @force_not_colorized def test_with_errors_in_tearDownClass(self): ordering = [] class TestableTest(unittest.TestCase): @@ -590,6 +596,7 @@ def test(self): 'inner setup', 'inner test', 'inner cleanup', 'end outer test', 'outer cleanup']) + @force_not_colorized def test_run_empty_suite_error_message(self): class EmptyTest(unittest.TestCase): pass @@ -663,6 +670,7 @@ class Module(object): self.assertEqual(cleanups, [((1, 2), {'function': 'hello'})]) + @force_not_colorized def test_run_module_cleanUp(self): blowUp = True ordering = [] @@ -802,6 +810,7 @@ def tearDownClass(cls): 'tearDownClass', 'cleanup_good']) self.assertEqual(unittest.case._module_cleanups, []) + @force_not_colorized def test_run_module_cleanUp_when_teardown_exception(self): ordering = [] class Module(object): @@ -963,6 +972,7 @@ def testNothing(self): self.assertEqual(cleanups, [((1, 2), {'function': 3, 'self': 4})]) + @force_not_colorized def test_with_errors_in_addClassCleanup(self): ordering = [] @@ -996,6 +1006,7 @@ def tearDownClass(cls): ['setUpModule', 'setUpClass', 'test', 'tearDownClass', 'cleanup_exc', 'tearDownModule', 'cleanup_good']) + @force_not_colorized def test_with_errors_in_addCleanup(self): ordering = [] class Module(object): @@ -1026,6 +1037,7 @@ def tearDown(self): ['setUpModule', 'setUp', 'test', 'tearDown', 'cleanup_exc', 'tearDownModule', 'cleanup_good']) + @force_not_colorized def test_with_errors_in_addModuleCleanup_and_setUps(self): ordering = [] module_blow_up = False @@ -1318,6 +1330,7 @@ def MockResultClass(*args): expectedresult = (runner.stream, DESCRIPTIONS, VERBOSITY) self.assertEqual(runner._makeResult(), expectedresult) + @force_not_colorized @support.requires_subprocess() def test_warnings(self): """ diff --git a/Lib/test/test_unittest/test_skipping.py b/Lib/test/test_unittest/test_skipping.py index f146dcac18ecc0..f5cb860c60b156 100644 --- a/Lib/test/test_unittest/test_skipping.py +++ b/Lib/test/test_unittest/test_skipping.py @@ -1,5 +1,6 @@ import unittest +from test.support import force_not_colorized from test.test_unittest.support import LoggingResult @@ -293,6 +294,7 @@ def test_die(self): self.assertFalse(result.unexpectedSuccesses) self.assertTrue(result.wasSuccessful()) + @force_not_colorized def test_expected_failure_and_fail_in_cleanup(self): class Foo(unittest.TestCase): @unittest.expectedFailure @@ -372,6 +374,7 @@ def test_die(self): self.assertEqual(result.unexpectedSuccesses, [test]) self.assertFalse(result.wasSuccessful()) + @force_not_colorized def test_unexpected_success_and_fail_in_cleanup(self): class Foo(unittest.TestCase): @unittest.expectedFailure diff --git a/Lib/unittest/result.py b/Lib/unittest/result.py index 3ace0a5b7bf2ef..97262735aa8311 100644 --- a/Lib/unittest/result.py +++ b/Lib/unittest/result.py @@ -189,7 +189,9 @@ def _exc_info_to_string(self, err, test): tb_e = traceback.TracebackException( exctype, value, tb, capture_locals=self.tb_locals, compact=True) - msgLines = list(tb_e.format()) + from _colorize import can_colorize + + msgLines = list(tb_e.format(colorize=can_colorize())) if self.buffer: output = sys.stdout.getvalue() diff --git a/Lib/unittest/runner.py b/Lib/unittest/runner.py index 2bcadf0c998bd9..d60c295a1eddf7 100644 --- a/Lib/unittest/runner.py +++ b/Lib/unittest/runner.py @@ -4,6 +4,8 @@ import time import warnings +from _colorize import get_colors + from . import result from .case import _SubTest from .signals import registerResult @@ -13,18 +15,18 @@ class _WritelnDecorator(object): """Used to decorate file-like objects with a handy 'writeln' method""" - def __init__(self,stream): + def __init__(self, stream): self.stream = stream def __getattr__(self, attr): if attr in ('stream', '__getstate__'): raise AttributeError(attr) - return getattr(self.stream,attr) + return getattr(self.stream, attr) def writeln(self, arg=None): if arg: self.write(arg) - self.write('\n') # text-mode streams translate to \r\n if needed + self.write('\n') # text-mode streams translate to \r\n if needed class TextTestResult(result.TestResult): @@ -43,6 +45,7 @@ def __init__(self, stream, descriptions, verbosity, *, durations=None): self.showAll = verbosity > 1 self.dots = verbosity == 1 self.descriptions = descriptions + self._ansi = get_colors() self._newline = True self.durations = durations @@ -76,86 +79,102 @@ def _write_status(self, test, status): def addSubTest(self, test, subtest, err): if err is not None: + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: if issubclass(err[0], subtest.failureException): - self._write_status(subtest, "FAIL") + self._write_status(subtest, f"{red}FAIL{reset}") else: - self._write_status(subtest, "ERROR") + self._write_status(subtest, f"{red}ERROR{reset}") elif self.dots: if issubclass(err[0], subtest.failureException): - self.stream.write('F') + self.stream.write(f"{red}F{reset}") else: - self.stream.write('E') + self.stream.write(f"{red}E{reset}") self.stream.flush() super(TextTestResult, self).addSubTest(test, subtest, err) def addSuccess(self, test): super(TextTestResult, self).addSuccess(test) + green, reset = self._ansi.GREEN, self._ansi.RESET if self.showAll: - self._write_status(test, "ok") + self._write_status(test, f"{green}ok{reset}") elif self.dots: - self.stream.write('.') + self.stream.write(f"{green}.{reset}") self.stream.flush() def addError(self, test, err): super(TextTestResult, self).addError(test, err) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self._write_status(test, "ERROR") + self._write_status(test, f"{red}ERROR{reset}") elif self.dots: - self.stream.write('E') + self.stream.write(f"{red}E{reset}") self.stream.flush() def addFailure(self, test, err): super(TextTestResult, self).addFailure(test, err) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self._write_status(test, "FAIL") + self._write_status(test, f"{red}FAIL{reset}") elif self.dots: - self.stream.write('F') + self.stream.write(f"{red}F{reset}") self.stream.flush() def addSkip(self, test, reason): super(TextTestResult, self).addSkip(test, reason) + yellow, reset = self._ansi.YELLOW, self._ansi.RESET if self.showAll: - self._write_status(test, "skipped {0!r}".format(reason)) + self._write_status(test, f"{yellow}skipped{reset} {reason!r}") elif self.dots: - self.stream.write("s") + self.stream.write(f"{yellow}s{reset}") self.stream.flush() def addExpectedFailure(self, test, err): super(TextTestResult, self).addExpectedFailure(test, err) + yellow, reset = self._ansi.YELLOW, self._ansi.RESET if self.showAll: - self.stream.writeln("expected failure") + self.stream.writeln(f"{yellow}expected failure{reset}") self.stream.flush() elif self.dots: - self.stream.write("x") + self.stream.write(f"{yellow}x{reset}") self.stream.flush() def addUnexpectedSuccess(self, test): super(TextTestResult, self).addUnexpectedSuccess(test) + red, reset = self._ansi.RED, self._ansi.RESET if self.showAll: - self.stream.writeln("unexpected success") + self.stream.writeln(f"{red}unexpected success{reset}") self.stream.flush() elif self.dots: - self.stream.write("u") + self.stream.write(f"{red}u{reset}") self.stream.flush() def printErrors(self): + bold_red = self._ansi.BOLD_RED + red = self._ansi.RED + reset = self._ansi.RESET if self.dots or self.showAll: self.stream.writeln() self.stream.flush() - self.printErrorList('ERROR', self.errors) - self.printErrorList('FAIL', self.failures) - unexpectedSuccesses = getattr(self, 'unexpectedSuccesses', ()) + self.printErrorList(f"{red}ERROR{reset}", self.errors) + self.printErrorList(f"{red}FAIL{reset}", self.failures) + unexpectedSuccesses = getattr(self, "unexpectedSuccesses", ()) if unexpectedSuccesses: self.stream.writeln(self.separator1) for test in unexpectedSuccesses: - self.stream.writeln(f"UNEXPECTED SUCCESS: {self.getDescription(test)}") + self.stream.writeln( + f"{red}UNEXPECTED SUCCESS{bold_red}: " + f"{self.getDescription(test)}{reset}" + ) self.stream.flush() def printErrorList(self, flavour, errors): + bold_red, reset = self._ansi.BOLD_RED, self._ansi.RESET for test, err in errors: self.stream.writeln(self.separator1) - self.stream.writeln("%s: %s" % (flavour,self.getDescription(test))) + self.stream.writeln( + f"{flavour}{bold_red}: {self.getDescription(test)}{reset}" + ) self.stream.writeln(self.separator2) self.stream.writeln("%s" % err) self.stream.flush() @@ -232,7 +251,7 @@ def run(self, test): if self.warnings: # if self.warnings is set, use it to filter all the warnings warnings.simplefilter(self.warnings) - startTime = time.perf_counter() + start_time = time.perf_counter() startTestRun = getattr(result, 'startTestRun', None) if startTestRun is not None: startTestRun() @@ -242,8 +261,8 @@ def run(self, test): stopTestRun = getattr(result, 'stopTestRun', None) if stopTestRun is not None: stopTestRun() - stopTime = time.perf_counter() - timeTaken = stopTime - startTime + stop_time = time.perf_counter() + time_taken = stop_time - start_time result.printErrors() if self.durations is not None: self._printDurations(result) @@ -253,10 +272,10 @@ def run(self, test): run = result.testsRun self.stream.writeln("Ran %d test%s in %.3fs" % - (run, run != 1 and "s" or "", timeTaken)) + (run, run != 1 and "s" or "", time_taken)) self.stream.writeln() - expectedFails = unexpectedSuccesses = skipped = 0 + expected_fails = unexpected_successes = skipped = 0 try: results = map(len, (result.expectedFailures, result.unexpectedSuccesses, @@ -264,26 +283,35 @@ def run(self, test): except AttributeError: pass else: - expectedFails, unexpectedSuccesses, skipped = results + expected_fails, unexpected_successes, skipped = results infos = [] + ansi = get_colors() + bold_red = ansi.BOLD_RED + green = ansi.GREEN + red = ansi.RED + reset = ansi.RESET + yellow = ansi.YELLOW + if not result.wasSuccessful(): - self.stream.write("FAILED") + self.stream.write(f"{bold_red}FAILED{reset}") failed, errored = len(result.failures), len(result.errors) if failed: - infos.append("failures=%d" % failed) + infos.append(f"{bold_red}failures={failed}{reset}") if errored: - infos.append("errors=%d" % errored) + infos.append(f"{bold_red}errors={errored}{reset}") elif run == 0 and not skipped: - self.stream.write("NO TESTS RAN") + self.stream.write(f"{yellow}NO TESTS RAN{reset}") else: - self.stream.write("OK") + self.stream.write(f"{green}OK{reset}") if skipped: - infos.append("skipped=%d" % skipped) - if expectedFails: - infos.append("expected failures=%d" % expectedFails) - if unexpectedSuccesses: - infos.append("unexpected successes=%d" % unexpectedSuccesses) + infos.append(f"{yellow}skipped={skipped}{reset}") + if expected_fails: + infos.append(f"{yellow}expected failures={expected_fails}{reset}") + if unexpected_successes: + infos.append( + f"{red}unexpected successes={unexpected_successes}{reset}" + ) if infos: self.stream.writeln(" (%s)" % (", ".join(infos),)) else: diff --git a/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst b/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst new file mode 100644 index 00000000000000..0e4a03caf9f49d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-23-00-17-29.gh-issue-127221.OSXdFE.rst @@ -0,0 +1 @@ +Add colour to :mod:`unittest` output. Patch by Hugo van Kemenade. From 657d0e99aa8754372786120d6ec00c9d9970e775 Mon Sep 17 00:00:00 2001 From: Maciej Olko Date: Thu, 5 Dec 2024 21:52:58 +0100 Subject: [PATCH 50/76] [Docs] GDB howto: Fix block type of a cast example (#127621) --- Doc/howto/gdb_helpers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/howto/gdb_helpers.rst b/Doc/howto/gdb_helpers.rst index 53bbf7ddaa2ab9..98ce813ca4ab02 100644 --- a/Doc/howto/gdb_helpers.rst +++ b/Doc/howto/gdb_helpers.rst @@ -180,7 +180,7 @@ regular machine-level integer:: (gdb) p some_python_integer $4 = 42 -The internal structure can be revealed with a cast to :c:expr:`PyLongObject *`: +The internal structure can be revealed with a cast to :c:expr:`PyLongObject *`:: (gdb) p *(PyLongObject*)some_python_integer $5 = {ob_base = {ob_base = {ob_refcnt = 8, ob_type = 0x3dad39f5e0}, ob_size = 1}, From f4f530804b9d8f089eba0f157ec2144c03b13651 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 5 Dec 2024 21:07:31 +0000 Subject: [PATCH 51/76] gh-127582: Make object resurrection thread-safe for free threading. (GH-127612) Objects may be temporarily "resurrected" in destructors when calling finalizers or watcher callbacks. We previously undid the resurrection by decrementing the reference count using `Py_SET_REFCNT`. This was not thread-safe because other threads might be accessing the object (modifying its reference count) if it was exposed by the finalizer, watcher callback, or temporarily accessed by a racy dictionary or list access. This adds internal-only thread-safe functions for temporary object resurrection during destructors. --- Include/internal/pycore_object.h | 44 +++++++++++++++++++ ...-12-05-19-25-00.gh-issue-127582.ogUY2a.rst | 2 + Objects/codeobject.c | 7 +-- Objects/dictobject.c | 7 +-- Objects/funcobject.c | 7 +-- Objects/object.c | 40 ++++++++++++++--- 6 files changed, 87 insertions(+), 20 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index ce876b093b2522..6b0b464a6fdb96 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -697,8 +697,52 @@ _PyObject_SetMaybeWeakref(PyObject *op) } } +extern int _PyObject_ResurrectEndSlow(PyObject *op); #endif +// Temporarily resurrects an object during deallocation. The refcount is set +// to one. +static inline void +_PyObject_ResurrectStart(PyObject *op) +{ + assert(Py_REFCNT(op) == 0); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif +#ifdef Py_GIL_DISABLED + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_ThreadId()); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 1); + _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0); +#else + Py_SET_REFCNT(op, 1); +#endif +} + +// Undoes an object resurrection by decrementing the refcount without calling +// _Py_Dealloc(). Returns 0 if the object is dead (the normal case), and +// deallocation should continue. Returns 1 if the object is still alive. +static inline int +_PyObject_ResurrectEnd(PyObject *op) +{ +#ifdef Py_REF_DEBUG + _Py_DecRefTotal(_PyThreadState_GET()); +#endif +#ifndef Py_GIL_DISABLED + Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + return Py_REFCNT(op) != 0; +#else + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + Py_ssize_t shared = _Py_atomic_load_ssize_acquire(&op->ob_ref_shared); + if (_Py_IsOwnedByCurrentThread(op) && local == 1 && shared == 0) { + // Fast-path: object has a single refcount and is owned by this thread + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); + return 0; + } + // Slow-path: object has a shared refcount or is not owned by this thread + return _PyObject_ResurrectEndSlow(op); +#endif +} + /* Tries to incref op and returns 1 if successful or 0 otherwise. */ static inline int _Py_TryIncref(PyObject *op) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst new file mode 100644 index 00000000000000..59491feeb9bcfa --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-05-19-25-00.gh-issue-127582.ogUY2a.rst @@ -0,0 +1,2 @@ +Fix non-thread-safe object resurrection when calling finalizers and watcher +callbacks in the free threading build. diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 148350cc4b9195..eb8de136ee6432 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -1867,14 +1867,11 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyCodeObject *co) { - assert(Py_REFCNT(co) == 0); - Py_SET_REFCNT(co, 1); + _PyObject_ResurrectStart((PyObject *)co); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); - if (Py_REFCNT(co) > 1) { - Py_SET_REFCNT(co, Py_REFCNT(co) - 1); + if (_PyObject_ResurrectEnd((PyObject *)co)) { return; } - Py_SET_REFCNT(co, 0); #ifdef Py_GIL_DISABLED PyObject_GC_UnTrack(co); diff --git a/Objects/dictobject.c b/Objects/dictobject.c index a13d8084d14d66..1c9f86438dadc3 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -3162,14 +3162,11 @@ dict_dealloc(PyObject *self) { PyDictObject *mp = (PyDictObject *)self; PyInterpreterState *interp = _PyInterpreterState_GET(); - assert(Py_REFCNT(mp) == 0); - Py_SET_REFCNT(mp, 1); + _PyObject_ResurrectStart(self); _PyDict_NotifyEvent(interp, PyDict_EVENT_DEALLOCATED, mp, NULL, NULL); - if (Py_REFCNT(mp) > 1) { - Py_SET_REFCNT(mp, Py_REFCNT(mp) - 1); + if (_PyObject_ResurrectEnd(self)) { return; } - Py_SET_REFCNT(mp, 0); PyDictValues *values = mp->ma_values; PyDictKeysObject *keys = mp->ma_keys; Py_ssize_t i, n; diff --git a/Objects/funcobject.c b/Objects/funcobject.c index 4ba47285f7152f..cca7f01498013e 100644 --- a/Objects/funcobject.c +++ b/Objects/funcobject.c @@ -1092,14 +1092,11 @@ static void func_dealloc(PyObject *self) { PyFunctionObject *op = _PyFunction_CAST(self); - assert(Py_REFCNT(op) == 0); - Py_SET_REFCNT(op, 1); + _PyObject_ResurrectStart(self); handle_func_event(PyFunction_EVENT_DESTROY, op, NULL); - if (Py_REFCNT(op) > 1) { - Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + if (_PyObject_ResurrectEnd(self)) { return; } - Py_SET_REFCNT(op, 0); _PyObject_GC_UNTRACK(op); if (op->func_weakreflist != NULL) { PyObject_ClearWeakRefs((PyObject *) op); diff --git a/Objects/object.c b/Objects/object.c index 8868fa29066404..74f47fa4239032 100644 --- a/Objects/object.c +++ b/Objects/object.c @@ -362,8 +362,10 @@ is_dead(PyObject *o) } # endif -void -_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +// Decrement the shared reference count of an object. Return 1 if the object +// is dead and should be deallocated, 0 otherwise. +static int +_Py_DecRefSharedIsDead(PyObject *o, const char *filename, int lineno) { // Should we queue the object for the owning thread to merge? int should_queue; @@ -404,6 +406,15 @@ _Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) } else if (new_shared == _Py_REF_MERGED) { // refcount is zero AND merged + return 1; + } + return 0; +} + +void +_Py_DecRefSharedDebug(PyObject *o, const char *filename, int lineno) +{ + if (_Py_DecRefSharedIsDead(o, filename, lineno)) { _Py_Dealloc(o); } } @@ -472,6 +483,26 @@ _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra) &shared, new_shared)); return refcnt; } + +// The more complicated "slow" path for undoing the resurrection of an object. +int +_PyObject_ResurrectEndSlow(PyObject *op) +{ + if (_Py_IsImmortal(op)) { + return 1; + } + if (_Py_IsOwnedByCurrentThread(op)) { + // If the object is owned by the current thread, give up ownership and + // merge the refcount. This isn't necessary in all cases, but it + // simplifies the implementation. + Py_ssize_t refcount = _Py_ExplicitMergeRefcount(op, -1); + return refcount != 0; + } + int is_dead = _Py_DecRefSharedIsDead(op, NULL, 0); + return !is_dead; +} + + #endif /* Py_GIL_DISABLED */ @@ -550,7 +581,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) } /* Temporarily resurrect the object. */ - Py_SET_REFCNT(self, 1); + _PyObject_ResurrectStart(self); PyObject_CallFinalizer(self); @@ -560,8 +591,7 @@ PyObject_CallFinalizerFromDealloc(PyObject *self) /* Undo the temporary resurrection; can't use DECREF here, it would * cause a recursive call. */ - Py_SET_REFCNT(self, Py_REFCNT(self) - 1); - if (Py_REFCNT(self) == 0) { + if (!_PyObject_ResurrectEnd(self)) { return 0; /* this is the normal path out */ } From 8b3cccf3f9508572d85b0044519f2bd5715dacad Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 5 Dec 2024 21:39:43 +0000 Subject: [PATCH 52/76] GH-125413: Revert addition of `pathlib.Path.scandir()` method (#127377) Remove documentation for `pathlib.Path.scandir()`, and rename the method to `_scandir()`. In the private pathlib ABCs, make `iterdir()` abstract and call it from `_scandir()`. It's not worthwhile to add this method at the moment - see discussion: https://discuss.python.org/t/ergonomics-of-new-pathlib-path-scandir/71721 Co-authored-by: Steve Dower --- Doc/library/pathlib.rst | 29 ----------- Doc/whatsnew/3.14.rst | 6 --- Lib/pathlib/_abc.py | 15 +++--- Lib/pathlib/_local.py | 4 +- Lib/test/test_pathlib/test_pathlib_abc.py | 48 ++++--------------- Misc/NEWS.d/3.14.0a2.rst | 2 +- ...-11-29-00-15-59.gh-issue-125413.WCN0vv.rst | 3 ++ 7 files changed, 22 insertions(+), 85 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst diff --git a/Doc/library/pathlib.rst b/Doc/library/pathlib.rst index a42ac1f8bcdf71..4b48880d6d9a18 100644 --- a/Doc/library/pathlib.rst +++ b/Doc/library/pathlib.rst @@ -1289,35 +1289,6 @@ Reading directories raised. -.. method:: Path.scandir() - - When the path points to a directory, return an iterator of - :class:`os.DirEntry` objects corresponding to entries in the directory. The - returned iterator supports the :term:`context manager` protocol. It is - implemented using :func:`os.scandir` and gives the same guarantees. - - Using :meth:`~Path.scandir` instead of :meth:`~Path.iterdir` can - significantly increase the performance of code that also needs file type or - file attribute information, because :class:`os.DirEntry` objects expose - this information if the operating system provides it when scanning a - directory. - - The following example displays the names of subdirectories. The - ``entry.is_dir()`` check will generally not make an additional system call:: - - >>> p = Path('docs') - >>> with p.scandir() as entries: - ... for entry in entries: - ... if entry.is_dir(): - ... entry.name - ... - '_templates' - '_build' - '_static' - - .. versionadded:: 3.14 - - .. method:: Path.glob(pattern, *, case_sensitive=None, recurse_symlinks=False) Glob the given relative *pattern* in the directory represented by this path, diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index db25c037e509b6..b300e348679438 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -532,12 +532,6 @@ pathlib (Contributed by Barney Gale in :gh:`73991`.) -* Add :meth:`pathlib.Path.scandir` to scan a directory and return an iterator - of :class:`os.DirEntry` objects. This is exactly equivalent to calling - :func:`os.scandir` on a path object. - - (Contributed by Barney Gale in :gh:`125413`.) - pdb --- diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 2b314b6c9a16bf..86617ff2616f33 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -94,7 +94,7 @@ class PathGlobber(_GlobberBase): lexists = operator.methodcaller('exists', follow_symlinks=False) add_slash = operator.methodcaller('joinpath', '') - scandir = operator.methodcaller('scandir') + scandir = operator.methodcaller('_scandir') @staticmethod def concat_path(path, text): @@ -632,13 +632,14 @@ def write_text(self, data, encoding=None, errors=None, newline=None): with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) - def scandir(self): - """Yield os.DirEntry objects of the directory contents. + def _scandir(self): + """Yield os.DirEntry-like objects of the directory contents. The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - raise UnsupportedOperation(self._unsupported_msg('scandir()')) + import contextlib + return contextlib.nullcontext(self.iterdir()) def iterdir(self): """Yield path objects of the directory contents. @@ -646,9 +647,7 @@ def iterdir(self): The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. """ - with self.scandir() as entries: - names = [entry.name for entry in entries] - return map(self.joinpath, names) + raise UnsupportedOperation(self._unsupported_msg('iterdir()')) def _glob_selector(self, parts, case_sensitive, recurse_symlinks): if case_sensitive is None: @@ -698,7 +697,7 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): if not top_down: paths.append((path, dirnames, filenames)) try: - with path.scandir() as entries: + with path._scandir() as entries: for entry in entries: name = entry.name try: diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index b5d9dc49f58463..bb8a252c0e94e2 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -634,8 +634,8 @@ def _filter_trailing_slash(self, paths): path_str = path_str[:-1] yield path_str - def scandir(self): - """Yield os.DirEntry objects of the directory contents. + def _scandir(self): + """Yield os.DirEntry-like objects of the directory contents. The children are yielded in arbitrary order, and the special entries '.' and '..' are not included. diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 5fa2f550cefcf4..7ba3fa823a30b9 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1,5 +1,4 @@ import collections -import contextlib import io import os import errno @@ -1418,24 +1417,6 @@ def close(self): 'st_mode st_ino st_dev st_nlink st_uid st_gid st_size st_atime st_mtime st_ctime') -class DummyDirEntry: - """ - Minimal os.DirEntry-like object. Returned from DummyPath.scandir(). - """ - __slots__ = ('name', '_is_symlink', '_is_dir') - - def __init__(self, name, is_symlink, is_dir): - self.name = name - self._is_symlink = is_symlink - self._is_dir = is_dir - - def is_symlink(self): - return self._is_symlink - - def is_dir(self, *, follow_symlinks=True): - return self._is_dir and (follow_symlinks or not self._is_symlink) - - class DummyPath(PathBase): """ Simple implementation of PathBase that keeps files and directories in @@ -1503,25 +1484,14 @@ def open(self, mode='r', buffering=-1, encoding=None, stream = io.TextIOWrapper(stream, encoding=encoding, errors=errors, newline=newline) return stream - @contextlib.contextmanager - def scandir(self): - path = self.resolve() - path_str = str(path) - if path_str in self._files: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path_str) - elif path_str in self._directories: - yield iter([path.joinpath(name)._dir_entry for name in self._directories[path_str]]) + def iterdir(self): + path = str(self.resolve()) + if path in self._files: + raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) + elif path in self._directories: + return iter([self / name for name in self._directories[path]]) else: - raise FileNotFoundError(errno.ENOENT, "File not found", path_str) - - @property - def _dir_entry(self): - path_str = str(self) - is_symlink = path_str in self._symlinks - is_directory = (path_str in self._directories - if not is_symlink - else self._symlinks[path_str][1]) - return DummyDirEntry(self.name, is_symlink, is_directory) + raise FileNotFoundError(errno.ENOENT, "File not found", path) def mkdir(self, mode=0o777, parents=False, exist_ok=False): path = str(self.parent.resolve() / self.name) @@ -2214,9 +2184,9 @@ def test_iterdir_nodir(self): def test_scandir(self): p = self.cls(self.base) - with p.scandir() as entries: + with p._scandir() as entries: self.assertTrue(list(entries)) - with p.scandir() as entries: + with p._scandir() as entries: for entry in entries: child = p / entry.name self.assertIsNotNone(entry) diff --git a/Misc/NEWS.d/3.14.0a2.rst b/Misc/NEWS.d/3.14.0a2.rst index 7384ce54cb8914..d82ec98b7a3c87 100644 --- a/Misc/NEWS.d/3.14.0a2.rst +++ b/Misc/NEWS.d/3.14.0a2.rst @@ -597,7 +597,7 @@ TypeError is now raised instead of ValueError for some logical errors. .. nonce: Jat5kq .. section: Library -Add :meth:`pathlib.Path.scandir` method to efficiently fetch directory +Add :meth:`!pathlib.Path.scandir` method to efficiently fetch directory children and their file attributes. This is a trivial wrapper of :func:`os.scandir`. diff --git a/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst b/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst new file mode 100644 index 00000000000000..b56a77b4294ace --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-29-00-15-59.gh-issue-125413.WCN0vv.rst @@ -0,0 +1,3 @@ +Revert addition of :meth:`!pathlib.Path.scandir`. This method was added in +3.14.0a2. The optimizations remain for file system paths, but other +subclasses should only have to implement :meth:`pathlib.Path.iterdir`. From 25eee578c8e369b027da6d9d2725f29df6ef1cbd Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Fri, 6 Dec 2024 03:47:51 +0100 Subject: [PATCH 53/76] gh-127627: Add `posix._emscripten_debugger` function (#127628) Add a posix._emscripten_debugger function to add an emscripten breakpoint. --- ...-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst | 2 ++ Modules/clinic/posixmodule.c.h | 28 ++++++++++++++++++- Modules/posixmodule.c | 22 ++++++++++++++- 3 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst diff --git a/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst b/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst new file mode 100644 index 00000000000000..48a6c7d30b4a26 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-05-10-14-52.gh-issue-127627.fgCHOZ.rst @@ -0,0 +1,2 @@ +Added ``posix._emscripten_debugger()`` to help with debugging the test suite on +the Emscripten target. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index cd0c4faeac83d1..554299b8598299 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -12447,6 +12447,28 @@ os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) return os__create_environ_impl(module); } +#if defined(__EMSCRIPTEN__) + +PyDoc_STRVAR(os__emscripten_debugger__doc__, +"_emscripten_debugger($module, /)\n" +"--\n" +"\n" +"Create a breakpoint for the JavaScript debugger. Emscripten only."); + +#define OS__EMSCRIPTEN_DEBUGGER_METHODDEF \ + {"_emscripten_debugger", (PyCFunction)os__emscripten_debugger, METH_NOARGS, os__emscripten_debugger__doc__}, + +static PyObject * +os__emscripten_debugger_impl(PyObject *module); + +static PyObject * +os__emscripten_debugger(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return os__emscripten_debugger_impl(module); +} + +#endif /* defined(__EMSCRIPTEN__) */ + #ifndef OS_TTYNAME_METHODDEF #define OS_TTYNAME_METHODDEF #endif /* !defined(OS_TTYNAME_METHODDEF) */ @@ -13114,4 +13136,8 @@ os__create_environ(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=7ee14f5e880092f5 input=a9049054013a1b77]*/ + +#ifndef OS__EMSCRIPTEN_DEBUGGER_METHODDEF + #define OS__EMSCRIPTEN_DEBUGGER_METHODDEF +#endif /* !defined(OS__EMSCRIPTEN_DEBUGGER_METHODDEF) */ +/*[clinic end generated code: output=9c2ca1dbf986c62c input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 2c26fbeac9a1be..2045c6065b8e7a 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -84,6 +84,9 @@ extern char * _getpty(int *, int, mode_t, int); #endif +#ifdef __EMSCRIPTEN__ +#include "emscripten.h" // emscripten_debugger() +#endif /* * A number of APIs are available on macOS from a certain macOS version. @@ -16845,8 +16848,24 @@ os__create_environ_impl(PyObject *module) } -static PyMethodDef posix_methods[] = { +#ifdef __EMSCRIPTEN__ +/*[clinic input] +os._emscripten_debugger + +Create a breakpoint for the JavaScript debugger. Emscripten only. +[clinic start generated code]*/ + +static PyObject * +os__emscripten_debugger_impl(PyObject *module) +/*[clinic end generated code: output=ad47dc3bf0661343 input=d814b1877fb6083a]*/ +{ + emscripten_debugger(); + Py_RETURN_NONE; +} +#endif /* __EMSCRIPTEN__ */ + +static PyMethodDef posix_methods[] = { OS_STAT_METHODDEF OS_ACCESS_METHODDEF OS_TTYNAME_METHODDEF @@ -17060,6 +17079,7 @@ static PyMethodDef posix_methods[] = { OS__INPUTHOOK_METHODDEF OS__IS_INPUTHOOK_INSTALLED_METHODDEF OS__CREATE_ENVIRON_METHODDEF + OS__EMSCRIPTEN_DEBUGGER_METHODDEF {NULL, NULL} /* Sentinel */ }; From e991ac8f2037d78140e417cc9a9486223eb3e786 Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Thu, 5 Dec 2024 22:33:03 -0600 Subject: [PATCH 54/76] gh-127655: Ensure `_SelectorSocketTransport.writelines` pauses the protocol if needed (#127656) Ensure `_SelectorSocketTransport.writelines` pauses the protocol if it reaches the high water mark as needed. Co-authored-by: Kumar Aditya --- Lib/asyncio/selector_events.py | 1 + Lib/test/test_asyncio/test_selector_events.py | 12 ++++++++++++ .../2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst | 1 + 3 files changed, 14 insertions(+) create mode 100644 Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst diff --git a/Lib/asyncio/selector_events.py b/Lib/asyncio/selector_events.py index f94bf10b4225e7..f1ab9b12d69a5d 100644 --- a/Lib/asyncio/selector_events.py +++ b/Lib/asyncio/selector_events.py @@ -1175,6 +1175,7 @@ def writelines(self, list_of_data): # If the entire buffer couldn't be written, register a write handler if self._buffer: self._loop._add_writer(self._sock_fd, self._write_ready) + self._maybe_pause_protocol() def can_write_eof(self): return True diff --git a/Lib/test/test_asyncio/test_selector_events.py b/Lib/test/test_asyncio/test_selector_events.py index aaeda33dd0c677..efca30f37414f9 100644 --- a/Lib/test/test_asyncio/test_selector_events.py +++ b/Lib/test/test_asyncio/test_selector_events.py @@ -805,6 +805,18 @@ def test_writelines_send_partial(self): self.assertTrue(self.sock.send.called) self.assertTrue(self.loop.writers) + def test_writelines_pauses_protocol(self): + data = memoryview(b'data') + self.sock.send.return_value = 2 + self.sock.send.fileno.return_value = 7 + + transport = self.socket_transport() + transport._high_water = 1 + transport.writelines([data]) + self.assertTrue(self.protocol.pause_writing.called) + self.assertTrue(self.sock.send.called) + self.assertTrue(self.loop.writers) + @unittest.skipUnless(selector_events._HAS_SENDMSG, 'no sendmsg') def test_write_sendmsg_full(self): data = memoryview(b'data') diff --git a/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst b/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst new file mode 100644 index 00000000000000..76cfc58121d3bd --- /dev/null +++ b/Misc/NEWS.d/next/Security/2024-12-05-21-35-19.gh-issue-127655.xpPoOf.rst @@ -0,0 +1 @@ +Fixed the :class:`!asyncio.selector_events._SelectorSocketTransport` transport not pausing writes for the protocol when the buffer reaches the high water mark when using :meth:`asyncio.WriteTransport.writelines`. From 8b7c194c7bf7e547e4f6317528f0dcb9344c18c7 Mon Sep 17 00:00:00 2001 From: Sergey B Kirpichev Date: Fri, 6 Dec 2024 13:28:32 +0300 Subject: [PATCH 55/76] gh-120010: Fix invalid (nan+nanj) results in _Py_c_prod() (GH-120287) In some cases, previously computed as (nan+nanj), we could recover meaningful component values in the result, see e.g. the C11, Annex G.5.1, routine _Cmultd(): >>> z = 1e300+1j >>> z*(nan+infj) # was (nan+nanj) (-inf+infj) That also fix some complex powers for small integer exponents, computed with optimized algorithm (by squaring): >>> z**5 # was (nan+nanj) Traceback (most recent call last): File "", line 1, in z**5 ~^^~ OverflowError: complex exponentiation --- Lib/test/test_complex.py | 17 ++++++ ...-06-04-08-26-25.gh-issue-120010._z-AWz.rst | 2 + Objects/complexobject.c | 60 +++++++++++++++++-- 3 files changed, 75 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst diff --git a/Lib/test/test_complex.py b/Lib/test/test_complex.py index 179556f57e884f..fd002fb00ac338 100644 --- a/Lib/test/test_complex.py +++ b/Lib/test/test_complex.py @@ -299,6 +299,22 @@ def test_mul(self): self.assertRaises(TypeError, operator.mul, 1j, None) self.assertRaises(TypeError, operator.mul, None, 1j) + for z, w, r in [(1e300+1j, complex(INF, INF), complex(NAN, INF)), + (1e300+1j, complex(NAN, INF), complex(-INF, INF)), + (1e300+1j, complex(INF, NAN), complex(INF, INF)), + (complex(INF, 1), complex(NAN, INF), complex(NAN, INF)), + (complex(INF, 1), complex(INF, NAN), complex(INF, NAN)), + (complex(NAN, 1), complex(1, INF), complex(-INF, NAN)), + (complex(1, NAN), complex(1, INF), complex(NAN, INF)), + (complex(1e200, NAN), complex(1e200, NAN), complex(INF, NAN)), + (complex(1e200, NAN), complex(NAN, 1e200), complex(NAN, INF)), + (complex(NAN, 1e200), complex(1e200, NAN), complex(NAN, INF)), + (complex(NAN, 1e200), complex(NAN, 1e200), complex(-INF, NAN)), + (complex(NAN, NAN), complex(NAN, NAN), complex(NAN, NAN))]: + with self.subTest(z=z, w=w, r=r): + self.assertComplexesAreIdentical(z * w, r) + self.assertComplexesAreIdentical(w * z, r) + def test_mod(self): # % is no longer supported on complex numbers with self.assertRaises(TypeError): @@ -340,6 +356,7 @@ def test_pow(self): self.assertAlmostEqual(pow(1j, 200), 1) self.assertRaises(ValueError, pow, 1+1j, 1+1j, 1+1j) self.assertRaises(OverflowError, pow, 1e200+1j, 1e200+1j) + self.assertRaises(OverflowError, pow, 1e200+1j, 5) self.assertRaises(TypeError, pow, 1j, None) self.assertRaises(TypeError, pow, None, 1j) self.assertAlmostEqual(pow(1j, 0.5), 0.7071067811865476+0.7071067811865475j) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst new file mode 100644 index 00000000000000..7954c7f5927397 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-06-04-08-26-25.gh-issue-120010._z-AWz.rst @@ -0,0 +1,2 @@ +Correct invalid corner cases which resulted in ``(nan+nanj)`` output in complex +multiplication, e.g., ``(1e300+1j)*(nan+infj)``. Patch by Sergey B Kirpichev. diff --git a/Objects/complexobject.c b/Objects/complexobject.c index 8fbca3cb02d80a..bf6187efac941f 100644 --- a/Objects/complexobject.c +++ b/Objects/complexobject.c @@ -85,11 +85,63 @@ _Py_c_neg(Py_complex a) } Py_complex -_Py_c_prod(Py_complex a, Py_complex b) +_Py_c_prod(Py_complex z, Py_complex w) { - Py_complex r; - r.real = a.real*b.real - a.imag*b.imag; - r.imag = a.real*b.imag + a.imag*b.real; + double a = z.real, b = z.imag, c = w.real, d = w.imag; + double ac = a*c, bd = b*d, ad = a*d, bc = b*c; + Py_complex r = {ac - bd, ad + bc}; + + /* Recover infinities that computed as nan+nanj. See e.g. the C11, + Annex G.5.1, routine _Cmultd(). */ + if (isnan(r.real) && isnan(r.imag)) { + int recalc = 0; + + if (isinf(a) || isinf(b)) { /* z is infinite */ + /* "Box" the infinity and change nans in the other factor to 0 */ + a = copysign(isinf(a) ? 1.0 : 0.0, a); + b = copysign(isinf(b) ? 1.0 : 0.0, b); + if (isnan(c)) { + c = copysign(0.0, c); + } + if (isnan(d)) { + d = copysign(0.0, d); + } + recalc = 1; + } + if (isinf(c) || isinf(d)) { /* w is infinite */ + /* "Box" the infinity and change nans in the other factor to 0 */ + c = copysign(isinf(c) ? 1.0 : 0.0, c); + d = copysign(isinf(d) ? 1.0 : 0.0, d); + if (isnan(a)) { + a = copysign(0.0, a); + } + if (isnan(b)) { + b = copysign(0.0, b); + } + recalc = 1; + } + if (!recalc && (isinf(ac) || isinf(bd) || isinf(ad) || isinf(bc))) { + /* Recover infinities from overflow by changing nans to 0 */ + if (isnan(a)) { + a = copysign(0.0, a); + } + if (isnan(b)) { + b = copysign(0.0, b); + } + if (isnan(c)) { + c = copysign(0.0, c); + } + if (isnan(d)) { + d = copysign(0.0, d); + } + recalc = 1; + } + if (recalc) { + r.real = Py_INFINITY*(a*c - b*d); + r.imag = Py_INFINITY*(a*d + b*c); + } + } + return r; } From 023b7d2141467017abc27de864f3f44677768cb3 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 6 Dec 2024 10:46:59 +0000 Subject: [PATCH 56/76] GH-126491: Lower heap size limit with faster marking (GH-127519) * Faster marking of reachable objects * Changes calculation of work to do and work done. * Merges transitive closure calculations --- InternalDocs/garbage_collector.md | 50 ++++- Lib/test/test_gc.py | 14 +- Objects/dictobject.c | 4 +- Objects/genobject.c | 69 +------ Objects/typeobject.c | 13 ++ Python/gc.c | 301 ++++++++++++++---------------- 6 files changed, 208 insertions(+), 243 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 08db080a200ea4..4761f78f3593e3 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -199,22 +199,22 @@ unreachable: ```pycon >>> import gc ->>> +>>> >>> class Link: ... def __init__(self, next_link=None): ... self.next_link = next_link -... +... >>> link_3 = Link() >>> link_2 = Link(link_3) >>> link_1 = Link(link_2) >>> link_3.next_link = link_1 >>> A = link_1 >>> del link_1, link_2, link_3 ->>> +>>> >>> link_4 = Link() >>> link_4.next_link = link_4 >>> del link_4 ->>> +>>> >>> # Collect the unreachable Link object (and its .__dict__ dict). >>> gc.collect() 2 @@ -459,11 +459,11 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. >>> # Create a reference cycle. >>> x = MyObj() >>> x.self = x ->>> +>>> >>> # Initially the object is in the young generation. >>> gc.get_objects(generation=0) [..., <__main__.MyObj object at 0x7fbcc12a3400>, ...] ->>> +>>> >>> # After a collection of the youngest generation the object >>> # moves to the old generation. >>> gc.collect(generation=0) @@ -515,6 +515,44 @@ increment. All objects directly referred to from those stack frames are added to the working set. Then the above algorithm is repeated, starting from step 2. +Determining how much work to do +------------------------------- + +We need to do a certain amount of work to enusre that garbage is collected, +but doing too much work slows down execution. + +To work out how much work we need to do, consider a heap with `L` live objects +and `G0` garbage objects at the start of a full scavenge and `G1` garbage objects +at the end of the scavenge. We don't want the amount of garbage to grow, `G1 ≤ G0`, and +we don't want too much garbage (say 1/3 of the heap maximum), `G0 ≤ L/2`. +For each full scavenge we must visit all objects, `T == L + G0 + G1`, during which +`G1` garbage objects are created. + +The number of new objects created `N` must be at least the new garbage created, `N ≥ G1`, +assuming that the number of live objects remains roughly constant. +If we set `T == 4*N` we get `T > 4*G1` and `T = L + G0 + G1` => `L + G0 > 3G1` +For a steady state heap (`G0 == G1`) we get `L > 2G0` and the desired garbage ratio. + +In other words, to keep the garbage fraction to 1/3 or less we need to visit +4 times as many objects as are newly created. + +We can do better than this though. Not all new objects will be garbage. +Consider the heap at the end of the scavenge with `L1` live objects and `G1` +garbage. Also, note that `T == M + I` where `M` is the number of objects marked +as reachable and `I` is the number of objects visited in increments. +Everything in `M` is live, so `I ≥ G0` and in practice `I` is closer to `G0 + G1`. + +If we choose the amount of work done such that `2*M + I == 6N` then we can do +less work in most cases, but are still guaranteed to keep up. +Since `I ≳ G0 + G1` (not strictly true, but close enough) +`T == M + I == (6N + I)/2` and `(6N + I)/2 ≳ 4G`, so we can keep up. + +The reason that this improves performance is that `M` is usually much larger +than `I`. If `M == 10I`, then `T ≅ 3N`. + +Finally, instead of using a fixed multiple of 8, we gradually increase it as the +heap grows. This avoids wasting work for small heaps and during startup. + Optimization: reusing fields to save memory =========================================== diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index b5140057a69d36..baf8e95dffdfce 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1161,27 +1161,19 @@ def make_ll(depth): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] initial_heap_size = _testinternalcapi.get_tracked_heap_size() - for i in range(20_000): + iterations = max(20_000, initial_heap_size) + for i in range(iterations): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_tracked_heap_size() - initial_heap_size - self.assertLess(new_objects, 27_000, f"Heap growing. Reached limit after {i} iterations") + self.assertLess(new_objects, initial_heap_size/2, f"Heap growing. Reached limit after {i} iterations") del olds[:] if not enabled: gc.disable() diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 1c9f86438dadc3..de518b8dc5024b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7064,9 +7064,7 @@ int PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg) { PyTypeObject *tp = Py_TYPE(obj); - if((tp->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0) { - return 0; - } + assert(tp->tp_flags & Py_TPFLAGS_MANAGED_DICT); if (tp->tp_flags & Py_TPFLAGS_INLINE_VALUES) { PyDictValues *values = _PyObject_InlineValues(obj); if (values->valid) { diff --git a/Objects/genobject.c b/Objects/genobject.c index e87f199c2504ba..33679afecb420f 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -882,25 +882,7 @@ PyTypeObject PyGen_Type = { gen_methods, /* tp_methods */ gen_memberlist, /* tp_members */ gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static PyObject * @@ -1242,24 +1224,7 @@ PyTypeObject PyCoro_Type = { coro_methods, /* tp_methods */ coro_memberlist, /* tp_members */ coro_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; static void @@ -1464,7 +1429,6 @@ typedef struct _PyAsyncGenWrappedValue { (assert(_PyAsyncGenWrappedValue_CheckExact(op)), \ _Py_CAST(_PyAsyncGenWrappedValue*, (op))) - static int async_gen_traverse(PyObject *self, visitproc visit, void *arg) { @@ -1673,24 +1637,7 @@ PyTypeObject PyAsyncGen_Type = { async_gen_methods, /* tp_methods */ async_gen_memberlist, /* tp_members */ async_gen_getsetlist, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ - 0, /* tp_version_tag */ - _PyGen_Finalize, /* tp_finalize */ + .tp_finalize = _PyGen_Finalize, }; @@ -1935,16 +1882,6 @@ PyTypeObject _PyAsyncGenASend_Type = { PyObject_SelfIter, /* tp_iter */ async_gen_asend_iternext, /* tp_iternext */ async_gen_asend_methods, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - 0, /* tp_new */ .tp_finalize = async_gen_asend_finalize, }; diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 2068d6aa9be52b..cc95b9857e3f2d 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -2355,6 +2355,16 @@ subtype_traverse(PyObject *self, visitproc visit, void *arg) return 0; } + +static int +plain_object_traverse(PyObject *self, visitproc visit, void *arg) +{ + PyTypeObject *type = Py_TYPE(self); + assert(type->tp_flags & Py_TPFLAGS_MANAGED_DICT); + Py_VISIT(type); + return PyObject_VisitManagedDict(self, visit, arg); +} + static void clear_slots(PyTypeObject *type, PyObject *self) { @@ -4147,6 +4157,9 @@ type_new_descriptors(const type_new_ctx *ctx, PyTypeObject *type) assert((type->tp_flags & Py_TPFLAGS_MANAGED_DICT) == 0); type->tp_flags |= Py_TPFLAGS_MANAGED_DICT; type->tp_dictoffset = -1; + if (type->tp_basicsize == sizeof(PyObject)) { + type->tp_traverse = plain_object_traverse; + } } type->tp_basicsize = slotoffset; diff --git a/Python/gc.c b/Python/gc.c index 5b9588c8741b97..fd29a48518e71b 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1277,18 +1277,13 @@ gc_list_set_space(PyGC_Head *list, int space) * space faster than objects are added to the old space. * * Each young or incremental collection adds a number of - * objects, S (for survivors) to the old space, and - * incremental collectors scan I objects from the old space. - * I > S must be true. We also want I > S * N to be where - * N > 1. Higher values of N mean that the old space is + * new objects (N) to the heap, and incremental collectors + * scan I objects from the old space. + * I > N must be true. We also want I > N * K to be where + * K > 2. Higher values of K mean that the old space is * scanned more rapidly. - * The default incremental threshold of 10 translates to - * N == 1.4 (1 + 4/threshold) */ - -/* Divide by 10, so that the default incremental threshold of 10 - * scans objects at 1% of the heap size */ -#define SCAN_RATE_DIVISOR 10 +#define SCAN_RATE_DIVISOR 5 static void add_stats(GCState *gcstate, int gen, struct gc_collection_stats *stats) @@ -1330,69 +1325,76 @@ gc_collect_young(PyThreadState *tstate, validate_spaces(gcstate); } -#ifndef NDEBUG -static inline int -IS_IN_VISITED(PyGC_Head *gc, int visited_space) +typedef struct work_stack { + PyGC_Head *top; + int visited_space; +} WorkStack; + +/* Remove gc from the list it is currently in and push it to the stack */ +static inline void +push_to_stack(PyGC_Head *gc, WorkStack *stack) { - assert(visited_space == 0 || other_space(visited_space) == 0); - return gc_old_space(gc) == visited_space; + PyGC_Head *prev = GC_PREV(gc); + PyGC_Head *next = GC_NEXT(gc); + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + _PyGCHead_SET_PREV(gc, stack->top); + stack->top = gc; } -#endif -struct container_and_flag { - PyGC_Head *container; - int visited_space; - intptr_t size; -}; +static inline PyGC_Head * +pop_from_stack(WorkStack *stack) +{ + PyGC_Head *gc = stack->top; + stack->top = _PyGCHead_PREV(gc); + return gc; +} -/* A traversal callback for adding to container) */ -static int -visit_add_to_container(PyObject *op, void *arg) +/* append list `from` to `stack`; `from` becomes an empty list */ +static void +move_list_to_stack(PyGC_Head *from, WorkStack *stack) { - OBJECT_STAT_INC(object_visits); - struct container_and_flag *cf = (struct container_and_flag *)arg; - int visited = cf->visited_space; - assert(visited == get_gc_state()->visited_space); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + if (!gc_list_is_empty(from)) { + PyGC_Head *from_head = GC_NEXT(from); + PyGC_Head *from_tail = GC_PREV(from); + _PyGCHead_SET_PREV(from_head, stack->top); + stack->top = from_tail; + gc_list_init(from); + } +} + +static inline void +move_to_stack(PyObject *op, WorkStack *stack, int visited_space) +{ + assert(op != NULL); + if (_PyObject_IS_GC(op)) { PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited) { + gc_old_space(gc) != visited_space) { + assert(!_Py_IsImmortal(op)); gc_flip_old_space(gc); - gc_list_move(gc, cf->container); - cf->size++; + push_to_stack(gc, stack); } } - return 0; } -static intptr_t -expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) -{ - struct container_and_flag arg = { - .container = container, - .visited_space = gcstate->visited_space, - .size = 0 - }; - assert(GC_NEXT(gc) == container); - while (gc != container) { - /* Survivors will be moved to visited space, so they should - * have been marked as visited */ - assert(IS_IN_VISITED(gc, gcstate->visited_space)); - PyObject *op = FROM_GC(gc); - assert(_PyObject_GC_IS_TRACKED(op)); - if (_Py_IsImmortal(op)) { - PyGC_Head *next = GC_NEXT(gc); - gc_list_move(gc, &get_gc_state()->permanent_generation.head); - gc = next; - continue; - } - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - gc = GC_NEXT(gc); - } - return arg.size; +static void +move_unvisited(PyObject *op, WorkStack *stack, int visited_space) +{ + move_to_stack(op, stack, visited_space); +} + +#define MOVE_UNVISITED(O, T, V) if ((O) != NULL) move_unvisited((O), (T), (V)) + +/* A traversal callback for adding to container */ +static int +visit_add_to_container(PyObject *op, void *arg) +{ + OBJECT_STAT_INC(object_visits); + WorkStack *stack = (WorkStack *)arg; + assert(stack->visited_space == get_gc_state()->visited_space); + move_to_stack(op, stack, stack->visited_space); + return 0; } /* Do bookkeeping for a completed GC cycle */ @@ -1420,54 +1422,62 @@ completed_scavenge(GCState *gcstate) gc_list_set_space(&gcstate->old[not_visited].head, not_visited); } assert(gc_list_is_empty(&gcstate->old[visited].head)); - gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } -static intptr_t -move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) -{ - if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, reachable); - return 1; +static void +frame_move_unvisited(_PyInterpreterFrame *frame, WorkStack *stack, int visited_space) +{ + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + if (frame->f_locals != NULL) { + move_unvisited(frame->f_locals, stack, visited_space); + } + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + move_unvisited(func, stack, visited_space); + while (sp > locals) { + sp--; + _PyStackRef ref = *sp; + if (!PyStackRef_IsNull(ref)) { + PyObject *op = PyStackRef_AsPyObjectBorrow(ref); + if (!_Py_IsImmortal(op)) { + move_unvisited(op, stack, visited_space); + } } } - return 0; } -static intptr_t -mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +static Py_ssize_t +move_all_transitively_reachable(WorkStack *stack, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty - struct container_and_flag arg = { - .container = reachable, - .visited_space = visited_space, - .size = 0 - }; - while (!gc_list_is_empty(reachable)) { - PyGC_Head *gc = _PyGCHead_NEXT(reachable); + Py_ssize_t objects_marked = 0; + while (stack->top != NULL) { + PyGC_Head *gc = pop_from_stack(stack); assert(gc_old_space(gc) == visited_space); - gc_list_move(gc, visited); + gc_list_append(gc, visited); + objects_marked++; PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); + assert(PyObject_IS_GC(op)); + assert(_PyObject_GC_IS_TRACKED(op)); + if (_Py_IsImmortal(op)) { + _PyObject_GC_UNTRACK(op); + } + else { + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, visit_add_to_container, stack); + } } gc_list_validate_space(visited, visited_space); - return arg.size; + return objects_marked; } static intptr_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); @@ -1480,27 +1490,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b frame = frame->previous; continue; } - _PyStackRef *locals = frame->localsplus; - _PyStackRef *sp = frame->stackpointer; - objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - objects_marked += move_to_reachable(func, &reachable, visited_space); - while (sp > locals) { - sp--; - if (PyStackRef_IsNull(*sp)) { - continue; - } - PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); - if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); - if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != visited_space) { - gc_flip_old_space(gc); - objects_marked++; - gc_list_move(gc, &reachable); - } - } - } + frame_move_unvisited(frame, &stack, visited_space); if (!start && frame->visited) { // If this frame has already been visited, then the lower frames // will have already been visited and will not have changed @@ -1513,31 +1503,31 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } static intptr_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { - PyGC_Head reachable; - gc_list_init(&reachable); - Py_ssize_t objects_marked = 0; - objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); - objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); - objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + WorkStack stack; + stack.top = NULL; + stack.visited_space = visited_space; + MOVE_UNVISITED(interp->sysdict, &stack, visited_space); + MOVE_UNVISITED(interp->builtins, &stack, visited_space); + MOVE_UNVISITED(interp->dict, &stack, visited_space); struct types_state *types = &interp->types; for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->builtins.initialized[i].tp_subclasses, &stack, visited_space); } for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); - objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_dict, &stack, visited_space); + MOVE_UNVISITED(types->for_extensions.initialized[i].tp_subclasses, &stack, visited_space); } - objects_marked += mark_all_reachable(&reachable, visited, visited_space); - assert(gc_list_is_empty(&reachable)); + Py_ssize_t objects_marked = move_all_transitively_reachable(&stack, visited, visited_space); + assert(stack.top == NULL); return objects_marked; } @@ -1549,39 +1539,35 @@ mark_at_start(PyThreadState *tstate) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); - gcstate->work_to_do -= objects_marked; gcstate->phase = GC_PHASE_COLLECT; validate_spaces(gcstate); return objects_marked; } + +/* See InternalDocs/garbage_collector.md for more details. */ +#define MAX_HEAP_PORTION_MULTIPLIER 5 +#define MARKING_PROGRESS_MULTIPLIER 2 + static intptr_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on three things. + /* The amount of work we want to do depends on two things. * 1. The number of new objects created - * 2. The growth in heap size since the last collection - * 3. The heap size (up to the number of new objects, to avoid quadratic effects) - * - * For a steady state heap, the amount of work to do is three times the number - * of new objects added to the heap. This ensures that we stay ahead in the - * worst case of all new objects being garbage. - * - * This could be improved by tracking survival rates, but it is still a - * large improvement on the non-marking approach. + * 2. The heap size (up to a multiple of the number of new objects, to avoid quadratic effects) */ intptr_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { scale_factor = 2; } intptr_t new_objects = gcstate->young.count; - intptr_t max_heap_fraction = new_objects*3/2; - intptr_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; - if (heap_fraction > max_heap_fraction) { - heap_fraction = max_heap_fraction; + intptr_t max_heap_portion = new_objects * MAX_HEAP_PORTION_MULTIPLIER; + intptr_t heap_portion = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_portion > max_heap_portion) { + heap_portion = max_heap_portion; } gcstate->young.count = 0; - return new_objects + heap_fraction; + return new_objects + heap_portion; } static void @@ -1594,36 +1580,37 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; validate_spaces(gcstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - PyGC_Head increment; - gc_list_init(&increment); - int scale_factor = gcstate->old[0].threshold; - if (scale_factor < 2) { - scale_factor = 2; - } intptr_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); - gcstate->work_to_do -= objects_marked; + gcstate->work_to_do -= objects_marked * MARKING_PROGRESS_MULTIPLIER; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - gc_list_merge(&gcstate->young.head, &increment); + PyGC_Head increment; + gc_list_init(&increment); + WorkStack working; + working.top = 0; + working.visited_space = gcstate->visited_space; + move_list_to_stack(&gcstate->young.head, &working); + Py_ssize_t increment_size = move_all_transitively_reachable(&working, &increment, gcstate->visited_space); gc_list_validate_space(&increment, gcstate->visited_space); - Py_ssize_t increment_size = gc_list_size(&increment); + assert(working.top == NULL); while (increment_size < gcstate->work_to_do) { if (gc_list_is_empty(not_visited)) { break; } PyGC_Head *gc = _PyGCHead_NEXT(not_visited); - gc_list_move(gc, &increment); - increment_size++; - assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); - increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); + push_to_stack(gc, &working); + assert(!_Py_IsImmortal(FROM_GC(gc))); + increment_size += move_all_transitively_reachable(&working, &increment, gcstate->visited_space); + assert(working.top == NULL); } + assert(increment_size == gc_list_size(&increment)); GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); validate_list(&increment, collecting_clear_unreachable_clear); gc_list_validate_space(&increment, gcstate->visited_space); @@ -1632,7 +1619,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_collect_region(tstate, &increment, &survivors, stats); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; add_stats(gcstate, 1, stats); @@ -1668,6 +1654,7 @@ gc_collect_full(PyThreadState *tstate, gcstate->old[0].count = 0; gcstate->old[1].count = 0; completed_scavenge(gcstate); + gcstate->work_to_do = -gcstate->young.threshold; _PyGC_ClearAllFreeLists(tstate->interp); validate_spaces(gcstate); add_stats(gcstate, 2, stats); From 77a61c0465c27c1c4ba7cddf4638d9ed75259671 Mon Sep 17 00:00:00 2001 From: Yuki Kobayashi Date: Fri, 6 Dec 2024 23:09:20 +0900 Subject: [PATCH 57/76] gh-101100: amend references starting with `!~` in gh-127054 (#127684) --- Doc/tutorial/datastructures.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Doc/tutorial/datastructures.rst b/Doc/tutorial/datastructures.rst index 263b0c2e2815a1..cbe780e075baf5 100644 --- a/Doc/tutorial/datastructures.rst +++ b/Doc/tutorial/datastructures.rst @@ -142,8 +142,8 @@ Using Lists as Stacks The list methods make it very easy to use a list as a stack, where the last element added is the first element retrieved ("last-in, first-out"). To add an -item to the top of the stack, use :meth:`!~list.append`. To retrieve an item from the -top of the stack, use :meth:`!~list.pop` without an explicit index. For example:: +item to the top of the stack, use :meth:`!append`. To retrieve an item from the +top of the stack, use :meth:`!pop` without an explicit index. For example:: >>> stack = [3, 4, 5] >>> stack.append(6) @@ -340,7 +340,7 @@ The :keyword:`!del` statement ============================= There is a way to remove an item from a list given its index instead of its -value: the :keyword:`del` statement. This differs from the :meth:`!~list.pop` method +value: the :keyword:`del` statement. This differs from the :meth:`!pop` method which returns a value. The :keyword:`!del` statement can also be used to remove slices from a list or clear the entire list (which we did earlier by assignment of an empty list to the slice). For example:: @@ -500,8 +500,8 @@ any immutable type; strings and numbers can always be keys. Tuples can be used as keys if they contain only strings, numbers, or tuples; if a tuple contains any mutable object either directly or indirectly, it cannot be used as a key. You can't use lists as keys, since lists can be modified in place using index -assignments, slice assignments, or methods like :meth:`!~list.append` and -:meth:`!~list.extend`. +assignments, slice assignments, or methods like :meth:`!append` and +:meth:`!extend`. It is best to think of a dictionary as a set of *key: value* pairs, with the requirement that the keys are unique (within one dictionary). A pair of From 36c6178d372b075e9c74b786cfb5e47702976b1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:31:30 +0100 Subject: [PATCH 58/76] gh-126024: fix UBSan failure in `unicodeobject.c:find_first_nonascii` (GH-127566) --- Objects/unicodeobject.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 463da06445984b..33c4747bbef488 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -5083,12 +5083,9 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end) const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T); #if PY_LITTLE_ENDIAN && HAVE_CTZ if (p < p2) { -#if defined(_M_AMD64) || defined(_M_IX86) || defined(__x86_64__) || defined(__i386__) - // x86 and amd64 are little endian and can load unaligned memory. - size_t u = *(const size_t*)p & ASCII_CHAR_MASK; -#else - size_t u = load_unaligned(p, p2 - p) & ASCII_CHAR_MASK; -#endif + size_t u; + memcpy(&u, p, sizeof(size_t)); + u &= ASCII_CHAR_MASK; if (u) { return (ctz(u) - 7) / 8; } From a353455fca1b8f468ff3ffbb4b5e316510b4fd43 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Fri, 6 Dec 2024 15:48:24 +0000 Subject: [PATCH 59/76] gh-125610: Fix `STORE_ATTR_INSTANCE_VALUE` specialization check (GH-125612) The `STORE_ATTR_INSTANCE_VALUE` opcode doesn't support objects with non-NULL managed dictionaries, so don't specialize to that op in that case. --- Python/specialize.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Python/specialize.c b/Python/specialize.c index ec2cd7025e5054..d3fea717243847 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -947,7 +947,10 @@ specialize_dict_access( return 0; } _PyAttrCache *cache = (_PyAttrCache *)(instr + 1); - if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES && _PyObject_InlineValues(owner)->valid) { + if (type->tp_flags & Py_TPFLAGS_INLINE_VALUES && + _PyObject_InlineValues(owner)->valid && + !(base_op == STORE_ATTR && _PyObject_GetManagedDict(owner) != NULL)) + { PyDictKeysObject *keys = ((PyHeapTypeObject *)type)->ht_cached_keys; assert(PyUnicode_CheckExact(name)); Py_ssize_t index = _PyDictKeys_StringLookup(keys, name); From 12680ec5bd45c85b6daebe0739d30ef45f089efa Mon Sep 17 00:00:00 2001 From: Peter Bierma Date: Fri, 6 Dec 2024 10:58:19 -0500 Subject: [PATCH 60/76] gh-127314: Don't mention the GIL when calling without a thread state on the free-threaded build (#127315) Co-authored-by: Victor Stinner --- Include/internal/pycore_pystate.h | 8 ++++++++ Lib/test/test_capi/test_mem.py | 9 +++++++-- Lib/test/test_capi/test_misc.py | 17 ++++++++++++----- ...24-11-26-22-06-10.gh-issue-127314.SsRrIu.rst | 2 ++ Objects/obmalloc.c | 7 +++++++ 5 files changed, 36 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h index 54d8803bc0bdb6..1e73e541ef8de0 100644 --- a/Include/internal/pycore_pystate.h +++ b/Include/internal/pycore_pystate.h @@ -190,10 +190,18 @@ static inline void _Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate) { if (tstate == NULL) { +#ifndef Py_GIL_DISABLED _Py_FatalErrorFunc(func, "the function must be called with the GIL held, " "after Python initialization and before Python finalization, " "but the GIL is released (the current Python thread state is NULL)"); +#else + _Py_FatalErrorFunc(func, + "the function must be called with an active thread state, " + "after Python initialization and before Python finalization, " + "but it was called without an active thread state. " + "Are you trying to call the C API inside of a Py_BEGIN_ALLOW_THREADS block?"); +#endif } } diff --git a/Lib/test/test_capi/test_mem.py b/Lib/test/test_capi/test_mem.py index 6ab7b685c2e18b..5035b2b4829bf6 100644 --- a/Lib/test/test_capi/test_mem.py +++ b/Lib/test/test_capi/test_mem.py @@ -68,8 +68,13 @@ def test_api_misuse(self): def check_malloc_without_gil(self, code): out = self.check(code) - expected = ('Fatal Python error: _PyMem_DebugMalloc: ' - 'Python memory allocator called without holding the GIL') + if not support.Py_GIL_DISABLED: + expected = ('Fatal Python error: _PyMem_DebugMalloc: ' + 'Python memory allocator called without holding the GIL') + else: + expected = ('Fatal Python error: _PyMem_DebugMalloc: ' + 'Python memory allocator called without an active thread state. ' + 'Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?') self.assertIn(expected, out) def test_pymem_malloc_without_gil(self): diff --git a/Lib/test/test_capi/test_misc.py b/Lib/test/test_capi/test_misc.py index 8e0271919cc8a5..61512e610f46f2 100644 --- a/Lib/test/test_capi/test_misc.py +++ b/Lib/test/test_capi/test_misc.py @@ -100,11 +100,18 @@ def test_no_FatalError_infinite_loop(self): _rc, out, err = run_result self.assertEqual(out, b'') # This used to cause an infinite loop. - msg = ("Fatal Python error: PyThreadState_Get: " - "the function must be called with the GIL held, " - "after Python initialization and before Python finalization, " - "but the GIL is released " - "(the current Python thread state is NULL)").encode() + if not support.Py_GIL_DISABLED: + msg = ("Fatal Python error: PyThreadState_Get: " + "the function must be called with the GIL held, " + "after Python initialization and before Python finalization, " + "but the GIL is released " + "(the current Python thread state is NULL)").encode() + else: + msg = ("Fatal Python error: PyThreadState_Get: " + "the function must be called with an active thread state, " + "after Python initialization and before Python finalization, " + "but it was called without an active thread state. " + "Are you trying to call the C API inside of a Py_BEGIN_ALLOW_THREADS block?").encode() self.assertTrue(err.rstrip().startswith(msg), err) diff --git a/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst b/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst new file mode 100644 index 00000000000000..8ea3c4ee2a2c53 --- /dev/null +++ b/Misc/NEWS.d/next/C_API/2024-11-26-22-06-10.gh-issue-127314.SsRrIu.rst @@ -0,0 +1,2 @@ +Improve error message when calling the C API without an active thread state +on the :term:`free-threaded ` build. diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 2cc0377f68f990..b103deb01ca712 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2910,9 +2910,16 @@ static inline void _PyMem_DebugCheckGIL(const char *func) { if (!PyGILState_Check()) { +#ifndef Py_GIL_DISABLED _Py_FatalErrorFunc(func, "Python memory allocator called " "without holding the GIL"); +#else + _Py_FatalErrorFunc(func, + "Python memory allocator called " + "without an active thread state. " + "Are you trying to call it inside of a Py_BEGIN_ALLOW_THREADS block?"); +#endif } } From 67b18a18b66b89e253f38895057ef9f6bae92e7b Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Fri, 6 Dec 2024 17:27:12 +0100 Subject: [PATCH 61/76] gh-59705: Add _thread.set_name() function (#127338) On Linux, threading.Thread now sets the thread name to the operating system. * configure now checks if pthread_getname_np() and pthread_setname_np() functions are available. * Add PYTHREAD_NAME_MAXLEN macro. * Add _thread._NAME_MAXLEN constant for test_threading. Co-authored-by: Serhiy Storchaka --- Lib/test/test_threading.py | 60 ++++++++++ Lib/threading.py | 9 ++ ...4-11-27-17-04-38.gh-issue-59705.sAGyvs.rst | 2 + Modules/_threadmodule.c | 108 ++++++++++++++++++ Modules/clinic/_threadmodule.c.h | 104 +++++++++++++++++ configure | 30 +++++ configure.ac | 22 +++- pyconfig.h.in | 9 ++ 8 files changed, 342 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst create mode 100644 Modules/clinic/_threadmodule.c.h diff --git a/Lib/test/test_threading.py b/Lib/test/test_threading.py index fe225558fc4f0b..d05161f46f1034 100644 --- a/Lib/test/test_threading.py +++ b/Lib/test/test_threading.py @@ -2104,6 +2104,66 @@ def test__all__(self): support.check__all__(self, threading, ('threading', '_thread'), extra=extra, not_exported=not_exported) + @unittest.skipUnless(hasattr(_thread, 'set_name'), "missing _thread.set_name") + @unittest.skipUnless(hasattr(_thread, '_get_name'), "missing _thread._get_name") + def test_set_name(self): + # set_name() limit in bytes + truncate = getattr(_thread, "_NAME_MAXLEN", None) + limit = truncate or 100 + + tests = [ + # test short ASCII name + "CustomName", + + # test short non-ASCII name + "namé€", + + # embedded null character: name is truncated + # at the first null character + "embed\0null", + + # Test long ASCII names (not truncated) + "x" * limit, + + # Test long ASCII names (truncated) + "x" * (limit + 10), + + # Test long non-ASCII name (truncated) + "x" * (limit - 1) + "é€", + ] + if os_helper.FS_NONASCII: + tests.append(f"nonascii:{os_helper.FS_NONASCII}") + if os_helper.TESTFN_UNENCODABLE: + tests.append(os_helper.TESTFN_UNENCODABLE) + + if sys.platform.startswith("solaris"): + encoding = "utf-8" + else: + encoding = sys.getfilesystemencoding() + + def work(): + nonlocal work_name + work_name = _thread._get_name() + + for name in tests: + encoded = name.encode(encoding, "replace") + if b'\0' in encoded: + encoded = encoded.split(b'\0', 1)[0] + if truncate is not None: + encoded = encoded[:truncate] + if sys.platform.startswith("solaris"): + expected = encoded.decode("utf-8", "surrogateescape") + else: + expected = os.fsdecode(encoded) + + with self.subTest(name=name, expected=expected): + work_name = None + thread = threading.Thread(target=work, name=name) + thread.start() + thread.join() + self.assertEqual(work_name, expected, + f"{len(work_name)=} and {len(expected)=}") + class InterruptMainTests(unittest.TestCase): def check_interrupt_main_with_signal_handler(self, signum): diff --git a/Lib/threading.py b/Lib/threading.py index 94ea2f08178369..3abd22a2aa1b72 100644 --- a/Lib/threading.py +++ b/Lib/threading.py @@ -48,6 +48,10 @@ __all__.append('get_native_id') except AttributeError: _HAVE_THREAD_NATIVE_ID = False +try: + _set_name = _thread.set_name +except AttributeError: + _set_name = None ThreadError = _thread.error try: _CRLock = _thread.RLock @@ -1027,6 +1031,11 @@ def _bootstrap_inner(self): self._set_ident() if _HAVE_THREAD_NATIVE_ID: self._set_native_id() + if _set_name is not None and self._name: + try: + _set_name(self._name) + except OSError: + pass self._started.set() with _active_limbo_lock: _active[self._ident] = self diff --git a/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst b/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst new file mode 100644 index 00000000000000..a8c7b3d00755e6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-27-17-04-38.gh-issue-59705.sAGyvs.rst @@ -0,0 +1,2 @@ +On Linux, :class:`threading.Thread` now sets the thread name to the +operating system. Patch by Victor Stinner. diff --git a/Modules/_threadmodule.c b/Modules/_threadmodule.c index 4a45445e2f62db..35c032fbeaa94f 100644 --- a/Modules/_threadmodule.c +++ b/Modules/_threadmodule.c @@ -17,6 +17,8 @@ # include // SIGINT #endif +#include "clinic/_threadmodule.c.h" + // ThreadError is just an alias to PyExc_RuntimeError #define ThreadError PyExc_RuntimeError @@ -44,6 +46,13 @@ get_thread_state(PyObject *module) return (thread_module_state *)state; } + +/*[clinic input] +module _thread +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=be8dbe5cc4b16df7]*/ + + // _ThreadHandle type // Handles state transitions according to the following diagram: @@ -2354,6 +2363,96 @@ PyDoc_STRVAR(thread__get_main_thread_ident_doc, Internal only. Return a non-zero integer that uniquely identifies the main thread\n\ of the main interpreter."); + +#ifdef HAVE_PTHREAD_GETNAME_NP +/*[clinic input] +_thread._get_name + +Get the name of the current thread. +[clinic start generated code]*/ + +static PyObject * +_thread__get_name_impl(PyObject *module) +/*[clinic end generated code: output=20026e7ee3da3dd7 input=35cec676833d04c8]*/ +{ + // Linux and macOS are limited to respectively 16 and 64 bytes + char name[100]; + pthread_t thread = pthread_self(); + int rc = pthread_getname_np(thread, name, Py_ARRAY_LENGTH(name)); + if (rc) { + errno = rc; + return PyErr_SetFromErrno(PyExc_OSError); + } + +#ifdef __sun + return PyUnicode_DecodeUTF8(name, strlen(name), "surrogateescape"); +#else + return PyUnicode_DecodeFSDefault(name); +#endif +} +#endif // HAVE_PTHREAD_GETNAME_NP + + +#ifdef HAVE_PTHREAD_SETNAME_NP +/*[clinic input] +_thread.set_name + + name as name_obj: unicode + +Set the name of the current thread. +[clinic start generated code]*/ + +static PyObject * +_thread_set_name_impl(PyObject *module, PyObject *name_obj) +/*[clinic end generated code: output=402b0c68e0c0daed input=7e7acd98261be82f]*/ +{ +#ifdef __sun + // Solaris always uses UTF-8 + const char *encoding = "utf-8"; +#else + // Encode the thread name to the filesystem encoding using the "replace" + // error handler + PyInterpreterState *interp = _PyInterpreterState_GET(); + const char *encoding = interp->unicode.fs_codec.encoding; +#endif + PyObject *name_encoded; + name_encoded = PyUnicode_AsEncodedString(name_obj, encoding, "replace"); + if (name_encoded == NULL) { + return NULL; + } + +#ifdef PYTHREAD_NAME_MAXLEN + // Truncate to PYTHREAD_NAME_MAXLEN bytes + the NUL byte if needed + size_t len = PyBytes_GET_SIZE(name_encoded); + if (len > PYTHREAD_NAME_MAXLEN) { + PyObject *truncated; + truncated = PyBytes_FromStringAndSize(PyBytes_AS_STRING(name_encoded), + PYTHREAD_NAME_MAXLEN); + if (truncated == NULL) { + Py_DECREF(name_encoded); + return NULL; + } + Py_SETREF(name_encoded, truncated); + } +#endif + + const char *name = PyBytes_AS_STRING(name_encoded); +#ifdef __APPLE__ + int rc = pthread_setname_np(name); +#else + pthread_t thread = pthread_self(); + int rc = pthread_setname_np(thread, name); +#endif + Py_DECREF(name_encoded); + if (rc) { + errno = rc; + return PyErr_SetFromErrno(PyExc_OSError); + } + Py_RETURN_NONE; +} +#endif // HAVE_PTHREAD_SETNAME_NP + + static PyMethodDef thread_methods[] = { {"start_new_thread", (PyCFunction)thread_PyThread_start_new_thread, METH_VARARGS, start_new_thread_doc}, @@ -2393,6 +2492,8 @@ static PyMethodDef thread_methods[] = { METH_O, thread__make_thread_handle_doc}, {"_get_main_thread_ident", thread__get_main_thread_ident, METH_NOARGS, thread__get_main_thread_ident_doc}, + _THREAD_SET_NAME_METHODDEF + _THREAD__GET_NAME_METHODDEF {NULL, NULL} /* sentinel */ }; @@ -2484,6 +2585,13 @@ thread_module_exec(PyObject *module) llist_init(&state->shutdown_handles); +#ifdef PYTHREAD_NAME_MAXLEN + if (PyModule_AddIntConstant(module, "_NAME_MAXLEN", + PYTHREAD_NAME_MAXLEN) < 0) { + return -1; + } +#endif + return 0; } diff --git a/Modules/clinic/_threadmodule.c.h b/Modules/clinic/_threadmodule.c.h new file mode 100644 index 00000000000000..8f0507d40285b3 --- /dev/null +++ b/Modules/clinic/_threadmodule.c.h @@ -0,0 +1,104 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +#if defined(HAVE_PTHREAD_GETNAME_NP) + +PyDoc_STRVAR(_thread__get_name__doc__, +"_get_name($module, /)\n" +"--\n" +"\n" +"Get the name of the current thread."); + +#define _THREAD__GET_NAME_METHODDEF \ + {"_get_name", (PyCFunction)_thread__get_name, METH_NOARGS, _thread__get_name__doc__}, + +static PyObject * +_thread__get_name_impl(PyObject *module); + +static PyObject * +_thread__get_name(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return _thread__get_name_impl(module); +} + +#endif /* defined(HAVE_PTHREAD_GETNAME_NP) */ + +#if defined(HAVE_PTHREAD_SETNAME_NP) + +PyDoc_STRVAR(_thread_set_name__doc__, +"set_name($module, /, name)\n" +"--\n" +"\n" +"Set the name of the current thread."); + +#define _THREAD_SET_NAME_METHODDEF \ + {"set_name", _PyCFunction_CAST(_thread_set_name), METH_FASTCALL|METH_KEYWORDS, _thread_set_name__doc__}, + +static PyObject * +_thread_set_name_impl(PyObject *module, PyObject *name_obj); + +static PyObject * +_thread_set_name(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(name), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"name", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "set_name", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + PyObject *name_obj; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("set_name", "argument 'name'", "str", args[0]); + goto exit; + } + name_obj = args[0]; + return_value = _thread_set_name_impl(module, name_obj); + +exit: + return return_value; +} + +#endif /* defined(HAVE_PTHREAD_SETNAME_NP) */ + +#ifndef _THREAD__GET_NAME_METHODDEF + #define _THREAD__GET_NAME_METHODDEF +#endif /* !defined(_THREAD__GET_NAME_METHODDEF) */ + +#ifndef _THREAD_SET_NAME_METHODDEF + #define _THREAD_SET_NAME_METHODDEF +#endif /* !defined(_THREAD_SET_NAME_METHODDEF) */ +/*[clinic end generated code: output=b5cb85aaccc45bf6 input=a9049054013a1b77]*/ diff --git a/configure b/configure index 5e9bcb602d884e..bcbab8dfcff190 100755 --- a/configure +++ b/configure @@ -821,6 +821,7 @@ MODULE_TIME_TRUE MODULE__IO_FALSE MODULE__IO_TRUE MODULE_BUILDTYPE +PYTHREAD_NAME_MAXLEN TEST_MODULES OPENSSL_LDFLAGS OPENSSL_LIBS @@ -18841,6 +18842,18 @@ if test "x$ac_cv_func_pthread_kill" = xyes then : printf "%s\n" "#define HAVE_PTHREAD_KILL 1" >>confdefs.h +fi +ac_fn_c_check_func "$LINENO" "pthread_getname_np" "ac_cv_func_pthread_getname_np" +if test "x$ac_cv_func_pthread_getname_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_GETNAME_NP 1" >>confdefs.h + +fi +ac_fn_c_check_func "$LINENO" "pthread_setname_np" "ac_cv_func_pthread_setname_np" +if test "x$ac_cv_func_pthread_setname_np" = xyes +then : + printf "%s\n" "#define HAVE_PTHREAD_SETNAME_NP 1" >>confdefs.h + fi ac_fn_c_check_func "$LINENO" "ptsname" "ac_cv_func_ptsname" if test "x$ac_cv_func_ptsname" = xyes @@ -29081,6 +29094,23 @@ fi CPPFLAGS=$save_CPPFLAGS +# gh-59705: Maximum length in bytes of a thread name +case "$ac_sys_system" in + Linux*) PYTHREAD_NAME_MAXLEN=15;; # Linux and Android + SunOS*) PYTHREAD_NAME_MAXLEN=31;; + Darwin) PYTHREAD_NAME_MAXLEN=63;; + iOS) PYTHREAD_NAME_MAXLEN=63;; + FreeBSD*) PYTHREAD_NAME_MAXLEN=98;; + *) PYTHREAD_NAME_MAXLEN=;; +esac +if test -n "$PYTHREAD_NAME_MAXLEN"; then + +printf "%s\n" "#define PYTHREAD_NAME_MAXLEN $PYTHREAD_NAME_MAXLEN" >>confdefs.h + +fi + + + # stdlib diff --git a/configure.ac b/configure.ac index bf3685e1b1b209..922a125ea9608e 100644 --- a/configure.ac +++ b/configure.ac @@ -5110,8 +5110,10 @@ AC_CHECK_FUNCS([ \ mknod mknodat mktime mmap mremap nice openat opendir pathconf pause pipe \ pipe2 plock poll posix_fadvise posix_fallocate posix_openpt posix_spawn posix_spawnp \ posix_spawn_file_actions_addclosefrom_np \ - pread preadv preadv2 process_vm_readv pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ - pthread_kill ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ + pread preadv preadv2 process_vm_readv \ + pthread_cond_timedwait_relative_np pthread_condattr_setclock pthread_init \ + pthread_kill pthread_getname_np pthread_setname_np \ + ptsname ptsname_r pwrite pwritev pwritev2 readlink readlinkat readv realpath renameat \ rtpSpawn sched_get_priority_max sched_rr_get_interval sched_setaffinity \ sched_setparam sched_setscheduler sem_clockwait sem_getvalue sem_open \ sem_timedwait sem_unlink sendfile setegid seteuid setgid sethostname \ @@ -7498,6 +7500,22 @@ AS_VAR_IF([ac_cv_libatomic_needed], [yes], _RESTORE_VAR([CPPFLAGS]) +# gh-59705: Maximum length in bytes of a thread name +case "$ac_sys_system" in + Linux*) PYTHREAD_NAME_MAXLEN=15;; # Linux and Android + SunOS*) PYTHREAD_NAME_MAXLEN=31;; + Darwin) PYTHREAD_NAME_MAXLEN=63;; + iOS) PYTHREAD_NAME_MAXLEN=63;; + FreeBSD*) PYTHREAD_NAME_MAXLEN=98;; + *) PYTHREAD_NAME_MAXLEN=;; +esac +if test -n "$PYTHREAD_NAME_MAXLEN"; then + AC_DEFINE_UNQUOTED([PYTHREAD_NAME_MAXLEN], [$PYTHREAD_NAME_MAXLEN], + [Maximum length in bytes of a thread name]) +fi +AC_SUBST([PYTHREAD_NAME_MAXLEN]) + + # stdlib AC_DEFUN([PY_STDLIB_MOD_SET_NA], [ m4_foreach([mod], [$@], [ diff --git a/pyconfig.h.in b/pyconfig.h.in index 6a1f1284650b9f..166c195a8c66fc 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -981,6 +981,9 @@ /* Define to 1 if you have the `pthread_getcpuclockid' function. */ #undef HAVE_PTHREAD_GETCPUCLOCKID +/* Define to 1 if you have the `pthread_getname_np' function. */ +#undef HAVE_PTHREAD_GETNAME_NP + /* Define to 1 if you have the header file. */ #undef HAVE_PTHREAD_H @@ -990,6 +993,9 @@ /* Define to 1 if you have the `pthread_kill' function. */ #undef HAVE_PTHREAD_KILL +/* Define to 1 if you have the `pthread_setname_np' function. */ +#undef HAVE_PTHREAD_SETNAME_NP + /* Define to 1 if you have the `pthread_sigmask' function. */ #undef HAVE_PTHREAD_SIGMASK @@ -1650,6 +1656,9 @@ /* Define as the preferred size in bits of long digits */ #undef PYLONG_BITS_IN_DIGIT +/* Maximum length in bytes of a thread name */ +#undef PYTHREAD_NAME_MAXLEN + /* enabled builtin hash modules */ #undef PY_BUILTIN_HASHLIB_HASHES From 89fa7ec74e531870a8f495d5e32ec0b00dbcd32b Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:36:06 +0000 Subject: [PATCH 62/76] gh-119786: Add jit.md. Move adaptive.md to a section of interpreter.md. (#127175) --- InternalDocs/README.md | 4 +- InternalDocs/adaptive.md | 146 ------------------------ InternalDocs/code_objects.md | 5 + InternalDocs/compiler.md | 10 -- InternalDocs/interpreter.md | 210 ++++++++++++++++++++++++++++++----- InternalDocs/jit.md | 134 ++++++++++++++++++++++ 6 files changed, 322 insertions(+), 187 deletions(-) delete mode 100644 InternalDocs/adaptive.md create mode 100644 InternalDocs/jit.md diff --git a/InternalDocs/README.md b/InternalDocs/README.md index 8cdd06d189f362..794b4f3c6aad42 100644 --- a/InternalDocs/README.md +++ b/InternalDocs/README.md @@ -34,9 +34,9 @@ Runtime Objects Program Execution --- -- [The Interpreter](interpreter.md) +- [The Bytecode Interpreter](interpreter.md) -- [Adaptive Instruction Families](adaptive.md) +- [The JIT](jit.md) - [Garbage Collector Design](garbage_collector.md) diff --git a/InternalDocs/adaptive.md b/InternalDocs/adaptive.md deleted file mode 100644 index 7cfa8e52310460..00000000000000 --- a/InternalDocs/adaptive.md +++ /dev/null @@ -1,146 +0,0 @@ -# Adding or extending a family of adaptive instructions. - -## Families of instructions - -The core part of [PEP 659](https://peps.python.org/pep-0659/) -(specializing adaptive interpreter) is the families of -instructions that perform the adaptive specialization. - -A family of instructions has the following fundamental properties: - -* It corresponds to a single instruction in the code - generated by the bytecode compiler. -* It has a single adaptive instruction that records an execution count and, - at regular intervals, attempts to specialize itself. If not specializing, - it executes the base implementation. -* It has at least one specialized form of the instruction that is tailored - for a particular value or set of values at runtime. -* All members of the family must have the same number of inline cache entries, - to ensure correct execution. - Individual family members do not need to use all of the entries, - but must skip over any unused entries when executing. - -The current implementation also requires the following, -although these are not fundamental and may change: - -* All families use one or more inline cache entries, - the first entry is always the counter. -* All instruction names should start with the name of the adaptive - instruction. -* Specialized forms should have names describing their specialization. - -## Example family - -The `LOAD_GLOBAL` instruction (in [Python/bytecodes.c](../Python/bytecodes.c)) -already has an adaptive family that serves as a relatively simple example. - -The `LOAD_GLOBAL` instruction performs adaptive specialization, -calling `_Py_Specialize_LoadGlobal()` when the counter reaches zero. - -There are two specialized instructions in the family, `LOAD_GLOBAL_MODULE` -which is specialized for global variables in the module, and -`LOAD_GLOBAL_BUILTIN` which is specialized for builtin variables. - -## Performance analysis - -The benefit of a specialization can be assessed with the following formula: -`Tbase/Tadaptive`. - -Where `Tbase` is the mean time to execute the base instruction, -and `Tadaptive` is the mean time to execute the specialized and adaptive forms. - -`Tadaptive = (sum(Ti*Ni) + Tmiss*Nmiss)/(sum(Ni)+Nmiss)` - -`Ti` is the time to execute the `i`th instruction in the family and `Ni` is -the number of times that instruction is executed. -`Tmiss` is the time to process a miss, including de-optimzation -and the time to execute the base instruction. - -The ideal situation is where misses are rare and the specialized -forms are much faster than the base instruction. -`LOAD_GLOBAL` is near ideal, `Nmiss/sum(Ni) ≈ 0`. -In which case we have `Tadaptive ≈ sum(Ti*Ni)`. -Since we can expect the specialized forms `LOAD_GLOBAL_MODULE` and -`LOAD_GLOBAL_BUILTIN` to be much faster than the adaptive base instruction, -we would expect the specialization of `LOAD_GLOBAL` to be profitable. - -## Design considerations - -While `LOAD_GLOBAL` may be ideal, instructions like `LOAD_ATTR` and -`CALL_FUNCTION` are not. For maximum performance we want to keep `Ti` -low for all specialized instructions and `Nmiss` as low as possible. - -Keeping `Nmiss` low means that there should be specializations for almost -all values seen by the base instruction. Keeping `sum(Ti*Ni)` low means -keeping `Ti` low which means minimizing branches and dependent memory -accesses (pointer chasing). These two objectives may be in conflict, -requiring judgement and experimentation to design the family of instructions. - -The size of the inline cache should as small as possible, -without impairing performance, to reduce the number of -`EXTENDED_ARG` jumps, and to reduce pressure on the CPU's data cache. - -### Gathering data - -Before choosing how to specialize an instruction, it is important to gather -some data. What are the patterns of usage of the base instruction? -Data can best be gathered by instrumenting the interpreter. Since a -specialization function and adaptive instruction are going to be required, -instrumentation can most easily be added in the specialization function. - -### Choice of specializations - -The performance of the specializing adaptive interpreter relies on the -quality of specialization and keeping the overhead of specialization low. - -Specialized instructions must be fast. In order to be fast, -specialized instructions should be tailored for a particular -set of values that allows them to: - -1. Verify that incoming value is part of that set with low overhead. -2. Perform the operation quickly. - -This requires that the set of values is chosen such that membership can be -tested quickly and that membership is sufficient to allow the operation to -performed quickly. - -For example, `LOAD_GLOBAL_MODULE` is specialized for `globals()` -dictionaries that have a keys with the expected version. - -This can be tested quickly: - -* `globals->keys->dk_version == expected_version` - -and the operation can be performed quickly: - -* `value = entries[cache->index].me_value;`. - -Because it is impossible to measure the performance of an instruction without -also measuring unrelated factors, the assessment of the quality of a -specialization will require some judgement. - -As a general rule, specialized instructions should be much faster than the -base instruction. - -### Implementation of specialized instructions - -In general, specialized instructions should be implemented in two parts: - -1. A sequence of guards, each of the form - `DEOPT_IF(guard-condition-is-false, BASE_NAME)`. -2. The operation, which should ideally have no branches and - a minimum number of dependent memory accesses. - -In practice, the parts may overlap, as data required for guards -can be re-used in the operation. - -If there are branches in the operation, then consider further specialization -to eliminate the branches. - -### Maintaining stats - -Finally, take care that stats are gather correctly. -After the last `DEOPT_IF` has passed, a hit should be recorded with -`STAT_INC(BASE_INSTRUCTION, hit)`. -After an optimization has been deferred in the adaptive instruction, -that should be recorded with `STAT_INC(BASE_INSTRUCTION, deferred)`. diff --git a/InternalDocs/code_objects.md b/InternalDocs/code_objects.md index d4e28c6b238b48..a91a7043c1b8d4 100644 --- a/InternalDocs/code_objects.md +++ b/InternalDocs/code_objects.md @@ -18,6 +18,11 @@ Code objects are typically produced by the bytecode [compiler](compiler.md), although they are often written to disk by one process and read back in by another. The disk version of a code object is serialized using the [marshal](https://docs.python.org/dev/library/marshal.html) protocol. +When a `CodeObject` is created, the function `_PyCode_Quicken()` from +[`Python/specialize.c`](../Python/specialize.c) is called to initialize +the caches of all adaptive instructions. This is required because the +on-disk format is a sequence of bytes, and some of the caches need to be +initialized with 16-bit values. Code objects are nominally immutable. Some fields (including `co_code_adaptive` and fields for runtime diff --git a/InternalDocs/compiler.md b/InternalDocs/compiler.md index 9e99f348acbd8f..c257bfd9faf78f 100644 --- a/InternalDocs/compiler.md +++ b/InternalDocs/compiler.md @@ -595,16 +595,6 @@ Objects * [Exception Handling](exception_handling.md): Describes the exception table -Specializing Adaptive Interpreter -================================= - -Adding a specializing, adaptive interpreter to CPython will bring significant -performance improvements. These documents provide more information: - -* [PEP 659: Specializing Adaptive Interpreter](https://peps.python.org/pep-0659/). -* [Adding or extending a family of adaptive instructions](adaptive.md) - - References ========== diff --git a/InternalDocs/interpreter.md b/InternalDocs/interpreter.md index ab149e43471072..fa4a54fdc54fac 100644 --- a/InternalDocs/interpreter.md +++ b/InternalDocs/interpreter.md @@ -1,8 +1,4 @@ -The bytecode interpreter -======================== - -Overview --------- +# The bytecode interpreter This document describes the workings and implementation of the bytecode interpreter, the part of python that executes compiled Python code. Its @@ -47,8 +43,7 @@ simply calls [`_PyEval_EvalFrameDefault()`] to execute the frame. However, as pe `_PyEval_EvalFrameDefault()`. -Instruction decoding --------------------- +## Instruction decoding The first task of the interpreter is to decode the bytecode instructions. Bytecode is stored as an array of 16-bit code units (`_Py_CODEUNIT`). @@ -110,8 +105,7 @@ snippet decode a complete instruction: For various reasons we'll get to later (mostly efficiency, given that `EXTENDED_ARG` is rare) the actual code is different. -Jumps -===== +## Jumps Note that when the `switch` statement is reached, `next_instr` (the "instruction offset") already points to the next instruction. @@ -120,25 +114,26 @@ Thus, jump instructions can be implemented by manipulating `next_instr`: - A jump forward (`JUMP_FORWARD`) sets `next_instr += oparg`. - A jump backward sets `next_instr -= oparg`. -Inline cache entries -==================== +## Inline cache entries Some (specialized or specializable) instructions have an associated "inline cache". The inline cache consists of one or more two-byte entries included in the bytecode array as additional words following the `opcode`/`oparg` pair. The size of the inline cache for a particular instruction is fixed by its `opcode`. Moreover, the inline cache size for all instructions in a -[family of specialized/specializable instructions](adaptive.md) +[family of specialized/specializable instructions](#Specialization) (for example, `LOAD_ATTR`, `LOAD_ATTR_SLOT`, `LOAD_ATTR_MODULE`) must all be the same. Cache entries are reserved by the compiler and initialized with zeros. Although they are represented by code units, cache entries do not conform to the `opcode` / `oparg` format. -If an instruction has an inline cache, the layout of its cache is described by -a `struct` definition in (`pycore_code.h`)[../Include/internal/pycore_code.h]. -This allows us to access the cache by casting `next_instr` to a pointer to this `struct`. -The size of such a `struct` must be independent of the machine architecture, word size -and alignment requirements. For a 32-bit field, the `struct` should use `_Py_CODEUNIT field[2]`. +If an instruction has an inline cache, the layout of its cache is described in +the instruction's definition in [`Python/bytecodes.c`](../Python/bytecodes.c). +The structs defined in [`pycore_code.h`](../Include/internal/pycore_code.h) +allow us to access the cache by casting `next_instr` to a pointer to the relevant +`struct`. The size of such a `struct` must be independent of the machine +architecture, word size and alignment requirements. For a 32-bit field, the +`struct` should use `_Py_CODEUNIT field[2]`. The instruction implementation is responsible for advancing `next_instr` past the inline cache. For example, if an instruction's inline cache is four bytes (that is, two code units) in size, @@ -153,8 +148,7 @@ Serializing non-zero cache entries would present a problem because the serializa More information about the use of inline caches can be found in [PEP 659](https://peps.python.org/pep-0659/#ancillary-data). -The evaluation stack --------------------- +## The evaluation stack Most instructions read or write some data in the form of object references (`PyObject *`). The CPython bytecode interpreter is a stack machine, meaning that its instructions operate @@ -193,16 +187,14 @@ For example, the following sequence is illegal, because it keeps pushing items o > Do not confuse the evaluation stack with the call stack, which is used to implement calling > and returning from functions. -Error handling --------------- +## Error handling When the implementation of an opcode raises an exception, it jumps to the `exception_unwind` label in [Python/ceval.c](../Python/ceval.c). The exception is then handled as described in the [`exception handling documentation`](exception_handling.md#handling-exceptions). -Python-to-Python calls ----------------------- +## Python-to-Python calls The `_PyEval_EvalFrameDefault()` function is recursive, because sometimes the interpreter calls some C function that calls back into the interpreter. @@ -227,8 +219,7 @@ returns from `_PyEval_EvalFrameDefault()` altogether, to a C caller. A similar check is performed when an unhandled exception occurs. -The call stack --------------- +## The call stack Up through 3.10, the call stack was implemented as a singly-linked list of [frame objects](frames.md). This was expensive because each call would require a @@ -262,8 +253,7 @@ See also the [generators](generators.md) section. -Introducing a new bytecode instruction --------------------------------------- +## Introducing a new bytecode instruction It is occasionally necessary to add a new opcode in order to implement a new feature or change the way that existing features are compiled. @@ -355,6 +344,169 @@ new bytecode properly. Run `make regen-importlib` for updating the bytecode of frozen importlib files. You have to run `make` again after this to recompile the generated C files. +## Specialization + +Bytecode specialization, which was introduced in +[PEP 659](https://peps.python.org/pep-0659/), speeds up program execution by +rewriting instructions based on runtime information. This is done by replacing +a generic instruction with a faster version that works for the case that this +program encounters. Each specializable instruction is responsible for rewriting +itself, using its [inline caches](#inline-cache-entries) for +bookkeeping. + +When an adaptive instruction executes, it may attempt to specialize itself, +depending on the argument and the contents of its cache. This is done +by calling one of the `_Py_Specialize_XXX` functions in +[`Python/specialize.c`](../Python/specialize.c). + + +The specialized instructions are responsible for checking that the special-case +assumptions still apply, and de-optimizing back to the generic version if not. + +## Families of instructions + +A *family* of instructions consists of an adaptive instruction along with the +specialized instructions that it can be replaced by. +It has the following fundamental properties: + +* It corresponds to a single instruction in the code + generated by the bytecode compiler. +* It has a single adaptive instruction that records an execution count and, + at regular intervals, attempts to specialize itself. If not specializing, + it executes the base implementation. +* It has at least one specialized form of the instruction that is tailored + for a particular value or set of values at runtime. +* All members of the family must have the same number of inline cache entries, + to ensure correct execution. + Individual family members do not need to use all of the entries, + but must skip over any unused entries when executing. + +The current implementation also requires the following, +although these are not fundamental and may change: + +* All families use one or more inline cache entries, + the first entry is always the counter. +* All instruction names should start with the name of the adaptive + instruction. +* Specialized forms should have names describing their specialization. + +## Example family + +The `LOAD_GLOBAL` instruction (in [Python/bytecodes.c](../Python/bytecodes.c)) +already has an adaptive family that serves as a relatively simple example. + +The `LOAD_GLOBAL` instruction performs adaptive specialization, +calling `_Py_Specialize_LoadGlobal()` when the counter reaches zero. + +There are two specialized instructions in the family, `LOAD_GLOBAL_MODULE` +which is specialized for global variables in the module, and +`LOAD_GLOBAL_BUILTIN` which is specialized for builtin variables. + +## Performance analysis + +The benefit of a specialization can be assessed with the following formula: +`Tbase/Tadaptive`. + +Where `Tbase` is the mean time to execute the base instruction, +and `Tadaptive` is the mean time to execute the specialized and adaptive forms. + +`Tadaptive = (sum(Ti*Ni) + Tmiss*Nmiss)/(sum(Ni)+Nmiss)` + +`Ti` is the time to execute the `i`th instruction in the family and `Ni` is +the number of times that instruction is executed. +`Tmiss` is the time to process a miss, including de-optimzation +and the time to execute the base instruction. + +The ideal situation is where misses are rare and the specialized +forms are much faster than the base instruction. +`LOAD_GLOBAL` is near ideal, `Nmiss/sum(Ni) ≈ 0`. +In which case we have `Tadaptive ≈ sum(Ti*Ni)`. +Since we can expect the specialized forms `LOAD_GLOBAL_MODULE` and +`LOAD_GLOBAL_BUILTIN` to be much faster than the adaptive base instruction, +we would expect the specialization of `LOAD_GLOBAL` to be profitable. + +## Design considerations + +While `LOAD_GLOBAL` may be ideal, instructions like `LOAD_ATTR` and +`CALL_FUNCTION` are not. For maximum performance we want to keep `Ti` +low for all specialized instructions and `Nmiss` as low as possible. + +Keeping `Nmiss` low means that there should be specializations for almost +all values seen by the base instruction. Keeping `sum(Ti*Ni)` low means +keeping `Ti` low which means minimizing branches and dependent memory +accesses (pointer chasing). These two objectives may be in conflict, +requiring judgement and experimentation to design the family of instructions. + +The size of the inline cache should as small as possible, +without impairing performance, to reduce the number of +`EXTENDED_ARG` jumps, and to reduce pressure on the CPU's data cache. + +### Gathering data + +Before choosing how to specialize an instruction, it is important to gather +some data. What are the patterns of usage of the base instruction? +Data can best be gathered by instrumenting the interpreter. Since a +specialization function and adaptive instruction are going to be required, +instrumentation can most easily be added in the specialization function. + +### Choice of specializations + +The performance of the specializing adaptive interpreter relies on the +quality of specialization and keeping the overhead of specialization low. + +Specialized instructions must be fast. In order to be fast, +specialized instructions should be tailored for a particular +set of values that allows them to: + +1. Verify that incoming value is part of that set with low overhead. +2. Perform the operation quickly. + +This requires that the set of values is chosen such that membership can be +tested quickly and that membership is sufficient to allow the operation to +performed quickly. + +For example, `LOAD_GLOBAL_MODULE` is specialized for `globals()` +dictionaries that have a keys with the expected version. + +This can be tested quickly: + +* `globals->keys->dk_version == expected_version` + +and the operation can be performed quickly: + +* `value = entries[cache->index].me_value;`. + +Because it is impossible to measure the performance of an instruction without +also measuring unrelated factors, the assessment of the quality of a +specialization will require some judgement. + +As a general rule, specialized instructions should be much faster than the +base instruction. + +### Implementation of specialized instructions + +In general, specialized instructions should be implemented in two parts: + +1. A sequence of guards, each of the form + `DEOPT_IF(guard-condition-is-false, BASE_NAME)`. +2. The operation, which should ideally have no branches and + a minimum number of dependent memory accesses. + +In practice, the parts may overlap, as data required for guards +can be re-used in the operation. + +If there are branches in the operation, then consider further specialization +to eliminate the branches. + +### Maintaining stats + +Finally, take care that stats are gathered correctly. +After the last `DEOPT_IF` has passed, a hit should be recorded with +`STAT_INC(BASE_INSTRUCTION, hit)`. +After an optimization has been deferred in the adaptive instruction, +that should be recorded with `STAT_INC(BASE_INSTRUCTION, deferred)`. + + Additional resources -------------------- diff --git a/InternalDocs/jit.md b/InternalDocs/jit.md new file mode 100644 index 00000000000000..1e9f385d5f87fa --- /dev/null +++ b/InternalDocs/jit.md @@ -0,0 +1,134 @@ +# The JIT + +The [adaptive interpreter](interpreter.md) consists of a main loop that +executes the bytecode instructions generated by the +[bytecode compiler](compiler.md) and their +[specializations](interpreter.md#Specialization). Runtime optimization in +this interpreter can only be done for one instruction at a time. The JIT +is based on a mechanism to replace an entire sequence of bytecode instructions, +and this enables optimizations that span multiple instructions. + +Historically, the adaptive interpreter was referred to as `tier 1` and +the JIT as `tier 2`. You will see remnants of this in the code. + +## The Optimizer and Executors + +The program begins running on the adaptive interpreter, until a `JUMP_BACKWARD` +instruction determines that it is "hot" because the counter in its +[inline cache](interpreter.md#inline-cache-entries) indicates that it +executed more than some threshold number of times (see +[`backoff_counter_triggers`](../Include/internal/pycore_backoff.h)). +It then calls the function `_PyOptimizer_Optimize()` in +[`Python/optimizer.c`](../Python/optimizer.c), passing it the current +[frame](frames.md) and instruction pointer. `_PyOptimizer_Optimize()` +constructs an object of type +[`_PyExecutorObject`](Include/internal/pycore_optimizer.h) which implements +an optimized version of the instruction trace beginning at this jump. + +The optimizer determines where the trace ends, and the executor is set up +to either return to the adaptive interpreter and resume execution, or +transfer control to another executor (see `_PyExitData` in +Include/internal/pycore_optimizer.h). + +The executor is stored on the [`code object`](code_objects.md) of the frame, +in the `co_executors` field which is an array of executors. The start +instruction of the trace (the `JUMP_BACKWARD`) is replaced by an +`ENTER_EXECUTOR` instruction whose `oparg` is equal to the index of the +executor in `co_executors`. + +## The micro-op optimizer + +The optimizer that `_PyOptimizer_Optimize()` runs is configurable via the +`_Py_SetTier2Optimizer()` function (this is used in test via +`_testinternalcapi.set_optimizer()`.) + +The micro-op (abbreviated `uop` to approximate `μop`) optimizer is defined in +[`Python/optimizer.c`](../Python/optimizer.c) as the type `_PyUOpOptimizer_Type`. +It translates an instruction trace into a sequence of micro-ops by replacing +each bytecode by an equivalent sequence of micro-ops (see +`_PyOpcode_macro_expansion` in +[pycore_opcode_metadata.h](../Include/internal/pycore_opcode_metadata.h) +which is generated from [`Python/bytecodes.c`](../Python/bytecodes.c)). +The micro-op sequence is then optimized by +`_Py_uop_analyze_and_optimize` in +[`Python/optimizer_analysis.c`](../Python/optimizer_analysis.c) +and an instance of `_PyUOpExecutor_Type` is created to contain it. + +## The JIT interpreter + +After a `JUMP_BACKWARD` instruction invokes the uop optimizer to create a uop +executor, it transfers control to this executor via the `GOTO_TIER_TWO` macro. + +CPython implements two executors. Here we describe the JIT interpreter, +which is the simpler of them and is therefore useful for debugging and analyzing +the uops generation and optimization stages. To run it, we configure the +JIT to run on its interpreter (i.e., python is configured with +[`--enable-experimental-jit=interpreter`](https://docs.python.org/dev/using/configure.html#cmdoption-enable-experimental-jit)). + +When invoked, the executor jumps to the `tier2_dispatch:` label in +[`Python/ceval.c`](../Python/ceval.c), where there is a loop that +executes the micro-ops. The body of this loop is a switch statement over +the uops IDs, resembling the one used in the adaptive interpreter. + +The swtich implementing the uops is in [`Python/executor_cases.c.h`](../Python/executor_cases.c.h), +which is generated by the build script +[`Tools/cases_generator/tier2_generator.py`](../Tools/cases_generator/tier2_generator.py) +from the bytecode definitions in +[`Python/bytecodes.c`](../Python/bytecodes.c). + +When an `_EXIT_TRACE` or `_DEOPT` uop is reached, the uop interpreter exits +and execution returns to the adaptive interpreter. + +## Invalidating Executors + +In addition to being stored on the code object, each executor is also +inserted into a list of all executors, which is stored in the interpreter +state's `executor_list_head` field. This list is used when it is necessary +to invalidate executors because values they used in their construction may +have changed. + +## The JIT + +When the full jit is enabled (python was configured with +[`--enable-experimental-jit`](https://docs.python.org/dev/using/configure.html#cmdoption-enable-experimental-jit), +the uop executor's `jit_code` field is populated with a pointer to a compiled +C function that implements the executor logic. This function's signature is +defined by `jit_func` in [`pycore_jit.h`](Include/internal/pycore_jit.h). +When the executor is invoked by `ENTER_EXECUTOR`, instead of jumping to +the uop interpreter at `tier2_dispatch`, the executor runs the function +that `jit_code` points to. This function returns the instruction pointer +of the next Tier 1 instruction that needs to execute. + +The generation of the jitted functions uses the copy-and-patch technique +which is described in +[Haoran Xu's article](https://sillycross.github.io/2023/05/12/2023-05-12/). +At its core are statically generated `stencils` for the implementation +of the micro ops, which are completed with runtime information while +the jitted code is constructed for an executor by +[`_PyJIT_Compile`](../Python/jit.c). + +The stencils are generated at build time under the Makefile target `regen-jit` +by the scripts in [`/Tools/jit`](/Tools/jit). This script reads +[`Python/executor_cases.c.h`](../Python/executor_cases.c.h) (which is +generated from [`Python/bytecodes.c`](../Python/bytecodes.c)). For +each opcode, it constructs a `.c` file that contains a function for +implementing this opcode, with some runtime information injected. +This is done by replacing `CASE` by the bytecode definition in the +template file [`Tools/jit/template.c`](../Tools/jit/template.c). + +Each of the `.c` files is compiled by LLVM, to produce an object file +that contains a function that executes the opcode. These compiled +functions are used to generate the file +[`jit_stencils.h`](../jit_stencils.h), which contains the functions +that the JIT can use to emit code for each of the bytecodes. + +For Python maintainers this means that changes to the bytecodes and +their implementations do not require changes related to the stencils, +because everything is automatically generated from +[`Python/bytecodes.c`](../Python/bytecodes.c) at build time. + +See Also: + +* [Copy-and-Patch Compilation: A fast compilation algorithm for high-level languages and bytecode](https://arxiv.org/abs/2011.13127) + +* [PyCon 2024: Building a JIT compiler for CPython](https://www.youtube.com/watch?v=kMO3Ju0QCDo) From e59caf67cdb8dae26470f00599ea8dbb00968a73 Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Fri, 6 Dec 2024 17:50:58 +0000 Subject: [PATCH 63/76] Fix typo in `Lib/_android_support.py` (#127699) --- Lib/_android_support.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_android_support.py b/Lib/_android_support.py index 7572745c851847..ae506f6a4b57b8 100644 --- a/Lib/_android_support.py +++ b/Lib/_android_support.py @@ -6,7 +6,7 @@ # The maximum length of a log message in bytes, including the level marker and # tag, is defined as LOGGER_ENTRY_MAX_PAYLOAD at # https://cs.android.com/android/platform/superproject/+/android-14.0.0_r1:system/logging/liblog/include/log/log.h;l=71. -# Messages longer than this will be be truncated by logcat. This limit has already +# Messages longer than this will be truncated by logcat. This limit has already # been reduced at least once in the history of Android (from 4076 to 4068 between # API level 23 and 26), so leave some headroom. MAX_BYTES_PER_WRITE = 4000 From 5b6635f772d187d6049a56bfea76855644cd4ca1 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 6 Dec 2024 18:10:00 +0000 Subject: [PATCH 64/76] GH-127381: pathlib ABCs: remove `PathBase.rename()` and `replace()` (#127658) These methods are obviated by `PathBase.move()`, which can move directories and supports any `PathBase` object as a target. --- Lib/pathlib/_abc.py | 37 +---------------------- Lib/pathlib/_local.py | 17 +++++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 2 -- 3 files changed, 18 insertions(+), 38 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 86617ff2616f33..11a11ecc4c8203 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -14,7 +14,7 @@ import functools import operator import posixpath -from errno import EINVAL, EXDEV +from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from pathlib._os import copyfileobj @@ -902,45 +902,10 @@ def copy_into(self, target_dir, *, follow_symlinks=True, dirs_exist_ok=dirs_exist_ok, preserve_metadata=preserve_metadata) - def rename(self, target): - """ - Rename this path to the target path. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - raise UnsupportedOperation(self._unsupported_msg('rename()')) - - def replace(self, target): - """ - Rename this path to the target path, overwriting if that path exists. - - The target path may be absolute or relative. Relative paths are - interpreted relative to the current working directory, *not* the - directory of the Path object. - - Returns the new Path instance pointing to the target path. - """ - raise UnsupportedOperation(self._unsupported_msg('replace()')) - def move(self, target): """ Recursively move this file or directory tree to the given destination. """ - self._ensure_different_file(target) - try: - return self.replace(target) - except UnsupportedOperation: - pass - except TypeError: - if not isinstance(target, PathBase): - raise - except OSError as err: - if err.errno != EXDEV: - raise target = self.copy(target, follow_symlinks=False, preserve_metadata=True) self._delete() return target diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index bb8a252c0e94e2..250bc12956f5bc 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -4,6 +4,7 @@ import os import posixpath import sys +from errno import EXDEV from glob import _StringGlobber from itertools import chain from _collections_abc import Sequence @@ -876,6 +877,22 @@ def replace(self, target): os.replace(self, target) return self.with_segments(target) + def move(self, target): + """ + Recursively move this file or directory tree to the given destination. + """ + self._ensure_different_file(target) + try: + return self.replace(target) + except TypeError: + if not isinstance(target, PathBase): + raise + except OSError as err: + if err.errno != EXDEV: + raise + # Fall back to copy+delete. + return PathBase.move(self, target) + if hasattr(os, "symlink"): def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 7ba3fa823a30b9..00153e3f5e997e 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1376,8 +1376,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.hardlink_to, 'foo') self.assertRaises(e, p.mkdir) self.assertRaises(e, p.touch) - self.assertRaises(e, p.rename, 'foo') - self.assertRaises(e, p.replace, 'foo') self.assertRaises(e, p.chmod, 0o755) self.assertRaises(e, p.lchmod, 0o755) self.assertRaises(e, p.unlink) From 0fc4063747c96223575f6f5a0562eddf2ed0ed62 Mon Sep 17 00:00:00 2001 From: Brett Cannon Date: Fri, 6 Dec 2024 10:42:05 -0800 Subject: [PATCH 65/76] GH-127652: stop using `--wasi preview2` in `wasi.py` (GH-127704) It's only to use WASI 0.2 code to back preview1 APIs and is considered experimental anyway. --- Tools/wasm/wasi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Tools/wasm/wasi.py b/Tools/wasm/wasi.py index ac36d55587a38f..da847c4ff86215 100644 --- a/Tools/wasm/wasi.py +++ b/Tools/wasm/wasi.py @@ -297,8 +297,6 @@ def main(): # build. # Use 16 MiB stack. "--wasm max-wasm-stack=16777216 " - # Use WASI 0.2 primitives. - "--wasi preview2 " # Enable thread support; causes use of preview1. #"--wasm threads=y --wasi threads=y " # Map the checkout to / to load the stdlib from /Lib. From 31c9f3ced293492b38e784c17c4befe425da5dab Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 6 Dec 2024 21:39:45 +0000 Subject: [PATCH 66/76] GH-127381: pathlib ABCs: remove `PathBase.resolve()` and `absolute()` (#127707) Remove our implementation of POSIX path resolution in `PathBase.resolve()`. This functionality is rather fragile and isn't necessary in most cases. It depends on `PathBase.stat()`, which we're looking to remove. Also remove `PathBase.absolute()`. Many legitimate virtual filesystems lack the notion of a 'current directory', so it's wrong to include in the basic interface. --- Lib/pathlib/_abc.py | 64 +- Lib/test/test_pathlib/test_pathlib.py | 586 ++++++++++++++++++- Lib/test/test_pathlib/test_pathlib_abc.py | 680 +--------------------- 3 files changed, 599 insertions(+), 731 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 11a11ecc4c8203..820970fcd5889b 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -13,7 +13,6 @@ import functools import operator -import posixpath from errno import EINVAL from glob import _GlobberBase, _no_recurse_symlinks from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -115,11 +114,6 @@ class PurePathBase: # The `_raw_paths` slot stores unjoined string paths. This is set in # the `__init__()` method. '_raw_paths', - - # The '_resolving' slot stores a boolean indicating whether the path - # is being processed by `PathBase.resolve()`. This prevents duplicate - # work from occurring when `resolve()` calls `stat()` or `readlink()`. - '_resolving', ) parser = ParserBase() _globber = PathGlobber @@ -130,7 +124,6 @@ def __init__(self, *args): raise TypeError( f"argument should be a str, not {type(arg).__name__!r}") self._raw_paths = list(args) - self._resolving = False def with_segments(self, *pathsegments): """Construct a new path object from any number of path-like objects. @@ -339,9 +332,7 @@ def parent(self): path = str(self) parent = self.parser.split(path)[0] if path != parent: - parent = self.with_segments(parent) - parent._resolving = self._resolving - return parent + return self.with_segments(parent) return self @property @@ -424,9 +415,6 @@ class PathBase(PurePathBase): """ __slots__ = () - # Maximum number of symlinks to follow in resolve() - _max_symlinks = 40 - @classmethod def _unsupported_msg(cls, attribute): return f"{cls.__name__}.{attribute} is unsupported" @@ -720,20 +708,6 @@ def walk(self, top_down=True, on_error=None, follow_symlinks=False): yield path, dirnames, filenames paths += [path.joinpath(d) for d in reversed(dirnames)] - def absolute(self): - """Return an absolute version of this path - No normalization or symlink resolution is performed. - - Use resolve() to resolve symlinks and remove '..' segments. - """ - if self.is_absolute(): - return self - elif self.parser is not posixpath: - raise UnsupportedOperation(self._unsupported_msg('absolute()')) - else: - # Treat the root directory as the current working directory. - return self.with_segments('/', *self._raw_paths) - def expanduser(self): """ Return a new path with expanded ~ and ~user constructs (as returned by os.path.expanduser) @@ -745,42 +719,6 @@ def readlink(self): Return the path to which the symbolic link points. """ raise UnsupportedOperation(self._unsupported_msg('readlink()')) - readlink._supported = False - - def resolve(self, strict=False): - """ - Make the path absolute, resolving all symlinks on the way and also - normalizing it. - """ - if self._resolving: - return self - elif self.parser is not posixpath: - raise UnsupportedOperation(self._unsupported_msg('resolve()')) - - def raise_error(*args): - raise OSError("Unsupported operation.") - - getcwd = raise_error - if strict or getattr(self.readlink, '_supported', True): - def lstat(path_str): - path = self.with_segments(path_str) - path._resolving = True - return path.stat(follow_symlinks=False) - - def readlink(path_str): - path = self.with_segments(path_str) - path._resolving = True - return str(path.readlink()) - else: - # If the user has *not* overridden the `readlink()` method, then - # symlinks are unsupported and (in non-strict mode) we can improve - # performance by not calling `path.lstat()`. - lstat = readlink = raise_error - - return self.with_segments(posixpath._realpath( - str(self.absolute()), strict, self.parser.sep, - getcwd=getcwd, lstat=lstat, readlink=readlink, - maxlinks=self._max_symlinks)) def symlink_to(self, target, target_is_directory=False): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 2c48eeeda145d0..8c9049f15d5bf9 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1,3 +1,4 @@ +import collections import contextlib import io import os @@ -21,7 +22,7 @@ from test.support import os_helper from test.support.os_helper import TESTFN, FakePath from test.test_pathlib import test_pathlib_abc -from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows, needs_symlinks +from test.test_pathlib.test_pathlib_abc import needs_posix, needs_windows try: import fcntl @@ -55,6 +56,13 @@ def new_test(self): self.cls.replace = old_replace return new_test + +_tests_needing_symlinks = set() +def needs_symlinks(fn): + """Decorator that marks a test as requiring a path class that supports symlinks.""" + _tests_needing_symlinks.add(fn.__name__) + return fn + # # Tests for the pure classes. # @@ -533,6 +541,9 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest): can_symlink = os_helper.can_symlink() def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') super().setUp() os.chmod(self.parser.join(self.base, 'dirE'), 0) @@ -693,6 +704,34 @@ def test_copy_file_preserve_metadata(self): if hasattr(source_st, 'st_flags'): self.assertEqual(source_st.st_flags, target_st.st_flags) + @needs_symlinks + def test_copy_file_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + + @needs_symlinks + def test_copy_file_to_existing_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'dirB' / 'fileB' + target = base / 'linkA' + real_target = base / 'fileA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertTrue(real_target.exists()) + self.assertFalse(real_target.is_symlink()) + self.assertEqual(source.read_text(), real_target.read_text()) + @os_helper.skip_unless_xattr def test_copy_file_preserve_metadata_xattrs(self): base = self.cls(self.base) @@ -702,6 +741,118 @@ def test_copy_file_preserve_metadata_xattrs(self): source.copy(target, preserve_metadata=True) self.assertEqual(os.getxattr(target, b'user.foo'), b'42') + @needs_symlinks + def test_copy_symlink_follow_symlinks_true(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + result = source.copy(target) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertFalse(target.is_symlink()) + self.assertEqual(source.read_text(), target.read_text()) + + @needs_symlinks + def test_copy_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkA' + target = base / 'copyA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + @needs_symlinks + def test_copy_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.copy, source) + + @needs_symlinks + def test_copy_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'fileA') + target.symlink_to(base / 'dirC') + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_follow_symlinks_false(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'copyA' + result = source.copy(target, follow_symlinks=False) + self.assertEqual(result, target) + self.assertTrue(target.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source.readlink(), target.readlink()) + + @needs_symlinks + def test_copy_directory_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.copy, source) + self.assertRaises(OSError, source.copy, source, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_into_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + target = base / 'linkB' / 'copyB' + self.assertRaises(OSError, source.copy, target) + self.assertRaises(OSError, source.copy, target, follow_symlinks=False) + self.assertFalse(target.exists()) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC') + target.symlink_to(base / 'fileA') + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_directory_symlink_to_existing_directory_symlink(self): + base = self.cls(self.base) + source = base / 'copySource' + target = base / 'copyTarget' + source.symlink_to(base / 'dirC' / 'dirD') + target.symlink_to(base / 'dirC') + self.assertRaises(FileExistsError, source.copy, target) + self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) + + @needs_symlinks + def test_copy_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'source' + target = base / 'target' + + source.mkdir() + source.joinpath('link').symlink_to('nonexistent') + + self.assertRaises(FileNotFoundError, source.copy, target) + + target2 = base / 'target2' + result = source.copy(target2, follow_symlinks=False) + self.assertEqual(result, target2) + self.assertTrue(target2.joinpath('link').is_symlink()) + self.assertEqual(target2.joinpath('link').readlink(), self.cls('nonexistent')) + @needs_symlinks def test_copy_link_preserve_metadata(self): base = self.cls(self.base) @@ -801,6 +952,54 @@ def test_copy_dir_preserve_metadata_xattrs(self): target_file = target.joinpath('dirD', 'fileD') self.assertEqual(os.getxattr(target_file, b'user.foo'), b'42') + @needs_symlinks + def test_move_file_symlink(self): + base = self.cls(self.base) + source = base / 'linkA' + source_readlink = source.readlink() + target = base / 'linkA_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + @needs_symlinks + def test_move_file_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkA' + self.assertRaises(OSError, source.move, source) + + @needs_symlinks + def test_move_dir_symlink(self): + base = self.cls(self.base) + source = base / 'linkB' + source_readlink = source.readlink() + target = base / 'linkB_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + + @needs_symlinks + def test_move_dir_symlink_to_itself(self): + base = self.cls(self.base) + source = base / 'linkB' + self.assertRaises(OSError, source.move, source) + + @needs_symlinks + def test_move_dangling_symlink(self): + base = self.cls(self.base) + source = base / 'brokenLink' + source_readlink = source.readlink() + target = base / 'brokenLink_moved' + result = source.move(target) + self.assertEqual(result, target) + self.assertFalse(source.exists()) + self.assertTrue(target.is_symlink()) + self.assertEqual(source_readlink, target.readlink()) + @patch_replace def test_move_file_other_fs(self): self.test_move_file() @@ -858,9 +1057,41 @@ def test_move_into_other_os(self): def test_move_into_empty_name_other_os(self): self.test_move_into_empty_name() + @needs_symlinks + def test_complex_symlinks_absolute(self): + self._check_complex_symlinks(self.base) + + @needs_symlinks + def test_complex_symlinks_relative(self): + self._check_complex_symlinks('.') + + @needs_symlinks + def test_complex_symlinks_relative_dot_dot(self): + self._check_complex_symlinks(self.parser.join('dirA', '..')) + def _check_complex_symlinks(self, link0_target): - super()._check_complex_symlinks(link0_target) + # Test solving a non-looping chain of symlinks (issue #19887). + parser = self.parser P = self.cls(self.base) + P.joinpath('link1').symlink_to(parser.join('link0', 'link0'), target_is_directory=True) + P.joinpath('link2').symlink_to(parser.join('link1', 'link1'), target_is_directory=True) + P.joinpath('link3').symlink_to(parser.join('link2', 'link2'), target_is_directory=True) + P.joinpath('link0').symlink_to(link0_target, target_is_directory=True) + + # Resolve absolute paths. + p = (P / 'link0').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link1').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link2').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + p = (P / 'link3').resolve() + self.assertEqual(p, P) + self.assertEqualNormCase(str(p), self.base) + # Resolve relative paths. old_path = os.getcwd() os.chdir(self.base) @@ -880,6 +1111,118 @@ def _check_complex_symlinks(self, link0_target): finally: os.chdir(old_path) + def _check_resolve(self, p, expected, strict=True): + q = p.resolve(strict) + self.assertEqual(q, expected) + + # This can be used to check both relative and absolute resolutions. + _check_resolve_relative = _check_resolve_absolute = _check_resolve + + @needs_symlinks + def test_resolve_common(self): + P = self.cls + p = P(self.base, 'foo') + with self.assertRaises(OSError) as cm: + p.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ENOENT) + # Non-strict + parser = self.parser + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(self.base, 'foo')) + p = P(self.base, 'foo', 'in', 'spam') + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(self.base, 'foo', 'in', 'spam')) + p = P(self.base, '..', 'foo', 'in', 'spam') + self.assertEqualNormCase(str(p.resolve(strict=False)), + parser.join(parser.dirname(self.base), 'foo', 'in', 'spam')) + # These are all relative symlinks. + p = P(self.base, 'dirB', 'fileB') + self._check_resolve_relative(p, p) + p = P(self.base, 'linkA') + self._check_resolve_relative(p, P(self.base, 'fileA')) + p = P(self.base, 'dirA', 'linkC', 'fileB') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) + p = P(self.base, 'dirB', 'linkD', 'fileB') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) + # Non-strict + p = P(self.base, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB', 'foo', 'in', + 'spam'), False) + p = P(self.base, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') + if self.cls.parser is not posixpath: + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(self.base, 'dirA', 'foo', 'in', + 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + # Now create absolute symlinks. + d = self.tempdir() + P(self.base, 'dirA', 'linkX').symlink_to(d) + P(self.base, str(d), 'linkY').symlink_to(self.parser.join(self.base, 'dirB')) + p = P(self.base, 'dirA', 'linkX', 'linkY', 'fileB') + self._check_resolve_absolute(p, P(self.base, 'dirB', 'fileB')) + # Non-strict + p = P(self.base, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') + self._check_resolve_relative(p, P(self.base, 'dirB', 'foo', 'in', 'spam'), + False) + p = P(self.base, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') + if self.cls.parser is not posixpath: + # In Windows, if linkY points to dirB, 'dirA\linkY\..' + # resolves to 'dirA' without resolving linkY first. + self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) + else: + # In Posix, if linkY points to dirB, 'dirA/linkY/..' + # resolves to 'dirB/..' first before resolving to parent of dirB. + self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) + + @needs_symlinks + def test_resolve_dot(self): + # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ + parser = self.parser + p = self.cls(self.base) + p.joinpath('0').symlink_to('.', target_is_directory=True) + p.joinpath('1').symlink_to(parser.join('0', '0'), target_is_directory=True) + p.joinpath('2').symlink_to(parser.join('1', '1'), target_is_directory=True) + q = p / '2' + self.assertEqual(q.resolve(strict=True), p) + r = q / '3' / '4' + self.assertRaises(FileNotFoundError, r.resolve, strict=True) + # Non-strict + self.assertEqual(r.resolve(strict=False), p / '3' / '4') + + def _check_symlink_loop(self, *args): + path = self.cls(*args) + with self.assertRaises(OSError) as cm: + path.resolve(strict=True) + self.assertEqual(cm.exception.errno, errno.ELOOP) + + @needs_posix + @needs_symlinks + def test_resolve_loop(self): + # Loops with relative symlinks. + self.cls(self.base, 'linkX').symlink_to('linkX/inside') + self._check_symlink_loop(self.base, 'linkX') + self.cls(self.base, 'linkY').symlink_to('linkY') + self._check_symlink_loop(self.base, 'linkY') + self.cls(self.base, 'linkZ').symlink_to('linkZ/../linkZ') + self._check_symlink_loop(self.base, 'linkZ') + # Non-strict + p = self.cls(self.base, 'linkZ', 'foo') + self.assertEqual(p.resolve(strict=False), p) + # Loops with absolute symlinks. + self.cls(self.base, 'linkU').symlink_to(self.parser.join(self.base, 'linkU/inside')) + self._check_symlink_loop(self.base, 'linkU') + self.cls(self.base, 'linkV').symlink_to(self.parser.join(self.base, 'linkV')) + self._check_symlink_loop(self.base, 'linkV') + self.cls(self.base, 'linkW').symlink_to(self.parser.join(self.base, 'linkW/../linkW')) + self._check_symlink_loop(self.base, 'linkW') + # Non-strict + q = self.cls(self.base, 'linkW', 'foo') + self.assertEqual(q.resolve(strict=False), q) + def test_resolve_nonexist_relative_issue38671(self): p = self.cls('non', 'exist') @@ -890,6 +1233,24 @@ def test_resolve_nonexist_relative_issue38671(self): finally: os.chdir(old_cwd) + @needs_symlinks + def test_readlink(self): + P = self.cls(self.base) + self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) + self.assertEqual((P / 'brokenLink').readlink(), + self.cls('non-existing')) + self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) + self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) + with self.assertRaises(OSError): + (P / 'fileA').readlink() + + @unittest.skipIf(hasattr(os, "readlink"), "os.readlink() is present") + def test_readlink_unsupported(self): + P = self.cls(self.base) + p = P / 'fileA' + with self.assertRaises(pathlib.UnsupportedOperation): + q.readlink(p) + @os_helper.skip_unless_working_chmod def test_chmod(self): p = self.cls(self.base) / 'fileA' @@ -991,6 +1352,41 @@ def test_group_no_follow_symlinks(self): self.assertEqual(expected_gid, gid_2) self.assertEqual(expected_name, link.group(follow_symlinks=False)) + @needs_symlinks + def test_delete_symlink(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir_ = tmp / 'dir' + dir_.mkdir() + link = tmp / 'link' + link.symlink_to(dir_) + link._delete() + self.assertTrue(dir_.exists()) + self.assertFalse(link.exists(follow_symlinks=False)) + + @needs_symlinks + def test_delete_inner_symlink(self): + tmp = self.cls(self.base, 'delete') + tmp.mkdir() + dir1 = tmp / 'dir1' + dir2 = dir1 / 'dir2' + dir3 = tmp / 'dir3' + for d in dir1, dir2, dir3: + d.mkdir() + file1 = tmp / 'file1' + file1.write_text('foo') + link1 = dir1 / 'link1' + link1.symlink_to(dir2) + link2 = dir1 / 'link2' + link2.symlink_to(dir3) + link3 = dir1 / 'link3' + link3.symlink_to(file1) + # make sure symlinks are removed but not followed + dir1._delete() + self.assertFalse(dir1.exists()) + self.assertTrue(dir3.exists()) + self.assertTrue(file1.exists()) + @unittest.skipIf(sys.platform[:6] == 'cygwin', "This test can't be run on Cygwin (issue #1071513).") @os_helper.skip_if_dac_override @@ -1354,6 +1750,12 @@ def test_symlink_to_unsupported(self): with self.assertRaises(pathlib.UnsupportedOperation): q.symlink_to(p) + @needs_symlinks + def test_stat_no_follow_symlinks(self): + p = self.cls(self.base) / 'linkA' + st = p.stat() + self.assertNotEqual(st, p.stat(follow_symlinks=False)) + @needs_symlinks def test_lstat(self): p = self.cls(self.base)/ 'linkA' @@ -1433,6 +1835,15 @@ def test_passing_kwargs_errors(self): with self.assertRaises(TypeError): self.cls(foo="bar") + @needs_symlinks + def test_iterdir_symlink(self): + # __iter__ on a symlink to a directory. + P = self.cls + p = P(self.base, 'linkB') + paths = set(p.iterdir()) + expected = { P(self.base, 'linkB', q) for q in ['fileB', 'linkD'] } + self.assertEqual(paths, expected) + def test_glob_empty_pattern(self): p = self.cls('') with self.assertRaisesRegex(ValueError, 'Unacceptable pattern'): @@ -1493,6 +1904,25 @@ def test_glob_dot(self): self.assertEqual( set(P('.').glob('**/*/*')), {P("dirD/fileD")}) + # See https://github.com/WebAssembly/wasi-filesystem/issues/26 + @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") + def test_glob_dotdot(self): + # ".." is not special in globs. + P = self.cls + p = P(self.base) + self.assertEqual(set(p.glob("..")), { P(self.base, "..") }) + self.assertEqual(set(p.glob("../..")), { P(self.base, "..", "..") }) + self.assertEqual(set(p.glob("dirA/..")), { P(self.base, "dirA", "..") }) + self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) + self.assertEqual(set(p.glob("dirA/../file*/..")), set()) + self.assertEqual(set(p.glob("../xyzzy")), set()) + if self.cls.parser is posixpath: + self.assertEqual(set(p.glob("xyzzy/..")), set()) + else: + # ".." segments are normalized first on Windows, so this path is stat()able. + self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) + self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) + def test_glob_inaccessible(self): P = self.cls p = P(self.base, "mydir1", "mydir2") @@ -1508,6 +1938,124 @@ def test_rglob_pathlike(self): self.assertEqual(expect, set(p.rglob(P(pattern)))) self.assertEqual(expect, set(p.rglob(FakePath(pattern)))) + @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") + def test_glob_recurse_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.glob(glob, recurse_symlinks=True) + if path.parts.count("linkD") <= 1} # exclude symlink loop. + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p, "fileB", []) + _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) + _check(p, "*A", ["dirA", "fileA", "linkA"]) + _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) + _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) + _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) + _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) + _check(p, "dir*/**", [ + "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", + "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", + "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", + "dirE/"]) + _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/"]) + _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", + "dirB/linkD/..", "dirA/linkC/linkD/..", + "dirC/..", "dirC/dirD/..", "dirE/.."]) + _check(p, "dir*/*/**", [ + "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", + "dirB/linkD/", "dirB/linkD/fileB", + "dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) + _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", + "dirB/linkD/..", "dirC/dirD/.."]) + _check(p, "dir*/**/fileC", ["dirC/fileC"]) + _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) + _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) + _check(p, "*/dirD/**/", ["dirC/dirD/"]) + + @needs_symlinks + @unittest.skipIf(is_emscripten, "Hangs") + def test_rglob_recurse_symlinks_common(self): + def _check(path, glob, expected): + actual = {path for path in path.rglob(glob, recurse_symlinks=True) + if path.parts.count("linkD") <= 1} # exclude symlink loop. + self.assertEqual(actual, { P(self.base, q) for q in expected }) + P = self.cls + p = P(self.base) + _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) + _check(p, "*/fileA", []) + _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) + _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", + "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB", + "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) + _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) + _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", + "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) + + p = P(self.base, "dirC") + _check(p, "*", ["dirC/fileC", "dirC/novel.txt", + "dirC/dirD", "dirC/dirD/fileD"]) + _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) + _check(p, "*/*", ["dirC/dirD/fileD"]) + _check(p, "*/", ["dirC/dirD/"]) + _check(p, "", ["dirC/", "dirC/dirD/"]) + # gh-91616, a re module regression + _check(p, "*.txt", ["dirC/novel.txt"]) + _check(p, "*.*", ["dirC/novel.txt"]) + + @needs_symlinks + def test_rglob_symlink_loop(self): + # Don't get fooled by symlink loops (Issue #26012). + P = self.cls + p = P(self.base) + given = set(p.rglob('*', recurse_symlinks=False)) + expect = {'brokenLink', + 'dirA', 'dirA/linkC', + 'dirB', 'dirB/fileB', 'dirB/linkD', + 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', + 'dirC/fileC', 'dirC/novel.txt', + 'dirE', + 'fileA', + 'linkA', + 'linkB', + 'brokenLinkLoop', + } + self.assertEqual(given, {p / x for x in expect}) + + @needs_symlinks + def test_glob_permissions(self): + # See bpo-38894 + P = self.cls + base = P(self.base) / 'permissions' + base.mkdir() + + for i in range(100): + link = base / f"link{i}" + if i % 2: + link.symlink_to(P(self.base, "dirE", "nonexistent")) + else: + link.symlink_to(P(self.base, "dirC"), target_is_directory=True) + + self.assertEqual(len(set(base.glob("*"))), 100) + self.assertEqual(len(set(base.glob("*/"))), 50) + self.assertEqual(len(set(base.glob("*/fileC"))), 50) + self.assertEqual(len(set(base.glob("*/file*"))), 50) + + @needs_symlinks + def test_glob_long_symlink(self): + # See gh-87695 + base = self.cls(self.base) / 'long_symlink' + base.mkdir() + bad_link = base / 'bad_link' + bad_link.symlink_to("bad" * 200) + self.assertEqual(sorted(base.glob('**/*')), [bad_link]) + @needs_posix def test_absolute_posix(self): P = self.cls @@ -1822,6 +2370,9 @@ class PathWalkTest(test_pathlib_abc.DummyPathWalkTest): can_symlink = PathTest.can_symlink def setUp(self): + name = self.id().split('.')[-1] + if name in _tests_needing_symlinks and not self.can_symlink: + self.skipTest('requires symlinks') super().setUp() sub21_path= self.sub2_path / "SUB21" tmp5_path = sub21_path / "tmp3" @@ -1903,6 +2454,37 @@ def test_walk_above_recursion_limit(self): list(base.walk()) list(base.walk(top_down=False)) + @needs_symlinks + def test_walk_follow_symlinks(self): + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.link_path: + self.assertEqual(dirs, []) + self.assertEqual(files, ["tmp4"]) + break + else: + self.fail("Didn't follow symlink with follow_symlinks=True") + + @needs_symlinks + def test_walk_symlink_location(self): + # Tests whether symlinks end up in filenames or dirnames depending + # on the `follow_symlinks` argument. + walk_it = self.walk_path.walk(follow_symlinks=False) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", files) + break + else: + self.fail("symlink not found") + + walk_it = self.walk_path.walk(follow_symlinks=True) + for root, dirs, files in walk_it: + if root == self.sub2_path: + self.assertIn("link", dirs) + break + else: + self.fail("symlink not found") + @unittest.skipIf(os.name == 'nt', 'test requires a POSIX-compatible system') class PosixPathTest(PathTest, PurePosixPathTest): diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 00153e3f5e997e..bf9ae6cc8a2433 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -8,13 +8,11 @@ from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase import posixpath -from test.support import is_wasi, is_emscripten from test.support.os_helper import TESTFN _tests_needing_posix = set() _tests_needing_windows = set() -_tests_needing_symlinks = set() def needs_posix(fn): @@ -27,11 +25,6 @@ def needs_windows(fn): _tests_needing_windows.add(fn.__name__) return fn -def needs_symlinks(fn): - """Decorator that marks a test as requiring a path class that supports symlinks.""" - _tests_needing_symlinks.add(fn.__name__) - return fn - class UnsupportedOperationTest(unittest.TestCase): def test_is_notimplemented(self): @@ -1369,7 +1362,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.glob, '*') self.assertRaises(e, p.rglob, '*') self.assertRaises(e, lambda: list(p.walk())) - self.assertRaises(e, p.absolute) self.assertRaises(e, p.expanduser) self.assertRaises(e, p.readlink) self.assertRaises(e, p.symlink_to, 'foo') @@ -1425,7 +1417,6 @@ class DummyPath(PathBase): _files = {} _directories = {} - _symlinks = {} def __eq__(self, other): if not isinstance(other, DummyPath): @@ -1439,16 +1430,11 @@ def __repr__(self): return "{}({!r})".format(self.__class__.__name__, self.as_posix()) def stat(self, *, follow_symlinks=True): - if follow_symlinks or self.name in ('', '.', '..'): - path = str(self.resolve(strict=True)) - else: - path = str(self.parent.resolve(strict=True) / self.name) + path = str(self).rstrip('/') if path in self._files: st_mode = stat.S_IFREG elif path in self._directories: st_mode = stat.S_IFDIR - elif path in self._symlinks: - st_mode = stat.S_IFLNK else: raise FileNotFoundError(errno.ENOENT, "Not found", str(self)) return DummyPathStatResult(st_mode, hash(str(self)), 0, 0, 0, 0, 0, 0, 0, 0) @@ -1457,10 +1443,7 @@ def open(self, mode='r', buffering=-1, encoding=None, errors=None, newline=None): if buffering != -1 and not (buffering == 0 and 'b' in mode): raise NotImplementedError - path_obj = self.resolve() - path = str(path_obj) - name = path_obj.name - parent = str(path_obj.parent) + path = str(self) if path in self._directories: raise IsADirectoryError(errno.EISDIR, "Is a directory", path) @@ -1471,6 +1454,7 @@ def open(self, mode='r', buffering=-1, encoding=None, raise FileNotFoundError(errno.ENOENT, "File not found", path) stream = io.BytesIO(self._files[path]) elif mode == 'w': + parent, name = posixpath.split(path) if parent not in self._directories: raise FileNotFoundError(errno.ENOENT, "File not found", parent) stream = DummyPathIO(self._files, path) @@ -1483,7 +1467,7 @@ def open(self, mode='r', buffering=-1, encoding=None, return stream def iterdir(self): - path = str(self.resolve()) + path = str(self).rstrip('/') if path in self._files: raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) elif path in self._directories: @@ -1492,9 +1476,9 @@ def iterdir(self): raise FileNotFoundError(errno.ENOENT, "File not found", path) def mkdir(self, mode=0o777, parents=False, exist_ok=False): - path = str(self.parent.resolve() / self.name) - parent = str(self.parent.resolve()) - if path in self._directories or path in self._symlinks: + path = str(self) + parent = str(self.parent) + if path in self._directories: if exist_ok: return else: @@ -1510,33 +1494,28 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): self.mkdir(mode, parents=False, exist_ok=exist_ok) def unlink(self, missing_ok=False): - path_obj = self.parent.resolve(strict=True) / self.name - path = str(path_obj) - name = path_obj.name - parent = str(path_obj.parent) + path = str(self) + name = self.name + parent = str(self.parent) if path in self._directories: raise IsADirectoryError(errno.EISDIR, "Is a directory", path) elif path in self._files: self._directories[parent].remove(name) del self._files[path] - elif path in self._symlinks: - self._directories[parent].remove(name) - del self._symlinks[path] elif not missing_ok: raise FileNotFoundError(errno.ENOENT, "File not found", path) def rmdir(self): - path_obj = self.parent.resolve(strict=True) / self.name - path = str(path_obj) - if path in self._files or path in self._symlinks: + path = str(self) + if path in self._files: raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) elif path not in self._directories: raise FileNotFoundError(errno.ENOENT, "File not found", path) elif self._directories[path]: raise OSError(errno.ENOTEMPTY, "Directory not empty", path) else: - name = path_obj.name - parent = str(path_obj.parent) + name = self.name + parent = str(self.parent) self._directories[parent].remove(name) del self._directories[path] @@ -1569,9 +1548,6 @@ class DummyPathTest(DummyPurePathTest): def setUp(self): super().setUp() - name = self.id().split('.')[-1] - if name in _tests_needing_symlinks and not self.can_symlink: - self.skipTest('requires symlinks') parser = self.cls.parser p = self.cls(self.base) p.mkdir(parents=True) @@ -1604,7 +1580,6 @@ def tearDown(self): cls = self.cls cls._files.clear() cls._directories.clear() - cls._symlinks.clear() def tempdir(self): path = self.cls(self.base).with_name('tmp-dirD') @@ -1730,101 +1705,6 @@ def test_copy_file(self): self.assertTrue(target.exists()) self.assertEqual(source.read_text(), target.read_text()) - @needs_symlinks - def test_copy_symlink_follow_symlinks_true(self): - base = self.cls(self.base) - source = base / 'linkA' - target = base / 'copyA' - result = source.copy(target) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertFalse(target.is_symlink()) - self.assertEqual(source.read_text(), target.read_text()) - - @needs_symlinks - def test_copy_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkA' - target = base / 'copyA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - - @needs_symlinks - def test_copy_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkA' - self.assertRaises(OSError, source.copy, source) - - @needs_symlinks - def test_copy_symlink_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'fileA') - target.symlink_to(base / 'dirC') - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_symlink_to_existing_directory_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'fileA') - target.symlink_to(base / 'dirC') - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'linkB' - target = base / 'copyA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source.readlink(), target.readlink()) - - @needs_symlinks - def test_copy_directory_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - self.assertRaises(OSError, source.copy, source) - self.assertRaises(OSError, source.copy, source, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_into_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - target = base / 'linkB' / 'copyB' - self.assertRaises(OSError, source.copy, target) - self.assertRaises(OSError, source.copy, target, follow_symlinks=False) - self.assertFalse(target.exists()) - - @needs_symlinks - def test_copy_directory_symlink_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'dirC') - target.symlink_to(base / 'fileA') - self.assertRaises(FileExistsError, source.copy, target) - self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) - - @needs_symlinks - def test_copy_directory_symlink_to_existing_directory_symlink(self): - base = self.cls(self.base) - source = base / 'copySource' - target = base / 'copyTarget' - source.symlink_to(base / 'dirC' / 'dirD') - target.symlink_to(base / 'dirC') - self.assertRaises(FileExistsError, source.copy, target) - self.assertRaises(FileExistsError, source.copy, target, follow_symlinks=False) - def test_copy_file_to_existing_file(self): base = self.cls(self.base) source = base / 'fileA' @@ -1840,34 +1720,6 @@ def test_copy_file_to_existing_directory(self): target = base / 'dirA' self.assertRaises(OSError, source.copy, target) - @needs_symlinks - def test_copy_file_to_existing_symlink(self): - base = self.cls(self.base) - source = base / 'dirB' / 'fileB' - target = base / 'linkA' - real_target = base / 'fileA' - result = source.copy(target) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertTrue(real_target.exists()) - self.assertFalse(real_target.is_symlink()) - self.assertEqual(source.read_text(), real_target.read_text()) - - @needs_symlinks - def test_copy_file_to_existing_symlink_follow_symlinks_false(self): - base = self.cls(self.base) - source = base / 'dirB' / 'fileB' - target = base / 'linkA' - real_target = base / 'fileA' - result = source.copy(target, follow_symlinks=False) - self.assertEqual(result, target) - self.assertTrue(target.exists()) - self.assertTrue(target.is_symlink()) - self.assertTrue(real_target.exists()) - self.assertFalse(real_target.is_symlink()) - self.assertEqual(source.read_text(), real_target.read_text()) - def test_copy_file_empty(self): base = self.cls(self.base) source = base / 'empty' @@ -1985,23 +1837,6 @@ def test_copy_dir_into_itself(self): self.assertRaises(OSError, source.copy, target, follow_symlinks=False) self.assertFalse(target.exists()) - @needs_symlinks - def test_copy_dangling_symlink(self): - base = self.cls(self.base) - source = base / 'source' - target = base / 'target' - - source.mkdir() - source.joinpath('link').symlink_to('nonexistent') - - self.assertRaises(FileNotFoundError, source.copy, target) - - target2 = base / 'target2' - result = source.copy(target2, follow_symlinks=False) - self.assertEqual(result, target2) - self.assertTrue(target2.joinpath('link').is_symlink()) - self.assertEqual(target2.joinpath('link').readlink(), self.cls('nonexistent')) - def test_copy_into(self): base = self.cls(self.base) source = base / 'fileA' @@ -2087,54 +1922,6 @@ def test_move_dir_into_itself(self): self.assertTrue(source.exists()) self.assertFalse(target.exists()) - @needs_symlinks - def test_move_file_symlink(self): - base = self.cls(self.base) - source = base / 'linkA' - source_readlink = source.readlink() - target = base / 'linkA_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - - @needs_symlinks - def test_move_file_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkA' - self.assertRaises(OSError, source.move, source) - - @needs_symlinks - def test_move_dir_symlink(self): - base = self.cls(self.base) - source = base / 'linkB' - source_readlink = source.readlink() - target = base / 'linkB_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - - @needs_symlinks - def test_move_dir_symlink_to_itself(self): - base = self.cls(self.base) - source = base / 'linkB' - self.assertRaises(OSError, source.move, source) - - @needs_symlinks - def test_move_dangling_symlink(self): - base = self.cls(self.base) - source = base / 'brokenLink' - source_readlink = source.readlink() - target = base / 'brokenLink_moved' - result = source.move(target) - self.assertEqual(result, target) - self.assertFalse(source.exists()) - self.assertTrue(target.is_symlink()) - self.assertEqual(source_readlink, target.readlink()) - def test_move_into(self): base = self.cls(self.base) source = base / 'fileA' @@ -2161,15 +1948,6 @@ def test_iterdir(self): expected += ['linkA', 'linkB', 'brokenLink', 'brokenLinkLoop'] self.assertEqual(paths, { P(self.base, q) for q in expected }) - @needs_symlinks - def test_iterdir_symlink(self): - # __iter__ on a symlink to a directory. - P = self.cls - p = P(self.base, 'linkB') - paths = set(p.iterdir()) - expected = { P(self.base, 'linkB', q) for q in ['fileB', 'linkD'] } - self.assertEqual(paths, expected) - def test_iterdir_nodir(self): # __iter__ on something that is not a directory. p = self.cls(self.base, 'fileA') @@ -2196,7 +1974,6 @@ def test_scandir(self): if entry.name != 'brokenLinkLoop': self.assertEqual(entry.is_dir(), child.is_dir()) - def test_glob_common(self): def _check(glob, expected): self.assertEqual(set(glob), { P(self.base, q) for q in expected }) @@ -2250,8 +2027,6 @@ def test_glob_empty_pattern(self): P = self.cls p = P(self.base) self.assertEqual(list(p.glob("")), [p]) - self.assertEqual(list(p.glob(".")), [p / "."]) - self.assertEqual(list(p.glob("./")), [p / "./"]) def test_glob_case_sensitive(self): P = self.cls @@ -2265,44 +2040,6 @@ def _check(path, pattern, case_sensitive, expected): _check(path, "dirb/file*", True, []) _check(path, "dirb/file*", False, ["dirB/fileB"]) - @needs_symlinks - @unittest.skipIf(is_emscripten, "Hangs") - def test_glob_recurse_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.glob(glob, recurse_symlinks=True) - if path.parts.count("linkD") <= 1} # exclude symlink loop. - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", []) - _check(p, "dir*/file*", ["dirB/fileB", "dirC/fileC"]) - _check(p, "*A", ["dirA", "fileA", "linkA"]) - _check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"]) - _check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"]) - _check(p, "*/", ["dirA/", "dirB/", "dirC/", "dirE/", "linkB/"]) - _check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/..", "dirB/linkD/.."]) - _check(p, "dir*/**", [ - "dirA/", "dirA/linkC", "dirA/linkC/fileB", "dirA/linkC/linkD", "dirA/linkC/linkD/fileB", - "dirB/", "dirB/fileB", "dirB/linkD", "dirB/linkD/fileB", - "dirC/", "dirC/fileC", "dirC/dirD", "dirC/dirD/fileD", "dirC/novel.txt", - "dirE/"]) - _check(p, "dir*/**/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirC/dirD/", "dirE/"]) - _check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..", - "dirB/linkD/..", "dirA/linkC/linkD/..", - "dirC/..", "dirC/dirD/..", "dirE/.."]) - _check(p, "dir*/*/**", [ - "dirA/linkC/", "dirA/linkC/linkD", "dirA/linkC/fileB", "dirA/linkC/linkD/fileB", - "dirB/linkD/", "dirB/linkD/fileB", - "dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "dir*/*/**/", ["dirA/linkC/", "dirA/linkC/linkD/", "dirB/linkD/", "dirC/dirD/"]) - _check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirA/linkC/linkD/..", - "dirB/linkD/..", "dirC/dirD/.."]) - _check(p, "dir*/**/fileC", ["dirC/fileC"]) - _check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD/"]) - _check(p, "*/dirD/**", ["dirC/dirD/", "dirC/dirD/fileD"]) - _check(p, "*/dirD/**/", ["dirC/dirD/"]) - def test_rglob_recurse_symlinks_false(self): def _check(path, glob, expected): actual = set(path.rglob(glob, recurse_symlinks=False)) @@ -2361,252 +2098,6 @@ def test_rglob_windows(self): self.assertEqual(set(p.rglob("FILEd")), { P(self.base, "dirC/dirD/fileD") }) self.assertEqual(set(p.rglob("*\\")), { P(self.base, "dirC/dirD/") }) - @needs_symlinks - @unittest.skipIf(is_emscripten, "Hangs") - def test_rglob_recurse_symlinks_common(self): - def _check(path, glob, expected): - actual = {path for path in path.rglob(glob, recurse_symlinks=True) - if path.parts.count("linkD") <= 1} # exclude symlink loop. - self.assertEqual(actual, { P(self.base, q) for q in expected }) - P = self.cls - p = P(self.base) - _check(p, "fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) - _check(p, "*/fileA", []) - _check(p, "*/fileB", ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB"]) - _check(p, "file*", ["fileA", "dirA/linkC/fileB", "dirB/fileB", - "dirA/linkC/linkD/fileB", "dirB/linkD/fileB", "linkB/linkD/fileB", - "dirC/fileC", "dirC/dirD/fileD", "linkB/fileB"]) - _check(p, "*/", ["dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirC/dirD/", "dirE/", "linkB/", "linkB/linkD/"]) - _check(p, "", ["", "dirA/", "dirA/linkC/", "dirA/linkC/linkD/", "dirB/", "dirB/linkD/", - "dirC/", "dirE/", "dirC/dirD/", "linkB/", "linkB/linkD/"]) - - p = P(self.base, "dirC") - _check(p, "*", ["dirC/fileC", "dirC/novel.txt", - "dirC/dirD", "dirC/dirD/fileD"]) - _check(p, "file*", ["dirC/fileC", "dirC/dirD/fileD"]) - _check(p, "*/*", ["dirC/dirD/fileD"]) - _check(p, "*/", ["dirC/dirD/"]) - _check(p, "", ["dirC/", "dirC/dirD/"]) - # gh-91616, a re module regression - _check(p, "*.txt", ["dirC/novel.txt"]) - _check(p, "*.*", ["dirC/novel.txt"]) - - @needs_symlinks - def test_rglob_symlink_loop(self): - # Don't get fooled by symlink loops (Issue #26012). - P = self.cls - p = P(self.base) - given = set(p.rglob('*', recurse_symlinks=False)) - expect = {'brokenLink', - 'dirA', 'dirA/linkC', - 'dirB', 'dirB/fileB', 'dirB/linkD', - 'dirC', 'dirC/dirD', 'dirC/dirD/fileD', - 'dirC/fileC', 'dirC/novel.txt', - 'dirE', - 'fileA', - 'linkA', - 'linkB', - 'brokenLinkLoop', - } - self.assertEqual(given, {p / x for x in expect}) - - # See https://github.com/WebAssembly/wasi-filesystem/issues/26 - @unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX") - def test_glob_dotdot(self): - # ".." is not special in globs. - P = self.cls - p = P(self.base) - self.assertEqual(set(p.glob("..")), { P(self.base, "..") }) - self.assertEqual(set(p.glob("../..")), { P(self.base, "..", "..") }) - self.assertEqual(set(p.glob("dirA/..")), { P(self.base, "dirA", "..") }) - self.assertEqual(set(p.glob("dirA/../file*")), { P(self.base, "dirA/../fileA") }) - self.assertEqual(set(p.glob("dirA/../file*/..")), set()) - self.assertEqual(set(p.glob("../xyzzy")), set()) - if self.cls.parser is posixpath: - self.assertEqual(set(p.glob("xyzzy/..")), set()) - else: - # ".." segments are normalized first on Windows, so this path is stat()able. - self.assertEqual(set(p.glob("xyzzy/..")), { P(self.base, "xyzzy", "..") }) - self.assertEqual(set(p.glob("/".join([".."] * 50))), { P(self.base, *[".."] * 50)}) - - @needs_symlinks - def test_glob_permissions(self): - # See bpo-38894 - P = self.cls - base = P(self.base) / 'permissions' - base.mkdir() - - for i in range(100): - link = base / f"link{i}" - if i % 2: - link.symlink_to(P(self.base, "dirE", "nonexistent")) - else: - link.symlink_to(P(self.base, "dirC"), target_is_directory=True) - - self.assertEqual(len(set(base.glob("*"))), 100) - self.assertEqual(len(set(base.glob("*/"))), 50) - self.assertEqual(len(set(base.glob("*/fileC"))), 50) - self.assertEqual(len(set(base.glob("*/file*"))), 50) - - @needs_symlinks - def test_glob_long_symlink(self): - # See gh-87695 - base = self.cls(self.base) / 'long_symlink' - base.mkdir() - bad_link = base / 'bad_link' - bad_link.symlink_to("bad" * 200) - self.assertEqual(sorted(base.glob('**/*')), [bad_link]) - - @needs_posix - def test_absolute_posix(self): - P = self.cls - # The default implementation uses '/' as the current directory - self.assertEqual(str(P('').absolute()), '/') - self.assertEqual(str(P('a').absolute()), '/a') - self.assertEqual(str(P('a/b').absolute()), '/a/b') - - self.assertEqual(str(P('/').absolute()), '/') - self.assertEqual(str(P('/a').absolute()), '/a') - self.assertEqual(str(P('/a/b').absolute()), '/a/b') - - # '//'-prefixed absolute path (supported by POSIX). - self.assertEqual(str(P('//').absolute()), '//') - self.assertEqual(str(P('//a').absolute()), '//a') - self.assertEqual(str(P('//a/b').absolute()), '//a/b') - - @needs_symlinks - def test_readlink(self): - P = self.cls(self.base) - self.assertEqual((P / 'linkA').readlink(), self.cls('fileA')) - self.assertEqual((P / 'brokenLink').readlink(), - self.cls('non-existing')) - self.assertEqual((P / 'linkB').readlink(), self.cls('dirB')) - self.assertEqual((P / 'linkB' / 'linkD').readlink(), self.cls('../dirB')) - with self.assertRaises(OSError): - (P / 'fileA').readlink() - - @unittest.skipIf(hasattr(os, "readlink"), "os.readlink() is present") - def test_readlink_unsupported(self): - P = self.cls(self.base) - p = P / 'fileA' - with self.assertRaises(UnsupportedOperation): - q.readlink(p) - - def _check_resolve(self, p, expected, strict=True): - q = p.resolve(strict) - self.assertEqual(q, expected) - - # This can be used to check both relative and absolute resolutions. - _check_resolve_relative = _check_resolve_absolute = _check_resolve - - @needs_symlinks - def test_resolve_common(self): - P = self.cls - p = P(self.base, 'foo') - with self.assertRaises(OSError) as cm: - p.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ENOENT) - # Non-strict - parser = self.parser - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(self.base, 'foo')) - p = P(self.base, 'foo', 'in', 'spam') - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(self.base, 'foo', 'in', 'spam')) - p = P(self.base, '..', 'foo', 'in', 'spam') - self.assertEqualNormCase(str(p.resolve(strict=False)), - parser.join(parser.dirname(self.base), 'foo', 'in', 'spam')) - # These are all relative symlinks. - p = P(self.base, 'dirB', 'fileB') - self._check_resolve_relative(p, p) - p = P(self.base, 'linkA') - self._check_resolve_relative(p, P(self.base, 'fileA')) - p = P(self.base, 'dirA', 'linkC', 'fileB') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) - p = P(self.base, 'dirB', 'linkD', 'fileB') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB')) - # Non-strict - p = P(self.base, 'dirA', 'linkC', 'fileB', 'foo', 'in', 'spam') - self._check_resolve_relative(p, P(self.base, 'dirB', 'fileB', 'foo', 'in', - 'spam'), False) - p = P(self.base, 'dirA', 'linkC', '..', 'foo', 'in', 'spam') - if self.cls.parser is not posixpath: - # In Windows, if linkY points to dirB, 'dirA\linkY\..' - # resolves to 'dirA' without resolving linkY first. - self._check_resolve_relative(p, P(self.base, 'dirA', 'foo', 'in', - 'spam'), False) - else: - # In Posix, if linkY points to dirB, 'dirA/linkY/..' - # resolves to 'dirB/..' first before resolving to parent of dirB. - self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) - # Now create absolute symlinks. - d = self.tempdir() - P(self.base, 'dirA', 'linkX').symlink_to(d) - P(self.base, str(d), 'linkY').symlink_to(self.parser.join(self.base, 'dirB')) - p = P(self.base, 'dirA', 'linkX', 'linkY', 'fileB') - self._check_resolve_absolute(p, P(self.base, 'dirB', 'fileB')) - # Non-strict - p = P(self.base, 'dirA', 'linkX', 'linkY', 'foo', 'in', 'spam') - self._check_resolve_relative(p, P(self.base, 'dirB', 'foo', 'in', 'spam'), - False) - p = P(self.base, 'dirA', 'linkX', 'linkY', '..', 'foo', 'in', 'spam') - if self.cls.parser is not posixpath: - # In Windows, if linkY points to dirB, 'dirA\linkY\..' - # resolves to 'dirA' without resolving linkY first. - self._check_resolve_relative(p, P(d, 'foo', 'in', 'spam'), False) - else: - # In Posix, if linkY points to dirB, 'dirA/linkY/..' - # resolves to 'dirB/..' first before resolving to parent of dirB. - self._check_resolve_relative(p, P(self.base, 'foo', 'in', 'spam'), False) - - @needs_symlinks - def test_resolve_dot(self): - # See http://web.archive.org/web/20200623062557/https://bitbucket.org/pitrou/pathlib/issues/9/ - parser = self.parser - p = self.cls(self.base) - p.joinpath('0').symlink_to('.', target_is_directory=True) - p.joinpath('1').symlink_to(parser.join('0', '0'), target_is_directory=True) - p.joinpath('2').symlink_to(parser.join('1', '1'), target_is_directory=True) - q = p / '2' - self.assertEqual(q.resolve(strict=True), p) - r = q / '3' / '4' - self.assertRaises(FileNotFoundError, r.resolve, strict=True) - # Non-strict - self.assertEqual(r.resolve(strict=False), p / '3' / '4') - - def _check_symlink_loop(self, *args): - path = self.cls(*args) - with self.assertRaises(OSError) as cm: - path.resolve(strict=True) - self.assertEqual(cm.exception.errno, errno.ELOOP) - - @needs_posix - @needs_symlinks - def test_resolve_loop(self): - # Loops with relative symlinks. - self.cls(self.base, 'linkX').symlink_to('linkX/inside') - self._check_symlink_loop(self.base, 'linkX') - self.cls(self.base, 'linkY').symlink_to('linkY') - self._check_symlink_loop(self.base, 'linkY') - self.cls(self.base, 'linkZ').symlink_to('linkZ/../linkZ') - self._check_symlink_loop(self.base, 'linkZ') - # Non-strict - p = self.cls(self.base, 'linkZ', 'foo') - self.assertEqual(p.resolve(strict=False), p) - # Loops with absolute symlinks. - self.cls(self.base, 'linkU').symlink_to(self.parser.join(self.base, 'linkU/inside')) - self._check_symlink_loop(self.base, 'linkU') - self.cls(self.base, 'linkV').symlink_to(self.parser.join(self.base, 'linkV')) - self._check_symlink_loop(self.base, 'linkV') - self.cls(self.base, 'linkW').symlink_to(self.parser.join(self.base, 'linkW/../linkW')) - self._check_symlink_loop(self.base, 'linkW') - # Non-strict - q = self.cls(self.base, 'linkW', 'foo') - self.assertEqual(q.resolve(strict=False), q) - def test_stat(self): statA = self.cls(self.base).joinpath('fileA').stat() statB = self.cls(self.base).joinpath('dirB', 'fileB').stat() @@ -2627,12 +2118,6 @@ def test_stat(self): self.assertEqual(statA.st_dev, statC.st_dev) # other attributes not used by pathlib. - @needs_symlinks - def test_stat_no_follow_symlinks(self): - p = self.cls(self.base) / 'linkA' - st = p.stat() - self.assertNotEqual(st, p.stat(follow_symlinks=False)) - def test_stat_no_follow_symlinks_nosymlink(self): p = self.cls(self.base) / 'fileA' st = p.stat() @@ -2760,41 +2245,6 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\udfff').is_char_device(), False) self.assertIs((P / 'fileA\x00').is_char_device(), False) - def _check_complex_symlinks(self, link0_target): - # Test solving a non-looping chain of symlinks (issue #19887). - parser = self.parser - P = self.cls(self.base) - P.joinpath('link1').symlink_to(parser.join('link0', 'link0'), target_is_directory=True) - P.joinpath('link2').symlink_to(parser.join('link1', 'link1'), target_is_directory=True) - P.joinpath('link3').symlink_to(parser.join('link2', 'link2'), target_is_directory=True) - P.joinpath('link0').symlink_to(link0_target, target_is_directory=True) - - # Resolve absolute paths. - p = (P / 'link0').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link1').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link2').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - p = (P / 'link3').resolve() - self.assertEqual(p, P) - self.assertEqualNormCase(str(p), self.base) - - @needs_symlinks - def test_complex_symlinks_absolute(self): - self._check_complex_symlinks(self.base) - - @needs_symlinks - def test_complex_symlinks_relative(self): - self._check_complex_symlinks('.') - - @needs_symlinks - def test_complex_symlinks_relative_dot_dot(self): - self._check_complex_symlinks(self.parser.join('dirA', '..')) - def test_unlink(self): p = self.cls(self.base) / 'fileA' p.unlink() @@ -2838,41 +2288,6 @@ def test_delete_dir(self): self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'fileC').stat) self.assertRaises(FileNotFoundError, base.joinpath('dirC', 'novel.txt').stat) - @needs_symlinks - def test_delete_symlink(self): - tmp = self.cls(self.base, 'delete') - tmp.mkdir() - dir_ = tmp / 'dir' - dir_.mkdir() - link = tmp / 'link' - link.symlink_to(dir_) - link._delete() - self.assertTrue(dir_.exists()) - self.assertFalse(link.exists(follow_symlinks=False)) - - @needs_symlinks - def test_delete_inner_symlink(self): - tmp = self.cls(self.base, 'delete') - tmp.mkdir() - dir1 = tmp / 'dir1' - dir2 = dir1 / 'dir2' - dir3 = tmp / 'dir3' - for d in dir1, dir2, dir3: - d.mkdir() - file1 = tmp / 'file1' - file1.write_text('foo') - link1 = dir1 / 'link1' - link1.symlink_to(dir2) - link2 = dir1 / 'link2' - link2.symlink_to(dir3) - link3 = dir1 / 'link3' - link3.symlink_to(file1) - # make sure symlinks are removed but not followed - dir1._delete() - self.assertFalse(dir1.exists()) - self.assertTrue(dir3.exists()) - self.assertTrue(file1.exists()) - def test_delete_missing(self): tmp = self.cls(self.base, 'delete') tmp.mkdir() @@ -2887,9 +2302,6 @@ class DummyPathWalkTest(unittest.TestCase): can_symlink = False def setUp(self): - name = self.id().split('.')[-1] - if name in _tests_needing_symlinks and not self.can_symlink: - self.skipTest('requires symlinks') # Build: # TESTFN/ # TEST1/ a file kid and two directory kids @@ -3002,70 +2414,6 @@ def test_walk_bottom_up(self): raise AssertionError(f"Unexpected path: {path}") self.assertTrue(seen_testfn) - @needs_symlinks - def test_walk_follow_symlinks(self): - walk_it = self.walk_path.walk(follow_symlinks=True) - for root, dirs, files in walk_it: - if root == self.link_path: - self.assertEqual(dirs, []) - self.assertEqual(files, ["tmp4"]) - break - else: - self.fail("Didn't follow symlink with follow_symlinks=True") - - @needs_symlinks - def test_walk_symlink_location(self): - # Tests whether symlinks end up in filenames or dirnames depending - # on the `follow_symlinks` argument. - walk_it = self.walk_path.walk(follow_symlinks=False) - for root, dirs, files in walk_it: - if root == self.sub2_path: - self.assertIn("link", files) - break - else: - self.fail("symlink not found") - - walk_it = self.walk_path.walk(follow_symlinks=True) - for root, dirs, files in walk_it: - if root == self.sub2_path: - self.assertIn("link", dirs) - break - else: - self.fail("symlink not found") - - -class DummyPathWithSymlinks(DummyPath): - __slots__ = () - - # Reduce symlink traversal limit to make tests run faster. - _max_symlinks = 20 - - def readlink(self): - path = str(self.parent.resolve() / self.name) - if path in self._symlinks: - return self.with_segments(self._symlinks[path][0]) - elif path in self._files or path in self._directories: - raise OSError(errno.EINVAL, "Not a symlink", path) - else: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - - def symlink_to(self, target, target_is_directory=False): - path = str(self.parent.resolve() / self.name) - parent = str(self.parent.resolve()) - if path in self._symlinks: - raise FileExistsError(errno.EEXIST, "File exists", path) - self._directories[parent].add(self.name) - self._symlinks[path] = str(target), target_is_directory - - -class DummyPathWithSymlinksTest(DummyPathTest): - cls = DummyPathWithSymlinks - can_symlink = True - - -class DummyPathWithSymlinksWalkTest(DummyPathWalkTest): - cls = DummyPathWithSymlinks - can_symlink = True if __name__ == "__main__": From 72dca6c4eda0d63ee35a0aa619ae931ab226bef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Srinivas=20Reddy=20Thatiparthy=20=28=E0=B0=A4=E0=B0=BE?= =?UTF-8?q?=E0=B0=9F=E0=B0=BF=E0=B0=AA=E0=B0=B0=E0=B1=8D=E0=B0=A4=E0=B0=BF?= =?UTF-8?q?=20=E0=B0=B6=E0=B1=8D=E0=B0=B0=E0=B1=80=E0=B0=A8=E0=B0=BF?= =?UTF-8?q?=E0=B0=B5=E0=B0=BE=E0=B0=B8=E0=B1=8D=20=20=E0=B0=B0=E0=B1=86?= =?UTF-8?q?=E0=B0=A1=E0=B1=8D=E0=B0=A1=E0=B0=BF=29?= Date: Sat, 7 Dec 2024 15:42:45 +0530 Subject: [PATCH 67/76] gh-119786: fix typo in `InternalDocs/garbage_collector.md` (#127687) --- InternalDocs/garbage_collector.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 4761f78f3593e3..394e4ef075f55e 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -518,7 +518,7 @@ Then the above algorithm is repeated, starting from step 2. Determining how much work to do ------------------------------- -We need to do a certain amount of work to enusre that garbage is collected, +We need to do a certain amount of work to ensure that garbage is collected, but doing too much work slows down execution. To work out how much work we need to do, consider a heap with `L` live objects From 27d0d2141319d82709eb09ba20065df3e1714fab Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sat, 7 Dec 2024 16:13:49 +0000 Subject: [PATCH 68/76] Give `poplib.POP3.rpop` a proper docstring (#127370) Previously `poplib.POP3.rpop` had a "Not sure what this does" docstring, now it has been fixed. --- Lib/poplib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/poplib.py b/Lib/poplib.py index 1a1629d175b6d9..beb93a0d57cf93 100644 --- a/Lib/poplib.py +++ b/Lib/poplib.py @@ -309,7 +309,7 @@ def close(self): # optional commands: def rpop(self, user): - """Not sure what this does.""" + """Send RPOP command to access the mailbox with an alternate user.""" return self._shortcmd('RPOP %s' % user) From 79b7cab50a3292a1c01466cf0e69fb7b4e56cfb1 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 7 Dec 2024 17:58:42 +0000 Subject: [PATCH 69/76] GH-127090: Fix `urllib.response.addinfourl.url` value for opened `file:` URIs (#127091) The canonical `file:` URL (as generated by `pathname2url()`) is now used as the `url` attribute of the returned `addinfourl` object. The `addinfourl.url` attribute reflects the resolved URL for both `file:` or `http[s]:` URLs now. --- Lib/test/test_urllib.py | 11 ++++--- Lib/test/test_urllib2.py | 31 ++++++++----------- Lib/test/test_urllib2net.py | 3 +- Lib/urllib/request.py | 5 +-- ...-11-21-06-03-46.gh-issue-127090.yUYwdh.rst | 3 ++ 5 files changed, 25 insertions(+), 28 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 00e46990c406ac..042d3b35b77022 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -156,7 +156,7 @@ def test_headers(self): self.assertIsInstance(self.returned_obj.headers, email.message.Message) def test_url(self): - self.assertEqual(self.returned_obj.url, "file://" + self.quoted_pathname) + self.assertEqual(self.returned_obj.url, "file:" + self.quoted_pathname) def test_status(self): self.assertIsNone(self.returned_obj.status) @@ -165,7 +165,7 @@ def test_info(self): self.assertIsInstance(self.returned_obj.info(), email.message.Message) def test_geturl(self): - self.assertEqual(self.returned_obj.geturl(), "file://" + self.quoted_pathname) + self.assertEqual(self.returned_obj.geturl(), "file:" + self.quoted_pathname) def test_getcode(self): self.assertIsNone(self.returned_obj.getcode()) @@ -471,11 +471,14 @@ def test_missing_localfile(self): def test_file_notexists(self): fd, tmp_file = tempfile.mkstemp() - tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/') + tmp_file_canon_url = 'file:' + urllib.request.pathname2url(tmp_file) + parsed = urllib.parse.urlsplit(tmp_file_canon_url) + tmp_fileurl = parsed._replace(netloc='localhost').geturl() try: self.assertTrue(os.path.exists(tmp_file)) with urllib.request.urlopen(tmp_fileurl) as fobj: self.assertTrue(fobj) + self.assertEqual(fobj.url, tmp_file_canon_url) finally: os.close(fd) os.unlink(tmp_file) @@ -609,7 +612,7 @@ def tearDown(self): def constructLocalFileUrl(self, filePath): filePath = os.path.abspath(filePath) - return "file://%s" % urllib.request.pathname2url(filePath) + return "file:" + urllib.request.pathname2url(filePath) def createNewTempFile(self, data=b""): """Creates a new temporary file containing the specified data, diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 99ad11cf0552eb..4a9e653515be5b 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -23,7 +23,7 @@ _proxy_bypass_winreg_override, _proxy_bypass_macosx_sysconf, AbstractDigestAuthHandler) -from urllib.parse import urlparse +from urllib.parse import urlsplit import urllib.error import http.client @@ -717,14 +717,6 @@ def test_processors(self): self.assertIsInstance(args[1], MockResponse) -def sanepathname2url(path): - urlpath = urllib.request.pathname2url(path) - if os.name == "nt" and urlpath.startswith("///"): - urlpath = urlpath[2:] - # XXX don't ask me about the mac... - return urlpath - - class HandlerTests(unittest.TestCase): def test_ftp(self): @@ -818,19 +810,22 @@ def test_file(self): o = h.parent = MockOpener() TESTFN = os_helper.TESTFN - urlpath = sanepathname2url(os.path.abspath(TESTFN)) towrite = b"hello, world\n" + canonurl = 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)) + parsed = urlsplit(canonurl) + if parsed.netloc: + raise unittest.SkipTest("non-local working directory") urls = [ - "file://localhost%s" % urlpath, - "file://%s" % urlpath, - "file://%s%s" % (socket.gethostbyname('localhost'), urlpath), + canonurl, + parsed._replace(netloc='localhost').geturl(), + parsed._replace(netloc=socket.gethostbyname('localhost')).geturl(), ] try: localaddr = socket.gethostbyname(socket.gethostname()) except socket.gaierror: localaddr = '' if localaddr: - urls.append("file://%s%s" % (localaddr, urlpath)) + urls.append(parsed._replace(netloc=localaddr).geturl()) for url in urls: f = open(TESTFN, "wb") @@ -855,10 +850,10 @@ def test_file(self): self.assertEqual(headers["Content-type"], "text/plain") self.assertEqual(headers["Content-length"], "13") self.assertEqual(headers["Last-modified"], modified) - self.assertEqual(respurl, url) + self.assertEqual(respurl, canonurl) for url in [ - "file://localhost:80%s" % urlpath, + parsed._replace(netloc='localhost:80').geturl(), "file:///file_does_not_exist.txt", "file://not-a-local-host.com//dir/file.txt", "file://%s:80%s/%s" % (socket.gethostbyname('localhost'), @@ -1156,13 +1151,13 @@ def test_full_url_setter(self): r = Request('http://example.com') for url in urls: r.full_url = url - parsed = urlparse(url) + parsed = urlsplit(url) self.assertEqual(r.get_full_url(), url) # full_url setter uses splittag to split into components. # splittag sets the fragment as None while urlparse sets it to '' self.assertEqual(r.fragment or '', parsed.fragment) - self.assertEqual(urlparse(r.get_full_url()).query, parsed.query) + self.assertEqual(urlsplit(r.get_full_url()).query, parsed.query) def test_full_url_deleter(self): r = Request('http://www.example.com') diff --git a/Lib/test/test_urllib2net.py b/Lib/test/test_urllib2net.py index f0874d8d3ce463..b84290a7368c29 100644 --- a/Lib/test/test_urllib2net.py +++ b/Lib/test/test_urllib2net.py @@ -4,7 +4,6 @@ from test.support import os_helper from test.support import socket_helper from test.support import ResourceDenied -from test.test_urllib2 import sanepathname2url import os import socket @@ -151,7 +150,7 @@ def test_file(self): f.write('hi there\n') f.close() urls = [ - 'file:' + sanepathname2url(os.path.abspath(TESTFN)), + 'file:' + urllib.request.pathname2url(os.path.abspath(TESTFN)), ('file:///nonsensename/etc/passwd', None, urllib.error.URLError), ] diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 1fcaa89188188d..7ef85431b718ad 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -1488,10 +1488,7 @@ def open_local_file(self, req): host, port = _splitport(host) if not host or \ (not port and _safe_gethostbyname(host) in self.get_names()): - if host: - origurl = 'file://' + host + filename - else: - origurl = 'file://' + filename + origurl = 'file:' + pathname2url(localfile) return addinfourl(open(localfile, 'rb'), headers, origurl) except OSError as exp: raise URLError(exp, exp.filename) diff --git a/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst new file mode 100644 index 00000000000000..8efe563f443774 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-21-06-03-46.gh-issue-127090.yUYwdh.rst @@ -0,0 +1,3 @@ +Fix value of :attr:`urllib.response.addinfourl.url` for ``file:`` URLs that +express relative paths and absolute Windows paths. The canonical URL generated +by :func:`urllib.request.pathname2url` is now used. From 70154855cf698560dd9a5e484a649839cd68dc7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Filipe=20La=C3=ADns=20=F0=9F=87=B5=F0=9F=87=B8?= Date: Sun, 8 Dec 2024 05:57:22 +0000 Subject: [PATCH 70/76] GH-126789: fix some sysconfig data on late site initializations (#127729) --- Lib/sysconfig/__init__.py | 18 +++++++++---- Lib/test/test_sysconfig.py | 25 +++++++++++++++++++ ...-12-07-23-06-44.gh-issue-126789.4dqfV1.rst | 5 ++++ 3 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst diff --git a/Lib/sysconfig/__init__.py b/Lib/sysconfig/__init__.py index ad86609016e478..ed7b6a335d01d4 100644 --- a/Lib/sysconfig/__init__.py +++ b/Lib/sysconfig/__init__.py @@ -173,9 +173,7 @@ def joinuser(*args): _PY_VERSION = sys.version.split()[0] _PY_VERSION_SHORT = f'{sys.version_info[0]}.{sys.version_info[1]}' _PY_VERSION_SHORT_NO_DOT = f'{sys.version_info[0]}{sys.version_info[1]}' -_PREFIX = os.path.normpath(sys.prefix) _BASE_PREFIX = os.path.normpath(sys.base_prefix) -_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) _BASE_EXEC_PREFIX = os.path.normpath(sys.base_exec_prefix) # Mutex guarding initialization of _CONFIG_VARS. _CONFIG_VARS_LOCK = threading.RLock() @@ -473,8 +471,8 @@ def _init_config_vars(): global _CONFIG_VARS _CONFIG_VARS = {} - prefix = _PREFIX - exec_prefix = _EXEC_PREFIX + prefix = os.path.normpath(sys.prefix) + exec_prefix = os.path.normpath(sys.exec_prefix) base_prefix = _BASE_PREFIX base_exec_prefix = _BASE_EXEC_PREFIX @@ -564,9 +562,19 @@ def get_config_vars(*args): With arguments, return a list of values that result from looking up each argument in the configuration variable dictionary. """ + global _CONFIG_VARS_INITIALIZED # Avoid claiming the lock once initialization is complete. - if not _CONFIG_VARS_INITIALIZED: + if _CONFIG_VARS_INITIALIZED: + # GH-126789: If sys.prefix or sys.exec_prefix were updated, invalidate the cache. + prefix = os.path.normpath(sys.prefix) + exec_prefix = os.path.normpath(sys.exec_prefix) + if _CONFIG_VARS['prefix'] != prefix or _CONFIG_VARS['exec_prefix'] != exec_prefix: + with _CONFIG_VARS_LOCK: + _CONFIG_VARS_INITIALIZED = False + _init_config_vars() + else: + # Initialize the config_vars cache. with _CONFIG_VARS_LOCK: # Test again with the lock held to avoid races. Note that # we test _CONFIG_VARS here, not _CONFIG_VARS_INITIALIZED, diff --git a/Lib/test/test_sysconfig.py b/Lib/test/test_sysconfig.py index 0df1a67ea2b720..ce504dc21af85f 100644 --- a/Lib/test/test_sysconfig.py +++ b/Lib/test/test_sysconfig.py @@ -53,6 +53,8 @@ def setUp(self): os.uname = self._get_uname # saving the environment self.name = os.name + self.prefix = sys.prefix + self.exec_prefix = sys.exec_prefix self.platform = sys.platform self.version = sys.version self._framework = sys._framework @@ -77,6 +79,8 @@ def tearDown(self): else: del os.uname os.name = self.name + sys.prefix = self.prefix + sys.exec_prefix = self.exec_prefix sys.platform = self.platform sys.version = self.version sys._framework = self._framework @@ -653,6 +657,27 @@ def test_sysconfigdata_json(self): self.assertEqual(system_config_vars, json_config_vars) + def test_sysconfig_config_vars_no_prefix_cache(self): + sys.prefix = 'prefix-AAA' + sys.exec_prefix = 'exec-prefix-AAA' + + config_vars = sysconfig.get_config_vars() + + self.assertEqual(config_vars['prefix'], sys.prefix) + self.assertEqual(config_vars['base'], sys.prefix) + self.assertEqual(config_vars['exec_prefix'], sys.exec_prefix) + self.assertEqual(config_vars['platbase'], sys.exec_prefix) + + sys.prefix = 'prefix-BBB' + sys.exec_prefix = 'exec-prefix-BBB' + + config_vars = sysconfig.get_config_vars() + + self.assertEqual(config_vars['prefix'], sys.prefix) + self.assertEqual(config_vars['base'], sys.prefix) + self.assertEqual(config_vars['exec_prefix'], sys.exec_prefix) + self.assertEqual(config_vars['platbase'], sys.exec_prefix) + class MakefileTests(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst b/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst new file mode 100644 index 00000000000000..417e9ac986f27a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-07-23-06-44.gh-issue-126789.4dqfV1.rst @@ -0,0 +1,5 @@ +Fixed :func:`sysconfig.get_config_vars`, :func:`sysconfig.get_paths`, and +siblings, returning outdated cached data if the value of :data:`sys.prefix` +or :data:`sys.exec_prefix` changes. Overwriting :data:`sys.prefix` or +:data:`sys.exec_prefix` still is discouraged, as that might break other +parts of the code. From 1503fc8f88d4903e61f76a78a30bcd581b0ee0cd Mon Sep 17 00:00:00 2001 From: Apostol Fet <90645107+ApostolFet@users.noreply.github.com> Date: Sun, 8 Dec 2024 13:05:15 +0300 Subject: [PATCH 71/76] gh-127610: Added validation for more than one var-positional and var-keyword parameters in inspect.Signature (GH-127657) --- Lib/inspect.py | 8 ++++++++ Lib/test/test_inspect/test_inspect.py | 11 +++++++++++ Misc/ACKS | 1 + .../2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst | 3 +++ 4 files changed, 23 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst diff --git a/Lib/inspect.py b/Lib/inspect.py index e3f74e9f047eaf..b7d8271f8a471f 100644 --- a/Lib/inspect.py +++ b/Lib/inspect.py @@ -2943,11 +2943,19 @@ def __init__(self, parameters=None, *, return_annotation=_empty, params = OrderedDict() top_kind = _POSITIONAL_ONLY seen_default = False + seen_var_parameters = set() for param in parameters: kind = param.kind name = param.name + if kind in (_VAR_POSITIONAL, _VAR_KEYWORD): + if kind in seen_var_parameters: + msg = f'more than one {kind.description} parameter' + raise ValueError(msg) + + seen_var_parameters.add(kind) + if kind < top_kind: msg = ( 'wrong parameter order: {} parameter before {} ' diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index a92627a4d60f68..1ecf18bf49fa7e 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -2992,6 +2992,17 @@ def test2(pod=42, /): with self.assertRaisesRegex(ValueError, 'follows default argument'): S((pkd, pk)) + second_args = args.replace(name="second_args") + with self.assertRaisesRegex(ValueError, 'more than one variadic positional parameter'): + S((args, second_args)) + + with self.assertRaisesRegex(ValueError, 'more than one variadic positional parameter'): + S((args, ko, second_args)) + + second_kwargs = kwargs.replace(name="second_kwargs") + with self.assertRaisesRegex(ValueError, 'more than one variadic keyword parameter'): + S((kwargs, second_kwargs)) + def test_signature_object_pickle(self): def foo(a, b, *, c:1={}, **kw) -> {42:'ham'}: pass foo_partial = functools.partial(foo, a=1) diff --git a/Misc/ACKS b/Misc/ACKS index 913f7c8ecf5f1e..086930666822ad 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -24,6 +24,7 @@ Eitan Adler Anton Afanasyev Ali Afshar Nitika Agarwal +Maxim Ageev Anjani Agrawal Pablo S. Blum de Aguiar Jim Ahlstrom diff --git a/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst b/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst new file mode 100644 index 00000000000000..58769029d79977 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-12-06-17-28-55.gh-issue-127610.ctv_NP.rst @@ -0,0 +1,3 @@ +Added validation for more than one var-positional or +var-keyword parameters in :class:`inspect.Signature`. +Patch by Maxim Ageev. From 8fa5ecec01337215bc7baa62c9c16488ecd854fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Dec 2024 14:47:22 +0100 Subject: [PATCH 72/76] gh-123378: fix post-merge typos in comments and NEWS (#127739) --- .../C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst | 2 +- Objects/exceptions.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst index 2cfb8b8a1e245a..7254a04f61843d 100644 --- a/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst +++ b/Misc/NEWS.d/next/C_API/2024-08-27-09-07-56.gh-issue-123378.JJ6n_u.rst @@ -1,5 +1,5 @@ Ensure that the value of :attr:`UnicodeEncodeError.start ` -retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lie in +retrieved by :c:func:`PyUnicodeEncodeError_GetStart` lies in ``[0, max(0, objlen - 1)]`` where *objlen* is the length of :attr:`UnicodeEncodeError.object `. Similar arguments apply to :exc:`UnicodeDecodeError` and :exc:`UnicodeTranslateError` diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 124b591ee3a13f..287cbc25305964 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2712,7 +2712,7 @@ set_unicodefromstring(PyObject **attr, const char *value) * Adjust the (inclusive) 'start' value of a UnicodeError object. * * The 'start' can be negative or not, but when adjusting the value, - * we clip it in [0, max(0, objlen - 1)] but do not intepret it as + * we clip it in [0, max(0, objlen - 1)] and do not interpret it as * a relative offset. */ static inline Py_ssize_t @@ -2732,8 +2732,8 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen) * Adjust the (exclusive) 'end' value of a UnicodeError object. * * The 'end' can be negative or not, but when adjusting the value, - * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] but - * do not intepret it as a relative offset. + * we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and + * do not interpret it as a relative offset. */ static inline Py_ssize_t unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen) From 3b78409878c39d5afa344f7284b57104f7e765c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?B=C3=A9n=C3=A9dikt=20Tran?= <10796600+picnixz@users.noreply.github.com> Date: Sun, 8 Dec 2024 18:31:10 +0100 Subject: [PATCH 73/76] gh-87138: convert SHA-3 object type to heap type (GH-127670) --- Modules/sha3module.c | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/Modules/sha3module.c b/Modules/sha3module.c index ca839dc55e0519..b13e6a9de10114 100644 --- a/Modules/sha3module.c +++ b/Modules/sha3module.c @@ -71,13 +71,13 @@ typedef struct { static SHA3object * newSHA3object(PyTypeObject *type) { - SHA3object *newobj; - newobj = (SHA3object *)PyObject_New(SHA3object, type); + SHA3object *newobj = PyObject_GC_New(SHA3object, type); if (newobj == NULL) { return NULL; } HASHLIB_INIT_MUTEX(newobj); + PyObject_GC_Track(newobj); return newobj; } @@ -166,15 +166,32 @@ py_sha3_new_impl(PyTypeObject *type, PyObject *data, int usedforsecurity) /* Internal methods for a hash object */ +static int +SHA3_clear(SHA3object *self) +{ + if (self->hash_state != NULL) { + Hacl_Hash_SHA3_free(self->hash_state); + self->hash_state = NULL; + } + return 0; +} + static void SHA3_dealloc(SHA3object *self) { - Hacl_Hash_SHA3_free(self->hash_state); PyTypeObject *tp = Py_TYPE(self); - PyObject_Free(self); + PyObject_GC_UnTrack(self); + (void)SHA3_clear(self); + tp->tp_free(self); Py_DECREF(tp); } +static int +SHA3_traverse(PyObject *self, visitproc visit, void *arg) +{ + Py_VISIT(Py_TYPE(self)); + return 0; +} /* External methods for a hash object */ @@ -335,6 +352,7 @@ static PyObject * SHA3_get_capacity_bits(SHA3object *self, void *closure) { uint32_t rate = Hacl_Hash_SHA3_block_len(self->hash_state) * 8; + assert(rate <= 1600); int capacity = 1600 - rate; return PyLong_FromLong(capacity); } @@ -366,12 +384,14 @@ static PyGetSetDef SHA3_getseters[] = { #define SHA3_TYPE_SLOTS(type_slots_obj, type_doc, type_methods, type_getseters) \ static PyType_Slot type_slots_obj[] = { \ + {Py_tp_clear, SHA3_clear}, \ {Py_tp_dealloc, SHA3_dealloc}, \ + {Py_tp_traverse, SHA3_traverse}, \ {Py_tp_doc, (char*)type_doc}, \ {Py_tp_methods, type_methods}, \ {Py_tp_getset, type_getseters}, \ {Py_tp_new, py_sha3_new}, \ - {0,0} \ + {0, NULL} \ } // Using _PyType_GetModuleState() on these types is safe since they @@ -380,7 +400,8 @@ static PyGetSetDef SHA3_getseters[] = { static PyType_Spec type_spec_obj = { \ .name = "_sha3." type_name, \ .basicsize = sizeof(SHA3object), \ - .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE, \ + .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE \ + | Py_TPFLAGS_HAVE_GC, \ .slots = type_slots \ } @@ -444,9 +465,7 @@ _SHAKE_digest(SHA3object *self, unsigned long digestlen, int hex) result = PyBytes_FromStringAndSize((const char *)digest, digestlen); } - if (digest != NULL) { - PyMem_Free(digest); - } + PyMem_Free(digest); return result; } @@ -563,7 +582,7 @@ _sha3_clear(PyObject *module) static void _sha3_free(void *module) { - _sha3_clear((PyObject *)module); + (void)_sha3_clear((PyObject *)module); } static int From 2367759212f609b8ddf3218003b3ccd8e72849ae Mon Sep 17 00:00:00 2001 From: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> Date: Sun, 8 Dec 2024 18:01:55 +0000 Subject: [PATCH 74/76] [doc] Fix typos in `interpreter_definition.md` (#127742) --- Tools/cases_generator/interpreter_definition.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Tools/cases_generator/interpreter_definition.md b/Tools/cases_generator/interpreter_definition.md index 203286834e3e3f..d50c420307852f 100644 --- a/Tools/cases_generator/interpreter_definition.md +++ b/Tools/cases_generator/interpreter_definition.md @@ -309,7 +309,7 @@ This might become (if it was an instruction): ### More examples -For explanations see "Generating the interpreter" below.) +For explanations see "Generating the interpreter" below. ```C op ( CHECK_HAS_INSTANCE_VALUES, (owner -- owner) ) { PyDictOrValues dorv = *_PyObject_DictOrValuesPointer(owner); @@ -371,7 +371,7 @@ For explanations see "Generating the interpreter" below.) A _family_ maps a specializable instruction to its specializations. -Example: These opcodes all share the same instruction format): +Example: These opcodes all share the same instruction format: ```C family(load_attr) = { LOAD_ATTR, LOAD_ATTR_INSTANCE_VALUE, LOAD_SLOT }; ``` @@ -393,7 +393,7 @@ which can be easily inserted. What is more complex is ensuring the correct stack and not generating excess pops and pushes. For example, in `CHECK_HAS_INSTANCE_VALUES`, `owner` occurs in the input, so it cannot be -redefined. Thus it doesn't need to written and can be read without adjusting the stack pointer. +redefined. Thus it doesn't need to be written and can be read without adjusting the stack pointer. The C code generated for `CHECK_HAS_INSTANCE_VALUES` would look something like: ```C From 7f8ec523021427a5c1ab3ce0cdd6e4bb909f1dc5 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 8 Dec 2024 18:45:09 +0000 Subject: [PATCH 75/76] GH-127381: pathlib ABCs: remove `PathBase.unlink()` and `rmdir()` (#127736) Virtual filesystems don't always make a distinction between deleting files and empty directories, and sometimes support deleting non-empty directories in a single operation. Here we remove `PathBase.unlink()` and `rmdir()`, leaving `_delete()` as the sole deletion method, now made abstract. I hope to drop the underscore prefix later on. --- Lib/pathlib/_abc.py | 43 +++-------------- Lib/pathlib/_local.py | 16 +++++-- Lib/test/test_pathlib/test_pathlib.py | 19 ++++++++ Lib/test/test_pathlib/test_pathlib_abc.py | 56 +++++------------------ 4 files changed, 48 insertions(+), 86 deletions(-) diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index 820970fcd5889b..309eab2ff855c3 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -840,6 +840,12 @@ def copy_into(self, target_dir, *, follow_symlinks=True, dirs_exist_ok=dirs_exist_ok, preserve_metadata=preserve_metadata) + def _delete(self): + """ + Delete this file or directory (including all sub-directories). + """ + raise UnsupportedOperation(self._unsupported_msg('_delete()')) + def move(self, target): """ Recursively move this file or directory tree to the given destination. @@ -874,43 +880,6 @@ def lchmod(self, mode): """ self.chmod(mode, follow_symlinks=False) - def unlink(self, missing_ok=False): - """ - Remove this file or link. - If the path is a directory, use rmdir() instead. - """ - raise UnsupportedOperation(self._unsupported_msg('unlink()')) - - def rmdir(self): - """ - Remove this directory. The directory must be empty. - """ - raise UnsupportedOperation(self._unsupported_msg('rmdir()')) - - def _delete(self): - """ - Delete this file or directory (including all sub-directories). - """ - if self.is_symlink() or self.is_junction(): - self.unlink() - elif self.is_dir(): - self._rmtree() - else: - self.unlink() - - def _rmtree(self): - def on_error(err): - raise err - results = self.walk( - on_error=on_error, - top_down=False, # So we rmdir() empty directories. - follow_symlinks=False) - for dirpath, _, filenames in results: - for filename in filenames: - filepath = dirpath / filename - filepath.unlink() - dirpath.rmdir() - def owner(self, *, follow_symlinks=True): """ Return the login name of the file owner. diff --git a/Lib/pathlib/_local.py b/Lib/pathlib/_local.py index 250bc12956f5bc..f87069ce70a2de 100644 --- a/Lib/pathlib/_local.py +++ b/Lib/pathlib/_local.py @@ -846,10 +846,18 @@ def rmdir(self): """ os.rmdir(self) - def _rmtree(self): - # Lazy import to improve module import time - import shutil - shutil.rmtree(self) + def _delete(self): + """ + Delete this file or directory (including all sub-directories). + """ + if self.is_symlink() or self.is_junction(): + self.unlink() + elif self.is_dir(): + # Lazy import to improve module import time + import shutil + shutil.rmtree(self) + else: + self.unlink() def rename(self, target): """ diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 8c9049f15d5bf9..ce0f4748c860b1 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1352,6 +1352,25 @@ def test_group_no_follow_symlinks(self): self.assertEqual(expected_gid, gid_2) self.assertEqual(expected_name, link.group(follow_symlinks=False)) + def test_unlink(self): + p = self.cls(self.base) / 'fileA' + p.unlink() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + + def test_unlink_missing_ok(self): + p = self.cls(self.base) / 'fileAAA' + self.assertFileNotFound(p.unlink) + p.unlink(missing_ok=True) + + def test_rmdir(self): + p = self.cls(self.base) / 'dirA' + for q in p.iterdir(): + q.unlink() + p.rmdir() + self.assertFileNotFound(p.stat) + self.assertFileNotFound(p.unlink) + @needs_symlinks def test_delete_symlink(self): tmp = self.cls(self.base, 'delete') diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index bf9ae6cc8a2433..675abf30a9f13c 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -1370,8 +1370,6 @@ def test_unsupported_operation(self): self.assertRaises(e, p.touch) self.assertRaises(e, p.chmod, 0o755) self.assertRaises(e, p.lchmod, 0o755) - self.assertRaises(e, p.unlink) - self.assertRaises(e, p.rmdir) self.assertRaises(e, p.owner) self.assertRaises(e, p.group) self.assertRaises(e, p.as_uri) @@ -1493,31 +1491,18 @@ def mkdir(self, mode=0o777, parents=False, exist_ok=False): self.parent.mkdir(parents=True, exist_ok=True) self.mkdir(mode, parents=False, exist_ok=exist_ok) - def unlink(self, missing_ok=False): - path = str(self) - name = self.name - parent = str(self.parent) - if path in self._directories: - raise IsADirectoryError(errno.EISDIR, "Is a directory", path) - elif path in self._files: - self._directories[parent].remove(name) - del self._files[path] - elif not missing_ok: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - - def rmdir(self): + def _delete(self): path = str(self) if path in self._files: - raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) - elif path not in self._directories: - raise FileNotFoundError(errno.ENOENT, "File not found", path) - elif self._directories[path]: - raise OSError(errno.ENOTEMPTY, "Directory not empty", path) - else: - name = self.name - parent = str(self.parent) - self._directories[parent].remove(name) + del self._files[path] + elif path in self._directories: + for name in list(self._directories[path]): + self.joinpath(name)._delete() del self._directories[path] + else: + raise FileNotFoundError(errno.ENOENT, "File not found", path) + parent = str(self.parent) + self._directories[parent].remove(self.name) class DummyPathTest(DummyPurePathTest): @@ -2245,30 +2230,11 @@ def test_is_char_device_false(self): self.assertIs((P / 'fileA\udfff').is_char_device(), False) self.assertIs((P / 'fileA\x00').is_char_device(), False) - def test_unlink(self): - p = self.cls(self.base) / 'fileA' - p.unlink() - self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) - - def test_unlink_missing_ok(self): - p = self.cls(self.base) / 'fileAAA' - self.assertFileNotFound(p.unlink) - p.unlink(missing_ok=True) - - def test_rmdir(self): - p = self.cls(self.base) / 'dirA' - for q in p.iterdir(): - q.unlink() - p.rmdir() - self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) - def test_delete_file(self): p = self.cls(self.base) / 'fileA' p._delete() self.assertFileNotFound(p.stat) - self.assertFileNotFound(p.unlink) + self.assertFileNotFound(p._delete) def test_delete_dir(self): base = self.cls(self.base) @@ -2347,7 +2313,7 @@ def setUp(self): def tearDown(self): base = self.cls(self.base) - base._rmtree() + base._delete() def test_walk_topdown(self): walker = self.walk_path.walk() From a03efb533a58fd13fb0cc7f4a5c02c8406a407bd Mon Sep 17 00:00:00 2001 From: Stephen Morton Date: Sun, 8 Dec 2024 10:46:34 -0800 Subject: [PATCH 76/76] gh-127734: improve signature of `urllib.request.HTTPPasswordMgrWithPriorAuth.__init__` (#127735) improve signature of urllib.request.HTTPPasswordMgrWithPriorAuth.__init__ --- Lib/urllib/request.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 7ef85431b718ad..c5a6a18a32bba1 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -876,9 +876,9 @@ def find_user_password(self, realm, authuri): class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm): - def __init__(self, *args, **kwargs): + def __init__(self): self.authenticated = {} - super().__init__(*args, **kwargs) + super().__init__() def add_password(self, realm, uri, user, passwd, is_authenticated=False): self.update_authenticated(uri, is_authenticated)