From 2303fb98830df908322edce751717c51ef82062d Mon Sep 17 00:00:00 2001 From: Mats Wichmann Date: Sat, 15 Jun 2024 12:53:50 -0600 Subject: [PATCH 1/3] Update to current fastest env setitem Updated and re-ran benchmark tests for SubstitutionEnvironment.__setitem__. Added test for new (Python 3.0) string.isidentifier() method. That's actually exactly what we want, and it times out as the fastest method when combined with a membership test if the variable is already defined. Tweaked some comments about this performance consideration, and did other updates in bench/. Signed-off-by: Mats Wichmann --- CHANGES.txt | 3 ++ RELEASE.txt | 3 ++ SCons/Environment.py | 37 ++++++++-------- SCons/Util/envs.py | 5 ++- SCons/Variables/__init__.py | 2 +- bench/bench.py | 20 ++++----- bench/env.__setitem__.py | 32 +++++++++++--- bench/is_types.py | 85 ++++++++++++------------------------- bench/lvars-gvars.py | 10 ++--- 9 files changed, 97 insertions(+), 100 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index d0eb245f54..5efbe84c90 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -99,6 +99,9 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER SCons.Environment to SCons.Util to avoid the chance of import loops. Variables and Environment both use the routine and Environment() uses a Variables() object so better to move to a safer location. + - Performance tweak: the __setitem__ method of an Environment, used for + setting construction variables, now uses the string method isidentifier + to validate the name (updated from microbenchmark results). RELEASE 4.7.0 - Sun, 17 Mar 2024 17:22:20 -0700 diff --git a/RELEASE.txt b/RELEASE.txt index 8955e465d0..df6f7c1963 100644 --- a/RELEASE.txt +++ b/RELEASE.txt @@ -69,6 +69,9 @@ IMPROVEMENTS - Make the testing framework a little more resilient: the temporary directory for tests now includes a component named "scons" which can be given to antivirus software to exclude. +- Performance tweak: the __setitem__ method of an Environment, used for + setting construction variables, now uses the string method isidentifier + to validate the name (updated from microbenchmark results). PACKAGING --------- diff --git a/SCons/Environment.py b/SCons/Environment.py index 6669bf8ada..9322450627 100644 --- a/SCons/Environment.py +++ b/SCons/Environment.py @@ -76,7 +76,6 @@ to_String_for_subst, uniquer_hashables, ) -from SCons.Util.envs import is_valid_construction_var from SCons.Util.sctyping import ExecutorType class _Null: @@ -581,26 +580,20 @@ def __getitem__(self, key): return self._dict[key] def __setitem__(self, key, value): - # This is heavily used. This implementation is the best we have - # according to the timings in bench/env.__setitem__.py. - # - # The "key in self._special_set_keys" test here seems to perform - # pretty well for the number of keys we have. A hard-coded - # list worked a little better in Python 2.5, but that has the - # disadvantage of maybe getting out of sync if we ever add more - # variable names. - # So right now it seems like a good trade-off, but feel free to - # revisit this with bench/env.__setitem__.py as needed (and - # as newer versions of Python come out). if key in self._special_set_keys: self._special_set[key](self, key, value) else: - # If we already have the entry, then it's obviously a valid - # key and we don't need to check. If we do check, using a - # global, pre-compiled regular expression directly is more - # efficient than calling another function or a method. - if key not in self._dict and not is_valid_construction_var(key): - raise UserError("Illegal construction variable `%s'" % key) + # Performance: since this is heavily used, try to avoid checking + # if the variable is valid unless necessary. bench/__setitem__.py + # times a bunch of different approaches. Based the most recent + # run, against Python 3.6-3.13(beta), the best we can do across + # different combinations of actions is to use a membership test + # to see if we already have the variable, if so it must already + # have been checked, so skip; if we do check, "isidentifier()" + # (new in Python 3 so wasn't in benchmark until recently) + # on the key is the best. + if key not in self._dict and not key.isidentifier(): + raise UserError(f"Illegal construction variable {key!r}") self._dict[key] = value def get(self, key, default=None): @@ -2579,8 +2572,12 @@ def __getitem__(self, key): return self.__dict__['__subject'].__getitem__(key) def __setitem__(self, key, value): - if not is_valid_construction_var(key): - raise UserError("Illegal construction variable `%s'" % key) + # This doesn't have the same performance equation as a "real" + # environment: in an override you're basically just writing + # new stuff; it's not a common case to be changing values already + # set in the override dict, so don't spend time checking for existance. + if not key.isidentifier(): + raise UserError(f"Illegal construction variable {key!r}") self.__dict__['overrides'][key] = value def __delitem__(self, key): diff --git a/SCons/Util/envs.py b/SCons/Util/envs.py index 68a40488c6..2640ef5c19 100644 --- a/SCons/Util/envs.py +++ b/SCons/Util/envs.py @@ -330,10 +330,13 @@ def AddMethod(obj, function: Callable, name: Optional[str] = None) -> None: setattr(obj, name, method) +# This routine is used to validate that a construction var name can be used +# as a Python identifier, which we require. However, Python 3 introduced an +# isidentifier() string method so there's really not any need for it now. _is_valid_var_re = re.compile(r'[_a-zA-Z]\w*$') def is_valid_construction_var(varstr: str) -> bool: - """Return True if *varstr* is a legitimate construction variable.""" + """Return True if *varstr* is a legitimate name of a construction variable.""" return bool(_is_valid_var_re.match(varstr)) # Local Variables: diff --git a/SCons/Variables/__init__.py b/SCons/Variables/__init__.py index 1c41130025..1337d14f6e 100644 --- a/SCons/Variables/__init__.py +++ b/SCons/Variables/__init__.py @@ -131,7 +131,7 @@ def _do_add( option.key = key # TODO: normalize to not include key in aliases. Currently breaks tests. option.aliases = [key,] - if not SCons.Util.is_valid_construction_var(option.key): + if not option.key.isidentifier(): raise SCons.Errors.UserError(f"Illegal Variables key {option.key!r}") option.help = help option.default = default diff --git a/bench/bench.py b/bench/bench.py index 5adac230cd..bafe5a29d4 100644 --- a/bench/bench.py +++ b/bench/bench.py @@ -62,10 +62,10 @@ -h, --help Display this help and exit -i ITER, --iterations ITER Run each code snippet ITER times --time Use the time.time function - -r RUNS, --runs RUNS Average times for RUNS invocations of + -r RUNS, --runs RUNS Average times for RUNS invocations of """ -# How many times each snippet of code will be (or should be) run by the +# How many times each snippet of code will be (or should be) run by the # functions under test to gather the time (the "inner loop"). Iterations = 1000 @@ -191,19 +191,17 @@ def display(func, result_label, results): # pprint(results_dict) -# breakpoint() + tests = [label for label, args, kw in Data] -columns = ['Python Version', 'Implementation', 'Test'] + tests +columns = ['Python Version', 'Implementation'] + tests with open(results_filename, 'a') as r: - print("Python Version,%s" % ".".join(columns), file=r) - # print("Python Version,%s" % ".".join(columns)) + print(",".join(columns), file=r) - for implementation in results_dict.keys(): + for implementation in results_dict: + print(f'{py_ver_string},"{implementation}"', file=r, end='') for test in tests: - print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test], file=r) - # print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test]) - - + print(',%8.3f' % results_dict[implementation][test], file=r, end='') + print(file=r) # Local Variables: # tab-width:4 diff --git a/bench/env.__setitem__.py b/bench/env.__setitem__.py index 2cd0da826f..e915e46338 100644 --- a/bench/env.__setitem__.py +++ b/bench/env.__setitem__.py @@ -57,14 +57,14 @@ def times(num=1000000, init='', title='Results:', **statements): script_dir = os.path.split(filename)[0] if script_dir: script_dir = script_dir + '/' -sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path +sys.path = [os.path.abspath(script_dir + '..')] + sys.path import SCons.Errors import SCons.Environment import SCons.Util is_valid_construction_var = SCons.Util.is_valid_construction_var -global_valid_var = re.compile(r'[_a-zA-Z]\w*$') +global_valid_var = SCons.Util.envs._is_valid_var_re # The classes with different __setitem__() implementations that we're # going to horse-race. @@ -105,7 +105,7 @@ class Environment: 'SOURCES' : None, } _special_set_keys = list(_special_set.keys()) - _valid_var = re.compile(r'[_a-zA-Z]\w*$') + _valid_var = global_valid_var def __init__(self, **kw): self._dict = kw @@ -247,8 +247,8 @@ def __setitem__(self, key, value): raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) self._dict[key] = value -class env_Best_has_key(Environment): - """Best __setitem__(), with has_key""" +class env_Best_has_key_global_regex(Environment): + """Best __setitem__(), with membership test and global regex""" def __setitem__(self, key, value): if key in self._special_set: self._special_set[key](self, key, value) @@ -258,6 +258,18 @@ def __setitem__(self, key, value): raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) self._dict[key] = value +class env_Best_has_key_function(Environment): + """Best __setitem__(), with membership_test and validator function""" + def __setitem__(self, key, value): + if key in self._special_set: + self._special_set[key](self, key, value) + else: + if key not in self._dict \ + and not is_valid_construction_var(key): + raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) + self._dict[key] = value + + class env_Best_list(Environment): """Best __setitem__(), with a list""" def __setitem__(self, key, value): @@ -284,6 +296,16 @@ def __setitem__(self, key, value): raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) self._dict[key] = value +class env_Best_isidentifier(Environment): + """Best __setitem__(), with membership test and isidentifier""" + def __setitem__(self, key, value): + if key in self._special_set: + self._special_set[key](self, key, value) + else: + if key not in self._dict and not key.isidentifier(): + raise SCons.Errors.UserError("Illegal construction variable `%s'" % key) + self._dict[key] = value + # We'll use the names of all the env_* classes we find later to build # the dictionary of statements to be timed, and the import statement # that the timer will use to get at these classes. diff --git a/bench/is_types.py b/bench/is_types.py index 01db768ca9..257fa915c4 100644 --- a/bench/is_types.py +++ b/bench/is_types.py @@ -2,10 +2,11 @@ # # Benchmarks for testing various possible implementations # of the is_Dict(), is_List() and is_String() functions in -# src/engine/SCons/Util.py. +# SCons/Util.py. import types -from collections import UserDict, UserList, UserString +from collections import UserDict, UserList, UserString, deque +from collections.abc import Iterable, MappingView, MutableMapping, MutableSequence DictType = dict ListType = list @@ -28,19 +29,20 @@ def original_is_String(e): # New candidates that explicitly check for whether the object is an # InstanceType before calling isinstance() on the corresponding User* -# type. -# InstanceType was only for old-style classes, so absent in Python 3 -# this this is no different than the previous +# type. Update: meaningless in Python 3, InstanceType was only for +# old-style classes, so these are just removed. +# XXX -def checkInstanceType_is_Dict(e): - return isinstance(e, (dict, UserDict)) +# New candidates that try more generic names from collections: -def checkInstanceType_is_List(e): - return isinstance(e, (list, UserList)) +def new_is_Dict(e): + return isinstance(e, MutableMapping) -def checkInstanceType_is_String(e): - return isinstance(e, (str, UserString)) +def new_is_List(e): + return isinstance(e, MutableSequence) +def new_is_String(e): + return isinstance(e, (str, UserString)) # Improved candidates that cache the type(e) result in a variable # before doing any checks. @@ -51,7 +53,7 @@ def cache_type_e_is_Dict(e): def cache_type_e_is_List(e): t = type(e) - return t is list or isinstance(e, UserList) + return t is list or isinstance(e, UserList or isinstance(e, deque)) def cache_type_e_is_String(e): t = type(e) @@ -68,7 +70,7 @@ def global_cache_type_e_is_Dict(e): def global_cache_type_e_is_List(e): t = type(e) - return t is ListType or isinstance(e, UserList) + return t is ListType or isinstance(e, (UserList, deque)) def global_cache_type_e_is_String(e): t = type(e) @@ -77,30 +79,10 @@ def global_cache_type_e_is_String(e): # Alternative that uses a myType() function to map the User* objects # to their corresponding underlying types. - -instanceTypeMap = { - UserDict : dict, - UserList : list, - UserString : str, -} - -def myType(obj): - t = type(obj) - if t is types.InstanceType: - t = instanceTypeMap.get(obj.__class__, t) - return t - -def myType_is_Dict(e): - return myType(e) is dict - -def myType_is_List(e): - return myType(e) is list - -def myType_is_String(e): - return myType(e) is str - +# Again, since this used InstanceType, it's not useful for Python 3. +# These are the actual test entry points def Func01(obj): """original_is_String""" @@ -118,19 +100,19 @@ def Func03(obj): original_is_Dict(obj) def Func04(obj): - """checkInstanceType_is_String""" + """new_is_String""" for i in IterationList: - checkInstanceType_is_String(obj) + new_is_String(obj) def Func05(obj): - """checkInstanceType_is_List""" + """new_is_List""" for i in IterationList: - checkInstanceType_is_List(obj) + new_is_List(obj) def Func06(obj): - """checkInstanceType_is_Dict""" + """new_is_Dict""" for i in IterationList: - checkInstanceType_is_Dict(obj) + new_is_Dict(obj) def Func07(obj): """cache_type_e_is_String""" @@ -162,22 +144,6 @@ def Func12(obj): for i in IterationList: global_cache_type_e_is_Dict(obj) -#def Func13(obj): -# """myType_is_String""" -# for i in IterationList: -# myType_is_String(obj) -# -#def Func14(obj): -# """myType_is_List""" -# for i in IterationList: -# myType_is_List(obj) -# -#def Func15(obj): -# """myType_is_Dict""" -# for i in IterationList: -# myType_is_Dict(obj) - - # Data to pass to the functions on each run. Each entry is a # three-element tuple: @@ -217,6 +183,11 @@ class A: (UserList([]),), {}, ), + ( + "deque", + (deque([]),), + {}, + ), ( "UserDict", (UserDict({}),), diff --git a/bench/lvars-gvars.py b/bench/lvars-gvars.py index 0a81337581..91d76a0c43 100644 --- a/bench/lvars-gvars.py +++ b/bench/lvars-gvars.py @@ -23,9 +23,9 @@ """ Functions and data for timing different idioms for fetching a keyword -value from a pair of dictionaries for localand global values. This was +value from a pair of dictionaries for local and global values. This was used to select how to most efficiently expand single $KEYWORD strings -in src/engine/SCons/Subst.py. +in SCons/Subst.py (StringSubber and ListSubber). """ def Func1(var, gvars, lvars): @@ -40,7 +40,7 @@ def Func1(var, gvars, lvars): x = '' def Func2(var, gvars, lvars): - """lvars has_key(), gvars try:-except:""" + """lvars membership test, gvars try:-except:""" for i in IterationList: if var in lvars: x = lvars[var] @@ -51,7 +51,7 @@ def Func2(var, gvars, lvars): x = '' def Func3(var, gvars, lvars): - """lvars has_key(), gvars has_key()""" + """lvars membership test, gvars membership test)""" for i in IterationList: if var in lvars: x = lvars[var] @@ -71,7 +71,7 @@ def Func4(var, gvars, lvars): def Func5(var, gvars, lvars): """Chained get with default values""" for i in IterationList: - x = lvars.get(var,gvars.get(var,'')) + x = lvars.get(var, gvars.get(var, '')) # Data to pass to the functions on each run. Each entry is a From 78a26bb2cd5c9e794dd491821c11d7ba33c452ed Mon Sep 17 00:00:00 2001 From: Mats Wichmann Date: Sat, 15 Jun 2024 13:26:35 -0600 Subject: [PATCH 2/3] Fix test for bad construction var Didn't notice that the changes for env.__setitem__ caused a quote mark to flip in the error output, failing one test (a back-quote went to a single regular quote). Updated the test, and snuck in the tools-performance-hack. Signed-off-by: Mats Wichmann --- test/bad-variables.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/test/bad-variables.py b/test/bad-variables.py index 11f2be1af7..06f106447c 100644 --- a/test/bad-variables.py +++ b/test/bad-variables.py @@ -1,6 +1,8 @@ #!/usr/bin/env python # -# __COPYRIGHT__ +# MIT License +# +# Copyright The SCons Foundation # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -20,9 +22,6 @@ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -# - -__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__" """ Test that setting illegal construction variables fails in ways that are @@ -37,13 +36,14 @@ SConscript_path = test.workpath('SConscript') test.write(SConstruct_path, """\ -env = Environment() +_ = DefaultEnvironment(tools=[]) +env = Environment(tools=[]) env['foo-bar'] = 1 """) expect_stderr = """ -scons: *** Illegal construction variable `foo-bar' -""" + test.python_file_line(SConstruct_path, 2) +scons: *** Illegal construction variable 'foo-bar' +""" + test.python_file_line(SConstruct_path, 3) test.run(arguments='.', status=2, stderr=expect_stderr) @@ -54,14 +54,15 @@ """) test.write('SConscript', """\ -env = Environment() +_ = DefaultEnvironment(tools=[]) +env = Environment(tools=[]) env['foo(bar)'] = 1 """) expect_stderr = """ -scons: *** Illegal construction variable `foo(bar)' -""" + test.python_file_line(SConscript_path, 2) +scons: *** Illegal construction variable 'foo(bar)' +""" + test.python_file_line(SConscript_path, 3) test.run(arguments='.', status=2, stderr=expect_stderr) From 88f7f9ab27d2d953597eab9cc467f580dfa5f846 Mon Sep 17 00:00:00 2001 From: Mats Wichmann Date: Fri, 28 Jun 2024 07:33:34 -0600 Subject: [PATCH 3/3] bench: fix bas isinstance sequence in one test Signed-off-by: Mats Wichmann --- bench/is_types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bench/is_types.py b/bench/is_types.py index 257fa915c4..ecc0edbdb0 100644 --- a/bench/is_types.py +++ b/bench/is_types.py @@ -53,7 +53,7 @@ def cache_type_e_is_Dict(e): def cache_type_e_is_List(e): t = type(e) - return t is list or isinstance(e, UserList or isinstance(e, deque)) + return t is list or isinstance(e, (UserList, deque)) def cache_type_e_is_String(e): t = type(e)