From 2303fb98830df908322edce751717c51ef82062d Mon Sep 17 00:00:00 2001
From: Mats Wichmann <mats@linux.com>
Date: Sat, 15 Jun 2024 12:53:50 -0600
Subject: [PATCH 1/3] Update to current fastest env setitem

Updated and re-ran benchmark tests for SubstitutionEnvironment.__setitem__.
Added test for new (Python 3.0) string.isidentifier() method.
That's actually exactly what we want, and it times out as the fastest
method when combined with a membership test if the variable is
already defined.

Tweaked some comments about this performance consideration,
and did other updates in bench/.

Signed-off-by: Mats Wichmann <mats@linux.com>
---
 CHANGES.txt                 |  3 ++
 RELEASE.txt                 |  3 ++
 SCons/Environment.py        | 37 ++++++++--------
 SCons/Util/envs.py          |  5 ++-
 SCons/Variables/__init__.py |  2 +-
 bench/bench.py              | 20 ++++-----
 bench/env.__setitem__.py    | 32 +++++++++++---
 bench/is_types.py           | 85 ++++++++++++-------------------------
 bench/lvars-gvars.py        | 10 ++---
 9 files changed, 97 insertions(+), 100 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index d0eb245f54..5efbe84c90 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -99,6 +99,9 @@ RELEASE  VERSION/DATE TO BE FILLED IN LATER
       SCons.Environment to SCons.Util to avoid the chance of import loops. Variables
       and Environment both use the routine and Environment() uses a Variables()
       object so better to move to a safer location.
+    - Performance tweak: the __setitem__ method of an Environment, used for
+      setting construction variables, now uses the string method isidentifier
+      to validate the name (updated from microbenchmark results).
 
 
 RELEASE 4.7.0 -  Sun, 17 Mar 2024 17:22:20 -0700
diff --git a/RELEASE.txt b/RELEASE.txt
index 8955e465d0..df6f7c1963 100644
--- a/RELEASE.txt
+++ b/RELEASE.txt
@@ -69,6 +69,9 @@ IMPROVEMENTS
 - Make the testing framework a little more resilient: the temporary
   directory for tests now includes a component named "scons" which can
   be given to antivirus software to exclude.
+- Performance tweak: the __setitem__ method of an Environment, used for
+  setting construction variables, now uses the string method isidentifier
+  to validate the name (updated from microbenchmark results).
 
 PACKAGING
 ---------
diff --git a/SCons/Environment.py b/SCons/Environment.py
index 6669bf8ada..9322450627 100644
--- a/SCons/Environment.py
+++ b/SCons/Environment.py
@@ -76,7 +76,6 @@
     to_String_for_subst,
     uniquer_hashables,
 )
-from SCons.Util.envs import is_valid_construction_var
 from SCons.Util.sctyping import ExecutorType
 
 class _Null:
@@ -581,26 +580,20 @@ def __getitem__(self, key):
         return self._dict[key]
 
     def __setitem__(self, key, value):
-        # This is heavily used.  This implementation is the best we have
-        # according to the timings in bench/env.__setitem__.py.
-        #
-        # The "key in self._special_set_keys" test here seems to perform
-        # pretty well for the number of keys we have.  A hard-coded
-        # list worked a little better in Python 2.5, but that has the
-        # disadvantage of maybe getting out of sync if we ever add more
-        # variable names.
-        # So right now it seems like a good trade-off, but feel free to
-        # revisit this with bench/env.__setitem__.py as needed (and
-        # as newer versions of Python come out).
         if key in self._special_set_keys:
             self._special_set[key](self, key, value)
         else:
-            # If we already have the entry, then it's obviously a valid
-            # key and we don't need to check.  If we do check, using a
-            # global, pre-compiled regular expression directly is more
-            # efficient than calling another function or a method.
-            if key not in self._dict and not is_valid_construction_var(key):
-                raise UserError("Illegal construction variable `%s'" % key)
+            # Performance: since this is heavily used, try to avoid checking
+            # if the variable is valid unless necessary. bench/__setitem__.py
+            # times a bunch of different approaches. Based the most recent
+            # run, against Python 3.6-3.13(beta), the best we can do across
+            # different combinations of actions is to use a membership test
+            # to see if we already have the variable, if so it must already
+            # have been checked, so skip; if we do check, "isidentifier()"
+            # (new in Python 3 so wasn't in benchmark until recently)
+            # on the key is the best.
+            if key not in self._dict and not key.isidentifier():
+                raise UserError(f"Illegal construction variable {key!r}")
             self._dict[key] = value
 
     def get(self, key, default=None):
@@ -2579,8 +2572,12 @@ def __getitem__(self, key):
             return self.__dict__['__subject'].__getitem__(key)
 
     def __setitem__(self, key, value):
-        if not is_valid_construction_var(key):
-            raise UserError("Illegal construction variable `%s'" % key)
+        # This doesn't have the same performance equation as a "real"
+        # environment: in an override you're basically just writing
+        # new stuff; it's not a common case to be changing values already
+        # set in the override dict, so don't spend time checking for existance.
+        if not key.isidentifier():
+            raise UserError(f"Illegal construction variable {key!r}")
         self.__dict__['overrides'][key] = value
 
     def __delitem__(self, key):
diff --git a/SCons/Util/envs.py b/SCons/Util/envs.py
index 68a40488c6..2640ef5c19 100644
--- a/SCons/Util/envs.py
+++ b/SCons/Util/envs.py
@@ -330,10 +330,13 @@ def AddMethod(obj, function: Callable, name: Optional[str] = None) -> None:
     setattr(obj, name, method)
 
 
+# This routine is used to validate that a construction var name can be used
+# as a Python identifier, which we require. However, Python 3 introduced an
+# isidentifier() string method so there's really not any need for it now.
 _is_valid_var_re = re.compile(r'[_a-zA-Z]\w*$')
 
 def is_valid_construction_var(varstr: str) -> bool:
-    """Return True if *varstr* is a legitimate construction variable."""
+    """Return True if *varstr* is a legitimate name of a construction variable."""
     return bool(_is_valid_var_re.match(varstr))
 
 # Local Variables:
diff --git a/SCons/Variables/__init__.py b/SCons/Variables/__init__.py
index 1c41130025..1337d14f6e 100644
--- a/SCons/Variables/__init__.py
+++ b/SCons/Variables/__init__.py
@@ -131,7 +131,7 @@ def _do_add(
             option.key = key
             # TODO: normalize to not include key in aliases. Currently breaks tests.
             option.aliases = [key,]
-        if not SCons.Util.is_valid_construction_var(option.key):
+        if not option.key.isidentifier():
             raise SCons.Errors.UserError(f"Illegal Variables key {option.key!r}")
         option.help = help
         option.default = default
diff --git a/bench/bench.py b/bench/bench.py
index 5adac230cd..bafe5a29d4 100644
--- a/bench/bench.py
+++ b/bench/bench.py
@@ -62,10 +62,10 @@
   -h, --help                    Display this help and exit
   -i ITER, --iterations ITER    Run each code snippet ITER times
   --time                        Use the time.time function
-  -r RUNS, --runs RUNS          Average times for RUNS invocations of 
+  -r RUNS, --runs RUNS          Average times for RUNS invocations of
 """
 
-# How many times each snippet of code will be (or should be) run by the 
+# How many times each snippet of code will be (or should be) run by the
 # functions under test to gather the time (the "inner loop").
 
 Iterations = 1000
@@ -191,19 +191,17 @@ def display(func, result_label, results):
 
 
 # pprint(results_dict)
-# breakpoint()
+
 tests = [label for label, args, kw in Data]
-columns = ['Python Version', 'Implementation', 'Test'] + tests
+columns = ['Python Version', 'Implementation'] + tests
 with open(results_filename, 'a') as r:
-    print("Python Version,%s" % ".".join(columns), file=r)
-    # print("Python Version,%s" % ".".join(columns))
+    print(",".join(columns), file=r)
 
-    for implementation in results_dict.keys():
+    for implementation in results_dict:
+        print(f'{py_ver_string},"{implementation}"', file=r, end='')
         for test in tests:
-            print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test], file=r)
-            # print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test])
-
-
+            print(',%8.3f' % results_dict[implementation][test], file=r, end='')
+        print(file=r)
 
 # Local Variables:
 # tab-width:4
diff --git a/bench/env.__setitem__.py b/bench/env.__setitem__.py
index 2cd0da826f..e915e46338 100644
--- a/bench/env.__setitem__.py
+++ b/bench/env.__setitem__.py
@@ -57,14 +57,14 @@ def times(num=1000000, init='', title='Results:', **statements):
 script_dir = os.path.split(filename)[0]
 if script_dir:
     script_dir = script_dir + '/'
-sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path
+sys.path = [os.path.abspath(script_dir + '..')] + sys.path
 
 import SCons.Errors
 import SCons.Environment
 import SCons.Util
 
 is_valid_construction_var = SCons.Util.is_valid_construction_var
-global_valid_var = re.compile(r'[_a-zA-Z]\w*$')
+global_valid_var = SCons.Util.envs._is_valid_var_re
 
 # The classes with different __setitem__() implementations that we're
 # going to horse-race.
@@ -105,7 +105,7 @@ class Environment:
         'SOURCES'  : None,
     }
     _special_set_keys = list(_special_set.keys())
-    _valid_var = re.compile(r'[_a-zA-Z]\w*$')
+    _valid_var = global_valid_var
     def __init__(self, **kw):
         self._dict = kw
 
@@ -247,8 +247,8 @@ def __setitem__(self, key, value):
                     raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
             self._dict[key] = value
 
-class env_Best_has_key(Environment):
-    """Best __setitem__(), with has_key"""
+class env_Best_has_key_global_regex(Environment):
+    """Best __setitem__(), with membership test and global regex"""
     def __setitem__(self, key, value):
         if key in self._special_set:
             self._special_set[key](self, key, value)
@@ -258,6 +258,18 @@ def __setitem__(self, key, value):
                     raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
             self._dict[key] = value
 
+class env_Best_has_key_function(Environment):
+    """Best __setitem__(), with membership_test and validator function"""
+    def __setitem__(self, key, value):
+        if key in self._special_set:
+            self._special_set[key](self, key, value)
+        else:
+            if key not in self._dict \
+               and not is_valid_construction_var(key):
+                    raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
+            self._dict[key] = value
+
+
 class env_Best_list(Environment):
     """Best __setitem__(), with a list"""
     def __setitem__(self, key, value):
@@ -284,6 +296,16 @@ def __setitem__(self, key, value):
                     raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
                 self._dict[key] = value
 
+class env_Best_isidentifier(Environment):
+    """Best __setitem__(), with membership test and isidentifier"""
+    def __setitem__(self, key, value):
+        if key in self._special_set:
+            self._special_set[key](self, key, value)
+        else:
+            if key not in self._dict and not key.isidentifier():
+                raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
+            self._dict[key] = value
+
 # We'll use the names of all the env_* classes we find later to build
 # the dictionary of statements to be timed, and the import statement
 # that the timer will use to get at these classes.
diff --git a/bench/is_types.py b/bench/is_types.py
index 01db768ca9..257fa915c4 100644
--- a/bench/is_types.py
+++ b/bench/is_types.py
@@ -2,10 +2,11 @@
 #
 # Benchmarks for testing various possible implementations
 # of the is_Dict(), is_List() and is_String() functions in
-# src/engine/SCons/Util.py.
+# SCons/Util.py.
 
 import types
-from collections import UserDict, UserList, UserString
+from collections import UserDict, UserList, UserString, deque
+from collections.abc import Iterable, MappingView, MutableMapping, MutableSequence
 
 DictType = dict
 ListType = list
@@ -28,19 +29,20 @@ def original_is_String(e):
 
 # New candidates that explicitly check for whether the object is an
 # InstanceType before calling isinstance() on the corresponding User*
-# type.
-# InstanceType was only for old-style classes, so absent in Python 3
-# this this is no different than the previous
+# type. Update: meaningless in Python 3, InstanceType was only for
+# old-style classes, so these are just removed.
+# XXX
 
-def checkInstanceType_is_Dict(e):
-    return isinstance(e, (dict, UserDict))
+# New candidates that try more generic names from collections:
 
-def checkInstanceType_is_List(e):
-    return isinstance(e, (list, UserList))
+def new_is_Dict(e):
+    return isinstance(e, MutableMapping)
 
-def checkInstanceType_is_String(e):
-    return isinstance(e, (str, UserString))
+def new_is_List(e):
+    return isinstance(e, MutableSequence)
 
+def new_is_String(e):
+    return isinstance(e, (str, UserString))
 
 # Improved candidates that cache the type(e) result in a variable
 # before doing any checks.
@@ -51,7 +53,7 @@ def cache_type_e_is_Dict(e):
 
 def cache_type_e_is_List(e):
     t = type(e)
-    return t is list or isinstance(e, UserList)
+    return t is list or isinstance(e, UserList or isinstance(e, deque))
 
 def cache_type_e_is_String(e):
     t = type(e)
@@ -68,7 +70,7 @@ def global_cache_type_e_is_Dict(e):
 
 def global_cache_type_e_is_List(e):
     t = type(e)
-    return t is ListType or isinstance(e, UserList)
+    return t is ListType or isinstance(e, (UserList, deque))
 
 def global_cache_type_e_is_String(e):
     t = type(e)
@@ -77,30 +79,10 @@ def global_cache_type_e_is_String(e):
 
 # Alternative that uses a myType() function to map the User* objects
 # to their corresponding underlying types.
-
-instanceTypeMap = {
-    UserDict : dict,
-    UserList : list,
-    UserString : str,
-}
-
-def myType(obj):
-    t = type(obj)
-    if t is types.InstanceType:
-        t = instanceTypeMap.get(obj.__class__, t)
-    return t
-
-def myType_is_Dict(e):
-    return myType(e) is dict
-
-def myType_is_List(e):
-    return myType(e) is list
-
-def myType_is_String(e):
-    return myType(e) is str
-
+# Again, since this used InstanceType, it's not useful for Python 3.
 
 
+# These are the actual test entry points
 
 def Func01(obj):
     """original_is_String"""
@@ -118,19 +100,19 @@ def Func03(obj):
         original_is_Dict(obj)
 
 def Func04(obj):
-    """checkInstanceType_is_String"""
+    """new_is_String"""
     for i in IterationList:
-        checkInstanceType_is_String(obj)
+        new_is_String(obj)
 
 def Func05(obj):
-    """checkInstanceType_is_List"""
+    """new_is_List"""
     for i in IterationList:
-        checkInstanceType_is_List(obj)
+        new_is_List(obj)
 
 def Func06(obj):
-    """checkInstanceType_is_Dict"""
+    """new_is_Dict"""
     for i in IterationList:
-        checkInstanceType_is_Dict(obj)
+        new_is_Dict(obj)
 
 def Func07(obj):
     """cache_type_e_is_String"""
@@ -162,22 +144,6 @@ def Func12(obj):
     for i in IterationList:
         global_cache_type_e_is_Dict(obj)
 
-#def Func13(obj):
-#    """myType_is_String"""
-#    for i in IterationList:
-#        myType_is_String(obj)
-#
-#def Func14(obj):
-#    """myType_is_List"""
-#    for i in IterationList:
-#        myType_is_List(obj)
-#
-#def Func15(obj):
-#    """myType_is_Dict"""
-#    for i in IterationList:
-#        myType_is_Dict(obj)
-
-
 
 # Data to pass to the functions on each run.  Each entry is a
 # three-element tuple:
@@ -217,6 +183,11 @@ class A:
         (UserList([]),),
         {},
     ),
+    (
+        "deque",
+        (deque([]),),
+        {},
+    ),
     (
         "UserDict",
         (UserDict({}),),
diff --git a/bench/lvars-gvars.py b/bench/lvars-gvars.py
index 0a81337581..91d76a0c43 100644
--- a/bench/lvars-gvars.py
+++ b/bench/lvars-gvars.py
@@ -23,9 +23,9 @@
 
 """
 Functions and data for timing different idioms for fetching a keyword
-value from a pair of dictionaries for localand global values.  This was
+value from a pair of dictionaries for local and global values.  This was
 used to select how to most efficiently expand single $KEYWORD strings
-in src/engine/SCons/Subst.py.
+in SCons/Subst.py (StringSubber and ListSubber).
 """
 
 def Func1(var, gvars, lvars):
@@ -40,7 +40,7 @@ def Func1(var, gvars, lvars):
                 x = ''
 
 def Func2(var, gvars, lvars):
-    """lvars has_key(), gvars try:-except:"""
+    """lvars membership test, gvars try:-except:"""
     for i in IterationList:
         if var in lvars:
             x = lvars[var]
@@ -51,7 +51,7 @@ def Func2(var, gvars, lvars):
                 x = ''
 
 def Func3(var, gvars, lvars):
-    """lvars has_key(), gvars has_key()"""
+    """lvars membership test, gvars membership test)"""
     for i in IterationList:
         if var in lvars:
             x = lvars[var]
@@ -71,7 +71,7 @@ def Func4(var, gvars, lvars):
 def Func5(var, gvars, lvars):
     """Chained get with default values"""
     for i in IterationList:
-        x = lvars.get(var,gvars.get(var,''))
+        x = lvars.get(var, gvars.get(var, ''))
 
 
 # Data to pass to the functions on each run.  Each entry is a

From 78a26bb2cd5c9e794dd491821c11d7ba33c452ed Mon Sep 17 00:00:00 2001
From: Mats Wichmann <mats@linux.com>
Date: Sat, 15 Jun 2024 13:26:35 -0600
Subject: [PATCH 2/3] Fix test for bad construction var

Didn't notice that the changes for env.__setitem__ caused a quote
mark to flip in the error output, failing one test (a back-quote
went to a single regular quote). Updated the test, and snuck in
the tools-performance-hack.

Signed-off-by: Mats Wichmann <mats@linux.com>
---
 test/bad-variables.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/test/bad-variables.py b/test/bad-variables.py
index 11f2be1af7..06f106447c 100644
--- a/test/bad-variables.py
+++ b/test/bad-variables.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python
 #
-# __COPYRIGHT__
+# MIT License
+#
+# Copyright The SCons Foundation
 #
 # Permission is hereby granted, free of charge, to any person obtaining
 # a copy of this software and associated documentation files (the
@@ -20,9 +22,6 @@
 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-#
-
-__revision__ = "__FILE__ __REVISION__ __DATE__ __DEVELOPER__"
 
 """
 Test that setting illegal construction variables fails in ways that are
@@ -37,13 +36,14 @@
 SConscript_path = test.workpath('SConscript')
 
 test.write(SConstruct_path, """\
-env = Environment()
+_ = DefaultEnvironment(tools=[])
+env = Environment(tools=[])
 env['foo-bar'] = 1
 """)
 
 expect_stderr = """
-scons: *** Illegal construction variable `foo-bar'
-""" + test.python_file_line(SConstruct_path, 2)
+scons: *** Illegal construction variable 'foo-bar'
+""" + test.python_file_line(SConstruct_path, 3)
 
 test.run(arguments='.', status=2, stderr=expect_stderr)
 
@@ -54,14 +54,15 @@
 """)
 
 test.write('SConscript', """\
-env = Environment()
+_ = DefaultEnvironment(tools=[])
+env = Environment(tools=[])
 env['foo(bar)'] = 1
 """)
 
 
 expect_stderr = """
-scons: *** Illegal construction variable `foo(bar)'
-""" + test.python_file_line(SConscript_path, 2)
+scons: *** Illegal construction variable 'foo(bar)'
+""" + test.python_file_line(SConscript_path, 3)
 
 test.run(arguments='.', status=2, stderr=expect_stderr)
 

From 88f7f9ab27d2d953597eab9cc467f580dfa5f846 Mon Sep 17 00:00:00 2001
From: Mats Wichmann <mats@linux.com>
Date: Fri, 28 Jun 2024 07:33:34 -0600
Subject: [PATCH 3/3] bench: fix bas isinstance sequence in one test

Signed-off-by: Mats Wichmann <mats@linux.com>
---
 bench/is_types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bench/is_types.py b/bench/is_types.py
index 257fa915c4..ecc0edbdb0 100644
--- a/bench/is_types.py
+++ b/bench/is_types.py
@@ -53,7 +53,7 @@ def cache_type_e_is_Dict(e):
 
 def cache_type_e_is_List(e):
     t = type(e)
-    return t is list or isinstance(e, UserList or isinstance(e, deque))
+    return t is list or isinstance(e, (UserList, deque))
 
 def cache_type_e_is_String(e):
     t = type(e)