Skip to content

Commit

Permalink
Merge pull request SCons#4556 from mwichmann/perf/env-setitem
Browse files Browse the repository at this point in the history
Update to current fastest env setitem
  • Loading branch information
bdbaddog authored Jun 28, 2024
2 parents 80e5b0c + 88f7f9a commit dc42f22
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 110 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,9 @@ RELEASE VERSION/DATE TO BE FILLED IN LATER
SCons.Environment to SCons.Util to avoid the chance of import loops. Variables
and Environment both use the routine and Environment() uses a Variables()
object so better to move to a safer location.
- Performance tweak: the __setitem__ method of an Environment, used for
setting construction variables, now uses the string method isidentifier
to validate the name (updated from microbenchmark results).
- AddOption and the internal add_local_option which AddOption calls now
recognize a "settable" keyword argument to indicate a project-added
option can also be modified using SetOption. Fixes #3983.
Expand Down
3 changes: 3 additions & 0 deletions RELEASE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ IMPROVEMENTS
- Make the testing framework a little more resilient: the temporary
directory for tests now includes a component named "scons" which can
be given to antivirus software to exclude.
- Performance tweak: the __setitem__ method of an Environment, used for
setting construction variables, now uses the string method isidentifier
to validate the name (updated from microbenchmark results).

PACKAGING
---------
Expand Down
37 changes: 17 additions & 20 deletions SCons/Environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@
to_String_for_subst,
uniquer_hashables,
)
from SCons.Util.envs import is_valid_construction_var
from SCons.Util.sctyping import ExecutorType

class _Null:
Expand Down Expand Up @@ -581,26 +580,20 @@ def __getitem__(self, key):
return self._dict[key]

def __setitem__(self, key, value):
# This is heavily used. This implementation is the best we have
# according to the timings in bench/env.__setitem__.py.
#
# The "key in self._special_set_keys" test here seems to perform
# pretty well for the number of keys we have. A hard-coded
# list worked a little better in Python 2.5, but that has the
# disadvantage of maybe getting out of sync if we ever add more
# variable names.
# So right now it seems like a good trade-off, but feel free to
# revisit this with bench/env.__setitem__.py as needed (and
# as newer versions of Python come out).
if key in self._special_set_keys:
self._special_set[key](self, key, value)
else:
# If we already have the entry, then it's obviously a valid
# key and we don't need to check. If we do check, using a
# global, pre-compiled regular expression directly is more
# efficient than calling another function or a method.
if key not in self._dict and not is_valid_construction_var(key):
raise UserError("Illegal construction variable `%s'" % key)
# Performance: since this is heavily used, try to avoid checking
# if the variable is valid unless necessary. bench/__setitem__.py
# times a bunch of different approaches. Based the most recent
# run, against Python 3.6-3.13(beta), the best we can do across
# different combinations of actions is to use a membership test
# to see if we already have the variable, if so it must already
# have been checked, so skip; if we do check, "isidentifier()"
# (new in Python 3 so wasn't in benchmark until recently)
# on the key is the best.
if key not in self._dict and not key.isidentifier():
raise UserError(f"Illegal construction variable {key!r}")
self._dict[key] = value

def get(self, key, default=None):
Expand Down Expand Up @@ -2579,8 +2572,12 @@ def __getitem__(self, key):
return self.__dict__['__subject'].__getitem__(key)

def __setitem__(self, key, value):
if not is_valid_construction_var(key):
raise UserError("Illegal construction variable `%s'" % key)
# This doesn't have the same performance equation as a "real"
# environment: in an override you're basically just writing
# new stuff; it's not a common case to be changing values already
# set in the override dict, so don't spend time checking for existance.
if not key.isidentifier():
raise UserError(f"Illegal construction variable {key!r}")
self.__dict__['overrides'][key] = value

def __delitem__(self, key):
Expand Down
5 changes: 4 additions & 1 deletion SCons/Util/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,10 +330,13 @@ def AddMethod(obj, function: Callable, name: Optional[str] = None) -> None:
setattr(obj, name, method)


# This routine is used to validate that a construction var name can be used
# as a Python identifier, which we require. However, Python 3 introduced an
# isidentifier() string method so there's really not any need for it now.
_is_valid_var_re = re.compile(r'[_a-zA-Z]\w*$')

def is_valid_construction_var(varstr: str) -> bool:
"""Return True if *varstr* is a legitimate construction variable."""
"""Return True if *varstr* is a legitimate name of a construction variable."""
return bool(_is_valid_var_re.match(varstr))

# Local Variables:
Expand Down
2 changes: 1 addition & 1 deletion SCons/Variables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def _do_add(
option.key = key
# TODO: normalize to not include key in aliases. Currently breaks tests.
option.aliases = [key,]
if not SCons.Util.is_valid_construction_var(option.key):
if not option.key.isidentifier():
raise SCons.Errors.UserError(f"Illegal Variables key {option.key!r}")
option.help = help
option.default = default
Expand Down
20 changes: 9 additions & 11 deletions bench/bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@
-h, --help Display this help and exit
-i ITER, --iterations ITER Run each code snippet ITER times
--time Use the time.time function
-r RUNS, --runs RUNS Average times for RUNS invocations of
-r RUNS, --runs RUNS Average times for RUNS invocations of
"""

# How many times each snippet of code will be (or should be) run by the
# How many times each snippet of code will be (or should be) run by the
# functions under test to gather the time (the "inner loop").

Iterations = 1000
Expand Down Expand Up @@ -191,19 +191,17 @@ def display(func, result_label, results):


# pprint(results_dict)
# breakpoint()

tests = [label for label, args, kw in Data]
columns = ['Python Version', 'Implementation', 'Test'] + tests
columns = ['Python Version', 'Implementation'] + tests
with open(results_filename, 'a') as r:
print("Python Version,%s" % ".".join(columns), file=r)
# print("Python Version,%s" % ".".join(columns))
print(",".join(columns), file=r)

for implementation in results_dict.keys():
for implementation in results_dict:
print(f'{py_ver_string},"{implementation}"', file=r, end='')
for test in tests:
print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test], file=r)
# print(f'{py_ver_string},"{implementation}","{test}",%8.3f' % results_dict[implementation][test])


print(',%8.3f' % results_dict[implementation][test], file=r, end='')
print(file=r)

# Local Variables:
# tab-width:4
Expand Down
32 changes: 27 additions & 5 deletions bench/env.__setitem__.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ def times(num=1000000, init='', title='Results:', **statements):
script_dir = os.path.split(filename)[0]
if script_dir:
script_dir = script_dir + '/'
sys.path = [os.path.abspath(script_dir + '../src/engine')] + sys.path
sys.path = [os.path.abspath(script_dir + '..')] + sys.path

import SCons.Errors
import SCons.Environment
import SCons.Util

is_valid_construction_var = SCons.Util.is_valid_construction_var
global_valid_var = re.compile(r'[_a-zA-Z]\w*$')
global_valid_var = SCons.Util.envs._is_valid_var_re

# The classes with different __setitem__() implementations that we're
# going to horse-race.
Expand Down Expand Up @@ -105,7 +105,7 @@ class Environment:
'SOURCES' : None,
}
_special_set_keys = list(_special_set.keys())
_valid_var = re.compile(r'[_a-zA-Z]\w*$')
_valid_var = global_valid_var
def __init__(self, **kw):
self._dict = kw

Expand Down Expand Up @@ -247,8 +247,8 @@ def __setitem__(self, key, value):
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
self._dict[key] = value

class env_Best_has_key(Environment):
"""Best __setitem__(), with has_key"""
class env_Best_has_key_global_regex(Environment):
"""Best __setitem__(), with membership test and global regex"""
def __setitem__(self, key, value):
if key in self._special_set:
self._special_set[key](self, key, value)
Expand All @@ -258,6 +258,18 @@ def __setitem__(self, key, value):
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
self._dict[key] = value

class env_Best_has_key_function(Environment):
"""Best __setitem__(), with membership_test and validator function"""
def __setitem__(self, key, value):
if key in self._special_set:
self._special_set[key](self, key, value)
else:
if key not in self._dict \
and not is_valid_construction_var(key):
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
self._dict[key] = value


class env_Best_list(Environment):
"""Best __setitem__(), with a list"""
def __setitem__(self, key, value):
Expand All @@ -284,6 +296,16 @@ def __setitem__(self, key, value):
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
self._dict[key] = value

class env_Best_isidentifier(Environment):
"""Best __setitem__(), with membership test and isidentifier"""
def __setitem__(self, key, value):
if key in self._special_set:
self._special_set[key](self, key, value)
else:
if key not in self._dict and not key.isidentifier():
raise SCons.Errors.UserError("Illegal construction variable `%s'" % key)
self._dict[key] = value

# We'll use the names of all the env_* classes we find later to build
# the dictionary of statements to be timed, and the import statement
# that the timer will use to get at these classes.
Expand Down
85 changes: 28 additions & 57 deletions bench/is_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
#
# Benchmarks for testing various possible implementations
# of the is_Dict(), is_List() and is_String() functions in
# src/engine/SCons/Util.py.
# SCons/Util.py.

import types
from collections import UserDict, UserList, UserString
from collections import UserDict, UserList, UserString, deque
from collections.abc import Iterable, MappingView, MutableMapping, MutableSequence

DictType = dict
ListType = list
Expand All @@ -28,19 +29,20 @@ def original_is_String(e):

# New candidates that explicitly check for whether the object is an
# InstanceType before calling isinstance() on the corresponding User*
# type.
# InstanceType was only for old-style classes, so absent in Python 3
# this this is no different than the previous
# type. Update: meaningless in Python 3, InstanceType was only for
# old-style classes, so these are just removed.
# XXX

def checkInstanceType_is_Dict(e):
return isinstance(e, (dict, UserDict))
# New candidates that try more generic names from collections:

def checkInstanceType_is_List(e):
return isinstance(e, (list, UserList))
def new_is_Dict(e):
return isinstance(e, MutableMapping)

def checkInstanceType_is_String(e):
return isinstance(e, (str, UserString))
def new_is_List(e):
return isinstance(e, MutableSequence)

def new_is_String(e):
return isinstance(e, (str, UserString))

# Improved candidates that cache the type(e) result in a variable
# before doing any checks.
Expand All @@ -51,7 +53,7 @@ def cache_type_e_is_Dict(e):

def cache_type_e_is_List(e):
t = type(e)
return t is list or isinstance(e, UserList)
return t is list or isinstance(e, (UserList, deque))

def cache_type_e_is_String(e):
t = type(e)
Expand All @@ -68,7 +70,7 @@ def global_cache_type_e_is_Dict(e):

def global_cache_type_e_is_List(e):
t = type(e)
return t is ListType or isinstance(e, UserList)
return t is ListType or isinstance(e, (UserList, deque))

def global_cache_type_e_is_String(e):
t = type(e)
Expand All @@ -77,30 +79,10 @@ def global_cache_type_e_is_String(e):

# Alternative that uses a myType() function to map the User* objects
# to their corresponding underlying types.

instanceTypeMap = {
UserDict : dict,
UserList : list,
UserString : str,
}

def myType(obj):
t = type(obj)
if t is types.InstanceType:
t = instanceTypeMap.get(obj.__class__, t)
return t

def myType_is_Dict(e):
return myType(e) is dict

def myType_is_List(e):
return myType(e) is list

def myType_is_String(e):
return myType(e) is str

# Again, since this used InstanceType, it's not useful for Python 3.


# These are the actual test entry points

def Func01(obj):
"""original_is_String"""
Expand All @@ -118,19 +100,19 @@ def Func03(obj):
original_is_Dict(obj)

def Func04(obj):
"""checkInstanceType_is_String"""
"""new_is_String"""
for i in IterationList:
checkInstanceType_is_String(obj)
new_is_String(obj)

def Func05(obj):
"""checkInstanceType_is_List"""
"""new_is_List"""
for i in IterationList:
checkInstanceType_is_List(obj)
new_is_List(obj)

def Func06(obj):
"""checkInstanceType_is_Dict"""
"""new_is_Dict"""
for i in IterationList:
checkInstanceType_is_Dict(obj)
new_is_Dict(obj)

def Func07(obj):
"""cache_type_e_is_String"""
Expand Down Expand Up @@ -162,22 +144,6 @@ def Func12(obj):
for i in IterationList:
global_cache_type_e_is_Dict(obj)

#def Func13(obj):
# """myType_is_String"""
# for i in IterationList:
# myType_is_String(obj)
#
#def Func14(obj):
# """myType_is_List"""
# for i in IterationList:
# myType_is_List(obj)
#
#def Func15(obj):
# """myType_is_Dict"""
# for i in IterationList:
# myType_is_Dict(obj)



# Data to pass to the functions on each run. Each entry is a
# three-element tuple:
Expand Down Expand Up @@ -217,6 +183,11 @@ class A:
(UserList([]),),
{},
),
(
"deque",
(deque([]),),
{},
),
(
"UserDict",
(UserDict({}),),
Expand Down
Loading

0 comments on commit dc42f22

Please sign in to comment.