Skip to content

Commit

Permalink
Merge pull request ESMCI#4667 from jedwards4b/fix/rest_n_in_tests
Browse files Browse the repository at this point in the history
Fix/rest n in tests
  • Loading branch information
jedwards4b authored Aug 21, 2024
2 parents 5a24822 + b88c9e1 commit af3eab5
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 33 deletions.
8 changes: 4 additions & 4 deletions CIME/Servers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# pylint: disable=import-error
from distutils.spawn import find_executable
from shutil import which

has_gftp = find_executable("globus-url-copy")
has_svn = find_executable("svn")
has_wget = find_executable("wget")
has_gftp = which("globus-url-copy")
has_svn = which("svn")
has_wget = which("wget")
has_ftp = True
try:
from ftplib import FTP
Expand Down
17 changes: 3 additions & 14 deletions CIME/SystemTests/ers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,18 @@ def __init__(self, case, **kwargs):
SystemTestsCommon.__init__(self, case, **kwargs)

def _ers_first_phase(self):
stop_n = self._case.get_value("STOP_N")
stop_option = self._case.get_value("STOP_OPTION")
rest_n = self._case.get_value("REST_N")
expect(stop_n > 0, "Bad STOP_N: {:d}".format(stop_n))

expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n))
logger.info(
"doing an {0} {1} initial test with restart file at {2} {1}".format(
str(stop_n), stop_option, str(rest_n)
)
)
self._rest_n = self._set_restart_interval()
self.run_indv()

def _ers_second_phase(self):
stop_n = self._case.get_value("STOP_N")
stop_option = self._case.get_value("STOP_OPTION")

rest_n = int(stop_n / 2 + 1)
stop_new = stop_n - rest_n
stop_new = stop_n - self._rest_n
expect(
stop_new > 0,
"ERROR: stop_n value {:d} too short {:d} {:d}".format(
stop_new, stop_n, rest_n
stop_new, stop_n, self._rest_n
),
)
rundir = self._case.get_value("RUNDIR")
Expand Down
2 changes: 1 addition & 1 deletion CIME/SystemTests/restart_tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""
Abstract class for restart tests
"""

from CIME.SystemTests.system_tests_compare_two import SystemTestsCompareTwo
Expand Down Expand Up @@ -34,6 +33,7 @@ def __init__(
def _case_one_setup(self):
stop_n = self._case1.get_value("STOP_N")
expect(stop_n >= 3, "STOP_N must be at least 3, STOP_N = {}".format(stop_n))
self._set_restart_interval()

def _case_two_setup(self):
rest_n = self._case1.get_value("REST_N")
Expand Down
69 changes: 69 additions & 0 deletions CIME/SystemTests/system_tests_common.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Base class for CIME system tests
"""

from CIME.XML.standard_module_setup import *
from CIME.XML.env_run import EnvRun
from CIME.XML.env_test import EnvTest
Expand Down Expand Up @@ -117,6 +118,74 @@ def __init__(
self._dry_run = False
self._user_separate_builds = False
self._expected_num_cmp = None
self._rest_n = None

def _set_restart_interval(self):
stop_n = self._case.get_value("STOP_N")
stop_option = self._case.get_value("STOP_OPTION")
self._case.set_value("REST_OPTION", stop_option)
# We need to make sure the run is long enough and to set REST_N to a
# value that makes sense for all components
maxncpl = 10000
minncpl = 0
for comp in self._case.get_values("COMP_CLASSES"):
if comp == "CPL":
continue
compname = self._case.get_value("COMP_{}".format(comp))

# ignore stub components in this test.
if compname == "s{}".format(comp.lower()):
ncpl = None
else:
ncpl = self._case.get_value("{}_NCPL".format(comp))

if ncpl and maxncpl > ncpl:
maxncpl = ncpl
if ncpl and minncpl < ncpl:
minncpl = ncpl

ncpl_base_period = self._case.get_value("NCPL_BASE_PERIOD")
if ncpl_base_period == "hour":
coupling_secs = 3600 / maxncpl
timestep = 3600 / minncpl
elif ncpl_base_period == "day":
coupling_secs = 86400 / maxncpl
timestep = 86400 / minncpl
elif ncpl_base_period == "year":
coupling_secs = 31536000 / maxncpl
timestep = 31536000 / minncpl
elif ncpl_base_period == "decade":
coupling_secs = 315360000 / maxncpl
timestep = 315360000 / minncpl

# Convert stop_n to units of coupling intervals
factor = 1
if stop_option == "nsteps":
factor = timestep
elif stop_option == "nminutes":
factor = 60
elif stop_option == "nhours":
factor = 3600
elif stop_option == "ndays":
factor = 86400
elif stop_option == "nyears":
factor = 315360000
else:
expect(False, f"stop_option {stop_option} not available for this test")

stop_n = int(stop_n * factor // coupling_secs)
rest_n = int((stop_n // 2 + 1) * coupling_secs / factor)

expect(stop_n > 0, "Bad STOP_N: {:d}".format(stop_n))

expect(stop_n > 2, "ERROR: stop_n value {:d} too short".format(stop_n))
logger.info(
"doing an {0} {1} initial test with restart file at {2} {1}".format(
str(stop_n), stop_option, str(rest_n)
)
)
self._case.set_value("REST_N", rest_n)
return rest_n

def _init_environment(self, caseroot):
"""
Expand Down
2 changes: 1 addition & 1 deletion CIME/case/case.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,10 @@ def initialize_derived_attributes(self):
self.tasks_per_node = env_mach_pes.get_tasks_per_node(
self.total_tasks, self.thread_count
)

self.num_nodes, self.spare_nodes = env_mach_pes.get_total_nodes(
self.total_tasks, self.thread_count
)

self.num_nodes += self.spare_nodes

logger.debug(
Expand Down
2 changes: 2 additions & 0 deletions CIME/case/case_submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ def submit(
caseroot = self.get_value("CASEROOT")
if self.get_value("TEST"):
casebaseid = self.get_value("CASEBASEID")
if os.path.exists(os.path.join(caseroot, "env_test.xml")):
self.set_initial_test_values()
# This should take care of the race condition where the submitted job
# begins immediately and tries to set RUN phase. We proactively assume
# a passed SUBMIT phase. If this state is already PASS, don't set it again
Expand Down
11 changes: 1 addition & 10 deletions CIME/code_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

logger = logging.getLogger(__name__)


###############################################################################
def _run_pylint(all_files, interactive):
###############################################################################
Expand Down Expand Up @@ -80,16 +81,6 @@ def _run_pylint(all_files, interactive):

return result

# if stat != 0:
# if interactive:
# logger.info("File %s has pylint problems, please fix\n Use command: %s" % (on_file, cmd))
# logger.info(out + "\n" + err)
# return (on_file, out + "\n" + err)
# else:
# if interactive:
# logger.info("File %s has no pylint problems" % on_file)
# return (on_file, "")


###############################################################################
def _matches(file_path, file_ends):
Expand Down
3 changes: 0 additions & 3 deletions CIME/data/config/config_tests.xml
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu
<INFO_DBUG>1</INFO_DBUG>
<STOP_OPTION>ndays</STOP_OPTION>
<STOP_N>7</STOP_N>
<REST_N>$STOP_N / 2 + 1</REST_N>
<REST_OPTION>$STOP_OPTION</REST_OPTION>
<HIST_N>$STOP_N</HIST_N>
<HIST_OPTION>$STOP_OPTION</HIST_OPTION>
Expand All @@ -333,7 +332,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu
<INFO_DBUG>1</INFO_DBUG>
<STOP_OPTION>ndays</STOP_OPTION>
<STOP_N>7</STOP_N>
<REST_N>$STOP_N / 2 + 1</REST_N>
<REST_OPTION>$STOP_OPTION</REST_OPTION>
<HIST_N>$STOP_N</HIST_N>
<HIST_OPTION>$STOP_OPTION</HIST_OPTION>
Expand Down Expand Up @@ -540,7 +538,6 @@ NODEFAIL Tests restart upon detected node failure. Generates fake failu
<STOP_OPTION>nsteps</STOP_OPTION>
<OCN_NCPL>$ATM_NCPL</OCN_NCPL>
<STOP_N>11</STOP_N>
<REST_N>$STOP_N / 2 + 1</REST_N>
<REST_OPTION>$STOP_OPTION</REST_OPTION>
<HIST_N>$STOP_N</HIST_N>
<HIST_OPTION>$STOP_OPTION</HIST_OPTION>
Expand Down

0 comments on commit af3eab5

Please sign in to comment.