Skip to content

Commit

Permalink
Merge pull request #77 from NREL/bnb/dev
Browse files Browse the repository at this point in the history
Bnb/dev
  • Loading branch information
bnb32 authored Oct 28, 2024
2 parents 76a6d86 + e815c42 commit 8b8f859
Show file tree
Hide file tree
Showing 16 changed files with 493 additions and 100 deletions.
6 changes: 6 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ The NSRDB `Data Model
aggregation framework that sources, processes, and prepares data for input to
All-Sky.

The MLClouds Model
==================
The `MLClouds Model <https://github.com/NREL/mlclouds.git>`_ is used to predict
missing cloud properties (a.k.a. Gap Fill). The NSRDB interface with MLClouds
can be found `here <https://github.com/NREL/nsrdb/tree/master/nsrdb/gap_fill>`_.

Installation
============

Expand Down
83 changes: 61 additions & 22 deletions nsrdb/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,16 @@ def main(ctx, config, verbose):
To do a standard CONUS / Full Disc run use the following commands::
$ config='{"year": <year>, "out_dir": <out_dir>}'
$ python -m nsrdb.cli create-configs -c config
$ CONFIG='{"year": <year>, "out_dir": <out_dir>}'
$ python -m nsrdb.cli create-configs -c ${CONFIG}
$ cd <out_dir>
$ bash run.sh (run this until all main steps are complete)
$ cd post_proc
$ bash run.sh (run this until all post-proc steps are complete)
See the help pages of the module CLIs for more details on the config files
Expand Down Expand Up @@ -248,16 +253,17 @@ def pipeline(ctx, config, cancel, monitor, background, verbose):
@click.option(
'--run_type',
'-r',
default='full',
default='surfrad',
type=str,
help="""Run type to create configs for. Can be "full" (generates all config
and pipline files for the given year, including all domain main runs,
blending, aggregation, and collection), or "main" (for standard run without
post-processing, with data-model, ml-cloud-fill, all-sky, and
collect-data-model), "aggregate" (for aggregating post-2018 data to
pre-2018 resolution), "blend" (for blending east and west domains into a
single domain), or "post" (for all blending / aggregation / collection for
a given year)""",
help="""Run type to create configs for. Can be "surfrad" (just writes a
single template config with any provided kwargs replaced, with a surfrad
meta file), "full" (generates all config and pipline files for the given
year, including all domain main runs, blending, aggregation, and
collection), or "main" (for standard run without post-processing, with
data-model, ml-cloud-fill, all-sky, and collect-data-model), "aggregate"
(for aggregating post-2018 data to pre-2018 resolution), "blend" (for
blending east and west domains into a single domain), or "post" (for all
blending / aggregation / collection for a given year)""",
)
@click.option(
'--all_domains',
Expand All @@ -277,27 +283,30 @@ def pipeline(ctx, config, cancel, monitor, background, verbose):
)
@click.pass_context
def create_configs(
ctx, config, run_type='full', all_domains=False, collect=False
ctx, config, run_type='surfrad', all_domains=False, collect=False
):
"""Create config files for standard NSRDB runs using config templates.
Examples
--------
$ python -m nsrdb.cli create-configs -c '{"year": 2020, "out_dir": "./"}'
To generate all full_disc / conus run directories for east /
west regions, each with main routine config files contained run the
following::
$ CONFIG='{"year": 2020, "out_dir": "./"}'
The above will generate all full_disc / conus run directories for east /
west regions, each with main routine config files contained. Additionally,
conus / full_disc blend configs, aggregation config, collection config, and
a post processing pipeline config with all these steps will be written to a
"post_proc" directory so that post-processing can be run simply with::
$ python -m nsrdb.cli create-configs --run_type full -c ${CONFIG}
Additionally, conus / full_disc blend configs, aggregation config,
collection config, and a post processing pipeline config with all these
steps will be written to a "post_proc" directory so that post-processing
can be run simply with::
$ python -m nsrdb.cli pipeline -c config_pipeline_post.json
"""

ctx.ensure_object(dict)
func_name = f'collect_{run_type}' if collect else run_type
func_name = 'main_all' if run_type == 'main' and all_domains else func_name
valid_types = ['full', 'main', 'aggregate', 'blend', 'post']
valid_types = ['full', 'main', 'aggregate', 'blend', 'post', 'surfrad']
msg = (
f'Received unknown "run_type" {run_type}. Accepted values are '
f'{valid_types}'
Expand Down Expand Up @@ -436,6 +445,7 @@ def ml_cloud_fill(ctx, config, verbose=False, pipeline_step=None):
"col_chunk": 10000,
"fill_all": false,
"max_workers": 4
"model_path": ...
}
}
Expand Down Expand Up @@ -659,7 +669,7 @@ def collect_data_model(ctx, config, verbose=False, pipeline_step=None):
'-c',
type=CONFIG_TYPE,
required=True,
help='Path to config file or dict with kwargs for NSRDB.all_sky()',
help='Path to config file or dict with kwargs for NSRDB.collect_final()',
)
@click.option(
'-v',
Expand Down Expand Up @@ -692,6 +702,34 @@ def collect_final(ctx, config, verbose=False, pipeline_step=None):
)


@main.command()
@click.option(
'--config',
'-c',
type=CONFIG_TYPE,
required=True,
help='Path to config file or dict with kwargs for NSRDB.collect_daily()',
)
@click.option(
'-v',
'--verbose',
is_flag=True,
help='Flag to turn on debug logging. Default is False.',
)
@click.pass_context
def collect_daily(ctx, config, verbose=False, pipeline_step=None):
"""Collect daily files into a final file."""

BaseCLI.kickoff_single(
ctx=ctx,
module_name=ModuleName.COLLECT_DAILY,
func=Collector.collect_daily,
config=config,
verbose=verbose,
pipeline_step=pipeline_step,
)


@main.command()
@click.option(
'--config',
Expand Down Expand Up @@ -1045,6 +1083,7 @@ def batch(
Pipeline.COMMANDS[ModuleName.AGGREGATE] = aggregate
Pipeline.COMMANDS[ModuleName.COLLECT_DATA_MODEL] = collect_data_model
Pipeline.COMMANDS[ModuleName.COLLECT_FINAL] = collect_final
Pipeline.COMMANDS[ModuleName.COLLECT_DAILY] = collect_daily
Pipeline.COMMANDS[ModuleName.TMY] = tmy
Pipeline.COMMANDS[ModuleName.COLLECT_BLEND] = collect_blend
Pipeline.COMMANDS[ModuleName.COLLECT_AGGREGATE] = collect_aggregate
Expand Down
86 changes: 48 additions & 38 deletions nsrdb/config/create_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,12 @@
'meta_dir': DEFAULT_META_DIR,
}

MAIN_KWARGS = {
**BASE_KWARGS,
'extent': 'full',
'satellite': 'east',
MAIN_KWARGS = {**BASE_KWARGS, 'extent': 'full', 'satellite': 'east'}

SURFRAD_KWARGS = {
**MAIN_KWARGS,
'freq': '15min',
'spatial': '4km',
}

BLEND_KWARGS = {
Expand All @@ -47,10 +49,7 @@
'main_dir': '../',
}

COLLECT_BLEND_KWARGS = {
**BASE_KWARGS,
'extent': 'full',
}
COLLECT_BLEND_KWARGS = {**BASE_KWARGS, 'extent': 'full'}

AGG_KWARGS = {
**BASE_KWARGS,
Expand All @@ -62,7 +61,7 @@
'conus_freq': '5min',
'final_freq': '30min',
'n_chunks': 32,
'source_priority': ['conus', 'full_disk'],
'source_priority': ['conus', 'full_disc'],
}

COLLECT_AGG_KWARGS = {
Expand Down Expand Up @@ -102,14 +101,16 @@ class CreateConfigs:
standard CONUS / Full Disc runs."""

MAIN_RUN_NAME = '{basename}_{satellite}_{extent}_{year}_{spatial}_{freq}'
SURFRAD_RUN_NAME = '{basename}_{year}_surfrad'
BLEND_RUN_NAME = '{basename}_{extent}_{year}_blend'
AGG_RUN_NAME = '{basename}_{year}_aggregate'
COLLECT_AGG_RUN_NAME = '{basename}_{year}_collect_aggregate'
COLLECT_BLEND_RUN_NAME = '{basename}_{extent}_{year}_collect_blend'

@classmethod
def _init_kwargs(cls, kwargs, default_kwargs):
def init_kwargs(cls, kwargs=None, default_kwargs=None):
"""Initialize config with default kwargs."""
default_kwargs = default_kwargs or {}
msg = f'kwargs must have a "year" key. Received {kwargs}.'
assert 'year' in kwargs, msg
config = copy.deepcopy(default_kwargs)
Expand Down Expand Up @@ -211,6 +212,7 @@ def _get_run_name(cls, config, run_type='main'):
{k: v for k, v in BASE_KWARGS.items() if k not in config}
)
pattern_dict = {
'surfrad': cls.SURFRAD_RUN_NAME,
'main': cls.MAIN_RUN_NAME,
'blend': cls.BLEND_RUN_NAME,
'aggregate': cls.AGG_RUN_NAME,
Expand All @@ -227,7 +229,7 @@ def _get_run_name(cls, config, run_type='main'):
return pattern.format(**run_config)

@classmethod
def _update_run_templates(cls, config):
def _update_run_templates(cls, config, run_type='main'):
"""Replace format keys and dictionary keys in config templates with
user input values."""

Expand All @@ -236,6 +238,17 @@ def _update_run_templates(cls, config):
f'{pprint.pformat(config, indent=2)}'
)

config['doy_range'] = config.get(
'doy_range',
([1, 367] if calendar.isleap(config['year']) else [1, 366]),
)
config['start_doy'], config['end_doy'] = (
config['doy_range'][0],
config['doy_range'][1],
)
config['run_name'] = cls._get_run_name(config, run_type=run_type)
config['out_dir'] = os.path.join(config['out_dir'], config['run_name'])

template = (
PRE2018_CONFIG_TEMPLATE
if int(config['year']) < 2018
Expand Down Expand Up @@ -263,6 +276,22 @@ def _update_run_templates(cls, config):
config_dict, cls._get_config_file(config, 'pipeline')
)

run_file = os.path.join(config['out_dir'], 'run.sh')
with open(run_file, 'w') as f:
f.write('python -m nsrdb.cli pipeline -c config_pipeline.json')

logger.info(f'Saved run script: {run_file}.')

@classmethod
def surfrad(cls, kwargs):
"""Get basic config template specified parameters replaced."""
config = cls.init_kwargs(kwargs, SURFRAD_KWARGS)
config['extent_tag'] = EXTENT_MAP['extent_tag'][config['extent']]
config['meta_file'] = os.path.join(
config['meta_dir'], 'surfrad_meta.csv'
)
cls._update_run_templates(config, run_type='surfrad')

@classmethod
def main(cls, kwargs):
"""Modify config files with specified parameters
Expand All @@ -273,7 +302,7 @@ def main(cls, kwargs):
Dictionary of parameters including year, basename, satellite,
extent, freq, spatial, meta_file, doy_range
"""
config = cls._init_kwargs(kwargs, MAIN_KWARGS)
config = cls.init_kwargs(kwargs, MAIN_KWARGS)
msg = (
'"extent" key not provided. Provide "extent" so correct input '
'data can be selected'
Expand All @@ -284,27 +313,8 @@ def main(cls, kwargs):
config['meta_file'] = cls._get_meta(config)
config['spatial'], config['freq'] = cls._get_res(config)

config['doy_range'] = config.get(
'doy_range',
([1, 367] if calendar.isleap(config['year']) else [1, 366]),
)

config['start_doy'], config['end_doy'] = (
config['doy_range'][0],
config['doy_range'][1],
)

config['run_name'] = cls._get_run_name(config)
config['out_dir'] = os.path.join(config['out_dir'], config['run_name'])

cls._update_run_templates(config)

run_file = os.path.join(config['out_dir'], 'run.sh')
with open(run_file, 'w') as f:
f.write('python -m nsrdb.cli pipeline -c config_pipeline.json')

logger.info(f'Saved run script: {run_file}.')

@classmethod
def main_all(cls, kwargs):
"""Modify config files for all domains with specified parameters.
Expand Down Expand Up @@ -463,7 +473,7 @@ def _get_agg_entry(cls, config, extent):

@classmethod
def _aggregate(cls, kwargs):
"""Get config for conus and full disk high-resolution to low-resolution
"""Get config for conus and full disc high-resolution to low-resolution
aggregation. This is then used as the input to `nsrdb.cli.aggregate`
Parameters
Expand All @@ -472,14 +482,14 @@ def _aggregate(cls, kwargs):
Dictionary with keys specifying the case for which to aggregate
files
"""
config = cls._init_kwargs(kwargs, AGG_KWARGS)
config = cls.init_kwargs(kwargs, AGG_KWARGS)

if config['year'] == 2018:
data = NSRDB_2018

else:
data = {
'full_disk': cls._get_agg_entry(config, extent='full'),
'full_disc': cls._get_agg_entry(config, extent='full'),
'conus': cls._get_agg_entry(config, extent='conus'),
'final': cls._get_agg_entry(config, extent='final'),
}
Expand All @@ -490,7 +500,7 @@ def _aggregate(cls, kwargs):

@classmethod
def aggregate(cls, kwargs):
"""Get config for conus and full disk high-resolution to low-resolution
"""Get config for conus and full disc high-resolution to low-resolution
aggregation. This is then used as the input to `nsrdb.cli.aggregate`
Parameters
Expand Down Expand Up @@ -521,7 +531,7 @@ def _blend(cls, kwargs):
Dictionary with keys specifying the case for which to blend data
files
"""
config = cls._init_kwargs(kwargs, BLEND_KWARGS)
config = cls.init_kwargs(kwargs, BLEND_KWARGS)
config['map_col'] = EXTENT_MAP['map_col'][config['extent']]
config['lon_seam'] = EXTENT_MAP['lon_seam'][config['extent']]
config['meta_file'] = cls._get_meta(config, run_type='blend')
Expand Down Expand Up @@ -599,7 +609,7 @@ def _collect_blend(cls, kwargs):
Dictionary with keys specifying the case for blend collection
"""

config = cls._init_kwargs(kwargs, COLLECT_BLEND_KWARGS)
config = cls.init_kwargs(kwargs, COLLECT_BLEND_KWARGS)
config['meta_final'] = cls._get_meta(config, run_type='collect-blend')
config['collect_dir'] = cls._get_run_name(config, run_type='blend')
config['collect_tag'] = config['collect_dir'].replace('_blend', '')
Expand Down Expand Up @@ -650,7 +660,7 @@ def _collect_aggregate(cls, kwargs):
kwargs : dict
Dictionary with keys specifying the case for aggregation collection
"""
config = cls._init_kwargs(kwargs, COLLECT_AGG_KWARGS)
config = cls.init_kwargs(kwargs, COLLECT_AGG_KWARGS)

config['meta_final'] = cls._get_meta(
config, run_type='collect-aggregate'
Expand Down
4 changes: 2 additions & 2 deletions nsrdb/config/templates/config_nsrdb_post2017.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"max_workers": 10,
"n_chunks": 1,
"memory": 178,
"n_writes": 50,
"n_writes": 5,
"walltime": 48
},
"daily-all-sky": {},
Expand Down Expand Up @@ -80,4 +80,4 @@
"fill_all": false,
"max_workers": 4
}
}
}
Loading

0 comments on commit 8b8f859

Please sign in to comment.