diff --git a/catalogbuilder/intakebuilder/configparser.py b/catalogbuilder/intakebuilder/configparser.py index c8cbe59..a7db0f7 100644 --- a/catalogbuilder/intakebuilder/configparser.py +++ b/catalogbuilder/intakebuilder/configparser.py @@ -7,14 +7,17 @@ def __init__(self, config): configfile = yaml.safe_load(file) try: self.input_path = configfile['input_path'] - #print("input_path :",self.input_path) except: - raise KeyError("input_path does not exist in config") + self.input_path = None + print("input_path does not exist in config") + pass try: self.output_path = configfile['output_path'] #print("output_path :",self.output_path) except: - raise KeyError("output_path does not exist in config") + self.output_path = None + print("output_path does not exist in config") + pass try: self.headerlist = configfile['headerlist'] print("headerlist :",self.headerlist) diff --git a/catalogbuilder/scripts/configs/config-example2.yml b/catalogbuilder/scripts/configs/config-example2.yml new file mode 100644 index 0000000..11831ee --- /dev/null +++ b/catalogbuilder/scripts/configs/config-example2.yml @@ -0,0 +1,33 @@ +#what kind of directory structure to expect? +#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp +# the output_path_template is set as follows. +#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we +#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example +#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure +#this is a valid value in headerlist as well. +#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template +#for the fourth value. + +#catalog headers +#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction +#with the ESM collection specification standards and the appropriate workflows. + +headerlist: ["activity_id", "institution_id", "source_id", "experiment_id", + "frequency", "realm", "table_id", + "member_id", "grid_label", "variable_id", + "time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"] + +#what kind of directory structure to expect? +#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp +# the output_path_template is set as follows. +#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we +#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example +#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure +#this is a valid value in headerlist as well. +#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template +#for the fourth value. + +output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq'] + +output_file_template: ['realm','time_range','variable_id'] + diff --git a/catalogbuilder/scripts/gen_intake_gfdl.py b/catalogbuilder/scripts/gen_intake_gfdl.py index 0ee71f6..5f4f239 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl.py +++ b/catalogbuilder/scripts/gen_intake_gfdl.py @@ -35,8 +35,6 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm= # TODO error catching if (config is not None): configyaml = configparser.Config(config) - if configyaml.input_path is None or not configyaml.input_path : - sys.exit("Can't find paths, is yaml configured?") if(input_path is None): input_path = configyaml.input_path if(output_path is None): diff --git a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py index de0e9e8..81603f5 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py +++ b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py @@ -1,13 +1,18 @@ -#!/usr/bin/env python - +import catalogbuilder from catalogbuilder.scripts import gen_intake_gfdl import sys, os #This is an example call to run catalog builder using a yaml config file. package_dir = os.path.dirname(os.path.abspath(__file__)) configyaml = os.path.join(package_dir, 'configs/config-example.yml') +input_path = "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" +output_path = "sample-mdtf-catalog" -def create_catalog_from_config(config=configyaml): - csv, json = gen_intake_gfdl.create_catalog(config=configyaml) +def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml): + csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml) return(csv,json) +if __name__ == '__main__': + create_catalog_from_config(input_path,output_path,configyaml) + + diff --git a/gen_intake_gfdl_runner_config.py b/gen_intake_gfdl_runner_config.py new file mode 100755 index 0000000..7bda58d --- /dev/null +++ b/gen_intake_gfdl_runner_config.py @@ -0,0 +1,18 @@ +import catalogbuilder +from catalogbuilder.scripts import gen_intake_gfdl +import sys, os + +#This is an example call to run catalog builder using a yaml config file. + +def create_catalog_from_config(input_path,output_path,configyaml): + csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml) + return(csv,json) + +if __name__ == '__main__': + package_dir = os.path.dirname(os.path.abspath(__file__)) + configyaml = os.path.join(package_dir, 'catalogbuilder/scripts/configs/config-example2.yml') + input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" + output_path = "sample-test" + create_catalog_from_config(input_path,output_path,configyaml) + +