From 7f07c128a6dec3dd2eb996de9476951b7587cbe1 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 15:00:03 -0400 Subject: [PATCH 01/11] test --- gen_intake_gfdl_runner_config.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100755 gen_intake_gfdl_runner_config.py diff --git a/gen_intake_gfdl_runner_config.py b/gen_intake_gfdl_runner_config.py new file mode 100755 index 0000000..7bda58d --- /dev/null +++ b/gen_intake_gfdl_runner_config.py @@ -0,0 +1,18 @@ +import catalogbuilder +from catalogbuilder.scripts import gen_intake_gfdl +import sys, os + +#This is an example call to run catalog builder using a yaml config file. + +def create_catalog_from_config(input_path,output_path,configyaml): + csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml) + return(csv,json) + +if __name__ == '__main__': + package_dir = os.path.dirname(os.path.abspath(__file__)) + configyaml = os.path.join(package_dir, 'catalogbuilder/scripts/configs/config-example2.yml') + input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" + output_path = "sample-test" + create_catalog_from_config(input_path,output_path,configyaml) + + From 46d15171d201fab9c8a3ac7be6f86bda1b46eecf Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 15:00:21 -0400 Subject: [PATCH 02/11] eg --- .../scripts/configs/config-example2.yml | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 catalogbuilder/scripts/configs/config-example2.yml diff --git a/catalogbuilder/scripts/configs/config-example2.yml b/catalogbuilder/scripts/configs/config-example2.yml new file mode 100644 index 0000000..11831ee --- /dev/null +++ b/catalogbuilder/scripts/configs/config-example2.yml @@ -0,0 +1,33 @@ +#what kind of directory structure to expect? +#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp +# the output_path_template is set as follows. +#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we +#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example +#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure +#this is a valid value in headerlist as well. +#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template +#for the fourth value. + +#catalog headers +#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction +#with the ESM collection specification standards and the appropriate workflows. + +headerlist: ["activity_id", "institution_id", "source_id", "experiment_id", + "frequency", "realm", "table_id", + "member_id", "grid_label", "variable_id", + "time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"] + +#what kind of directory structure to expect? +#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp +# the output_path_template is set as follows. +#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we +#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example +#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure +#this is a valid value in headerlist as well. +#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template +#for the fourth value. + +output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq'] + +output_file_template: ['realm','time_range','variable_id'] + From 985e12e0fd1750b514794a980a628a84e2c4082b Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 15:47:54 -0400 Subject: [PATCH 03/11] to support cli and config --- catalogbuilder/intakebuilder/configparser.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/catalogbuilder/intakebuilder/configparser.py b/catalogbuilder/intakebuilder/configparser.py index c8cbe59..a7db0f7 100644 --- a/catalogbuilder/intakebuilder/configparser.py +++ b/catalogbuilder/intakebuilder/configparser.py @@ -7,14 +7,17 @@ def __init__(self, config): configfile = yaml.safe_load(file) try: self.input_path = configfile['input_path'] - #print("input_path :",self.input_path) except: - raise KeyError("input_path does not exist in config") + self.input_path = None + print("input_path does not exist in config") + pass try: self.output_path = configfile['output_path'] #print("output_path :",self.output_path) except: - raise KeyError("output_path does not exist in config") + self.output_path = None + print("output_path does not exist in config") + pass try: self.headerlist = configfile['headerlist'] print("headerlist :",self.headerlist) From 68e0556daca243518c99bdc43106176689b5b295 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 15:48:12 -0400 Subject: [PATCH 04/11] bugfix, when there is no input and output in config, but cli options were passed, the builder threw exceptions. fixed now --- catalogbuilder/scripts/gen_intake_gfdl.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/catalogbuilder/scripts/gen_intake_gfdl.py b/catalogbuilder/scripts/gen_intake_gfdl.py index 0ee71f6..5f4f239 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl.py +++ b/catalogbuilder/scripts/gen_intake_gfdl.py @@ -35,8 +35,6 @@ def create_catalog(input_path=None, output_path=None, config=None, filter_realm= # TODO error catching if (config is not None): configyaml = configparser.Config(config) - if configyaml.input_path is None or not configyaml.input_path : - sys.exit("Can't find paths, is yaml configured?") if(input_path is None): input_path = configyaml.input_path if(output_path is None): From d1b45c401ae74523943084f687910f79da23a459 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 15:51:12 -0400 Subject: [PATCH 05/11] python example updated so that a config yaml can be passed, along with input and output passed in as command line --- .../scripts/gen_intake_gfdl_runner_config.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py index de0e9e8..7bda58d 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py +++ b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py @@ -1,13 +1,18 @@ -#!/usr/bin/env python - +import catalogbuilder from catalogbuilder.scripts import gen_intake_gfdl import sys, os #This is an example call to run catalog builder using a yaml config file. -package_dir = os.path.dirname(os.path.abspath(__file__)) -configyaml = os.path.join(package_dir, 'configs/config-example.yml') -def create_catalog_from_config(config=configyaml): - csv, json = gen_intake_gfdl.create_catalog(config=configyaml) +def create_catalog_from_config(input_path,output_path,configyaml): + csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml) return(csv,json) +if __name__ == '__main__': + package_dir = os.path.dirname(os.path.abspath(__file__)) + configyaml = os.path.join(package_dir, 'catalogbuilder/scripts/configs/config-example2.yml') + input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" + output_path = "sample-test" + create_catalog_from_config(input_path,output_path,configyaml) + + From d4663e59496739b5af2430527a33146adff76af0 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 16:17:20 -0400 Subject: [PATCH 06/11] CI test fix --- .../scripts/gen_intake_gfdl_runner_config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py index 7bda58d..dd462ae 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py +++ b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py @@ -3,16 +3,16 @@ import sys, os #This is an example call to run catalog builder using a yaml config file. +package_dir = os.path.dirname(os.path.abspath(__file__)) +configyaml = os.path.join(package_dir, 'catalogbuilder/scripts/configs/config-example.yml') +input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" +output_path = "sample-test" -def create_catalog_from_config(input_path,output_path,configyaml): +def create_catalog_from_config(input_path,output_path,configyaml=configyaml): csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml) return(csv,json) if __name__ == '__main__': - package_dir = os.path.dirname(os.path.abspath(__file__)) - configyaml = os.path.join(package_dir, 'catalogbuilder/scripts/configs/config-example2.yml') - input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" - output_path = "sample-test" create_catalog_from_config(input_path,output_path,configyaml) From 775df9edca986e481f8576cc9ec67c00dcbf943f Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 16:23:13 -0400 Subject: [PATCH 07/11] ci fix --- catalogbuilder/scripts/gen_intake_gfdl_runner_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py index dd462ae..762bef2 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py +++ b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py @@ -8,7 +8,7 @@ input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" output_path = "sample-test" -def create_catalog_from_config(input_path,output_path,configyaml=configyaml): +def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml): csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml) return(csv,json) From 8f5f37242ec52d60787251ce872250e3f627a632 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 16:26:33 -0400 Subject: [PATCH 08/11] yaml path fix --- catalogbuilder/scripts/gen_intake_gfdl_runner_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py index 762bef2..10a9a0e 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py +++ b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py @@ -4,7 +4,7 @@ #This is an example call to run catalog builder using a yaml config file. package_dir = os.path.dirname(os.path.abspath(__file__)) -configyaml = os.path.join(package_dir, 'catalogbuilder/scripts/configs/config-example.yml') +configyaml = os.path.join(package_dir, 'configs/config-example.yml') input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" output_path = "sample-test" From aad949853ce6416d81a1c7d6e927f56018bfc6e5 Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 16:32:58 -0400 Subject: [PATCH 09/11] path adjusted for ci --- catalogbuilder/scripts/gen_intake_gfdl_runner_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py index 10a9a0e..9df3072 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py +++ b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py @@ -5,7 +5,7 @@ #This is an example call to run catalog builder using a yaml config file. package_dir = os.path.dirname(os.path.abspath(__file__)) configyaml = os.path.join(package_dir, 'configs/config-example.yml') -input_path = "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" +input_path = "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" output_path = "sample-test" def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml): From 54a8c9890cbaf685024f4a9ccdc3bdf87a47a23f Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 16:34:12 -0400 Subject: [PATCH 10/11] ame output name as b4 --- catalogbuilder/scripts/gen_intake_gfdl_runner_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py index 9df3072..2324646 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py +++ b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py @@ -6,7 +6,7 @@ package_dir = os.path.dirname(os.path.abspath(__file__)) configyaml = os.path.join(package_dir, 'configs/config-example.yml') input_path = "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" -output_path = "sample-test" +output_path = "sample-mdtf-catalog"" def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml): csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml) From b57db732baa468287334d21b944766827cd5379f Mon Sep 17 00:00:00 2001 From: aradhakrishnanGFDL Date: Wed, 25 Sep 2024 16:40:34 -0400 Subject: [PATCH 11/11] typo --- catalogbuilder/scripts/gen_intake_gfdl_runner_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py index 2324646..81603f5 100755 --- a/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py +++ b/catalogbuilder/scripts/gen_intake_gfdl_runner_config.py @@ -6,7 +6,7 @@ package_dir = os.path.dirname(os.path.abspath(__file__)) configyaml = os.path.join(package_dir, 'configs/config-example.yml') input_path = "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" -output_path = "sample-mdtf-catalog"" +output_path = "sample-mdtf-catalog" def create_catalog_from_config(input_path=input_path,output_path=output_path,configyaml=configyaml): csv, json = gen_intake_gfdl.create_catalog(input_path=input_path,output_path=output_path,config=configyaml)