From add750261223255d006656f8a3add1a45116c35b Mon Sep 17 00:00:00 2001 From: Aparna Radhakrishnan Date: Mon, 29 Jul 2024 16:21:38 -0400 Subject: [PATCH] Update config-example.yml --- .../scripts/configs/config-example.yml | 43 ++++++++++++++++++- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/catalogbuilder/scripts/configs/config-example.yml b/catalogbuilder/scripts/configs/config-example.yml index 2013e59..079afc7 100644 --- a/catalogbuilder/scripts/configs/config-example.yml +++ b/catalogbuilder/scripts/configs/config-example.yml @@ -1,2 +1,41 @@ -input_path: "/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/" #"ENTER INPUT PATH HERE" #Example: /Users/ar46/archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp/" -output_path: "catalog" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path) +#what kind of directory structure to expect? +#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp +# the output_path_template is set as follows. +#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we +#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example +#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure +#this is a valid value in headerlist as well. +#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template +#for the fourth value. + +#catalog headers +#The headerlist is expected column names in your catalog/csv file. This is usually determined by the users in conjuction +#with the ESM collection specification standards and the appropriate workflows. + +headerlist: ["activity_id", "institution_id", "source_id", "experiment_id", + "frequency", "realm", "table_id", + "member_id", "grid_label", "variable_id", + "time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"] + +#what kind of directory structure to expect? +#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp +# the output_path_template is set as follows. +#We have NA in those values that do not match up with any of the expected headerlist (CSV columns), otherwise we +#simply specify the associated header name in the appropriate place. E.g. The third directory in the PP path example +#above is the model (source_id), so the third list value in output_path_template is set to 'source_id'. We make sure +#this is a valid value in headerlist as well. +#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template +#for the fourth value. + +output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq'] + +output_file_template: ['realm','time_range','variable_id'] + +#OUTPUT FILE INFO is currently passed as command-line argument. +#We will revisit adding a csvfile, jsonfile and logfile configuration to the builder configuration file in the future. +#csvfile = #jsonfile = #logfile = + +####################################################### + +input_path: "archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp" +output_path: "sample-mdtf-catalog" # ENTER NAME OF THE CSV AND JSON, THE SUFFIX ALONE. e.g catalog (the builder then generates catalog.csv and catalog.json. This can also be an absolute path)