Skip to content

Commit

Permalink
Merge pull request NOAA-GFDL#75 from aradhakrishnanGFDL/53-static
Browse files Browse the repository at this point in the history
53 static
  • Loading branch information
ceblanton authored Oct 30, 2024
2 parents 0f18e03 + a40e18a commit 419252d
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 13 deletions.
2 changes: 1 addition & 1 deletion catalogbuilder/intakebuilder/builderconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
headerlist = ["activity_id", "institution_id", "source_id", "experiment_id",
"frequency", "realm", "table_id",
"member_id", "grid_label", "variable_id",
"time_range", "chunk_freq","grid_label","platform","dimensions","cell_methods","standard_name","path"]
"time_range", "chunk_freq","platform","dimensions","cell_methods","standard_name","path"]

#what kind of directory structure to expect?
#For a directory structure like /archive/am5/am5/am5f3b1r0/c96L65_am5f3b1r0_pdclim1850F/gfdl.ncrc5-deploy-prod-openmp/pp
Expand Down
33 changes: 27 additions & 6 deletions catalogbuilder/intakebuilder/getinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,27 +108,38 @@ def getInfoFromGFDLFilename(filename,dictInfo,logger,configyaml):
output_file_template = builderconfig.output_file_template
except:
sys.exit("No output_path_template found. Check configuration.")
if( ".static" in filename ):
## For static we handle this differently . The GFDL PP expected pattern is atmos.static.nc
#TODO error checking as needed
output_file_template = ['realm','NA']
dictInfo["variable_id"] = "fixed"
dictInfo["frequency"] = "fx"
nlen = len(output_file_template)
for i in range(nlen-1,-1,-1): #nlen = 3
try:
if(output_file_template[i] != "NA"):
try:
#print(output_file_template[i], "=" , stemdir[(j)])
dictInfo[output_file_template[i]] = stemdir[(j)]
except IndexError:
#print("Check configuration. Is output file template set correctly?")
dictInfo[output_file_template[i]] = ""
except IndexError:
sys.exit("oops in getInfoFromGFDLFilename"+str(i)+str(j)+output_file_template[i]+stemdir[j])
j = j - 1
cnt = cnt + 1
print(dictInfo["realm"], filename)
if (".static" in filename):
if ("ocean" in dictInfo["realm"]):
dictInfo["table_id"] = "Ofx"
else:
dictInfo["table_id"] = "fx"
return dictInfo

def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml):
def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml,variable_id):
'''
Returns info from project directory and the DRS path to the file
:param dirpath:
:param drsstructure:
:param variable_id to check for static
:return:
'''
# we need thise dict keys "project", "institute", "model", "experiment_id",
Expand All @@ -151,7 +162,9 @@ def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml):
output_path_template = builderconfig.output_path_template
except:
sys.exit("No output_path_template found in builderconfig.py. Check configuration.")

#If variable_id is fixed, it's a GFDL PP static dataset and the output path template in config is aligned only up to a particular directory structure as this does not have the ts and frequency or time chunks
if(variable_id == "fixed"):
output_path_template = output_path_template[:-3 or None]
nlen = len(output_path_template)
for i in range(nlen-1,0,-1):
try:
Expand All @@ -168,10 +181,18 @@ def getInfoFromGFDLDRS(dirpath,projectdir,dictInfo,configyaml):
# WE do not want to work with anythi:1
# ng that's not time series
#TODO have verbose option to print message
#TODO Make this elegant and intuitive
#TODO logger messages, not print
if "cell_methods" in dictInfo.keys():
if (dictInfo["cell_methods"] != "ts"):
#print("Skipping non-timeseries data")
if (dictInfo["cell_methods"] == "av"):
print("Skipping time-average data")
return {}
elif (dictInfo["cell_methods"] == "ts"):
print("time-series data")
else:
print("This is likely static")
dictInfo["cell_methods"] = ""
dictInfo["member_id"] = ""
return dictInfo

def getInfoFromDRS(dirpath,projectdir,dictInfo):
Expand Down
9 changes: 7 additions & 2 deletions catalogbuilder/intakebuilder/gfdlcrawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,12 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow):
dictInfo = getinfo.getInfoFromFilename(filename,dictInfo, logger)
else:
dictInfo = getinfo.getInfoFromGFDLFilename(filename,dictInfo, logger,configyaml)
dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo,configyaml)
if "variable_id" in dictInfo.keys():
if dictInfo["variable_id"] is not None:
variable_id = dictInfo["variable_id"]
else:
variable_id = ""
dictInfo = getinfo.getInfoFromGFDLDRS(dirpath, projectdir, dictInfo,configyaml,variable_id)
list_bad_modellabel = ["","piControl","land-hist","piClim-SO2","abrupt-4xCO2","hist-piAer","hist-piNTCF","piClim-ghg","piClim-OC","hist-GHG","piClim-BC","1pctCO2"]
list_bad_chunklabel = ['DO_NOT_USE']
if "source_id" in dictInfo:
Expand Down Expand Up @@ -129,6 +134,6 @@ def crawlLocal(projectdir, dictFilter,dictFilterIgnore,logger,configyaml,slow):
cmipfreq = getinfo.getFreqFromYAML(yamlfile,gfdlfreq=dictInfo['frequency'])
if(cmipfreq is not None):
dictInfo['frequency'] = cmipfreq
#print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq)
#print("Adjusting frequency from ", gfdlfreq ," to ",cmipfreq)
listfiles.append(dictInfo)
return listfiles
4 changes: 2 additions & 2 deletions configs/config-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ headerlist: ["activity_id", "institution_id", "source_id", "experiment_id",
#this is a valid value in headerlist as well.
#The fourth directory is am5f3b1r0 which does not map to an existing header value. So we simply NA in output_path_template
#for the fourth value.

output_path_template: ['NA','NA','source_id','NA','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq']
#/archive/a1r/fre/FMS2024.02_OM5_20240724/CM4.5v01_om5b06_piC_noBLING/gfdl.ncrc5-intel23-prod-openmp/pp/ocean_monthly/ocean_monthly.static.nc
output_path_template: ['NA','NA','NA','source_id','experiment_id','platform','custom_pp','realm','cell_methods','frequency','chunk_freq']

output_file_template: ['realm','time_range','variable_id']

Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ channels:
dependencies:
- python
- pytest
- setuptools
- click
- xarray
- pandas
- jsondiff
- intake-esm
- intake-esm=2023.7.7
- boto3
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
'xarray',
'pandas',
'jsondiff',
'intake-esm',
'intake-esm==2023.7.7',
'boto3'
]
)
Expand Down

0 comments on commit 419252d

Please sign in to comment.