Skip to content

Commit

Permalink
Feature/update error logging (#138)
Browse files Browse the repository at this point in the history
* update python libraries

* fix pylint

* poetry update

* update changelog

* poetry  update

* add tests for concise exception class

* more test coverage

* update license
  • Loading branch information
sliu008 authored Dec 6, 2024
1 parent f667850 commit dcbfbb6
Show file tree
Hide file tree
Showing 9 changed files with 1,629 additions and 1,244 deletions.
3 changes: 2 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,8 @@ disable=raw-checker-failed,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead,
too-many-arguments
too-many-arguments,
too-many-positional-arguments

# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Added harmony deployment into github actions.
### Changed
- [issue #117](https://github.com/podaac/concise/issues/117): Add part of URL to output file name
- Update python libraries
- Update harmony service lib that changed project structure
- Add Concise exception to propogate up to harmony api calls
### Deprecated
### Removed
### Fixed
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier
identification within third-party archives.

Copyright [yyyy] [name of copyright owner]
Copyright 2024 California Institute of Technology

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion podaac/merger/harmony/cli.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""A Harmony CLI wrapper around Concise"""

from argparse import ArgumentParser
import harmony
import harmony_service_lib as harmony
from podaac.merger.harmony.service import ConciseService


Expand Down
4 changes: 2 additions & 2 deletions podaac/merger/harmony/download_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
import re
from urllib.parse import urlparse

from harmony.logging import build_logger
from harmony.util import download
from harmony_service_lib.logging import build_logger
from harmony_service_lib.util import download


def multi_core_download(urls, destination_dir, access_token, cfg, process_count=None):
Expand Down
149 changes: 94 additions & 55 deletions podaac/merger/harmony/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
from shutil import copyfile
from urllib.parse import urlsplit
from uuid import uuid4
import traceback
import sys

from harmony.adapter import BaseHarmonyAdapter
from harmony.util import bbox_to_geometry, stage
from harmony_service_lib.adapter import BaseHarmonyAdapter
from harmony_service_lib.util import bbox_to_geometry, stage
from harmony_service_lib.exceptions import HarmonyException
from pystac import Catalog, Item
from pystac.item import Asset

Expand All @@ -20,6 +23,37 @@
NETCDF4_MIME = 'application/x-netcdf4' # pylint: disable=invalid-name


class ConciseException(HarmonyException):
"""Concise Exception class for custom error messages to see in harmony api calls."""
def __init__(self, original_exception):
# Ensure we can extract traceback information
if original_exception.__traceback__ is None:
# Capture the current traceback if not already present
try:
raise original_exception
except type(original_exception):
original_exception.__traceback__ = sys.exc_info()[2]

# Extract the last traceback entry (most recent call) for the error location
tb = traceback.extract_tb(original_exception.__traceback__)[-1]

# Get the error details: file, line, function, and message
filename = tb.filename
lineno = tb.lineno
funcname = tb.name
error_msg = str(original_exception)

# Format the error message to be more readable
readable_message = (f"Error in file '{filename}', line {lineno}, in function '{funcname}': "
f"{error_msg}")

# Call the parent class constructor with the formatted message and category
super().__init__(readable_message, 'podaac/concise')

# Store the original exception for potential further investigation
self.original_exception = original_exception


class ConciseService(BaseHarmonyAdapter):
"""
A harmony-service-lib wrapper around the Concise module. This wrapper does
Expand Down Expand Up @@ -55,63 +89,68 @@ def process_catalog(self, catalog: Catalog):
pystac.Catalog
A new catalog containing the results from the merge
"""
result = catalog.clone()
result.id = str(uuid4())
result.clear_children()

# Get all the items from the catalog, including from child or linked catalogs
items = list(self.get_all_catalog_items(catalog))
try:
result = catalog.clone()
result.id = str(uuid4())
result.clear_children()

# Get all the items from the catalog, including from child or linked catalogs
items = list(self.get_all_catalog_items(catalog))

# Quick return if catalog contains no items
if len(items) == 0:
return result

# -- Process metadata --
bbox = []
granule_urls = []
datetimes = [
datetime.max.replace(tzinfo=timezone.utc), # start
datetime.min.replace(tzinfo=timezone.utc) # end
]

for item in items:
get_bbox(item, bbox)
get_granule_url(item, granule_urls)
get_datetime(item, datetimes)

# Items did not have a bbox; valid under spec
if len(bbox) == 0:
bbox = None

# -- Perform merging --
collection = self._get_item_source(items[0]).collection
first_granule_url = []
get_granule_url(items[0], first_granule_url)
first_url_name = Path(first_granule_url[0]).stem
filename = f'{first_url_name}_{datetimes[1].strftime("%Y%m%dT%H%M%SZ")}_{collection}_merged.nc4'

with TemporaryDirectory() as temp_dir:
self.logger.info('Starting granule downloads')
input_files = multi_core_download(granule_urls, temp_dir, self.message.accessToken, self.config)
self.logger.info('Finished granule downloads')

output_path = Path(temp_dir).joinpath(filename).resolve()
merge_netcdf_files(input_files, output_path, granule_urls, logger=self.logger)
staged_url = self._stage(str(output_path), filename, NETCDF4_MIME)

# -- Output to STAC catalog --
result.clear_items()
properties = {
"start_datetime": datetimes[0].isoformat(),
"end_datetime": datetimes[1].isoformat()
}

item = Item(str(uuid4()), bbox_to_geometry(bbox), bbox, None, properties)
asset = Asset(staged_url, title=filename, media_type=NETCDF4_MIME, roles=['data'])
item.add_asset('data', asset)
result.add_item(item)

# Quick return if catalog contains no items
if len(items) == 0:
return result

# -- Process metadata --
bbox = []
granule_urls = []
datetimes = [
datetime.max.replace(tzinfo=timezone.utc), # start
datetime.min.replace(tzinfo=timezone.utc) # end
]

for item in items:
get_bbox(item, bbox)
get_granule_url(item, granule_urls)
get_datetime(item, datetimes)

# Items did not have a bbox; valid under spec
if len(bbox) == 0:
bbox = None

# -- Perform merging --
collection = self._get_item_source(items[0]).collection
first_granule_url = []
get_granule_url(items[0], first_granule_url)
first_url_name = Path(first_granule_url[0]).stem
filename = f'{first_url_name}_{datetimes[1].strftime("%Y%m%dT%H%M%SZ")}_{collection}_merged.nc4'

with TemporaryDirectory() as temp_dir:
self.logger.info('Starting granule downloads')
input_files = multi_core_download(granule_urls, temp_dir, self.message.accessToken, self.config)
self.logger.info('Finished granule downloads')

output_path = Path(temp_dir).joinpath(filename).resolve()
merge_netcdf_files(input_files, output_path, granule_urls, logger=self.logger)
staged_url = self._stage(str(output_path), filename, NETCDF4_MIME)

# -- Output to STAC catalog --
result.clear_items()
properties = {
"start_datetime": datetimes[0].isoformat(),
"end_datetime": datetimes[1].isoformat()
}

item = Item(str(uuid4()), bbox_to_geometry(bbox), bbox, None, properties)
asset = Asset(staged_url, title=filename, media_type=NETCDF4_MIME, roles=['data'])
item.add_asset('data', asset)
result.add_item(item)

return result
except Exception as ex:
raise ConciseException(ex) from ex

def _stage(self, local_filename, remote_filename, mime):
"""
Expand Down
Loading

0 comments on commit dcbfbb6

Please sign in to comment.