Skip to content

Commit

Permalink
Merge pull request #155 from astronomy-commons/delucchi/storage_type
Browse files Browse the repository at this point in the history
Use fancier typing on the storage_options
  • Loading branch information
delucchi-cmu authored Oct 25, 2023
2 parents e6ac8e2 + 7201d25 commit 3bd039f
Show file tree
Hide file tree
Showing 14 changed files with 132 additions and 62 deletions.
10 changes: 6 additions & 4 deletions src/hipscat/catalog/association_catalog/association_catalog.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Tuple, Union
from typing import Any, Dict, Tuple, Union

import pandas as pd
from typing_extensions import TypeAlias
Expand Down Expand Up @@ -31,7 +31,7 @@ def __init__(
pixels: PixelInputTypes,
join_pixels: JoinPixelInputTypes,
catalog_path=None,
storage_options: dict = None,
storage_options: Union[Dict[Any, Any], None] = None,
) -> None:
if not catalog_info.catalog_type == CatalogType.ASSOCIATION:
raise ValueError("Catalog info `catalog_type` must be 'association'")
Expand Down Expand Up @@ -59,7 +59,7 @@ def _get_partition_join_info_from_pixels(

@classmethod
def _read_args(
cls, catalog_base_dir: FilePointer, storage_options: dict = None
cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
) -> Tuple[CatalogInfoClass, PixelInputTypes, JoinPixelInputTypes]: # type: ignore[override]
args = super()._read_args(catalog_base_dir, storage_options=storage_options)
partition_join_info_file = paths.get_partition_join_info_pointer(catalog_base_dir)
Expand All @@ -69,7 +69,9 @@ def _read_args(
return args + (partition_join_info,)

@classmethod
def _check_files_exist(cls, catalog_base_dir: FilePointer, storage_options: dict = None):
def _check_files_exist(
cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
):
super()._check_files_exist(catalog_base_dir, storage_options=storage_options)
partition_join_info_file = paths.get_partition_join_info_pointer(catalog_base_dir)
if not file_io.does_file_or_directory_exist(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any, Dict, Union

import pandas as pd
from typing_extensions import Self

Expand Down Expand Up @@ -29,7 +31,9 @@ def _check_column_names(self):
raise ValueError(f"join_info_df does not contain column {column}")

@classmethod
def read_from_file(cls, partition_join_info_file: FilePointer, storage_options: dict = None) -> Self:
def read_from_file(
cls, partition_join_info_file: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
) -> Self:
"""Read partition join info from a `partition_join_info.csv` file to create an object
Args:
Expand Down
4 changes: 2 additions & 2 deletions src/hipscat/catalog/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from __future__ import annotations

import dataclasses
from typing import List
from typing import Any, Dict, List, Union

import healpy as hp
import numpy as np
Expand Down Expand Up @@ -36,7 +36,7 @@ def __init__(
catalog_info: CatalogInfoClass,
pixels: PixelInputTypes,
catalog_path: str = None,
storage_options: dict = None,
storage_options: Union[Dict[Any, Any], None] = None,
) -> None:
"""Initializes a Catalog
Expand Down
5 changes: 4 additions & 1 deletion src/hipscat/catalog/dataset/base_catalog_info.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import dataclasses
from dataclasses import dataclass
from typing import Any, Dict, Union

from typing_extensions import Self

Expand Down Expand Up @@ -42,7 +43,9 @@ def __str__(self):
return formatted_string

@classmethod
def read_from_metadata_file(cls, catalog_info_file: FilePointer, storage_options: dict = None) -> Self:
def read_from_metadata_file(
cls, catalog_info_file: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
) -> Self:
"""Read catalog info from the `catalog_info.json` metadata file
Args:
Expand Down
4 changes: 2 additions & 2 deletions src/hipscat/catalog/dataset/catalog_info_factory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import dataclasses
from typing import Optional
from typing import Any, Dict, Optional, Union

from hipscat.catalog.association_catalog.association_catalog_info import AssociationCatalogInfo
from hipscat.catalog.catalog_info import CatalogInfo
Expand Down Expand Up @@ -51,7 +51,7 @@ def create_catalog_info(keywords: dict, catalog_type: Optional[CatalogType] = No
return ci_class(**catalog_info_keywords)


def from_catalog_dir(catalog_base_dir: FilePointer, storage_options: dict = None):
def from_catalog_dir(catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None):
"""Generate a typed catalog info object from the type specified in the
catalog info file.
Expand Down
21 changes: 15 additions & 6 deletions src/hipscat/catalog/dataset/dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Tuple
from typing import Any, Dict, Tuple, Union

from typing_extensions import Self, TypeAlias

Expand All @@ -18,7 +18,10 @@ class Dataset:
CatalogInfoClass: TypeAlias = BaseCatalogInfo

def __init__(
self, catalog_info: CatalogInfoClass, catalog_path=None, storage_options: dict = None
self,
catalog_info: CatalogInfoClass,
catalog_path=None,
storage_options: Union[Dict[Any, Any], None] = None,
) -> None:
"""Initializes a Dataset
Expand All @@ -40,7 +43,9 @@ def __init__(
self.catalog_base_dir = file_io.get_file_pointer_from_path(self.catalog_path)

@classmethod
def read_from_hipscat(cls, catalog_path: str, storage_options: dict = None) -> Self:
def read_from_hipscat(
cls, catalog_path: str, storage_options: Union[Dict[Any, Any], None] = None
) -> Self:
"""Reads a HiPSCat Catalog from a HiPSCat directory
Args:
Expand All @@ -58,7 +63,7 @@ def read_from_hipscat(cls, catalog_path: str, storage_options: dict = None) -> S

@classmethod
def _read_args(
cls, catalog_base_dir: FilePointer, storage_options: dict = None
cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
) -> Tuple[CatalogInfoClass]:
catalog_info_file = paths.get_catalog_info_pointer(catalog_base_dir)
catalog_info = cls.CatalogInfoClass.read_from_metadata_file(
Expand All @@ -67,11 +72,15 @@ def _read_args(
return (catalog_info,)

@classmethod
def _read_kwargs(cls, catalog_base_dir: FilePointer, storage_options: dict = None) -> dict:
def _read_kwargs(
cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
) -> dict:
return {"catalog_path": str(catalog_base_dir), "storage_options": storage_options}

@classmethod
def _check_files_exist(cls, catalog_base_dir: FilePointer, storage_options: dict = None):
def _check_files_exist(
cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
):
if not file_io.does_file_or_directory_exist(catalog_base_dir, storage_options=storage_options):
raise FileNotFoundError(f"No directory exists at {str(catalog_base_dir)}")
catalog_info_file = paths.get_catalog_info_pointer(catalog_base_dir)
Expand Down
10 changes: 6 additions & 4 deletions src/hipscat/catalog/healpix_dataset/healpix_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Tuple, Union
from typing import Any, Dict, List, Tuple, Union

import pandas as pd
from typing_extensions import TypeAlias
Expand Down Expand Up @@ -28,7 +28,7 @@ def __init__(
catalog_info: CatalogInfoClass,
pixels: PixelInputTypes,
catalog_path: str = None,
storage_options: dict = None,
storage_options: Union[Dict[Any, Any], None] = None,
) -> None:
"""Initializes a Catalog
Expand Down Expand Up @@ -79,15 +79,17 @@ def _get_pixel_tree_from_pixels(pixels: PixelInputTypes) -> PixelTree:

@classmethod
def _read_args(
cls, catalog_base_dir: FilePointer, storage_options: dict = None
cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
) -> Tuple[CatalogInfoClass, PartitionInfo]:
args = super()._read_args(catalog_base_dir, storage_options=storage_options)
partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
partition_info = PartitionInfo.read_from_file(partition_info_file, storage_options=storage_options)
return args + (partition_info,)

@classmethod
def _check_files_exist(cls, catalog_base_dir: FilePointer, storage_options: dict = None):
def _check_files_exist(
cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
):
super()._check_files_exist(catalog_base_dir, storage_options=storage_options)
partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
if not file_io.does_file_or_directory_exist(partition_info_file, storage_options=storage_options):
Expand Down
6 changes: 4 additions & 2 deletions src/hipscat/catalog/partition_info.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Container class to hold per-partition metadata"""
from typing import List
from typing import Any, Dict, List, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -45,7 +45,9 @@ def write_to_file(self, partition_info_file: FilePointer):
file_io.write_dataframe_to_csv(self.as_dataframe(), partition_info_file, index=False)

@classmethod
def read_from_file(cls, partition_info_file: FilePointer, storage_options: dict = None):
def read_from_file(
cls, partition_info_file: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
):
"""Read partition info from a `partition_info.csv` file to create an object
Args:
Expand Down
6 changes: 4 additions & 2 deletions src/hipscat/inspection/almanac.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import os
import warnings
from typing import List
from typing import Any, Dict, List, Union

import pandas as pd

Expand Down Expand Up @@ -32,7 +32,9 @@ class Almanac:
catalogs.
"""

def __init__(self, include_default_dir=True, dirs=None, storage_options: dict = None):
def __init__(
self, include_default_dir=True, dirs=None, storage_options: Union[Dict[Any, Any], None] = None
):
"""Create new almanac."""
self.files = {}
self.entries = {}
Expand Down
18 changes: 13 additions & 5 deletions src/hipscat/inspection/almanac_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import dataclasses
import os
from dataclasses import dataclass, field
from typing import List
from typing import Any, Dict, List, Union

import yaml
from typing_extensions import Self
Expand All @@ -18,7 +18,7 @@ class AlmanacInfo:
"""Container for parsed almanac information."""

file_path: str = ""
storage_options: dict = field(default_factory=dict)
storage_options: Union[Dict[Any, Any], None] = field(default_factory=dict)
namespace: str = ""
catalog_path: str = ""
catalog_name: str = ""
Expand Down Expand Up @@ -75,7 +75,9 @@ def get_default_dir() -> str:
return default_dir

@classmethod
def from_catalog_dir(cls, catalog_base_dir: str, storage_options: dict = None) -> Self:
def from_catalog_dir(
cls, catalog_base_dir: str, storage_options: Union[Dict[Any, Any], None] = None
) -> Self:
"""Create almanac information from the catalog information found at the target directory"""
catalog_info = catalog_info_factory.from_catalog_dir(
catalog_base_dir=file_io.get_file_pointer_from_path(catalog_base_dir),
Expand All @@ -91,15 +93,21 @@ def from_catalog_dir(cls, catalog_base_dir: str, storage_options: dict = None) -
return cls(**args)

@classmethod
def from_file(cls, file: str, storage_options: dict = None) -> Self:
def from_file(cls, file: str, storage_options: Union[Dict[Any, Any], None] = None) -> Self:
"""Create almanac information from an almanac file."""
_, fmt = os.path.splitext(file)
if fmt != ".yml":
raise ValueError(f"Unsupported file format {fmt}")
metadata = file_io.file_io.read_yaml(file, storage_options=storage_options)
return cls(**metadata)

def write_to_file(self, directory=None, default_dir=True, fmt="yml", storage_options: dict = None):
def write_to_file(
self,
directory=None,
default_dir=True,
fmt="yml",
storage_options: Union[Dict[Any, Any], None] = None,
):
"""Write the almanac to an almanac file"""
if default_dir and directory:
raise ValueError("Use only one of dir and default_dir")
Expand Down
4 changes: 3 additions & 1 deletion src/hipscat/inspection/visualize_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
NB: Testing validity of generated plots is currently not tested in our unit test suite.
"""

from typing import Any, Dict, Union

import healpy as hp
import numpy as np
from matplotlib import pyplot as plt
Expand All @@ -11,7 +13,7 @@
from hipscat.io import file_io, paths


def _read_point_map(catalog_base_dir, storage_options: dict = None):
def _read_point_map(catalog_base_dir, storage_options: Union[Dict[Any, Any], None] = None):
"""Read the object spatial distribution information from a healpix FITS file.
Args:
Expand Down
Loading

0 comments on commit 3bd039f

Please sign in to comment.