From 5e8cc98700742ab18c5f62e332b729444ecc22cd Mon Sep 17 00:00:00 2001 From: schwarzam Date: Fri, 1 Nov 2024 11:59:51 -0300 Subject: [PATCH] added http header possibility --- src/hats/catalog/dataset/table_properties.py | 4 ++-- src/hats/io/file_io/file_io.py | 2 ++ src/hats/io/file_io/file_pointer.py | 5 +++-- src/hats/loaders/read_hats.py | 4 ++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/hats/catalog/dataset/table_properties.py b/src/hats/catalog/dataset/table_properties.py index 94542eed..55218cb6 100644 --- a/src/hats/catalog/dataset/table_properties.py +++ b/src/hats/catalog/dataset/table_properties.py @@ -191,9 +191,9 @@ def __str__(self): return formatted_string @classmethod - def read_from_dir(cls, catalog_dir: Union[str, Path, UPath]) -> Self: + def read_from_dir(cls, catalog_dir: Union[str, Path, UPath], **storage_options) -> Self: """Read field values from a java-style properties file.""" - file_path = file_io.get_upath(catalog_dir) / "properties" + file_path = file_io.get_upath(catalog_dir, **storage_options) / "properties" if not file_io.does_file_or_directory_exist(file_path): raise FileNotFoundError(f"No properties file found where expected: {str(file_path)}") p = Properties() diff --git a/src/hats/io/file_io/file_io.py b/src/hats/io/file_io/file_io.py index 67d3aa82..6cc87b7d 100644 --- a/src/hats/io/file_io/file_io.py +++ b/src/hats/io/file_io/file_io.py @@ -282,5 +282,7 @@ def read_parquet_file_to_pandas( file_pointer = get_upath(file_pointer) if file_open_kwargs is None: file_open_kwargs = {} + with file_pointer.open("rb", **file_open_kwargs) as parquet_file: + kwargs.pop("headers", None) return pd.read_parquet(parquet_file, **kwargs) diff --git a/src/hats/io/file_io/file_pointer.py b/src/hats/io/file_io/file_pointer.py index c13582a9..18eea745 100644 --- a/src/hats/io/file_io/file_pointer.py +++ b/src/hats/io/file_io/file_pointer.py @@ -6,13 +6,14 @@ from upath import UPath -def get_upath(path: str | Path | UPath) -> UPath: +def get_upath(path: str | Path | UPath, **storage_options) -> UPath: """Returns a file pointer from a path string""" if not path: return None if isinstance(path, UPath): return path - return UPath(path) + + return UPath(path, **storage_options) def append_paths_to_pointer(pointer: str | Path | UPath, *paths: str) -> UPath: diff --git a/src/hats/loaders/read_hats.py b/src/hats/loaders/read_hats.py index f071e2d2..bffc1fa2 100644 --- a/src/hats/loaders/read_hats.py +++ b/src/hats/loaders/read_hats.py @@ -26,7 +26,7 @@ } -def read_hats(catalog_path: str | Path | UPath) -> Dataset: +def read_hats(catalog_path: str | Path | UPath, **kwargs) -> Dataset: """Reads a HATS Catalog from a HATS directory Args: @@ -34,7 +34,7 @@ def read_hats(catalog_path: str | Path | UPath) -> Dataset: Returns: The initialized catalog object """ - catalog_path = file_io.get_upath(catalog_path) + catalog_path = file_io.get_upath(catalog_path, **kwargs) try: properties = TableProperties.read_from_dir(catalog_path) dataset_type = properties.catalog_type