Skip to content

Commit

Permalink
Merge branch 'main' into issue/147/metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu authored Nov 14, 2023
2 parents 93320fc + 9b53aeb commit 7947a18
Showing 1 changed file with 4 additions and 10 deletions.
14 changes: 4 additions & 10 deletions src/hipscat/io/file_io/file_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,20 +195,15 @@ def read_parquet_metadata(
return parquet_file


def read_parquet_dataset(dir_pointer: FilePointer, storage_options: Union[Dict[Any, Any], None] = None):
def read_parquet_dataset(
dir_pointer: FilePointer, storage_options: Union[Dict[Any, Any], None] = None, **kwargs
):
"""Read parquet dataset from directory pointer.
Args:
dir_pointer: location of file to read metadata from
storage_options: dictionary that contains abstract filesystem credentials
"""

ignore_prefixes = [
"intermediate",
"_common_metadata",
"_metadata",
]

file_system, dir_pointer = get_fs(file_pointer=dir_pointer, storage_options=storage_options)

# pyarrow.dataset requires the pointer not lead with a slash
Expand All @@ -217,9 +212,8 @@ def read_parquet_dataset(dir_pointer: FilePointer, storage_options: Union[Dict[A
dataset = pds.dataset(
dir_pointer,
filesystem=file_system,
exclude_invalid_files=True,
format="parquet",
ignore_prefixes=ignore_prefixes,
**kwargs,
)
return dataset

Expand Down

0 comments on commit 7947a18

Please sign in to comment.