diff --git a/docs/notebooks/catalog_size_inspection.ipynb b/docs/notebooks/catalog_size_inspection.ipynb index b1b14eea..421b9a55 100644 --- a/docs/notebooks/catalog_size_inspection.ipynb +++ b/docs/notebooks/catalog_size_inspection.ipynb @@ -33,8 +33,7 @@ "metadata": {}, "outputs": [], "source": [ - "from hipscat.catalog.catalog import Catalog\n", - "from hipscat.io import paths\n", + "import hipscat\n", "import os\n", "\n", "### Change this path!!!\n", @@ -43,12 +42,14 @@ "### ----------------\n", "### You probably won't have to change anything from here.\n", "\n", - "catalog = Catalog.read_from_hipscat(catalog_dir)\n", + "catalog = hipscat.read_from_hipscat(catalog_dir)\n", "\n", "info_frame = catalog.partition_info.as_dataframe()\n", "\n", "for index, partition in info_frame.iterrows():\n", - " file_name = result = paths.pixel_catalog_file(catalog_dir, partition[\"Norder\"], partition[\"Npix\"])\n", + " file_name = result = hipscat.io.paths.pixel_catalog_file(\n", + " catalog_dir, partition[\"Norder\"], partition[\"Npix\"]\n", + " )\n", " info_frame.loc[index, \"size_on_disk\"] = os.path.getsize(file_name)\n", "\n", "info_frame = info_frame.astype(int)\n", diff --git a/docs/notebooks/cone_search.ipynb b/docs/notebooks/cone_search.ipynb index 132cbad1..0f301151 100644 --- a/docs/notebooks/cone_search.ipynb +++ b/docs/notebooks/cone_search.ipynb @@ -16,10 +16,9 @@ "metadata": {}, "outputs": [], "source": [ - "import numpy as np\n", - "from hipscat.catalog import Catalog\n", - "from hipscat import inspection\n", + "import hipscat\n", "import healpy as hp\n", + "import numpy as np\n", "\n", "## Fill in these variables with what's relevant in your use case:\n", "\n", @@ -39,7 +38,7 @@ "source": [ "## Load catalog\n", "\n", - "catalog = Catalog.read_from_hipscat(catalog_path)" + "catalog = hipscat.read_from_hipscat(catalog_path)" ] }, { @@ -50,7 +49,7 @@ "source": [ "## Plot catalog pixels\n", "\n", - "inspection.plot_pixels(catalog)" + "hipscat.inspection.plot_pixels(catalog)" ] }, { @@ -81,27 +80,13 @@ "\n", "filtered_catalog = catalog.filter_by_cone(ra, dec, radius)\n", "\n", - "inspection.plot_pixels(filtered_catalog)" + "hipscat.inspection.plot_pixels(filtered_catalog)" ] } ], "metadata": { - "kernelspec": { - "display_name": "hipscatenv", - "language": "python", - "name": "python3" - }, "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" + "name": "python" } }, "nbformat": 4, diff --git a/src/hipscat/__init__.py b/src/hipscat/__init__.py index a4b54e4d..a1e23d3a 100644 --- a/src/hipscat/__init__.py +++ b/src/hipscat/__init__.py @@ -1,3 +1,4 @@ """High-level namespace, hipscat""" -from . import catalog, io, pixel_math +from . import catalog, inspection, io, pixel_math +from .loaders import read_from_hipscat diff --git a/src/hipscat/catalog/dataset/dataset.py b/src/hipscat/catalog/dataset/dataset.py index e5369c2c..b095771b 100644 --- a/src/hipscat/catalog/dataset/dataset.py +++ b/src/hipscat/catalog/dataset/dataset.py @@ -1,21 +1,16 @@ from typing import Any, Dict, Tuple, Union -from typing_extensions import Self, TypeAlias +from typing_extensions import Self from hipscat.catalog.dataset.base_catalog_info import BaseCatalogInfo from hipscat.io import FilePointer, file_io, paths class Dataset: - """A base HiPSCat dataset + """A base HiPSCat dataset that contains a catalog_info metadata file + and the data contained in parquet files""" - A base dataset contains a catalog_info metadata file and the data contained in parquet files - - TODO - create factory methods to get appropriately-typed datasets for - some catalog info or catalog directory - """ - - CatalogInfoClass: TypeAlias = BaseCatalogInfo + CatalogInfoClass = BaseCatalogInfo def __init__( self, diff --git a/src/hipscat/loaders/__init__.py b/src/hipscat/loaders/__init__.py new file mode 100644 index 00000000..7798a463 --- /dev/null +++ b/src/hipscat/loaders/__init__.py @@ -0,0 +1 @@ +from .read_from_hipscat import read_from_hipscat diff --git a/src/hipscat/loaders/read_from_hipscat.py b/src/hipscat/loaders/read_from_hipscat.py new file mode 100644 index 00000000..e44a50fe --- /dev/null +++ b/src/hipscat/loaders/read_from_hipscat.py @@ -0,0 +1,59 @@ +from __future__ import annotations + +from typing import Type + +from hipscat import io +from hipscat.catalog import AssociationCatalog, Catalog, CatalogType, Dataset, MarginCatalog +from hipscat.catalog.dataset import BaseCatalogInfo +from hipscat.catalog.index.index_catalog import IndexCatalog + +CATALOG_TYPE_TO_CLASS = { + CatalogType.OBJECT: Catalog, + CatalogType.SOURCE: Catalog, + CatalogType.ASSOCIATION: AssociationCatalog, + CatalogType.INDEX: IndexCatalog, + CatalogType.MARGIN: MarginCatalog, +} + + +def read_from_hipscat( + catalog_path: str, + catalog_type: CatalogType | None = None, + storage_options: dict | None = None, +) -> Dataset: + """Reads a HiPSCat Catalog from a HiPSCat directory + + Args: + catalog_path (str): path to the root directory of the catalog + catalog_type (CatalogType): Default `None`. By default, the type of the catalog is loaded + from the catalog info and the corresponding object type is returned. Python's type hints + cannot allow a return type specified by a loaded value, so to use the correct return + type for type checking, the type of the catalog can be specified here. Use by specifying + the hipscat class for that catalog. + storage_options (dict): dictionary that contains abstract filesystem credentials + + Returns: + The initialized catalog object + """ + catalog_type_to_use = ( + _read_dataset_class_from_metadata(catalog_path, storage_options=storage_options) + if catalog_type is None + else catalog_type + ) + loader = _get_loader_from_catalog_type(catalog_type_to_use) + return loader.read_from_hipscat(catalog_path) + + +def _read_dataset_class_from_metadata( + catalog_base_path: str, storage_options: dict | None = None +) -> CatalogType: + catalog_base_dir = io.file_io.get_file_pointer_from_path(catalog_base_path) + catalog_info_path = io.paths.get_catalog_info_pointer(catalog_base_dir) + catalog_info = BaseCatalogInfo.read_from_metadata_file(catalog_info_path, storage_options=storage_options) + return catalog_info.catalog_type + + +def _get_loader_from_catalog_type(catalog_type: CatalogType) -> Type[Dataset]: + if catalog_type not in CATALOG_TYPE_TO_CLASS: + raise NotImplementedError(f"Cannot load catalog of type {catalog_type}") + return CATALOG_TYPE_TO_CLASS[catalog_type] diff --git a/tests/conftest.py b/tests/conftest.py index 38cfedfd..8db5ab9a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -18,6 +18,8 @@ SMALL_SKY_DIR_NAME = "small_sky" SMALL_SKY_ORDER1_DIR_NAME = "small_sky_order1" SMALL_SKY_TO_SMALL_SKY_ORDER1_DIR_NAME = "small_sky_to_small_sky_order1" +SMALL_SKY_SOURCE_OBJECT_INDEX_DIR_NAME = "small_sky_source_object_index" + TEST_DIR = os.path.dirname(__file__) # pylint: disable=missing-function-docstring, redefined-outer-name @@ -48,6 +50,11 @@ def small_sky_to_small_sky_order1_dir(test_data_dir): return os.path.join(test_data_dir, SMALL_SKY_TO_SMALL_SKY_ORDER1_DIR_NAME) +@pytest.fixture +def small_sky_source_object_index_dir(test_data_dir): + return os.path.join(test_data_dir, SMALL_SKY_SOURCE_OBJECT_INDEX_DIR_NAME) + + @pytest.fixture def assert_catalog_info_matches_dict(): def assert_match(catalog_info: BaseCatalogInfo, dictionary: dict): diff --git a/tests/hipscat/catalog/association_catalog/test_association_catalog.py b/tests/hipscat/catalog/association_catalog/test_association_catalog.py index 2668727f..7ea5764b 100644 --- a/tests/hipscat/catalog/association_catalog/test_association_catalog.py +++ b/tests/hipscat/catalog/association_catalog/test_association_catalog.py @@ -7,6 +7,7 @@ from hipscat.catalog import CatalogType from hipscat.catalog.association_catalog.association_catalog import AssociationCatalog from hipscat.catalog.association_catalog.partition_join_info import PartitionJoinInfo +from hipscat.loaders import read_from_hipscat from hipscat.pixel_math import HealpixPixel from hipscat.pixel_tree.pixel_node_type import PixelNodeType @@ -51,7 +52,9 @@ def test_different_join_pixels_type(association_catalog_info, association_catalo def test_read_from_file(association_catalog_path, association_catalog_join_pixels): - catalog = AssociationCatalog.read_from_hipscat(association_catalog_path) + catalog = read_from_hipscat(association_catalog_path) + + assert isinstance(catalog, AssociationCatalog) assert catalog.on_disk assert catalog.catalog_path == association_catalog_path assert len(catalog.get_join_pixels()) == 4 @@ -70,7 +73,7 @@ def test_empty_directory(tmp_path, association_catalog_info_data, association_ca """Test loading empty or incomplete data""" ## Path doesn't exist with pytest.raises(FileNotFoundError): - AssociationCatalog.read_from_hipscat(os.path.join("path", "empty")) + read_from_hipscat(os.path.join("path", "empty")) catalog_path = os.path.join(tmp_path, "empty") os.makedirs(catalog_path, exist_ok=True) @@ -85,20 +88,20 @@ def test_empty_directory(tmp_path, association_catalog_info_data, association_ca metadata_file.write(json.dumps(association_catalog_info_data)) with pytest.raises(FileNotFoundError, match="metadata"): - AssociationCatalog.read_from_hipscat(catalog_path) + read_from_hipscat(catalog_path) ## Now we create the needed _metadata and everything is right. part_info = PartitionJoinInfo(association_catalog_join_pixels) part_info.write_to_metadata_files(catalog_path=catalog_path) with pytest.warns(UserWarning, match="slow"): - catalog = AssociationCatalog.read_from_hipscat(catalog_path) + catalog = read_from_hipscat(catalog_path) assert catalog.catalog_name == association_catalog_info_data["catalog_name"] def test_csv_round_trip(tmp_path, association_catalog_info_data, association_catalog_join_pixels): ## Path doesn't exist with pytest.raises(FileNotFoundError): - AssociationCatalog.read_from_hipscat(os.path.join("path", "empty")) + read_from_hipscat(os.path.join("path", "empty")) catalog_path = os.path.join(tmp_path, "empty") os.makedirs(catalog_path, exist_ok=True) @@ -108,17 +111,17 @@ def test_csv_round_trip(tmp_path, association_catalog_info_data, association_cat metadata_file.write(json.dumps(association_catalog_info_data)) with pytest.raises(FileNotFoundError, match="partition"): - AssociationCatalog.read_from_hipscat(catalog_path) + read_from_hipscat(catalog_path) file_name = os.path.join(catalog_path, "partition_info.csv") with open(file_name, "w", encoding="utf-8") as metadata_file: # dump some garbage in there - just needs to exist. metadata_file.write(json.dumps(association_catalog_info_data)) with pytest.raises(FileNotFoundError, match="partition"): - AssociationCatalog.read_from_hipscat(catalog_path) + read_from_hipscat(catalog_path) part_info = PartitionJoinInfo(association_catalog_join_pixels) part_info.write_to_csv(catalog_path=catalog_path) - catalog = AssociationCatalog.read_from_hipscat(catalog_path) + catalog = read_from_hipscat(catalog_path) pd.testing.assert_frame_equal(catalog.get_join_pixels(), association_catalog_join_pixels) diff --git a/tests/hipscat/catalog/index/test_index_catalog.py b/tests/hipscat/catalog/index/test_index_catalog.py index 645e6caf..6a773ae5 100644 --- a/tests/hipscat/catalog/index/test_index_catalog.py +++ b/tests/hipscat/catalog/index/test_index_catalog.py @@ -1,17 +1,16 @@ -import os - import numpy.testing as npt from hipscat.catalog.index.index_catalog import IndexCatalog +from hipscat.loaders import read_from_hipscat from hipscat.pixel_math import HealpixPixel -def test_loc_partition(test_data_dir): - index_catalog_dir = os.path.join(test_data_dir, "small_sky_source_object_index") - catalog = IndexCatalog.read_from_hipscat(index_catalog_dir) +def test_loc_partition(small_sky_source_object_index_dir): + catalog = read_from_hipscat(small_sky_source_object_index_dir) + assert isinstance(catalog, IndexCatalog) assert catalog.on_disk - assert catalog.catalog_path == index_catalog_dir + assert catalog.catalog_path == small_sky_source_object_index_dir npt.assert_array_equal(catalog.loc_partitions([700]), [HealpixPixel(2, 184)]) npt.assert_array_equal(catalog.loc_partitions([707]), [HealpixPixel(2, 176), HealpixPixel(2, 178)]) diff --git a/tests/hipscat/catalog/loaders/test_read_from_hipscat.py b/tests/hipscat/catalog/loaders/test_read_from_hipscat.py new file mode 100644 index 00000000..f19da5cb --- /dev/null +++ b/tests/hipscat/catalog/loaders/test_read_from_hipscat.py @@ -0,0 +1,11 @@ +import pytest + +from hipscat.catalog import CatalogType +from hipscat.loaders import read_from_hipscat + + +def test_read_from_hipscat_wrong_catalog_type(small_sky_dir): + with pytest.raises(ValueError, match="must have type"): + read_from_hipscat(small_sky_dir, catalog_type=CatalogType.ASSOCIATION) + with pytest.raises(NotImplementedError, match="load catalog of type"): + read_from_hipscat(small_sky_dir, catalog_type="unknown") diff --git a/tests/hipscat/catalog/margin_cache/test_margin_catalog.py b/tests/hipscat/catalog/margin_cache/test_margin_catalog.py index 6812bdeb..db62a522 100644 --- a/tests/hipscat/catalog/margin_cache/test_margin_catalog.py +++ b/tests/hipscat/catalog/margin_cache/test_margin_catalog.py @@ -4,6 +4,7 @@ import pytest from hipscat.catalog import CatalogType, MarginCatalog, PartitionInfo +from hipscat.loaders import read_from_hipscat from hipscat.pixel_tree.pixel_node_type import PixelNodeType @@ -33,7 +34,9 @@ def test_wrong_catalog_info_type(catalog_info, margin_catalog_pixels): def test_read_from_file(margin_catalog_path, margin_catalog_pixels): - catalog = MarginCatalog.read_from_hipscat(margin_catalog_path) + catalog = read_from_hipscat(margin_catalog_path) + + assert isinstance(catalog, MarginCatalog) assert catalog.on_disk assert catalog.catalog_path == margin_catalog_path assert len(catalog.get_healpix_pixels()) == len(margin_catalog_pixels) @@ -46,18 +49,19 @@ def test_read_from_file(margin_catalog_path, margin_catalog_pixels): assert info.margin_threshold == 7200 +# pylint: disable=duplicate-code def test_empty_directory(tmp_path, margin_cache_catalog_info_data, margin_catalog_pixels): """Test loading empty or incomplete data""" ## Path doesn't exist with pytest.raises(FileNotFoundError): - MarginCatalog.read_from_hipscat(os.path.join("path", "empty")) + read_from_hipscat(os.path.join("path", "empty")) catalog_path = os.path.join(tmp_path, "empty") os.makedirs(catalog_path, exist_ok=True) ## Path exists but there's nothing there - with pytest.raises(FileNotFoundError, match="catalog info"): - MarginCatalog.read_from_hipscat(catalog_path) + with pytest.raises(FileNotFoundError, match="catalog_info"): + read_from_hipscat(catalog_path) ## catalog_info file exists - getting closer file_name = os.path.join(catalog_path, "catalog_info.json") @@ -65,12 +69,12 @@ def test_empty_directory(tmp_path, margin_cache_catalog_info_data, margin_catalo metadata_file.write(json.dumps(margin_cache_catalog_info_data)) with pytest.raises(FileNotFoundError, match="metadata"): - MarginCatalog.read_from_hipscat(catalog_path) + read_from_hipscat(catalog_path) ## Now we create the needed _metadata and everything is right. part_info = PartitionInfo.from_healpix(margin_catalog_pixels) part_info.write_to_metadata_files(catalog_path=catalog_path) with pytest.warns(UserWarning, match="slow"): - catalog = MarginCatalog.read_from_hipscat(catalog_path) + catalog = read_from_hipscat(catalog_path) assert catalog.catalog_name == margin_cache_catalog_info_data["catalog_name"] diff --git a/tests/hipscat/catalog/test_catalog.py b/tests/hipscat/catalog/test_catalog.py index 5be3a86e..8fadddaf 100644 --- a/tests/hipscat/catalog/test_catalog.py +++ b/tests/hipscat/catalog/test_catalog.py @@ -7,6 +7,7 @@ import pytest from hipscat.catalog import Catalog, CatalogType, PartitionInfo +from hipscat.loaders import read_from_hipscat from hipscat.pixel_math import HealpixPixel from hipscat.pixel_math.box_filter import _generate_ra_strip_pixel_tree from hipscat.pixel_math.validators import ValidatorsErrors @@ -81,20 +82,31 @@ def test_get_pixels_list(catalog_info, catalog_pixels): def test_load_catalog_small_sky(small_sky_dir): """Instantiate a catalog with 1 pixel""" - cat = Catalog.read_from_hipscat(small_sky_dir) + cat = read_from_hipscat(small_sky_dir) + assert isinstance(cat, Catalog) assert cat.catalog_name == "small_sky" assert len(cat.get_healpix_pixels()) == 1 def test_load_catalog_small_sky_order1(small_sky_order1_dir): """Instantiate a catalog with 4 pixels""" - cat = Catalog.read_from_hipscat(small_sky_order1_dir) + cat = read_from_hipscat(small_sky_order1_dir) + assert isinstance(cat, Catalog) assert cat.catalog_name == "small_sky_order1" assert len(cat.get_healpix_pixels()) == 4 +def test_load_catalog_small_sky_source(small_sky_source_dir): + """Instantiate a source catalog with 14 pixels""" + cat = read_from_hipscat(small_sky_source_dir) + + assert isinstance(cat, Catalog) + assert cat.catalog_name == "small_sky_source_catalog" + assert len(cat.get_healpix_pixels()) == 14 + + def test_cone_filter(small_sky_order1_catalog): filtered_catalog = small_sky_order1_catalog.filter_by_cone(315, -66.443, 0.1) filtered_pixels = filtered_catalog.get_healpix_pixels() @@ -412,14 +424,14 @@ def test_empty_directory(tmp_path): """Test loading empty or incomplete data""" ## Path doesn't exist with pytest.raises(FileNotFoundError): - Catalog.read_from_hipscat(os.path.join("path", "empty")) + read_from_hipscat(os.path.join("path", "empty")) catalog_path = os.path.join(tmp_path, "empty") os.makedirs(catalog_path, exist_ok=True) ## Path exists but there's nothing there - with pytest.raises(FileNotFoundError, match="catalog info"): - Catalog.read_from_hipscat(catalog_path) + with pytest.raises(FileNotFoundError, match="catalog_info"): + read_from_hipscat(catalog_path) ## catalog_info file exists - getting closer file_name = os.path.join(catalog_path, "catalog_info.json") @@ -427,14 +439,14 @@ def test_empty_directory(tmp_path): metadata_file.write('{"catalog_name":"empty", "catalog_type":"source"}') with pytest.raises(FileNotFoundError, match="metadata"): - Catalog.read_from_hipscat(catalog_path) + read_from_hipscat(catalog_path) ## Now we create the needed _metadata and everything is right. part_info = PartitionInfo.from_healpix([HealpixPixel(0, 11)]) part_info.write_to_metadata_files(catalog_path=catalog_path) with pytest.warns(UserWarning, match="slow"): - catalog = Catalog.read_from_hipscat(catalog_path) + catalog = read_from_hipscat(catalog_path) assert catalog.catalog_name == "empty" diff --git a/tests/hipscat/conftest.py b/tests/hipscat/conftest.py index e1efea5c..d29f448c 100644 --- a/tests/hipscat/conftest.py +++ b/tests/hipscat/conftest.py @@ -1,6 +1,6 @@ import pytest -from hipscat.catalog import Catalog +from hipscat.loaders import read_from_hipscat from hipscat.pixel_math import HealpixPixel from hipscat.pixel_tree.pixel_node import PixelNode from hipscat.pixel_tree.pixel_node_type import PixelNodeType @@ -52,7 +52,7 @@ def leaf_pixel_node(leaf_pixel_node_data): @pytest.fixture def small_sky_catalog(small_sky_dir): - return Catalog.read_from_hipscat(small_sky_dir) + return read_from_hipscat(small_sky_dir) @pytest.fixture() @@ -64,7 +64,7 @@ def small_sky_pixels(): @pytest.fixture def small_sky_order1_catalog(small_sky_order1_dir): - return Catalog.read_from_hipscat(small_sky_order1_dir) + return read_from_hipscat(small_sky_order1_dir) @pytest.fixture() diff --git a/tests/hipscat/inspection/test_visualize_catalog.py b/tests/hipscat/inspection/test_visualize_catalog.py index cf308756..236e5357 100644 --- a/tests/hipscat/inspection/test_visualize_catalog.py +++ b/tests/hipscat/inspection/test_visualize_catalog.py @@ -2,13 +2,14 @@ from hipscat.catalog import Catalog from hipscat.inspection import plot_pixel_list, plot_pixels, plot_points +from hipscat.loaders import read_from_hipscat @pytest.mark.parametrize("projection", ["moll", "gnom", "cart", "orth"]) def test_generate_map(small_sky_dir, projection): """Basic test that map data can be generated (does not test that a plot is rendered)""" - cat = Catalog.read_from_hipscat(small_sky_dir) + cat = read_from_hipscat(small_sky_dir) plot_pixels(cat, projection=projection, draw_map=False) plot_points(cat, projection=projection, draw_map=False) @@ -16,7 +17,7 @@ def test_generate_map(small_sky_dir, projection): def test_generate_map_unknown_projection(small_sky_dir): """Test for error with unknown projection type""" - cat = Catalog.read_from_hipscat(small_sky_dir) + cat = read_from_hipscat(small_sky_dir) with pytest.raises(NotImplementedError): plot_pixels(cat, projection=None, draw_map=False) @@ -30,7 +31,7 @@ def test_generate_map_unknown_projection(small_sky_dir): def test_generate_map_order1(small_sky_order1_dir): """Basic test that map data can be generated (does not test that a plot is rendered)""" - cat = Catalog.read_from_hipscat(small_sky_order1_dir) + cat = read_from_hipscat(small_sky_order1_dir) plot_pixels(cat, draw_map=False) plot_points(cat, draw_map=False)