From 6be660d57c921aec1d581a3f20c0a2d28b6c10fd Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Tue, 14 Nov 2023 10:49:30 -0500
Subject: [PATCH 1/8] Load partition info from metadata file.

---
 .../healpix_dataset/healpix_dataset.py        |  27 +++--
 src/hipscat/catalog/partition_info.py         |  56 ++++++++-
 src/hipscat/io/__init__.py                    |   5 +
 src/hipscat/io/parquet_metadata.py            |  98 +++++++++++++++
 src/hipscat/io/validation.py                  |  18 ++-
 src/hipscat/io/write_metadata.py              |  27 +----
 src/hipscat/pixel_math/healpix_pixel.py       |  19 ++-
 tests/conftest.py                             |  26 ++++
 tests/data/pixel_trees/aligned_2_3_inner.csv  |  11 --
 tests/data/pixel_trees/aligned_2_3_left.csv   |  13 --
 tests/data/pixel_trees/aligned_2_3_outer.csv  |  18 ---
 tests/data/pixel_trees/aligned_2_3_right.csv  |  16 ---
 tests/data/small_sky/_common_metadata         | Bin 0 -> 3997 bytes
 tests/data/small_sky/_metadata                | Bin 0 -> 5130 bytes
 tests/data/small_sky_order1/_common_metadata  | Bin 0 -> 445 bytes
 tests/data/small_sky_order1/_metadata         | Bin 0 -> 2202 bytes
 tests/data/small_sky_source/_common_metadata  | Bin 0 -> 5506 bytes
 tests/data/small_sky_source/_metadata         | Bin 0 -> 29243 bytes
 tests/data/small_sky_source/point_map.fits    | Bin 0 -> 8640 bytes
 .../_common_metadata                          | Bin 0 -> 556 bytes
 .../small_sky_to_small_sky_order1/_metadata   | Bin 0 -> 2929 bytes
 tests/hipscat/catalog/test_catalog.py         |  11 +-
 tests/hipscat/catalog/test_partition_info.py  |  25 +++-
 tests/hipscat/io/file_io/test_file_io.py      |   8 --
 .../hipscat/io/file_io/test_file_pointers.py  |  13 +-
 tests/hipscat/io/test_parquet_metadata.py     | 113 ++++++++++++++++++
 tests/hipscat/io/test_validation.py           |   6 +-
 tests/hipscat/pixel_tree/conftest.py          | 106 +++++++++++-----
 28 files changed, 459 insertions(+), 157 deletions(-)
 create mode 100644 src/hipscat/io/parquet_metadata.py
 delete mode 100644 tests/data/pixel_trees/aligned_2_3_inner.csv
 delete mode 100644 tests/data/pixel_trees/aligned_2_3_left.csv
 delete mode 100644 tests/data/pixel_trees/aligned_2_3_outer.csv
 delete mode 100644 tests/data/pixel_trees/aligned_2_3_right.csv
 create mode 100644 tests/data/small_sky/_common_metadata
 create mode 100644 tests/data/small_sky/_metadata
 create mode 100644 tests/data/small_sky_order1/_common_metadata
 create mode 100644 tests/data/small_sky_order1/_metadata
 create mode 100644 tests/data/small_sky_source/_common_metadata
 create mode 100644 tests/data/small_sky_source/_metadata
 create mode 100644 tests/data/small_sky_source/point_map.fits
 create mode 100644 tests/data/small_sky_to_small_sky_order1/_common_metadata
 create mode 100644 tests/data/small_sky_to_small_sky_order1/_metadata
 create mode 100644 tests/hipscat/io/test_parquet_metadata.py

diff --git a/src/hipscat/catalog/healpix_dataset/healpix_dataset.py b/src/hipscat/catalog/healpix_dataset/healpix_dataset.py
index d9b98025..18374e37 100644
--- a/src/hipscat/catalog/healpix_dataset/healpix_dataset.py
+++ b/src/hipscat/catalog/healpix_dataset/healpix_dataset.py
@@ -79,18 +79,29 @@ def _get_pixel_tree_from_pixels(pixels: PixelInputTypes) -> PixelTree:
 
     @classmethod
     def _read_args(
-        cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
+        cls,
+        catalog_base_dir: FilePointer,
+        storage_options: Union[Dict[Any, Any], None] = None,
     ) -> Tuple[CatalogInfoClass, PartitionInfo]:
         args = super()._read_args(catalog_base_dir, storage_options=storage_options)
-        partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
-        partition_info = PartitionInfo.read_from_file(partition_info_file, storage_options=storage_options)
+        metadata_file = paths.get_parquet_metadata_pointer(catalog_base_dir)
+        if file_io.does_file_or_directory_exist(metadata_file, storage_options=storage_options):
+            partition_info = PartitionInfo.read_from_file(metadata_file, storage_options=storage_options)
+        else:
+            partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
+            partition_info = PartitionInfo.read_from_csv(partition_info_file, storage_options=storage_options)
         return args + (partition_info,)
 
     @classmethod
-    def _check_files_exist(
-        cls, catalog_base_dir: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
-    ):
+    def _check_files_exist(cls, catalog_base_dir: FilePointer, storage_options: dict = None):
         super()._check_files_exist(catalog_base_dir, storage_options=storage_options)
+
         partition_info_file = paths.get_partition_info_pointer(catalog_base_dir)
-        if not file_io.does_file_or_directory_exist(partition_info_file, storage_options=storage_options):
-            raise FileNotFoundError(f"No partition info found where expected: {str(partition_info_file)}")
+        metadata_file = paths.get_parquet_metadata_pointer(catalog_base_dir)
+        if not (
+            file_io.does_file_or_directory_exist(partition_info_file, storage_options=storage_options)
+            or file_io.does_file_or_directory_exist(metadata_file, storage_options=storage_options)
+        ):
+            raise FileNotFoundError(
+                f"_metadata or partition info file is required in catalog directory {catalog_base_dir}"
+            )
diff --git a/src/hipscat/catalog/partition_info.py b/src/hipscat/catalog/partition_info.py
index b87a3006..3230cdbf 100644
--- a/src/hipscat/catalog/partition_info.py
+++ b/src/hipscat/catalog/partition_info.py
@@ -3,8 +3,14 @@
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 
 from hipscat.io import FilePointer, file_io
+from hipscat.io.parquet_metadata import (
+    read_row_group_fragments,
+    row_group_stat_single_value,
+    write_parquet_metadata_for_batches,
+)
 from hipscat.pixel_math import HealpixPixel
 
 
@@ -44,10 +50,54 @@ def write_to_file(self, partition_info_file: FilePointer):
         """
         file_io.write_dataframe_to_csv(self.as_dataframe(), partition_info_file, index=False)
 
+    def write_to_metadata_files(self, catalog_path: FilePointer, storage_options: dict = None):
+        """Generate parquet metadata, using the known partitions.
+
+        Args:
+            catalog_path (str): base path for the catalog
+            storage_options: dictionary that contains abstract filesystem credentials
+        """
+        batches = [
+            pa.RecordBatch.from_arrays(
+                [[pixel.order], [pixel.dir], [pixel.pixel]],
+                names=[
+                    self.METADATA_ORDER_COLUMN_NAME,
+                    self.METADATA_DIR_COLUMN_NAME,
+                    self.METADATA_PIXEL_COLUMN_NAME,
+                ],
+            )
+            for pixel in self.get_healpix_pixels()
+        ]
+
+        write_parquet_metadata_for_batches(batches, catalog_path, storage_options)
+
+    @classmethod
+    def read_from_file(cls, metadata_file: FilePointer, storage_options: dict = None):
+        """Read partition info from a `_metadata` file to create an object
+
+        Args:
+            metadata_file: FilePointer to the `_metadata` file
+            storage_options: dictionary that contains abstract filesystem credentials
+
+        Returns:
+            A `PartitionInfo` object with the data from the file
+        """
+        pixel_list = [
+            HealpixPixel(
+                row_group_stat_single_value(row_group,cls.METADATA_ORDER_COLUMN_NAME),
+                row_group_stat_single_value(row_group,cls.METADATA_PIXEL_COLUMN_NAME),
+            )
+            for row_group in read_row_group_fragments(metadata_file, storage_options)
+        ]
+        ## Remove duplicates, preserving order.
+        ## In the case of association partition join info, we may have multiple entries
+        ## for the primary order/pixels.
+        pixel_list = list(dict.fromkeys(pixel_list))
+
+        return cls(pixel_list)
+
     @classmethod
-    def read_from_file(
-        cls, partition_info_file: FilePointer, storage_options: Union[Dict[Any, Any], None] = None
-    ):
+    def read_from_csv(cls, partition_info_file: FilePointer, storage_options: dict = None):
         """Read partition info from a `partition_info.csv` file to create an object
 
         Args:
diff --git a/src/hipscat/io/__init__.py b/src/hipscat/io/__init__.py
index 3f046d75..78098bf5 100644
--- a/src/hipscat/io/__init__.py
+++ b/src/hipscat/io/__init__.py
@@ -1,6 +1,11 @@
 """Utilities for reading and writing catalog files"""
 
 from .file_io import FilePointer, get_file_pointer_from_path
+from .parquet_metadata import (
+    read_row_group_fragments,
+    row_group_stat_single_value,
+    write_parquet_metadata_for_batches,
+)
 from .paths import (
     create_hive_directory_name,
     create_hive_parquet_file_name,
diff --git a/src/hipscat/io/parquet_metadata.py b/src/hipscat/io/parquet_metadata.py
new file mode 100644
index 00000000..97037665
--- /dev/null
+++ b/src/hipscat/io/parquet_metadata.py
@@ -0,0 +1,98 @@
+"""Utility functions for handling parquet metadata files"""
+import tempfile
+from typing import List
+
+import pyarrow as pa
+import pyarrow.dataset as pds
+import pyarrow.parquet as pq
+
+from hipscat.io import file_io, paths
+from hipscat.io.file_io.file_pointer import get_fs, strip_leading_slash_for_pyarrow
+
+
+def row_group_stat_single_value(row_group, stat_key):
+    """Convenience method to find the min and max inside a statistics dictionary,
+    and raise an error if they're unequal."""
+    if stat_key not in row_group.statistics:
+        raise ValueError(f"row group doesn't have expected key {stat_key}")
+    stat_dict = row_group.statistics[stat_key]
+    min_val = stat_dict["min"]
+    max_val = stat_dict["max"]
+    if min_val != max_val:
+        raise ValueError(f"stat min != max ({min_val} != {max_val})")
+    return min_val
+
+
+def write_parquet_metadata(catalog_path, storage_options: dict = None, output_path: str = None):
+    """Generate parquet metadata, using the already-partitioned parquet files
+    for this catalog
+
+    Args:
+        catalog_path (str): base path for the catalog
+        storage_options: dictionary that contains abstract filesystem credentials
+        output_path (str): base path for writing out metadata files
+            defaults to `catalog_path` if unspecified
+    """
+    dataset = file_io.read_parquet_dataset(catalog_path, storage_options=storage_options)
+    metadata_collector = []
+
+    for hips_file in dataset.files:
+        hips_file_pointer = file_io.get_file_pointer_from_path(hips_file, include_protocol=catalog_path)
+        single_metadata = file_io.read_parquet_metadata(hips_file_pointer, storage_options=storage_options)
+        relative_path = hips_file[len(catalog_path) :]
+        single_metadata.set_file_path(relative_path)
+        metadata_collector.append(single_metadata)
+
+    ## Write out the two metadata files
+    if output_path is None:
+        output_path = catalog_path
+    catalog_base_dir = file_io.get_file_pointer_from_path(output_path)
+    metadata_file_pointer = paths.get_parquet_metadata_pointer(catalog_base_dir)
+    common_metadata_file_pointer = paths.get_common_metadata_pointer(catalog_base_dir)
+
+    file_io.write_parquet_metadata(
+        dataset.schema,
+        metadata_file_pointer,
+        metadata_collector=metadata_collector,
+        write_statistics=True,
+        storage_options=storage_options,
+    )
+    file_io.write_parquet_metadata(
+        dataset.schema, common_metadata_file_pointer, storage_options=storage_options
+    )
+
+
+def write_parquet_metadata_for_batches(
+    batches: List[pa.RecordBatch], output_path: str = None, storage_options: dict = None
+):
+    """Write parquet metadata files for some pyarrow table batches.
+    This writes the batches to a temporary parquet dataset using local storage, and
+
+    Args:
+        batches (List[pa.RecordBatch]): create one batch per group of data (partition or row group)
+        output_path (str): base path for writing out metadata files
+            defaults to `catalog_path` if unspecified
+        storage_options: dictionary that contains abstract filesystem credentials
+    """
+
+    temp_info_table = pa.Table.from_batches(batches)
+
+    with tempfile.TemporaryDirectory() as temp_pq_file:
+        pq.write_to_dataset(temp_info_table, temp_pq_file)
+        write_parquet_metadata(temp_pq_file, storage_options=storage_options, output_path=output_path)
+
+
+def read_row_group_fragments(metadata_file, storage_options: dict = None):
+    """Generator for metadata fragment row groups in a parquet metadata file.
+
+    Args:
+        metadata_file (str): path to `_metadata` file.
+        storage_options: dictionary that contains abstract filesystem credentials
+    """
+    file_system, dir_pointer = get_fs(file_pointer=metadata_file, storage_options=storage_options)
+    dir_pointer = strip_leading_slash_for_pyarrow(dir_pointer, file_system.protocol)
+    dataset = pds.parquet_dataset(dir_pointer, filesystem=file_system)
+
+    for frag in dataset.get_fragments():
+        for row_group in frag.row_groups:
+            yield row_group
diff --git a/src/hipscat/io/validation.py b/src/hipscat/io/validation.py
index 51339bdb..b62c8f37 100644
--- a/src/hipscat/io/validation.py
+++ b/src/hipscat/io/validation.py
@@ -1,5 +1,5 @@
 from hipscat.catalog.dataset.catalog_info_factory import from_catalog_dir
-from hipscat.io import get_partition_info_pointer
+from hipscat.io import get_partition_info_pointer, get_parquet_metadata_pointer
 from hipscat.io.file_io.file_pointer import FilePointer, is_regular_file
 
 
@@ -13,7 +13,7 @@ def is_valid_catalog(pointer: FilePointer) -> bool:
         True if both the catalog_info and partition_info files are
         valid, False otherwise
     """
-    return is_catalog_info_valid(pointer) and is_partition_info_valid(pointer)
+    return is_catalog_info_valid(pointer) and (is_partition_info_valid(pointer) or is_metadata_valid(pointer))
 
 
 def is_catalog_info_valid(pointer):
@@ -46,3 +46,17 @@ def is_partition_info_valid(pointer):
     partition_info_pointer = get_partition_info_pointer(pointer)
     partition_info_exists = is_regular_file(partition_info_pointer)
     return partition_info_exists
+
+
+def is_metadata_valid(pointer):
+    """Checks if _metadata is valid for a given base catalog pointer
+
+    Args:
+        pointer: pointer to base catalog directory
+
+    Returns:
+        True if the _metadata file exists, False otherwise
+    """
+    metadata_file = get_parquet_metadata_pointer(pointer)
+    metadata_file_exists = is_regular_file(metadata_file)
+    return metadata_file_exists
diff --git a/src/hipscat/io/write_metadata.py b/src/hipscat/io/write_metadata.py
index ed4c44cd..37fc61ce 100644
--- a/src/hipscat/io/write_metadata.py
+++ b/src/hipscat/io/write_metadata.py
@@ -10,6 +10,7 @@
 import pandas as pd
 
 from hipscat.io import file_io, paths
+from hipscat.io.parquet_metadata import write_parquet_metadata as wpm
 
 
 def write_json_file(
@@ -116,30 +117,10 @@ def write_parquet_metadata(catalog_path, storage_options: Union[Dict[Any, Any],
         catalog_path (str): base path for the catalog
         storage_options: dictionary that contains abstract filesystem credentials
     """
-
-    dataset = file_io.read_parquet_dataset(catalog_path, storage_options=storage_options)
-    metadata_collector = []
-
-    for hips_file in dataset.files:
-        hips_file_pointer = file_io.get_file_pointer_from_path(hips_file, include_protocol=catalog_path)
-        single_metadata = file_io.read_parquet_metadata(hips_file_pointer, storage_options=storage_options)
-        relative_path = hips_file[len(catalog_path) :]
-        single_metadata.set_file_path(relative_path)
-        metadata_collector.append(single_metadata)
-
-    ## Write out the two metadata files
-    catalog_base_dir = file_io.get_file_pointer_from_path(catalog_path)
-    metadata_file_pointer = paths.get_parquet_metadata_pointer(catalog_base_dir)
-    common_metadata_file_pointer = paths.get_common_metadata_pointer(catalog_base_dir)
-
-    file_io.write_parquet_metadata(
-        dataset.schema,
-        metadata_file_pointer,
-        metadata_collector=metadata_collector,
+    wpm(
+        catalog_path=catalog_path,
         storage_options=storage_options,
-    )
-    file_io.write_parquet_metadata(
-        dataset.schema, common_metadata_file_pointer, storage_options=storage_options
+        output_path=catalog_path,
     )
 
 
diff --git a/src/hipscat/pixel_math/healpix_pixel.py b/src/hipscat/pixel_math/healpix_pixel.py
index 1b91e84e..fe317f82 100644
--- a/src/hipscat/pixel_math/healpix_pixel.py
+++ b/src/hipscat/pixel_math/healpix_pixel.py
@@ -53,7 +53,7 @@ def convert_to_lower_order(self, delta_order: int) -> HealpixPixel:
         if delta_order < 0:
             raise ValueError("delta order cannot be below zero")
         new_order = self.order - delta_order
-        new_pixel = math.floor(self.pixel / 4**delta_order)
+        new_pixel = math.floor(self.pixel / 4 ** delta_order)
         return HealpixPixel(new_order, new_pixel)
 
     def convert_to_higher_order(self, delta_order: int) -> List[HealpixPixel]:
@@ -77,6 +77,21 @@ def convert_to_higher_order(self, delta_order: int) -> List[HealpixPixel]:
             raise ValueError("delta order cannot be below zero")
         pixels = []
         new_order = self.order + delta_order
-        for new_pixel in range(self.pixel * 4**delta_order, (self.pixel + 1) * 4**delta_order):
+        for new_pixel in range(self.pixel * 4 ** delta_order, (self.pixel + 1) * 4 ** delta_order):
             pixels.append(HealpixPixel(new_order, new_pixel))
         return pixels
+
+    @property
+    def dir(self):
+        """Directory number for the pixel.
+
+        This is necessary for file systems that limit to 10,000 subdirectories.
+        The directory name will take the HiPS standard form of::
+
+            <catalog_base_dir>/Norder=<pixel_order>/Dir=<directory number>
+
+        Where the directory number is calculated using integer division as::
+
+            (pixel_number/10000)*10000
+        """
+        return int(self.pixel / 10_000) * 10_000
diff --git a/tests/conftest.py b/tests/conftest.py
index e07e6c76..58595a72 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -216,6 +216,32 @@ def source_catalog_info_file(test_data_dir) -> str:
     return os.path.join(test_data_dir, "small_sky_source", "catalog_info.json")
 
 
+@pytest.fixture
+def small_sky_source_dir(test_data_dir) -> str:
+    return os.path.join(test_data_dir, "small_sky_source")
+
+
+@pytest.fixture
+def small_sky_source_pixels():
+    """Source catalog pixels"""
+    return [
+        HealpixPixel(0, 4),
+        HealpixPixel(1, 47),
+        HealpixPixel(2, 176),
+        HealpixPixel(2, 177),
+        HealpixPixel(2, 178),
+        HealpixPixel(2, 179),
+        HealpixPixel(2, 180),
+        HealpixPixel(2, 181),
+        HealpixPixel(2, 182),
+        HealpixPixel(2, 183),
+        HealpixPixel(2, 184),
+        HealpixPixel(2, 185),
+        HealpixPixel(2, 186),
+        HealpixPixel(2, 187),
+    ]
+
+
 @pytest.fixture
 def association_catalog_info(association_catalog_info_data) -> AssociationCatalogInfo:
     return AssociationCatalogInfo(**association_catalog_info_data)
diff --git a/tests/data/pixel_trees/aligned_2_3_inner.csv b/tests/data/pixel_trees/aligned_2_3_inner.csv
deleted file mode 100644
index 696dc40e..00000000
--- a/tests/data/pixel_trees/aligned_2_3_inner.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-Norder,Dir,Npix
-1,0,33
-1,0,35
-1,0,40
-1,0,42
-1,0,43
-1,0,44
-1,0,46
-2,0,128
-2,0,130
-2,0,131
\ No newline at end of file
diff --git a/tests/data/pixel_trees/aligned_2_3_left.csv b/tests/data/pixel_trees/aligned_2_3_left.csv
deleted file mode 100644
index a7347b36..00000000
--- a/tests/data/pixel_trees/aligned_2_3_left.csv
+++ /dev/null
@@ -1,13 +0,0 @@
-Norder,Dir,Npix
-1,0,33
-1,0,35
-1,0,40
-1,0,41
-1,0,42
-1,0,43
-1,0,44
-1,0,45
-1,0,46
-2,0,128
-2,0,130
-2,0,131
\ No newline at end of file
diff --git a/tests/data/pixel_trees/aligned_2_3_outer.csv b/tests/data/pixel_trees/aligned_2_3_outer.csv
deleted file mode 100644
index 6902c1b5..00000000
--- a/tests/data/pixel_trees/aligned_2_3_outer.csv
+++ /dev/null
@@ -1,18 +0,0 @@
-Norder,Dir,Npix
-1,0,33
-1,0,34
-1,0,35
-1,0,36
-1,0,37
-1,0,40
-1,0,41
-1,0,42
-1,0,43
-1,0,44
-1,0,45
-1,0,46
-1,0,47
-2,0,128
-2,0,129
-2,0,130
-2,0,131
\ No newline at end of file
diff --git a/tests/data/pixel_trees/aligned_2_3_right.csv b/tests/data/pixel_trees/aligned_2_3_right.csv
deleted file mode 100644
index df302ddc..00000000
--- a/tests/data/pixel_trees/aligned_2_3_right.csv
+++ /dev/null
@@ -1,16 +0,0 @@
-Norder,Dir,Npix
-1,0,33
-1,0,34
-1,0,35
-1,0,36
-1,0,37
-1,0,40
-1,0,42
-1,0,43
-1,0,44
-1,0,46
-1,0,47
-2,0,128
-2,0,129
-2,0,130
-2,0,131
\ No newline at end of file
diff --git a/tests/data/small_sky/_common_metadata b/tests/data/small_sky/_common_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..a1505a289449f1419095c4f6c9dbc9a7f9262a58
GIT binary patch
literal 3997
zcmcgv>u%ae6t>!`+ODcysZ~})s<aVO|FsQ}(3VP-c5JY*H;{z3_)@JR-vGndxL__$
z)rZ*s`v`r2eWrbiJu_p2!H{;PXsyKYoXec=T;`m~1?rR!?j3%ua)tX-d*J%!!6WYA
z-qSmUJD$TG{760~Ci}SSxHjYcFfp&($z(iX%Ll+n*LOhUetSG|Tws#L-MW{2-D`*5
zl4<;OHSofzZAMpK;JC}DPtLwR{_PRRaldef4-cOe?uTaJnA5^P4?Y*E#Fah9Ge4LX
z-yD4`ZYmf5cy?4|Y_^+Ar9Kw}(|18Wm|+YOJ<r9?)h3Nuxr*YD2o`67XOA7X2-3mK
z591YMYz<vIB6Q!4Ovj8&5dU0s0}%IuKGFT~UVQ$#_~pwQ%qEJW=Meo36bWq$g%?CG
zPxBa7<O~Gj+Lvd5Byp=aG4n<kZOcdxkIkKCGK`xHLB{iz65=hJN^31|FU#37x{B6j
zB3I;Z_{OR^Z*f)VgmsH1*@4O1ThsH#PAWt`=N}{A<R2W!yrqPAi)P4a&)Z3<w`gdq
z9pp|~p1);$wknIwXs+qo;iRX0;+oNTLOTKWtcf{^Nh1hj6FTF9cs_S0Q*Rs)YVAeo
zMX3m9HLaQquflj-;UVgD&eb=p4;TJ^d`zAaY9fR#;g>wWBXo-RJ`xe?BcV@eJiozT
z5MpD+7l<rb!$WtWd>Nqy0zLRkH-W#YkPct`LRYIJU27q9J_cR>mF(l0A$3o+N_%cK
zyO9;NymODYWLz7Y8h@=<#FAEFGBPerWkDHQm1Wsdt34C;GPD7<WBH{lMzB|PV5uL7
z{>)LA(=!2ihL5o&b$XUBM#ejkld5G)T9ADbEbX4t#0$z3l(OYx*uR5iZ_$VS%9h4g
zwUbWOmdL)yvnt9bLv5EBzl<G81b;dMc{uL7v4SnXJvSu4OE3vax7*O8b3v)~x*OQ9
ziM)C;^;Wn9B{T$?;POqiJ2k}GOjpa8$U%%o)Ly_3EWb0b%e9iL@hNZS)b#{U9xueG
z)E-`XBo>z1E$M1!;7E;CjC$`D1ozrjpbVIAMrf$Zkyc4#iagtI(0y!4gq(@j*|-?~
za;|Hal{WJM;$L34-7HRHYC$gl6BfUN^>|-QR=iz#^ew42xG64Mcq0RH2tJkcRgElb
zX9B6utp8%q(3IFZk!c)qwIwLA-mPIfQ0BHraqq$wTER_m!kK(-)|VA@r&bf{sJT%$
zBdEu3;mXPE%Hc+>1l`DP;_TVT;{L99z4vP|ob7mDJ#Fy!)^;n=-(B0SYnF@es_juH
zO!AUHV{6{NoL#qAPIuHc$yti?yK;7JOZaqeTmn9H**W?@u=Gp`FYHQt;E1&l`U_5M
z=VP?1CzC$8D|bORTQB*TZSr^4lLoy>_S><ciY0R9ps&MuT!uRIC_5O@ehf7V-^$!!
zeWC*A^V(EvGuk(YqghKhUC06Lz3W)|-Po?+x#f`_Eyrg=1ae9Z29!eP)trpAW*^2F
z-y*Y3B<nVrZ^D@HUGi$sp$80DU%Wa#ChSX@5@DE<1$~oOos2>;KhKzejs!X)>3pQ5
zXlp--QrG(dFwm$jOpOidPpLd=P(n3bCwR!9&f<X)4f=2v`t=XT^W$U6en;Pz<Z1w4
zJQ~n70Y&C{`cNq8xdmb)kw=K?^Jq)oCW;w-8eixML#+Zifc`-E=NF(wuMZ5Vldco|
z<f4+r5AlNhD1+=CNV<kiUK0+_k}yoxjXF&dqtZ_zgb>_85D$H^g~|{EYQJA!`@f_@
z+xV$zn1hrF_a(UB=;YqSo<1ZU?0^u&$wqZT1?N{r_xMAvN)jG{G`&B8KDeB2n)rj>
szU2zPhUVnc%#EJgVfdWh2al2q-x0kD9{%-&<M#b<fBwvI7x0hyzb)?MsQ>@~

literal 0
HcmV?d00001

diff --git a/tests/data/small_sky/_metadata b/tests/data/small_sky/_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..63b03188f42a9fb1b4446d058e8ea678f6837297
GIT binary patch
literal 5130
zcmcgwOKclO7~Z5IN=redsVzsi5V>?|MO!}-w{=mfjGfrANu0!O{X!`0%l6v4YsYV=
ztyFP9RZpA{LKO$j72?1lLa3?`>WL#l964}6y&$2ghaMvSnc211UMDS4VAXDR{>S{^
z|Cs+Dc?~7Jwy~)Pe#S9g6AF^b*^V=|vD2fDQCVbcNAO3LqaTMvNua!mDz_n3tK}+P
z9tMW79tDl@c)2P{z{HC~3$p$-7O%){s`2_pL9WyUuD&5_qSQX^Tz)V)f1F_$`=_H*
z&Z+y3x7~D)IgcCk;f;0Yyxt2HuDaQf>h^m_oq*eaj2WN8P;H~2X@5Ft|Kuq9`vg1T
zbWB`51pC3`nD}9oe%YBbL#WKuIop^Gz%uYU27f^1h32<UIBiD^^Hq+T|K^DO?{W5*
zH`vFofqi20&&Is|b$4?f+qY8pW&i1$J^MH>6u`p?`_3!uXH)Dmn*rcX;E@jC<Ml^f
z0DaVb0eJpaFaF(f&{2%oACItqoMgW{X`sL{rLMzQ7eya+UnuUmdMJ8TXom50n*C||
zzlQPj9j9$nuLiyNDTdES>|c$rJEz!RPdOc9IQR4oAuz4><@)x!J)8K9nGOEPvR|?X
zZSdLIo<VYy%@*&RWq&&Z7GEVU;x~4&x7wGB-(T%zcc5cX+dog*zn)~b-)Fyg&v}s?
zuKC!#yC)vvTDx=W*3Mtw4zX*0G7r6iov=UmonV0P7_V@e$kiNA4&QW<cC;asm4>R-
zTvw()boF;C*GK23U6f5*mqes)x-?FeKwfJo3P|K+Nf9^tY1GO_y;H%0u7)NHWl?g0
zwAN57oi3xy7bT&N>8e!cM6S+(_)V9jfw-*YvF?rQ;q#YWw{Q2r^rLXeBG&Iiq0<IX
z$Xfm4tQA9-Y=VGYyL}Fj=x({HoYe@W4H(HOWp1b$6Jsw!(BG_<bm9S<G6tK~o|!XX
zv>UDdM0Uw9@Qsd6tHthA#jFD~Nhhn-UXN}yHsizc7XK)DFaMx3SuN?r12luL2Ua@<
z^#BbGv;)g2v+@UwH@aofAI%<pFr4I+S0%1qu98lm$b6Nnc5ox8bU3(%wXo++saliE
z8m0y=crJKca8`|~G4Xtq$2g9nL}E2{g<kp`+mmkmc+e6;XdV9J=L3XhQ816f2<1^I
zPiVnlAHRi(i(P&Q%i=XWWEaAZBGf{l2Y<;X@YfK^z*n%9N%>2e^a?_&WzY>?imFN@
z8%fTleeov0l&tgGio7ZZ`KS^obLrq##uxUaeN-l@cxus5tjPP?UOwf|aj;i5T~LHh
z^kOtzhrRp-K6L}@H^fxCwj4roR#g-}lF0FDxSqWRauL6mkF@YUI+l1&TvA$u7m9g#
zRe}8zN>py;VLvaQ4*JtG3BM4*`y!e5#Wu64xQFt+iWmulKZ!!LSk6nGn8K^^W;OzN
zF{V%~nOw}&S3|KtF4>2D3(KcwYH}A>C|1daq8OLTrINL5IMB$Xyb6|s7?p@UEm+{y
zL_zQdJW@Jn@EYPmM#p2t3o-J<i|aCu1)oZKGO0vCj4XCzl)Ki#xO=v=Sb_SMtz=W}
zQrc(46v=|RNcIsTVaQo{%N&=i+HPjj>%KVk0pjm%Nl7!#Y>kIp{wpkMLdkS?i%FL^
zERQN52^98<%K+Yb7IFwac{1G^X$O`=xIWGPhjZC<ti#VlNgOP-6^eB-$$%ngv8Ess
z+{3U{cx|sZDW-h(*B7rPYy1+_(NbUCl%O78gv%l`EQfuyq9yCXl43p^X523ouj^`{
z182LlTRrvhcdhLeU4M9OuWZp=yi{$M5*0l!)-%@Q?Uu8x6`Io_wT*LT;C!i^tqKuk
zcGtLQ!HPuB(f@%Z$3>Ku;ENZ;aG(PHMbT|rV>GNM<32eocUsb1FV>j#^M}?`8hTSy
zjd!xCum_(x=<9GE+m#ITC_yWcehf7V-?64h`-Bh9=N6X=G)UhpmKrOe*;Z5|y|))j
zHQ5n-N|TpykB)YhiwNXm0T@s*Jg=H~tS#kXj0IQlYzyOc9M88<8S^FlS|mdc7_eTu
z<aT5BdXx}hnBoO_<JSTng<<PFqXHQeA|sN_M?$I(>=z-_-hKcKH2f_tO$YU+A&(Xb
z!EdZ}Ja|x>@xX`%eK-sGg&Xds+fCTl$a@`M4d5$?3S^Bz;dyR61d4HPfw-v4BSiFr
zXh2`UiY9#$U+4+Nv=4Fs{Q>h=*FcM0A1G4MSnK%lMa7IC;syDM70i2}$QEpJ8FPX(
z2}OOqxImIbspJz2Aq0026okIGg1is|Vn4so^S@1m2JsWqFb4?}?n`jL$>4hvee#fW
zumeI6Cjl*BDmdRYx{E*LsuWR5AWiO1pbst^o9h0cxjFc;O8-slyilo}C-=c={lYg*
WZh}+yoeZ<vKW1l=Vb<VRz<&Uz`?tmb

literal 0
HcmV?d00001

diff --git a/tests/data/small_sky_order1/_common_metadata b/tests/data/small_sky_order1/_common_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..06ad54414ca88298110b5730b32eb52f31de61c5
GIT binary patch
literal 445
zcmYjOO;3YB5M3+b)GG#(9=Lcit!>mqjXk^tMWg+Yw3>P&l~AHBNLA!#dGqHvv#?1g
zWOm+T-eZQHA{<zT^Kd3?bGux{>&P;NWgNC_bDR~gaZ!VPKW!>woToqHKsaruW4958
zljqA?X#cg2IgY^bs4{R5l#mH-fCOHmSSf)KR~~T3Ct-aO;#cyN`3Waw<-M^r0g06&
z7o16_<C=@vM&_|%ym`|P%$e{t)_TS6=zZ=AgZBy}pQPsurCNfTXn2}$0a?~_)5r9F
z+D~n^;}@{OY<*F^1Wy%}q%V2jTmBDC)czO_h3)21@$nUZc9(hHjfx^G-S4>ArrE|F
Vob>v=zH`+PqJG5ho)A;IxPNU`aUcKy

literal 0
HcmV?d00001

diff --git a/tests/data/small_sky_order1/_metadata b/tests/data/small_sky_order1/_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..dddd67547d7253086e8c653b0b37c81cbc0c4314
GIT binary patch
literal 2202
zcmchZZ)@5>7{JdMG98Xew}b?xvx6Oj)<3DW)zQAV=;p@yZ_;jb?=%ZmwpN#JbAAC4
zDg7M#1Vf~ZzA7SRpK15RoF`geNWnwyo_j9$<ahZA$pI<ovZ(E(kSd*<C)R0EmXIty
z7gTAlYJaxu!Ni-ZxRJP5tNcQWPutP9)RnlIUeir0ZKgL$o9oGvUNUvFlu0J^%}i>;
zS}R2pv0Bmo_QU$F^gLmDrxcQc8Y?afilB5n%C!flu`jIpJZR8(q@WjfWRbQii!zc0
zM7tmz5SkH2<S=l!5t5D<x4e|aML|3QmdWhBS#BP17U%Jf!2FSLQ=%}d?tOU#Fq7Fk
zV3gmYB4D3#Ej~eLR%tg6Ih*^B7^U;$rhlH=`j0ryn6_>Jw*&Lk)(di$ac!Li+*+V8
z(V;dLfi@QN^{LvLe()yD?5+2h!%ceYEaKMUJiXm29B0him5eXsd3x(>oMqfwSH*3W
z!o-H&SOnf!%-5%SYb}B|ab|D*Fo&D;)(zoyXrA8wJ>@uK-u^D(%cXgGyM4x4#=TuF
z<BMeq6CZkG5qM)UU!Ur&9RhC>(!y$h6bkzvUk=`iYDb7eTOrWiJ3S%q2&ogJOkNXG
zCfTx+H4H*_A<7c+7DhT}Bb~vLu@3ot=$um7FHo9J407TmkD;ggSbGJ*;h9mhoJ{t}
zy`GRGNaWa@?RT;x`*UzrA^){Oc5`%V8m5MG2zn8Pr+ml6s1La5&Di-^J?CU2_3}{Q
z@HEr-l>gA6A>Chx^S*)S;I=`1vb~Mei2Df;HEXqK(YCAg$WO~YuT;+>dNP)XCA6(D
PLW3W=q_5Zm`c?8Dg-w1p

literal 0
HcmV?d00001

diff --git a/tests/data/small_sky_source/_common_metadata b/tests/data/small_sky_source/_common_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..aa1aa280c7a8c36565679e0d272c45d586ad831e
GIT binary patch
literal 5506
zcmcf_Yj4_C6tz*MRoy;pVoIuhh>-eeZ38B>rBbC`8!)E9Noazxrz!FaFtLpb=2_(*
z?e|Rku%ECWv2(7C!Pum!l-2^{d)}{m?lEViRmkog{8!5CKN{<O(^0ceGugf85B7f;
z4<{qtyt0f;_VLDgqz3E9A>J_c0PxTmgrpB0wU^000j51ob&cnllV_PsD0Qd}Ox<5g
z4cC#>a80E?011!8;m9yYnJg|IR;=)8PxP#LAo8cHzU7T|6@<EmIe-4_<gZ_T{}~ki
znmKrK@M8aw2R5kV{l9<woTC(1`p}*@?l|}E@Q+-aIrry_!(4z4q?wfJbIw&A6X@NE
zZ39KuGHv534hytg`3nyTa}(FnhlZI0YIovziwF@oh;YaBRYUbvApe{*T_CsI9`asZ
z)Ly>L-Q28=iCdA2QiEb*#>7!uHd(I!`Z#G(1l}+R9ld!0c7;x^<c`#xDg)G(%C0?B
zQ<ZKY?w}URVW-{@ch8EUn3Ebq;C-u7dnu{{;61Ct?U>ZHf^I4Mx)M%QOtjOJ3S+1(
ztx+|`w@n1Tty)64X+;`CZK>R-#kkT9#2wV87HO#W+Z1Wg8<X)}!?!$zC>9x{kNX*c
zZ}Sp*Z&FhTyhk<I*pqrz(0f$0)%g>hG>P9*K8b2F9?dm;JDjwkk4)7cj%d@jEp4QZ
z7TEK>g*qAyXXyFV9F47^i_pUHtNg1%4mxozRK9^^09VZ2WcHtsR_j!H7o6Yw|2#dS
zSFb?`IfpM^9|$=nY>(6k=@G6+VJsWN&k$H$<7f3?4R<Pq;srux1o+^W3V~nB1a}#}
z>`azQ*K(&xh$!>0ej_;cMB&@V9Y&mL^|r6MP3zQRHNh?q)egIqnOeTX1T=!39}8S#
zpfU4;CY8D>$Wl6eTVDvTg_;ktN_|bbME;2(&BrGku@uL)HNMr=oSLtE1UkM{(D)h3
z32hO(M%|uK9M>pljt%l#wqVVAAg`cx*ixt1D(O7RC6>lCJ}Huz5AX%s;A`Mdt1k?O
zJ#*2pHAkE(JlKmiagBDnD*LBgqug!BY+oY1R2*B8U0h?S726A$X@g(qR;ukOG&)~{
z_|=x1HbPwT;vkTrxouys+nYXmntRTOoPaU#6|f!n^KulMdHICHysTnLES)JAbVsy1
z#YR8yTk#ZWe%)bip>f&1=Ct~Hp`156td0H(jxyFDpSRfNXneVEROHn7f-hsO4soof
z#|`Re@J0I|xAKDeo|gYD{mJ|*&U`9&zAk>6<X!KwE5d!hc=@uV@2al|msVF>zQZ0X
z%*^&J`IXvvS!(qSz8dYx?#CJKlZh-9!kTD6o$Rc43;P9)U0QtH_C%F%zg<#)QfhrF
zJDx2U!@Y1Pd&-vTv<F75>?y$>aa5^2R%+!9n-er?wY0g<QetIKzOv3kUs_$=sqV^O
zg68|>F^b8xA~nBgU$j@#^7?K1#Hm(nz<D9;vMq*BiyQXU<}%<<sWr^^vT5DDueNx5
zv8yjcxYYPy?KDH)WRHs3d6$~MF3r?VZ)aR|-ag*B2jTp_S6kA~eR>ur&zug^_w>3r
zfb%z;1p*(^a%V`J`z;x>I6i)^2j@<~z%yAcHf+#0)h#&R@f_l9OLuAu6VFeWDNL!h
z(4BHIh?ipPuIOpZF`XTn7w3~EcRUwdIt#3OhSOf?j6Ky*CMPUT24vD)G|DhAiSUI<
z3dX2PkAuf5j#UhfVIO8NX0dN^_G;mp#j&fRBmF15)p7n}0f8vEniu7xqqp!-<tQ*1
z^5`R?asC6!uFBgpoe}%k|KR+#O^PEhtMyP0{H}(=%5O1wFg(JwArFT__7rOe5#)3k
z>L)B63M<?Ebco#GhZzy<S33gX&)ysz1>+}G(pjy~1<FK?ibh%BLaJ(Ms)PMHJbzr#
zlPcvOtk&mE%8!HW$}4=Lq2Xiw4B~eZ`YVv>m-tcYntvhV`fSbrCgKn5Pw0>HdkPvQ
zE<^o<rABiVnBr+ZYcy!=YCcC|ypW2e=O<ml8uCX;FV27!$wU8P=*I=U(;MRWLSLD&
zuklp@Rvf8ctn^g^eUyGXU%u*`li@U!-{bU<pCxlDnbs$*N4muK4nm^&5@$2eosak{
z%`JYKzYTtThgtK(e2VWGuu=HneFL!qI(<p&w?`n<G)^=ah+Xf8ZGWx-$8OH#NDQSr
zL-`<wnu4$(ALRY}%>JRLjy_FH|E2DEFX;>0VfaFJ7`&<-{0o0|-sO{-W;2;H`1kdH
Ddy&Qe

literal 0
HcmV?d00001

diff --git a/tests/data/small_sky_source/_metadata b/tests/data/small_sky_source/_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..f5083342cf4bd6d3cf168ab55542ef87a282a33f
GIT binary patch
literal 29243
zcmchg3tUxIzQ@l?IH2aE*kR*s9OkxqSqi90W^q74Oz@HMn))<wKtK-<d5Ac-8!8eJ
zp&1d8kut`0%yEbmi40G~7#R|g5fPCo5;{oaFlfZ!{r}f~t;0TGvvr0~9nb#n$M3h+
zUjOx7Ywyiwg=v+$i(y*NT#c(kia9nqKEmZTjf>$9ovTwyVp_5}`l+}mjf*q0oE*U~
zI}7WhqRo6jAHP=ER39G^qj8x4I~gJ)5~A+VESs*;2uDqfTpMjpm5(YcOGg!!=}~oX
z1c#u+<f!OmjSDX57sUzR3_(e88~H=t^i*tIQi?eO4w?`Zz44Ce%MRS^JrxAGoq4)x
zJ=5A<Z=b~<WZ~>tZ~*wkofpozKPe)4eOh!XH#BZK)WKYh!{lj9v0Zdr6jx&8`p1!C
z12Il_HHNR&!eKQUS7YnV{5MW>mz}6)+DsRN3s}~`$7J{icDa^^zDd(voJV*_ju1T@
zFp~V6NSz~?hz?tIBIScLZ&zc}Uv_?YzB<htO(>&kOmt0?O)%$|agpUps;7X9*xlO7
z3*SFvbv5Q4dHJn|;DuInp^U0=(eYc^1vV;1e&{!(;#VN=`RF&d-0E@>Dk12|%GEz-
z_0URCM%BpI-^a>F#4z$*_mW+Afc$O8Z(DnF=NypFeC?sQvfJ;c^2(?h`R3WvT_y+(
zfu;}Duh()7TGBC#WY3yDPji=cldCpH&C&BcnWiq~B9*Apa#k%Fb|;PQP{i?i+SE1j
zH$1`0s2USR^W++Kk~#p!M9y5&ItNTp3su<G#cyar{4Ph;m?-s<O*qTF4&!3yd~)Ui
z(M9G$tDpzpBo{VOH7>Frn(m?#&bH9iQ3pG;T#=R(E+D-RPIon6iTO`ZW|q__);=sN
zVt>lWH7_Ll7v5mGE8emk?X(%a9B*>^5zxaD@t;^cc}Y$22A0STd;}S&@_-}P;Xsc1
zlOi8XBy3Ex&t>X-@Z@?oxb4^Y*iq#UJjg{6BiHi4EgB5~<<}xm+--pJfJBt;&$O})
znWW&L@(ymtIURTUGcDN*XBe?!k+8CHii{Oy)Q+)hJ2_KjklVTFgn~tIvhSRZlw8%5
z>d&>{#d*)Y(_WpD3K_TO^+Ce~*A&{bDWj^qbiCmxd$Hw<V5SVr6n8n0s%it6`J{R9
zg?GE=L*jK+Cf!!}h!0IXWmJ`!_H1X_jDQ)&MbSwoQn?3#`&vcbf6t+;nbGz6yiL#W
ztWic)xoEgJfptM?!??&fG=Y@mID?DtU%MwH_bMW%r`)q@vW7>FGOEf&%P_bY2_3Le
zIf=7QBE3IMAca3n7?V5K%!o@{Co&nqk_UA1#&JakQoea2X}dZB3cFB(PYn*9;0JqL
zWmHvRhjYM*G}L1815PgKxoSN*l|7NvZJP*AsJIdHfqoP6K^ax$WZ(tHi7kV`O0%9T
z)06h+Cz7UI$x5adWCNA==Cgr*w~4B<(!B$$C_w}^3bkB`mR#CCk(}5LHVoK9@t@dP
z%S)<cD_&w`1sX}3M|Vyn1v^m_3;9d=cg>bK@G_$*B1sa>e0d^iEkMy(R1`n4pz@Nc
zf?jza#`oeJL&+tO@9xxbZDl&ro;8V-yfRUQ(}Y>xTn(ObYfJB8m!o#Xxy<3ofEB<~
z*ISc#KFtvM<iYaE{egKBpay;;+YzY{ha>GR4qWAYuJ<>jdm4pPngpjl22RST9b>N7
zwvqb;lZwKGlER6%abi9hdW=|q3yFBkJs<wTmx8IN_~qd603J-rs5&3J3xU^SpI}QS
z$;r-qEopfYtbF$Lm){)zX$2(Wi$gDkc0IL{Vo4cQXXR)UV@2SS<l$gFq~a?GYPX^*
zjjJe?p1HilkekU<Nf}jVq9Io&*9T7LrIrDwq-KkbY-vUl_cVk}^&3Vgtq=E`dCq}{
zk}|5!MAvpOF_KD96%Bgsl%8D3(-D~9k4dF#H^82kKts$+aV{bzLYAJ}q9?U4>d1v1
zIw<bF5|%z>u%wKtD{lEKOr48X#AN8za+kCuw@^oFUPdd}BWh$WE#cEa8C7ScqnNQ`
z3nr-lQ(CTFOKM+*R`9CiWyVSoOuy&zfqJots`JwHI(Si%34HKmYTKnF6}!NPfnH~!
zpV(o`OX_rscQd*Ink3P^Z|KNS35sGde<_---5fhg8Bvi<l4|!J9kK2~)pDwepIB6R
zNnKHoH-V~{1WGu8e*S#UI+L8P*OBbKH07`XzIt2vKg%haw24=LD7T}{^?i#(0W_Ve
z(T$BJdG54p%V=|_hFiJT4lP%(MN4XaPwA8@(P;xiCuP)*w^8XNJ83%u^&(P1q$JHq
zaMA`&dONga$f_efTd_BsR(K+7`)7Ey;uG)pE&m1C2`9%+RGpcU51EOaEut7V);%ze
z?!>IzH}iw$$dqM}mBr&)Hhd*eN*Pt>rt^;ua@7cwVr*o?Ngt@gIh@ApnbhT!Q}?xh
zQ=cx(o|I8_Hg;thSsRp7jE$Nzj^yB*4qzkiVoa!>lIjyfMZmi{o>a=HIvY7Vz{W^Y
zfsZ0*uGE>-WE)A>Hlr$1$&InJ05rrT74bkx1wtKqu24_PcN$6Fi+D-lL5Wmt45^e+
zb;a!}VrpHqBId<jJ$DH@$3i3Ne;KVnN1@TwOV{v(tWZYPSt)p(v0_UqsFq<J*RCVw
zuNg^IvE;>yIYISVQrSe+dCA@lUX-K)A3UjcykR8WCE$Z$lX_CgEu*QF(G^G~iMH)A
zl8QYjibcL&QputPZ!)4HsU+2sy+$(dM^t^As^TXWRbEn8#YJB^$w{Cj73fz$x7=Q#
zC1;K}k)vfa>EL8%Ho4|pej87__xh6$KlrvCb*|x4XRhH^U{%gXPGhrbzBG**${USU
zb<?=PJ}uXurzJ;YDXX54Se3-EN*T4|ZB$mtPTH@I#wu_!1WtDRs3p6;){%yMWR>Zw
zkN>uQWeTt=dhqxy`l|JmRm!M3GqsmSWfkLQOQV5QY}SLDA&&!dg8%1H$jkjdd^Pj3
zz$#@_otypVl&oTGT(CNj;(lb6muGpTD`nO8r%v4+C$LHxRc9lsmFcR4ITd50?j7hc
zyO34KpFB|aH+rd0=XcNEe&JG|GOEr-b0^ps$tv(M=*(q}CuMC$()g7+R>_UA=salH
zWfce&8Ms<K$^G6)TFzmo>>;u07{e-MR9$g*_A<3@!z!@SrRVnPNmh@MT)2Q%u#42d
zTuS&RWQ8)S&Pw$q#)>VgupQ{Q79HvCGm_39BrjIX394_yDmJRl%YlD@7bUB}2hXa?
zez@b@j~%!?k5bPnxn&GoW^@HsNursTjihw|MX|`&%PLv4dXNznStY5~{bVG&enQm>
zs*0ajRC!5V6`wu|HyFeuP_hd2PeHdl4EJ2?i=4@spCwjp{^yfdD=4eHt_FHl*iq;D
zYsPbRaDk3%+V4CztGpyu)fJ7#s<vNoySM4Mqi{bee?4W@dWlu*8CEHycD#+sD%r`2
zH}vwT-DX||PD;T^<u)BDxB~wdBdg|~pRxD7B)kTn;C<P=us)fxN*PsWrlky?{tzd5
zw(}~+&F-rPvU?A*s(bdLTLTXILtdWv&5MuTd2G2A8jGE%IyaqvW^R!Q^D4&1!EBh&
z>>CF*!jpZSd?~9|Ei>M45m=>+s<d$k##v&e2&`glyx>F*{snK2738iT?@C#9M|$nX
zB!N}Rs5%=(wP0f;tH8(U@!W;+BpZf!2P>Ua&8y_bSo{$)q!a-`=T#uoX5fbPq^-`0
z?E26Niu<s{ss@Ht%BZ^Hwl*-eE?S}UDzH**;Cl3==!6sLsYff=MQUQDJ5xedD5L7E
z^nS)zv1Ju_$u@9xdSd<5iR^EbyjU?OsJ@lY3I019Rp;gODe$6X75Ly;b>VX-Qu{gh
zxFM{PTgLt`8C`)@lIWopCz98KqFBu9WtA-2i!&Utup+A@)hn$|q^T8E-=V7biL8>B
z)KzicpU02ID$pOyg4<KgFt5rUPYS=LNe3r8^Ge|3U%rE<-MX@MyYoAC)VYrLCU9-P
z0#==VZTy(5nm=1&RqK4&jtB^vlO@hv{_nY_`K0+C3aVrYR9hHODWi6bjRq>lMjh}+
z>IOw0G$DB(rN1XNFu^N@XLO8{ix;eU_D8(V@nq8*t-8DT=kk<MbxtaW7$>%rlB|^X
zYDvK^B<4+PBDTMk;s^P-ukcIHM;-~Hj8aC`c{$L*Of>~YNhZpgbR>5eO+>Gl92-Ch
z^}F^D((?pDDWmF46j+%tgg_|CM1F;yT>2KTg6=){*s?GGiFc?57Cim)w*sG(QFSH;
zj)RGjd;%8*<G5YpNCixD+CI>$;S<vab54MUIE)mbAm&1$j?34Pwt77o{0Ijp4@i7!
zV)&$tsw-~iNv6(4D`Gl;m0TUEY=AO<f>y9w)O1VN{e*l_M%7uV{fx0<%O|LR@B()S
z3!3z#@>9u+74w1Wd-M4~zuQFBc{y|nyog=4{16}dpwCX!e6A;jpM#GZ!6&9=47V`4
z0-q$&oEAMf{RN6*F|U(PjA-pwjHt*bNwuL>PxiK=>LIF%pU5Y9NnI5;?t=?TViG9l
zQlQ`U6xSI@`rt|J3vD#z@Kpbr(+wTMlb(6&U$&D&cC@*=PYqm&GtjA}&@eWg+$A~{
z1j=SaD$tC~zlj^%s^#(m0SOdOD<wQV&EQEHwc~75Jjp(K;U>I&JPAhfv*7;JF)cYA
z1PS@bqg!u(={rowW7{@wpE#2TlrpN$OH(TY1zSKdW;*_=BekcH4j#32>+bCLhLpVP
z_|MP2cg&9>N*Pt>rm}<S9t1=&HtIgnlYMQ-hsT~<@&3>)_#pYIonfxe2#``n)!E4J
zWUhk>kYa4?`piHwPh$sl=5>eH&(hmdALrd2G)d3jo>E5D*(mP;8zV6VJ{ldkRtHkv
zWgxBR45~0Cx5b_xm<$!!ASOktj;qp<gS`fF=ptTdy-&i_aRyV$sJh~w`a4tWq7{lM
zDD!?D(1Da*GLV`d>HSIQCN!;fF=Z1~XXWT+#)>Vbz{_sxC2PPy%KjmF@ivK=0yN>J
z1vIF!iK_E*a1gvGF$F$&OjZ44AUl2nA2$S3a?7|f#OMl`l0>%*8A#jDD2k=LUQEfN
zN3SxXBBmtO<HH70GK{K+sVaUVrsO4cReYw0`As4vra-?L@G!iN6t#^botfh>=a5d^
zqu*UTjGXdz4t{#pVLRen*FJa(<0jx#YumW7Ipra7s%PCOoH}|lw*`LPqjMeUTuwQ)
zLE_Z245yS)JI+Ssl<cGFO@<93Flj=9kt{G$R0S77pVN`5c<3BDXD^ub(%X1p<E2Z_
z9>Is4FbkAXbzY8s%aFmAQ;eBIAHq21{}2#!gBHHNu-_MQ(s^Ra>TM4%r8%jLs&jMT
zRp#QOz$wPY-hUcM!FLD<lUb9vnog!Rmz=+|MBtP%s?J9K=Qel9=wlR&jqD@vG)FOB
zb#zQ@IPxjI+~@g7^|2p>>69|6&c>l{z{W^UfsbY**KQ=`UpSD~Qx2*)CAY<$-+_iW
zk`&=UIR!$wdaexyDBr>_AavlJxVaLitPH1=QFX;FVDC$cR>ZWZ*Ky@KQh(Ngbe=&g
z*gb0YNtgTB$0%%~>a3KVXBvPlr=VKk$z*uQsr`Eg(nW7jQZL?%L{540IiZZI^HS6U
zUX+{yA3Uc@|K>mjFF>8#5KhT0<4iB3D{x8@?YZbc>Mo)v7V~;JC5x8*$cT!Zl2pt4
z97s+ds#a1}{6tR4OX{k);yJh>CMJQBQ=s2=1n^L(CCwKdN#iA&bcm+|Q(fPwq!X%T
ziO*;&?Wl8=oks5H%>b%>mmS9jm8S%%BC~8q<O9vg-YH!FK6rM)Ob!N6P$f#BN?|~y
zjM{NFDyU>1y)TRgDljqtMsok8C0YGi(isIQxqnjh*0WZ6#qXuFXPk<7P${G8yfjpe
z3M$6TnG$$F8J?4NHM-xjb;gD7@t39YGOFKx@R3CnRLZD2HwVu$16cu7jE%aF4P<XM
zg6giWGHw9{)wY?#b;$y#lu>myw&XMYlrW)UY!t!oYF~jlm#fhncJ_-a^txZ1`*Q6|
z0;rTxbvD{w0UIMh1wOKjT)vU)-QhqQwyOh`+!m`|0}U}pMLLK{!Go&%RR_{hgmidF
z0@Vi$sFYE4#ch0psddo`1r?OJOUKp0li(!|B=>c+g8idL{+(SRE0j@nR{GfIPHjO2
zUix($Tz)Pqb%1-dk{9NeqxhVlUTmW3yqqb6YEgm;eDI*^c@vm{PoLfpP{}Rhz@Hdh
z0aTJ`<vs_JzYj&RnAZy`S+wshMpOiqq&m3Yfi&+&)gx3DKM_>&lDaD1R|U7j#UxOI
z3iKNg56^2!SBWDj`m+eC%wwf9&mEyNDy_S(_J|#IuJ$=2*E<D3RZ?~hP|0tK*tt=*
zV*{#o7cT!K@L?kv4yB+<l0fyp45*Y*JI+Q0mF#27N27rXjO+j-drxXf@BisYM+T(i
z6i36MygkV<qk5w$HC0o@KkTQBs`E1VGJ^)&85LuuYd}w0>Je0xy)&2fKH~?VdhX+h
zXJTvki}A{+IyVEI${7`7<IG>jk>*#B5Z!J~O%G8}`DILcC{qBHGOEr-=Q}pfanKnR
zW233SkyyKMM#U}o?v;<|j4F3?<Hrt;{FOgtRGp218n7`ERN$k=iK}-a*_H4#$zexT
zGb*_)_S7;tD$;?13WWC4r@`|+a3q7r@OhCr5~z+cpi)NF6}RLgrq)F(6jWfPSkIk?
z$=zQaNez9G3igkhBFqZ@8=n=*s5&c+pD<QzK?Pn4^jwFI^ndI~`s%S-s486V6TjKL
zL1hiHs`FxP1TRWZfe#*3Cr&z&(v#rhhJZ?L8M&V^x&o*q(SjyN((@^bV&ShBRI+H}
z=ZvTbDoM4q*^!htqiPjZ#ZLs4yriy*>)`RdF+c_S-3?l<u}n)^;ZnxHDVlV6+BYoR
zKUjsQJ+p4kH&d$YsB`(}oVa!u09EyQ<JdE*xzdcPa5Dfx1g8zDhVJ7^EA?FdW*zCP
z=3hXuQDRjx!zyLej=51;C0i+dm+6s3D>NwuE7g^HG6WaoySE@7`ky&tO<#}C;C|6+
zoZ=G8KkcWCsxwpmBb+Dk!8Ti132v&uP3~Jpa-|x(XODX?PB@r~x2Xnni3{!?2%xhn
zWmKIT>z{1i{y<-K#Mmf4JD%kH9UlhnSr}3DI|{3-x9-^*FThF}RcE92JLct6!mNt1
zvFo)-P$T$WhRiC%x7id{C*Pj<+X!J+rHrbx(e)3oF%nkbqiHJFHkEXqoka4^PErLc
zxiRh<W_}M}Bm{*O2%Ru;6^^9)$|Q1OU=kE}fds4f8CWT!>WbU5b+X(PMJr-n9H$Q+
z?$|t;^j<|P&{M$Ee$tcaLRKiF>Z}}kj<I43E2x$h0|$e*?(E5=X4_=ZixqQ%>a&;s
zY@+JCl;wdJC9J>)53BO$CzG7#!N(1OmE1D=@)=zLR+8x83zJFn3n+?3zFt_#qK95&
zL`7Ihsx><%lfoUS`Yu()PlT1cq^^oj{4`aDgK|~{`h$n{TyGcrvd}wIN&C*p!hI@l
z-}zI!-^J5z-H@Uk_pTjvZr8<IxS{(1R#o3m9UE5i%P7k3l<kOspgEc2$PJjdk~>Mx
z?Q~Lg<uU&KNUp{r22{$Z9b=<{O13e$mdQu<WfZ)PTp!XNLHh2340w5P=M1A3hmP;g
zJMy(d4nK5MM%DRfI><cMVaq7VNyVF5a@+z|o>=U^<J-SuGR~`;Smkipj{-^=RcGbE
zCB}+y*-!G2e;yuvEXRjA95uzEf%GYgeG|TpY8LRMjH)v+beNg!3V4!CbR5u;OMgcb
zPZ)mHKbc<gn=zrQ@rst8Nhzc1OdNj?OpJsRxERuNSq4%Jk2lmFQS~y4`Emp7sbL3g
z>^qV`s8Yw3!ZQ@_>&S_tI8Ku1OrJ1tQbyGkx4xFCa~n8;m3;cB-_8%<WfWGlf_<Xq
z%6do#`b|g&WmKJ&s^g3mTR4H2G96c<BZF`Wvbaw2B0nwR?N9T8da;SB^RoYA@S=nh
z_~7Am2;Pv8hgUgn2%O}W(f<jfE5Jz-9X_EWEhkVEi+R0pl0~Z;8Bq~Vl4|WqsPdDj
zT1{2)6X7H;sjK4tZE)X1oVh6B1p198u3#P+9@3H1pNcP|$ow%tbEz5|p!e+|D;8DT
z(dO!2)pI$HfTk_|W50}IzWi&>B@1LTHfU;b=1QVCctK<ROp2y;(i|#>L6b6S$JnT7
zl5G_4Vftaw2OTT&J}RO}-A*m(eh9Mh;S!IGJ6}gQ{Wf?*?G)jc7?n|VPIf)ZW~2NL
zV0b~0Waao3ExAyNee#LyBVU)$*ThapIh^qQF<+XE%BVUor57150-Yoi4RC>_F9*AX
zyTbRh-%Te^FE)+aCq6r&jH)wH3{Un-GfII@l8HL_HJR>SG!glc&z!gE)M@6ycZ>QA
zJe`zLbtVeonO6m!z{P=aT;(`Y^#wc&3=b*K8{@^Xqr97jNrj`|Fo`K5fezC^sDjdI
zOS_&Fe~q^RIce%t%g{*~Rae}D@Jx(U=b{xc9l*+NxVHAKp5()`J_3{E>wH2!U{}dp
z{e!4FE2ZZcE4Fl!s-^p^o@AbtyvQ$&wetBuz1T$6c`4`yFG@Or51vjX-|NXh7x=g#
zbdr0gjth*gKqpDG`@Ei5&!Z?7^Lptdi<VwwL`6DDst4eA<+6HE^*ySJpGYTpNnI7!
zSm9>J=yU@80W;UXlI*B3kQ2S)gXo!Cw`5klhfUF%w12JfJv-Xmj`IfSL4i)k57GzG
zXSzBh!OPPlQe0am{=tpD+WaYVq9rXpA;s;%8NYM0ee1c~lQU<y@vHgQphwfK{@|7n
z5g!fPC!|>{u*I6VXiL;nwlQAisnm=l+|Vs8A<mo_73~IFC#1zEWk@Ud@hHaQqf;ZI
zB2pt@^FO#nC&1=$2{E|u<E#DdoA37Y)3O;`7j6PC;-c^{HfC&BjqntgkUDRU(vcKq
z3}Hc^o(aB$LW;*tju=&vUo}E<jU_R{t|+tO7s$eU>_^oV*4<!XcwxFyQ3&2>sfZXm
zQhe|ROX1%UP-=<M5n}&Z6IQesY2u|6v{^MmRcej4X(NTlYAwQX?KM)HRU;&swrDHL
ztoQ}8b{nan`|oL_{NH0x)~_5NQBtTyPY>MQwwzLUtWv^D&Xr0+@CM4@AC*w*h|wD;
zG@|EM^rXu5BgE6BHfdX%a(rZU(hfa2IwCbOnRfb?xX9#)<P7ZdlQJTblM~ai<Qt-s
zQ{oa6FzP<%{@M4tyFpLvCWtdDJz$-u+tV7?38q!6mWMpZ|KSnW*4t*$pO~d4lW8UV
zk3Wx^Omj>=F(yBgDaPa*Lt{QZHtW+dxL97FzJy<fPkI!(KEPy3H^F}JFFgqSOLv0L
zF!<jmJuGDIy0FkdlPM_7AC?~oh_|GL`v=bn^$6M!xg<C>G9fT-d7MvVfW<2@BGhM7
zn1|o&P!E2a0L$!@0N<r+BRw{{M~2K@69LBx4~?~$GXmxX_@%<J=Eg>bJdXRPMTKlk
zS>|hs3y-&0BK=pbiH!G44Sx)_^PlS;>7S0r5xfMgiCSVwr|W!|x<|%a;P|U70deUu
za6I?OP@lP>o~!1X{qeY_xJZws>%&8WX7lUaEm8h{P@Yw>0c#UuqBE9SBIAQLg!_YE
z^yIrVICybb>T=(uUTcDFyl=woLp)RBBwxNuH${2|XM}kKL%CMQ+4XK`8?(P3)Surb
z)+5x*?4Y&$F(MO!Q_V{(Y#Eax6IOZz`37Jeq=tic*nf7IR5u&Fmigj&5$n<vXZG-N
zH^&EALOqwp^5qUs3J=-1F4SX;Z4!K9BjZ=an%%u-M~C`Yu)G2B;VF@DKF9DDAL;KE
z8?`9RZv8@8QX-c?Jub0dZ_tj`Ueh0(+pr8;c6;Z4gue3phsST+5ElBc)n8;t(z?-n
zN$dV|^_3bHVjiu1N$c#js}W^~9CJ^P4j$wDh6K+J3t1H#<-b_U$u*CqW1ggig}4iC
zVkxwfQQLbQ_7}A7?A1>~a9TtN=G)`6Ja*dphOqb~OPHsS3%|&ZS6oDV@Y*OpucUB3
zN8%$wf>Xl%yco}lZS1urHqY$Tl_l+8Z0AB*R;@9|FCOjuDYp5~okyuo)`f=zUelkH
ztM<<8zo(yA9_hIhco8s~H=FhL>J9tVz)k%AcG?=A?@fVmzr0^v<!{LttzVGV*{vUJ
zJAuN!%09}*=PzmV>*l6MgvN|oFJ^zsoKbTS_<cjY*yBEBv64BV9<fR0B|&R}zrt96
zFN3{v7Zt+ho3hSq*V}TP=Z<?6GC9n1sRi^lnB#!&$RU4AoH^bvBO3V$V~XeyzYKG{
zmnUCe;TCg3P*SAF96ELgT)i?a(09(pfCM@gko%1I;0&{eWkVz$(>EYv*;-RtV8&7}
z7%&CF|1gq*Vbr1+U--HhhgBXp40|*khFL!IaqP7Umwj;9waARyug8xiIR5g16}Sq1
z0#|#@nl&H3EcT^Aevn7M;%XfK!Io<x{4MEbkDyrW|A60?Xu8e>W3?DO4*V+)3K!jL
zb37P4!Vhyi92EL2$F_qjOv^V3`X(PbC|ophe>#X{_J=Vd_>Tw;F!`@|VAd>t_(?@V
z#pRU&bSG*`8p^_6xYgph8_eL}4BsCUw>K@O`>z$3R|eAkaUd&}3ct~+@we&Ep#GK#
z<qa@xxYqu7RC)i5FkD_C@Be_bKkr|mZ@Yg|IGPIDB<P!bsA=CtFp8(=(~nk~k2pTZ
zjqwM!@}c~si?9smk8Zkp1?-BO3+0DFKQ7Qu`hn~G-DAVkedgg*0hWACOIC||i+DY{
z{mAo`8WF$Jl(<1SevGeK*x$#rksg`0Pud>oBF;NZCTV<$V>8g6E$zQ3aK!%f{4x9E
z97f(B#-}*XfJE`<=Nq^-01N#%ZND)lsA^hIv@&q5oDU<*vkunzuw!!E7@BT69v_ZF
ztze8lAEt#1HLf#+zjb(@IVtHrI$@h3Ovq;NQ?+UB@HZNN$$y$z$r{ZH__LA!5A7<F
AJ^%m!

literal 0
HcmV?d00001

diff --git a/tests/data/small_sky_source/point_map.fits b/tests/data/small_sky_source/point_map.fits
new file mode 100644
index 0000000000000000000000000000000000000000..e0ac82b9b62dc621b41cfd17b1498d741ed6173d
GIT binary patch
literal 8640
zcmeI0!EVz)5Qb@?2#J>%i9<j_N)vIQNIf`--7a+;%ic=qaZ_&-i^PuB4kQ;OP8@j&
zZae@lz;kfq#yha<jT4*NRSE)uG<(RbH{&m}|Bj;>`=syMCcrUN3<^!SjH9bq%o0dr
zun6@bNm&#y5mflgr4FGk8P@QgOCA$~tsr12gLF0L)s*e3V+@HeFDv6al)Pv$8*u^g
zRZb3PJW9eiO16-~%p6kF>DBOP!EwiF2HVjW-B1Bk02M$5Pyti`6+i|4e+5)I>63wT
z42=$PsL`>_MqyV?7=;lND@fUB%7HIaU7QwN%G1AEKeumRKWFuLtEPTd_5GAb<8-3x
z@gT3nLHgH?jMuK@wT@JLRXpimH{9;v+>v$s4*kMd#EUu1nP4-Xa#7tcoZQ4~mGsMa
z)s$_t)FP(c^Q+@4^K{Bwg?t*+Qz^Z0&FnlHdG}JxzwHn2IHT6U>ucWG`j+3~$!1VR
zDEHeqH4R(!hDBZz^3{2`<kK)=schYDh;`ugOpiDxlBbOFEc0B&fmj4wgwYt1%L$+H
zr@+H>BHP65o>+im`qY$dAjp2uG5T42`LkK(MKU{7Hm8#?+2LW9e+E6-PFJ7rGnp?Q
zPvYq!+vO(p6hGHN?Wiy;;`yqhuFYpF6iEu1dhiGyAHs+wJa|$-l%}?k;i}?m9jW}Y
z*==w@oZisyp7!N=s5c3uOK4d?5(RuIPvQmS-^;_1vTNT`Z0cgrd1-dD=eJ><+qQpp
z@zeSGuU7J%#A`0tczdk5t0cRIM~PwQVC>IxCD5V*r~oSPZz*sG2P$w~f%f$!TtNj;
zfj^_b-aE}b_@?ds(b|63cK?I6ci(G!@9WyW(e@zH_TgJ?_r7Sm|4G{ipS9h5e%#D(
Pf(oDlr~oSPOA7n|RcdkO

literal 0
HcmV?d00001

diff --git a/tests/data/small_sky_to_small_sky_order1/_common_metadata b/tests/data/small_sky_to_small_sky_order1/_common_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..943816477faeedd41ff200a2b58466cacf115919
GIT binary patch
literal 556
zcmY*X-%o=;48BaVXxu9Xl0ER^<0ex{jEN6D*yj8(foP_Qi9v>JW&;Ke)W5+$+V&VY
z8?M){?N{37Y;b$Bq&^!=X=eT`SbFj<lcmFk(j3R>G)PMzjg$FjY3{$r^XO$exL0~8
z<AA9xwXL+kb?+wUWkKa<=ZNGBH~=_!NUi}2;2{hELNG(&<Ct(umI*MT8*z0bBC2^x
z{wXP|Al)bB*;t>TGZOVL$yxsjS>zoixi>N1?jxQA6drsd>5MEcNLLl^o1!THt4~P)
z86LiRJzlIIbtCte?!pgEhei|O2hvuBIn0Fji8Kwq5mCDC0WA2>$EL9xoPP-5V-y9+
zf<*RaKCzUo`EZYzRP`kGhkFbGM<`Q?EBb>Arf7+muGYav$4`=um!@%{<w2Uw<4EhB
UboH*T-n1F3Kc+Ppv+<q$0&H)QSpWb4

literal 0
HcmV?d00001

diff --git a/tests/data/small_sky_to_small_sky_order1/_metadata b/tests/data/small_sky_to_small_sky_order1/_metadata
new file mode 100644
index 0000000000000000000000000000000000000000..44e48186f0e84cf4f109421357a6c16fdbc5399e
GIT binary patch
literal 2929
zcmchZO-vI(6o99t)*>}D=x%mHB_+a$LjRzv#KgmF`4L5gMuR3MW+_X-04+s8J#pkf
zJQ`y-aNy+869*2Qj3?uP7!DkW;bc4z&&K%PwzIQBduT{@(w%)XZ)WzJ>35h>mPtxJ
z^_fN#|6+F9oHL{Vk$ml5#XpiSPMXD1!#7fxc~uG<UgT%ypF5NFr5+q12h?G8Sm})S
zB)XHDVVGGhnMh@)rY6i($}m&ORIheZo4A#XCSnDnxUgitl)rewK%fzSNaY`uys}r`
z_Q<;)+DijrB|yqWL^A29&`Z=V$tMxXOP~tDi;x;&d;-SWv;$?kj;_@KOoI)>BZT3x
z$e$hNynt~YyOYUJ*Zjcbv6*;;Ogt9(Gc$d^Aeh7-?;!eIPv6x8QI}1`BP8On$X6he
z56{V78jx50mkFs=r^AZ^u<Wpd?pu&<2Hh0uC?s%B3bofrw;J6P>gbYysUp-~6WwZZ
zQ>fpvV5%t84~c$~fGBDU#Um7o$0A=<sLdc6<*nJMKdK-+ZPYJ??kH{=wR%P1oHXjV
zneH~bY1C>&z*I5nxP|VvxM|eetAeSbQGZ+M*H$2k*+%gQjpDJ$S2b!!L8E*%8}*(F
z!qY~rwbT7JH;vl3E^tm7wcbGwuDNN{Mwfu8V$^ykJ-FefQ6J-isiILIV)SPeh~l<U
zJVK**Eb>*2+HXUn{6q~aL6*rp9KT(<l_~3WaV)Qy&KMhm2g`lNR+ye+1B~TZI%oCi
zFyH}3X~u@|4Q>907EVp4uzwP(z7?$du`)(seQb^mS?k}$PWw|Z3ws9(%f`4i^=Mu%
zn3x`T1Cx3&(ydANc)LGsEvk$4aS&sR@aPK@-Lp$i654WhE;F0WrN?k<R{Am4eY~x+
zX{+}s*0}L<v6i+OSRea;K0pHv?0=B4EM%d;WNQ+}<^5TQ;@BUYBMjn9*7?CX<``_D
zG!*AreC!TUBIWCAG+QV{jbbss5?MBji!=H8NU|rEh$YnFFd_Djm~cNFh0FFoRpbr~

literal 0
HcmV?d00001

diff --git a/tests/hipscat/catalog/test_catalog.py b/tests/hipscat/catalog/test_catalog.py
index 51bbe0b1..67e2d881 100644
--- a/tests/hipscat/catalog/test_catalog.py
+++ b/tests/hipscat/catalog/test_catalog.py
@@ -140,7 +140,7 @@ def test_empty_directory(tmp_path):
     os.makedirs(catalog_path, exist_ok=True)
 
     ## Path exists but there's nothing there
-    with pytest.raises(FileNotFoundError):
+    with pytest.raises(FileNotFoundError, match="catalog info"):
         Catalog.read_from_hipscat(catalog_path)
 
     ## catalog_info file exists - getting closer
@@ -148,13 +148,12 @@ def test_empty_directory(tmp_path):
     with open(file_name, "w", encoding="utf-8") as metadata_file:
         metadata_file.write('{"catalog_name":"empty", "catalog_type":"source"}')
 
-    with pytest.raises(FileNotFoundError):
+    with pytest.raises(FileNotFoundError, match="metadata"):
         Catalog.read_from_hipscat(catalog_path)
 
-    ## partition_info file exists - enough to create a catalog
-    file_name = os.path.join(catalog_path, "partition_info.csv")
-    with open(file_name, "w", encoding="utf-8") as metadata_file:
-        metadata_file.write("Norder,Dir,Npix")
+    ## Now we create the needed _metadata and everything is right.
+    part_info = PartitionInfo.from_healpix([HealpixPixel(0, 11)])
+    part_info.write_to_metadata_files(catalog_path=catalog_path)
 
     catalog = Catalog.read_from_hipscat(catalog_path)
     assert catalog.catalog_name == "empty"
diff --git a/tests/hipscat/catalog/test_partition_info.py b/tests/hipscat/catalog/test_partition_info.py
index a0c2a8a6..b82ec6c3 100644
--- a/tests/hipscat/catalog/test_partition_info.py
+++ b/tests/hipscat/catalog/test_partition_info.py
@@ -10,7 +10,7 @@
 
 def test_load_partition_info_small_sky(small_sky_dir):
     """Instantiate the partition info for catalog with 1 pixel"""
-    partition_info_file = paths.get_partition_info_pointer(small_sky_dir)
+    partition_info_file = paths.get_parquet_metadata_pointer(small_sky_dir)
     partitions = PartitionInfo.read_from_file(partition_info_file)
 
     order_pixel_pairs = partitions.get_healpix_pixels()
@@ -19,9 +19,22 @@ def test_load_partition_info_small_sky(small_sky_dir):
     assert order_pixel_pairs == expected
 
 
+def test_load_partition_info_from_metadata(small_sky_dir, small_sky_source_dir, small_sky_source_pixels):
+    """Instantiate the partition info for catalogs via the `_metadata` file"""
+    metadata_file = paths.get_parquet_metadata_pointer(small_sky_dir)
+    partitions = PartitionInfo.read_from_file(metadata_file)
+
+    assert partitions.get_healpix_pixels() == [HealpixPixel(0, 11)]
+
+    metadata_file = paths.get_parquet_metadata_pointer(small_sky_source_dir)
+    partitions = PartitionInfo.read_from_file(metadata_file)
+
+    assert partitions.get_healpix_pixels() == small_sky_source_pixels
+
+
 def test_load_partition_info_small_sky_order1(small_sky_order1_dir):
     """Instantiate the partition info for catalog with 4 pixels"""
-    partition_info_file = paths.get_partition_info_pointer(small_sky_order1_dir)
+    partition_info_file = paths.get_parquet_metadata_pointer(small_sky_order1_dir)
     partitions = PartitionInfo.read_from_file(partition_info_file)
 
     order_pixel_pairs = partitions.get_healpix_pixels()
@@ -36,7 +49,7 @@ def test_load_partition_info_small_sky_order1(small_sky_order1_dir):
 
 
 def test_load_partition_no_file(tmp_path):
-    wrong_path = os.path.join(tmp_path, "wrong.csv")
+    wrong_path = os.path.join(tmp_path, "_metadata")
     wrong_pointer = file_io.get_file_pointer_from_path(wrong_path)
     with pytest.raises(FileNotFoundError):
         PartitionInfo.read_from_file(wrong_pointer)
@@ -44,7 +57,7 @@ def test_load_partition_no_file(tmp_path):
 
 def test_get_highest_order(small_sky_order1_dir):
     """test the `get_highest_order` method"""
-    partition_info_file = paths.get_partition_info_pointer(small_sky_order1_dir)
+    partition_info_file = paths.get_parquet_metadata_pointer(small_sky_order1_dir)
     partitions = PartitionInfo.read_from_file(partition_info_file)
 
     highest_order = partitions.get_highest_order()
@@ -54,10 +67,10 @@ def test_get_highest_order(small_sky_order1_dir):
 
 def test_write_to_file(tmp_path, small_sky_pixels):
     """Write out the partition info to file and make sure we can read it again."""
-    partition_info_pointer = paths.get_partition_info_pointer(tmp_path)
+    partition_info_pointer = paths.get_parquet_metadata_pointer(tmp_path)
     partition_info = PartitionInfo.from_healpix(small_sky_pixels)
     partition_info.write_to_file(partition_info_pointer)
 
-    new_partition_info = PartitionInfo.read_from_file(partition_info_pointer)
+    new_partition_info = PartitionInfo.read_from_csv(partition_info_pointer)
 
     assert partition_info.get_healpix_pixels() == new_partition_info.get_healpix_pixels()
diff --git a/tests/hipscat/io/file_io/test_file_io.py b/tests/hipscat/io/file_io/test_file_io.py
index 6dbd5d68..5e805ea0 100644
--- a/tests/hipscat/io/file_io/test_file_io.py
+++ b/tests/hipscat/io/file_io/test_file_io.py
@@ -91,14 +91,6 @@ def test_load_json(small_sky_dir):
     assert loaded_json_dict == json_dict
 
 
-def test_load_csv_to_pandas(small_sky_dir):
-    partition_info_path = os.path.join(small_sky_dir, "partition_info.csv")
-    csv_df = pd.read_csv(partition_info_path)
-    partition_info_pointer = get_file_pointer_from_path(partition_info_path)
-    loaded_df = load_csv_to_pandas(partition_info_pointer)
-    pd.testing.assert_frame_equal(csv_df, loaded_df)
-
-
 def test_load_parquet_to_pandas(small_sky_dir):
     pixel_data_path = pixel_catalog_file(small_sky_dir, 0, 11)
     parquet_df = pd.read_parquet(pixel_data_path)
diff --git a/tests/hipscat/io/file_io/test_file_pointers.py b/tests/hipscat/io/file_io/test_file_pointers.py
index bafe41c2..7bc75550 100644
--- a/tests/hipscat/io/file_io/test_file_pointers.py
+++ b/tests/hipscat/io/file_io/test_file_pointers.py
@@ -49,8 +49,8 @@ def test_append_paths_to_pointer(tmp_path):
 
 
 def test_is_regular_file(small_sky_dir):
-    partition_info_file = os.path.join(small_sky_dir, "partition_info.csv")
-    assert is_regular_file(partition_info_file)
+    catalog_info_file = os.path.join(small_sky_dir, "catalog_info.json")
+    assert is_regular_file(catalog_info_file)
 
     assert not is_regular_file(small_sky_dir)
 
@@ -60,10 +60,10 @@ def test_is_regular_file(small_sky_dir):
 
 def test_find_files_matching_path(small_sky_dir):
     ## no_wildcard
-    assert len(find_files_matching_path(small_sky_dir, "partition_info.csv")) == 1
+    assert len(find_files_matching_path(small_sky_dir, "catalog_info.json")) == 1
 
     ## wilcard in the name
-    assert len(find_files_matching_path(small_sky_dir, "*.csv")) == 1
+    assert len(find_files_matching_path(small_sky_dir, "*.json")) == 1
 
 
 def test_find_files_matching_path_directory(small_sky_order1_dir):
@@ -85,12 +85,13 @@ def test_get_directory_contents(small_sky_order1_dir, tmp_path):
         if not content.startswith("/"):
             small_sky_contents[i] = f"/{content}"
 
-    assert len(small_sky_contents) == 4
+    assert len(small_sky_contents) == 5
 
     expected = [
         os.path.join(small_sky_order1_dir, "Norder=1"),
+        os.path.join(small_sky_order1_dir, "_common_metadata"),
+        os.path.join(small_sky_order1_dir, "_metadata"),
         os.path.join(small_sky_order1_dir, "catalog_info.json"),
-        os.path.join(small_sky_order1_dir, "partition_info.csv"),
         os.path.join(small_sky_order1_dir, "point_map.fits"),
     ]
 
diff --git a/tests/hipscat/io/test_parquet_metadata.py b/tests/hipscat/io/test_parquet_metadata.py
new file mode 100644
index 00000000..4e2ca22b
--- /dev/null
+++ b/tests/hipscat/io/test_parquet_metadata.py
@@ -0,0 +1,113 @@
+"""Tests of file IO (reads and writes)"""
+
+import os
+import shutil
+
+import numpy.testing as npt
+import pandas as pd
+import pyarrow as pa
+
+import hipscat.io.parquet_metadata as io
+import hipscat.pixel_math as hist
+from hipscat.io import file_io
+
+
+def test_write_parquet_metadata(tmp_path, small_sky_dir, basic_catalog_parquet_metadata):
+    """Copy existing catalog and create new metadata files for it"""
+    catalog_base_dir = os.path.join(tmp_path, "catalog")
+    shutil.copytree(
+        small_sky_dir,
+        catalog_base_dir,
+    )
+    io.write_parquet_metadata(catalog_base_dir)
+    check_parquet_schema(os.path.join(catalog_base_dir, "_metadata"), basic_catalog_parquet_metadata)
+    ## _common_metadata has 0 row groups
+    check_parquet_schema(
+        os.path.join(catalog_base_dir, "_common_metadata"),
+        basic_catalog_parquet_metadata,
+        0,
+    )
+    ## Re-write - should still have the same properties.
+    io.write_parquet_metadata(catalog_base_dir)
+    check_parquet_schema(os.path.join(catalog_base_dir, "_metadata"), basic_catalog_parquet_metadata)
+    ## _common_metadata has 0 row groups
+    check_parquet_schema(
+        os.path.join(catalog_base_dir, "_common_metadata"),
+        basic_catalog_parquet_metadata,
+        0,
+    )
+
+
+def test_write_parquet_metadata_order1(tmp_path, small_sky_order1_dir, basic_catalog_parquet_metadata):
+    """Copy existing catalog and create new metadata files for it,
+    using a catalog with multiple files."""
+    temp_path = os.path.join(tmp_path, "catalog")
+    shutil.copytree(
+        small_sky_order1_dir,
+        temp_path,
+    )
+
+    io.write_parquet_metadata(temp_path)
+    ## 4 row groups for 4 partitioned parquet files
+    check_parquet_schema(
+        os.path.join(temp_path, "_metadata"),
+        basic_catalog_parquet_metadata,
+        4,
+    )
+    ## _common_metadata has 0 row groups
+    check_parquet_schema(
+        os.path.join(temp_path, "_common_metadata"),
+        basic_catalog_parquet_metadata,
+        0,
+    )
+
+
+def test_write_index_parquet_metadata(tmp_path):
+    """Create an index-like catalog, and test metadata creation."""
+    temp_path = os.path.join(tmp_path, "index")
+
+    index_parquet_path = os.path.join(temp_path, "Parts=0", "part_000_of_001.parquet")
+    file_io.make_directory(os.path.join(temp_path, "Parts=0"))
+    basic_index = pd.DataFrame({"_hipscat_id": [4000, 4001], "ps1_objid": [700, 800]})
+    file_io.write_dataframe_to_parquet(basic_index, index_parquet_path)
+
+    index_catalog_parquet_metadata = pa.schema(
+        [
+            pa.field("_hipscat_id", pa.int64()),
+            pa.field("ps1_objid", pa.int64()),
+        ]
+    )
+
+    io.write_parquet_metadata(temp_path)
+    check_parquet_schema(os.path.join(tmp_path, "index", "_metadata"), index_catalog_parquet_metadata)
+    ## _common_metadata has 0 row groups
+    check_parquet_schema(
+        os.path.join(tmp_path, "index", "_common_metadata"),
+        index_catalog_parquet_metadata,
+        0,
+    )
+
+
+def check_parquet_schema(file_name, expected_schema, expected_num_row_groups=1):
+    """Check parquet schema against expectations"""
+    assert file_io.does_file_or_directory_exist(file_name), f"file not found [{file_name}]"
+
+    single_metadata = file_io.read_parquet_metadata(file_name)
+    schema = single_metadata.schema.to_arrow_schema()
+
+    assert len(schema) == len(
+        expected_schema
+    ), f"object list not the same size ({len(schema)} vs {len(expected_schema)})"
+
+    npt.assert_array_equal(schema.names, expected_schema.names)
+
+    assert schema.equals(expected_schema, check_metadata=False)
+
+    parquet_file = file_io.read_parquet_file(file_name)
+    assert parquet_file.metadata.num_row_groups == expected_num_row_groups
+
+    for row_index in range(0, parquet_file.metadata.num_row_groups):
+        row_md = parquet_file.metadata.row_group(row_index)
+        for column_index in range(0, row_md.num_columns):
+            column_metadata = row_md.column(column_index)
+            assert column_metadata.file_path.endswith(".parquet")
diff --git a/tests/hipscat/io/test_validation.py b/tests/hipscat/io/test_validation.py
index a6a94234..77c5d768 100644
--- a/tests/hipscat/io/test_validation.py
+++ b/tests/hipscat/io/test_validation.py
@@ -16,10 +16,10 @@ def test_is_valid_catalog(tmp_path, small_sky_catalog, small_sky_pixels):
     write_catalog_info(catalog_dir_pointer, small_sky_catalog.catalog_info)
     assert not is_valid_catalog(catalog_dir_pointer)
 
-    # The catalog is valid if both the catalog_info and partition_info files exist,
+    # The catalog is valid if both the catalog_info and _metadata files exist,
     # and the catalog_info is in a valid format
-    partition_info_pointer = paths.get_partition_info_pointer(catalog_dir_pointer)
-    PartitionInfo.from_healpix(small_sky_pixels).write_to_file(partition_info_pointer)
+    partition_info_pointer = paths.get_parquet_metadata_pointer(catalog_dir_pointer)
+    PartitionInfo.from_healpix(small_sky_pixels).write_to_metadata_files(catalog_dir_pointer)
     assert is_valid_catalog(catalog_dir_pointer)
 
     # A partition_info file alone is also not enough
diff --git a/tests/hipscat/pixel_tree/conftest.py b/tests/hipscat/pixel_tree/conftest.py
index 171ab7ce..58499324 100644
--- a/tests/hipscat/pixel_tree/conftest.py
+++ b/tests/hipscat/pixel_tree/conftest.py
@@ -52,44 +52,86 @@ def pixel_tree_3():
 
 
 @pytest.fixture
-def aligned_trees_2_3_inner_path(pixel_trees_dir):
-    return os.path.join(pixel_trees_dir, "aligned_2_3_inner.csv")
-
-
-@pytest.fixture
-def aligned_trees_2_3_left_path(pixel_trees_dir):
-    return os.path.join(pixel_trees_dir, "aligned_2_3_left.csv")
-
-
-@pytest.fixture
-def aligned_trees_2_3_right_path(pixel_trees_dir):
-    return os.path.join(pixel_trees_dir, "aligned_2_3_right.csv")
-
-
-@pytest.fixture
-def aligned_trees_2_3_outer_path(pixel_trees_dir):
-    return os.path.join(pixel_trees_dir, "aligned_2_3_outer.csv")
-
-
-@pytest.fixture
-def aligned_trees_2_3_inner(aligned_trees_2_3_inner_path):
-    partition_info = PartitionInfo.read_from_file(aligned_trees_2_3_inner_path)
-    return PixelTreeBuilder.from_healpix(partition_info.get_healpix_pixels())
+def aligned_trees_2_3_inner():
+    return PixelTreeBuilder.from_healpix(
+        [
+            HealpixPixel(1, 33),
+            HealpixPixel(1, 35),
+            HealpixPixel(1, 40),
+            HealpixPixel(1, 42),
+            HealpixPixel(1, 43),
+            HealpixPixel(1, 44),
+            HealpixPixel(1, 46),
+            HealpixPixel(2, 128),
+            HealpixPixel(2, 130),
+            HealpixPixel(2, 131),
+        ]
+    )
 
 
 @pytest.fixture
-def aligned_trees_2_3_left(aligned_trees_2_3_left_path):
-    partition_info = PartitionInfo.read_from_file(aligned_trees_2_3_left_path)
-    return PixelTreeBuilder.from_healpix(partition_info.get_healpix_pixels())
+def aligned_trees_2_3_left():
+    return PixelTreeBuilder.from_healpix(
+        [
+            HealpixPixel(1, 33),
+            HealpixPixel(1, 35),
+            HealpixPixel(1, 40),
+            HealpixPixel(1, 41),
+            HealpixPixel(1, 42),
+            HealpixPixel(1, 43),
+            HealpixPixel(1, 44),
+            HealpixPixel(1, 45),
+            HealpixPixel(1, 46),
+            HealpixPixel(2, 128),
+            HealpixPixel(2, 130),
+            HealpixPixel(2, 131),
+        ]
+    )
 
 
 @pytest.fixture
-def aligned_trees_2_3_right(aligned_trees_2_3_right_path):
-    partition_info = PartitionInfo.read_from_file(aligned_trees_2_3_right_path)
-    return PixelTreeBuilder.from_healpix(partition_info.get_healpix_pixels())
+def aligned_trees_2_3_right():
+    return PixelTreeBuilder.from_healpix(
+        [
+            HealpixPixel(1, 33),
+            HealpixPixel(1, 34),
+            HealpixPixel(1, 35),
+            HealpixPixel(1, 36),
+            HealpixPixel(1, 37),
+            HealpixPixel(1, 40),
+            HealpixPixel(1, 42),
+            HealpixPixel(1, 43),
+            HealpixPixel(1, 44),
+            HealpixPixel(1, 46),
+            HealpixPixel(1, 47),
+            HealpixPixel(2, 128),
+            HealpixPixel(2, 129),
+            HealpixPixel(2, 130),
+            HealpixPixel(2, 131),
+        ]
+    )
 
 
 @pytest.fixture
-def aligned_trees_2_3_outer(aligned_trees_2_3_outer_path):
-    partition_info = PartitionInfo.read_from_file(aligned_trees_2_3_outer_path)
-    return PixelTreeBuilder.from_healpix(partition_info.get_healpix_pixels())
+def aligned_trees_2_3_outer():
+    return PixelTreeBuilder.from_healpix(
+        [
+            HealpixPixel(1, 33),
+            HealpixPixel(1, 34),
+            HealpixPixel(1, 35),
+            HealpixPixel(1, 36),
+            HealpixPixel(1, 37),
+            HealpixPixel(1, 40),
+            HealpixPixel(1, 41),
+            HealpixPixel(1, 42),
+            HealpixPixel(1, 43),
+            HealpixPixel(1, 44),
+            HealpixPixel(1, 45),
+            HealpixPixel(1, 46),
+            HealpixPixel(1, 47),
+            HealpixPixel(2, 128),
+            HealpixPixel(2, 129),
+            HealpixPixel(2, 130),
+            HealpixPixel(2, 131),
+        ]
+    )

From 93320fcc82557f5440ddd694f5641616836b6db4 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Tue, 14 Nov 2023 11:26:23 -0500
Subject: [PATCH 2/8] Merge recent changes

---
 src/hipscat/io/parquet_metadata.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/hipscat/io/parquet_metadata.py b/src/hipscat/io/parquet_metadata.py
index 97037665..047d603b 100644
--- a/src/hipscat/io/parquet_metadata.py
+++ b/src/hipscat/io/parquet_metadata.py
@@ -33,7 +33,18 @@ def write_parquet_metadata(catalog_path, storage_options: dict = None, output_pa
         output_path (str): base path for writing out metadata files
             defaults to `catalog_path` if unspecified
     """
-    dataset = file_io.read_parquet_dataset(catalog_path, storage_options=storage_options)
+    ignore_prefixes = [
+        "intermediate",
+        "_common_metadata",
+        "_metadata",
+    ]
+
+    dataset = file_io.read_parquet_dataset(
+        catalog_path,
+        storage_options=storage_options,
+        ignore_prefixes=ignore_prefixes,
+        exclude_invalid_files=True,
+    )
     metadata_collector = []
 
     for hips_file in dataset.files:

From 59a088554e27c3b1fc3b69ad259da874a831c23c Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Tue, 14 Nov 2023 12:09:49 -0500
Subject: [PATCH 3/8] Get rid of partition info files. Address pylint.

---
 src/hipscat/catalog/partition_info.py          |  2 +-
 tests/data/small_sky/partition_info.csv        |  2 --
 tests/data/small_sky_order1/partition_info.csv |  5 -----
 tests/data/small_sky_source/partition_info.csv | 15 ---------------
 4 files changed, 1 insertion(+), 23 deletions(-)
 delete mode 100644 tests/data/small_sky/partition_info.csv
 delete mode 100644 tests/data/small_sky_order1/partition_info.csv
 delete mode 100644 tests/data/small_sky_source/partition_info.csv

diff --git a/src/hipscat/catalog/partition_info.py b/src/hipscat/catalog/partition_info.py
index 3230cdbf..b7a07979 100644
--- a/src/hipscat/catalog/partition_info.py
+++ b/src/hipscat/catalog/partition_info.py
@@ -1,5 +1,5 @@
 """Container class to hold per-partition metadata"""
-from typing import Any, Dict, List, Union
+from typing import List
 
 import numpy as np
 import pandas as pd
diff --git a/tests/data/small_sky/partition_info.csv b/tests/data/small_sky/partition_info.csv
deleted file mode 100644
index ed015721..00000000
--- a/tests/data/small_sky/partition_info.csv
+++ /dev/null
@@ -1,2 +0,0 @@
-Norder,Dir,Npix,num_rows
-0,0,11,131
diff --git a/tests/data/small_sky_order1/partition_info.csv b/tests/data/small_sky_order1/partition_info.csv
deleted file mode 100644
index d15927f2..00000000
--- a/tests/data/small_sky_order1/partition_info.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-Norder,Dir,Npix,num_rows
-1,0,44,42
-1,0,45,29
-1,0,46,42
-1,0,47,18
diff --git a/tests/data/small_sky_source/partition_info.csv b/tests/data/small_sky_source/partition_info.csv
deleted file mode 100644
index 7a5f4e9f..00000000
--- a/tests/data/small_sky_source/partition_info.csv
+++ /dev/null
@@ -1,15 +0,0 @@
-Norder,Dir,Npix,num_rows
-0,0,4,50
-1,0,47,2395
-2,0,176,385
-2,0,177,1510
-2,0,178,1634
-2,0,179,1773
-2,0,180,655
-2,0,181,903
-2,0,182,1246
-2,0,183,1143
-2,0,184,1390
-2,0,185,2942
-2,0,186,452
-2,0,187,683

From 816041ae4f68c3c4f75424d39828a7fd9a1e886a Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Tue, 14 Nov 2023 12:39:43 -0500
Subject: [PATCH 4/8] ./tests/ pylint

---
 tests/hipscat/io/file_io/test_file_io.py  |   1 -
 tests/hipscat/io/test_parquet_metadata.py |   1 -
 tests/hipscat/io/test_validation.py       |   1 -
 tests/hipscat/io/test_write_metadata.py   | 104 ----------------------
 tests/hipscat/pixel_tree/conftest.py      |   1 -
 5 files changed, 108 deletions(-)

diff --git a/tests/hipscat/io/file_io/test_file_io.py b/tests/hipscat/io/file_io/test_file_io.py
index 5e805ea0..005f1935 100644
--- a/tests/hipscat/io/file_io/test_file_io.py
+++ b/tests/hipscat/io/file_io/test_file_io.py
@@ -8,7 +8,6 @@
 from hipscat.io.file_io import (
     delete_file,
     get_file_pointer_from_path,
-    load_csv_to_pandas,
     load_json_file,
     load_parquet_to_pandas,
     make_directory,
diff --git a/tests/hipscat/io/test_parquet_metadata.py b/tests/hipscat/io/test_parquet_metadata.py
index 4e2ca22b..77a7da64 100644
--- a/tests/hipscat/io/test_parquet_metadata.py
+++ b/tests/hipscat/io/test_parquet_metadata.py
@@ -8,7 +8,6 @@
 import pyarrow as pa
 
 import hipscat.io.parquet_metadata as io
-import hipscat.pixel_math as hist
 from hipscat.io import file_io
 
 
diff --git a/tests/hipscat/io/test_validation.py b/tests/hipscat/io/test_validation.py
index 77c5d768..03a4d959 100644
--- a/tests/hipscat/io/test_validation.py
+++ b/tests/hipscat/io/test_validation.py
@@ -18,7 +18,6 @@ def test_is_valid_catalog(tmp_path, small_sky_catalog, small_sky_pixels):
 
     # The catalog is valid if both the catalog_info and _metadata files exist,
     # and the catalog_info is in a valid format
-    partition_info_pointer = paths.get_parquet_metadata_pointer(catalog_dir_pointer)
     PartitionInfo.from_healpix(small_sky_pixels).write_to_metadata_files(catalog_dir_pointer)
     assert is_valid_catalog(catalog_dir_pointer)
 
diff --git a/tests/hipscat/io/test_write_metadata.py b/tests/hipscat/io/test_write_metadata.py
index 1ee6b910..72b3aaba 100644
--- a/tests/hipscat/io/test_write_metadata.py
+++ b/tests/hipscat/io/test_write_metadata.py
@@ -1,11 +1,8 @@
 """Tests of file IO (reads and writes)"""
 
 import os
-import shutil
 
 import numpy.testing as npt
-import pandas as pd
-import pyarrow as pa
 
 import hipscat.io.write_metadata as io
 import hipscat.pixel_math as hist
@@ -145,107 +142,6 @@ def test_write_partition_info_float(assert_text_file_matches, tmp_path):
     assert_text_file_matches(expected_lines, metadata_filename)
 
 
-def test_write_parquet_metadata(tmp_path, small_sky_dir, basic_catalog_parquet_metadata):
-    """Copy existing catalog and create new metadata files for it"""
-    catalog_base_dir = os.path.join(tmp_path, "catalog")
-    shutil.copytree(
-        small_sky_dir,
-        catalog_base_dir,
-    )
-    io.write_parquet_metadata(catalog_base_dir)
-    check_parquet_schema(os.path.join(catalog_base_dir, "_metadata"), basic_catalog_parquet_metadata)
-    ## _common_metadata has 0 row groups
-    check_parquet_schema(
-        os.path.join(catalog_base_dir, "_common_metadata"),
-        basic_catalog_parquet_metadata,
-        0,
-    )
-    ## Re-write - should still have the same properties.
-    io.write_parquet_metadata(catalog_base_dir)
-    check_parquet_schema(os.path.join(catalog_base_dir, "_metadata"), basic_catalog_parquet_metadata)
-    ## _common_metadata has 0 row groups
-    check_parquet_schema(
-        os.path.join(catalog_base_dir, "_common_metadata"),
-        basic_catalog_parquet_metadata,
-        0,
-    )
-
-
-def test_write_parquet_metadata_order1(tmp_path, small_sky_order1_dir, basic_catalog_parquet_metadata):
-    """Copy existing catalog and create new metadata files for it,
-    using a catalog with multiple files."""
-    temp_path = os.path.join(tmp_path, "catalog")
-    shutil.copytree(
-        small_sky_order1_dir,
-        temp_path,
-    )
-
-    io.write_parquet_metadata(temp_path)
-    ## 4 row groups for 4 partitioned parquet files
-    check_parquet_schema(
-        os.path.join(temp_path, "_metadata"),
-        basic_catalog_parquet_metadata,
-        4,
-    )
-    ## _common_metadata has 0 row groups
-    check_parquet_schema(
-        os.path.join(temp_path, "_common_metadata"),
-        basic_catalog_parquet_metadata,
-        0,
-    )
-
-
-def test_write_index_parquet_metadata(tmp_path):
-    """Create an index-like catalog, and test metadata creation."""
-    temp_path = os.path.join(tmp_path, "index")
-
-    index_parquet_path = os.path.join(temp_path, "Parts=0", "part_000_of_001.parquet")
-    file_io.make_directory(os.path.join(temp_path, "Parts=0"))
-    basic_index = pd.DataFrame({"_hipscat_id": [4000, 4001], "ps1_objid": [700, 800]})
-    file_io.write_dataframe_to_parquet(basic_index, index_parquet_path)
-
-    index_catalog_parquet_metadata = pa.schema(
-        [
-            pa.field("_hipscat_id", pa.int64()),
-            pa.field("ps1_objid", pa.int64()),
-        ]
-    )
-
-    io.write_parquet_metadata(temp_path)
-    check_parquet_schema(os.path.join(tmp_path, "index", "_metadata"), index_catalog_parquet_metadata)
-    ## _common_metadata has 0 row groups
-    check_parquet_schema(
-        os.path.join(tmp_path, "index", "_common_metadata"),
-        index_catalog_parquet_metadata,
-        0,
-    )
-
-
-def check_parquet_schema(file_name, expected_schema, expected_num_row_groups=1):
-    """Check parquet schema against expectations"""
-    assert file_io.does_file_or_directory_exist(file_name), f"file not found [{file_name}]"
-
-    single_metadata = file_io.read_parquet_metadata(file_name)
-    schema = single_metadata.schema.to_arrow_schema()
-
-    assert len(schema) == len(
-        expected_schema
-    ), f"object list not the same size ({len(schema)} vs {len(expected_schema)})"
-
-    npt.assert_array_equal(schema.names, expected_schema.names)
-
-    assert schema.equals(expected_schema, check_metadata=False)
-
-    parquet_file = file_io.read_parquet_file(file_name)
-    assert parquet_file.metadata.num_row_groups == expected_num_row_groups
-
-    for row_index in range(0, parquet_file.metadata.num_row_groups):
-        row_md = parquet_file.metadata.row_group(row_index)
-        for column_index in range(0, row_md.num_columns):
-            column_metadata = row_md.column(column_index)
-            assert column_metadata.file_path.endswith(".parquet")
-
-
 def test_read_write_fits_point_map(tmp_path):
     """Check that we write and can read a FITS file for spatial distribution."""
     initial_histogram = hist.empty_histogram(1)
diff --git a/tests/hipscat/pixel_tree/conftest.py b/tests/hipscat/pixel_tree/conftest.py
index 58499324..a2b91384 100644
--- a/tests/hipscat/pixel_tree/conftest.py
+++ b/tests/hipscat/pixel_tree/conftest.py
@@ -2,7 +2,6 @@
 
 import pytest
 
-from hipscat.catalog import PartitionInfo
 from hipscat.pixel_math import HealpixPixel
 from hipscat.pixel_tree.pixel_tree_builder import PixelTreeBuilder
 

From d8dec2e27785850b15b2b2afd5c79965db16897a Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Tue, 14 Nov 2023 12:50:07 -0500
Subject: [PATCH 5/8] =[

---
 src/hipscat/catalog/partition_info.py | 4 ++--
 src/hipscat/io/validation.py          | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hipscat/catalog/partition_info.py b/src/hipscat/catalog/partition_info.py
index b7a07979..2983e526 100644
--- a/src/hipscat/catalog/partition_info.py
+++ b/src/hipscat/catalog/partition_info.py
@@ -84,8 +84,8 @@ def read_from_file(cls, metadata_file: FilePointer, storage_options: dict = None
         """
         pixel_list = [
             HealpixPixel(
-                row_group_stat_single_value(row_group,cls.METADATA_ORDER_COLUMN_NAME),
-                row_group_stat_single_value(row_group,cls.METADATA_PIXEL_COLUMN_NAME),
+                row_group_stat_single_value(row_group, cls.METADATA_ORDER_COLUMN_NAME),
+                row_group_stat_single_value(row_group, cls.METADATA_PIXEL_COLUMN_NAME),
             )
             for row_group in read_row_group_fragments(metadata_file, storage_options)
         ]
diff --git a/src/hipscat/io/validation.py b/src/hipscat/io/validation.py
index b62c8f37..997a4d15 100644
--- a/src/hipscat/io/validation.py
+++ b/src/hipscat/io/validation.py
@@ -1,5 +1,5 @@
 from hipscat.catalog.dataset.catalog_info_factory import from_catalog_dir
-from hipscat.io import get_partition_info_pointer, get_parquet_metadata_pointer
+from hipscat.io import get_parquet_metadata_pointer, get_partition_info_pointer
 from hipscat.io.file_io.file_pointer import FilePointer, is_regular_file
 
 

From 6a56faf0bcf7b56cfc6212bf320fc52d4c4859d0 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Tue, 14 Nov 2023 15:43:52 -0500
Subject: [PATCH 6/8] Improve test coverage.

---
 tests/hipscat/catalog/test_partition_info.py |  5 ++
 tests/hipscat/io/conftest.py                 | 31 ++++++++++
 tests/hipscat/io/test_parquet_metadata.py    | 61 +++++++++++---------
 tests/hipscat/io/test_write_metadata.py      | 29 ++++++++++
 4 files changed, 99 insertions(+), 27 deletions(-)

diff --git a/tests/hipscat/catalog/test_partition_info.py b/tests/hipscat/catalog/test_partition_info.py
index b82ec6c3..3a5d5121 100644
--- a/tests/hipscat/catalog/test_partition_info.py
+++ b/tests/hipscat/catalog/test_partition_info.py
@@ -54,6 +54,11 @@ def test_load_partition_no_file(tmp_path):
     with pytest.raises(FileNotFoundError):
         PartitionInfo.read_from_file(wrong_pointer)
 
+    wrong_path = os.path.join(tmp_path, "partition_info.csv")
+    wrong_pointer = file_io.get_file_pointer_from_path(wrong_path)
+    with pytest.raises(FileNotFoundError):
+        PartitionInfo.read_from_csv(wrong_pointer)
+
 
 def test_get_highest_order(small_sky_order1_dir):
     """test the `get_highest_order` method"""
diff --git a/tests/hipscat/io/conftest.py b/tests/hipscat/io/conftest.py
index 1e787eda..2f154357 100644
--- a/tests/hipscat/io/conftest.py
+++ b/tests/hipscat/io/conftest.py
@@ -2,9 +2,11 @@
 
 import re
 
+import numpy.testing as npt
 import pyarrow as pa
 import pytest
 
+from hipscat.io import file_io
 from hipscat.io.file_io.file_io import load_text_file
 from hipscat.io.file_io.file_pointer import does_file_or_directory_exist
 
@@ -58,3 +60,32 @@ def basic_catalog_parquet_metadata():
             pa.field("__index_level_0__", pa.int64()),
         ]
     )
+
+
+@pytest.fixture
+def check_parquet_schema():
+    def check_parquet_schema(file_name, expected_schema, expected_num_row_groups=1):
+        """Check parquet schema against expectations"""
+        assert file_io.does_file_or_directory_exist(file_name), f"file not found [{file_name}]"
+
+        single_metadata = file_io.read_parquet_metadata(file_name)
+        schema = single_metadata.schema.to_arrow_schema()
+
+        assert len(schema) == len(
+            expected_schema
+        ), f"object list not the same size ({len(schema)} vs {len(expected_schema)})"
+
+        npt.assert_array_equal(schema.names, expected_schema.names)
+
+        assert schema.equals(expected_schema, check_metadata=False)
+
+        parquet_file = file_io.read_parquet_file(file_name)
+        assert parquet_file.metadata.num_row_groups == expected_num_row_groups
+
+        for row_index in range(0, parquet_file.metadata.num_row_groups):
+            row_md = parquet_file.metadata.row_group(row_index)
+            for column_index in range(0, row_md.num_columns):
+                column_metadata = row_md.column(column_index)
+                assert column_metadata.file_path.endswith(".parquet")
+
+    return check_parquet_schema
diff --git a/tests/hipscat/io/test_parquet_metadata.py b/tests/hipscat/io/test_parquet_metadata.py
index 77a7da64..b7e2bd18 100644
--- a/tests/hipscat/io/test_parquet_metadata.py
+++ b/tests/hipscat/io/test_parquet_metadata.py
@@ -3,22 +3,28 @@
 import os
 import shutil
 
-import numpy.testing as npt
 import pandas as pd
 import pyarrow as pa
+import pytest
 
-import hipscat.io.parquet_metadata as io
-from hipscat.io import file_io
+from hipscat.io import file_io, paths
+from hipscat.io.parquet_metadata import (
+    read_row_group_fragments,
+    row_group_stat_single_value,
+    write_parquet_metadata,
+)
 
 
-def test_write_parquet_metadata(tmp_path, small_sky_dir, basic_catalog_parquet_metadata):
+def test_write_parquet_metadata(
+    tmp_path, small_sky_dir, basic_catalog_parquet_metadata, check_parquet_schema
+):
     """Copy existing catalog and create new metadata files for it"""
     catalog_base_dir = os.path.join(tmp_path, "catalog")
     shutil.copytree(
         small_sky_dir,
         catalog_base_dir,
     )
-    io.write_parquet_metadata(catalog_base_dir)
+    write_parquet_metadata(catalog_base_dir)
     check_parquet_schema(os.path.join(catalog_base_dir, "_metadata"), basic_catalog_parquet_metadata)
     ## _common_metadata has 0 row groups
     check_parquet_schema(
@@ -27,7 +33,7 @@ def test_write_parquet_metadata(tmp_path, small_sky_dir, basic_catalog_parquet_m
         0,
     )
     ## Re-write - should still have the same properties.
-    io.write_parquet_metadata(catalog_base_dir)
+    write_parquet_metadata(catalog_base_dir)
     check_parquet_schema(os.path.join(catalog_base_dir, "_metadata"), basic_catalog_parquet_metadata)
     ## _common_metadata has 0 row groups
     check_parquet_schema(
@@ -37,7 +43,9 @@ def test_write_parquet_metadata(tmp_path, small_sky_dir, basic_catalog_parquet_m
     )
 
 
-def test_write_parquet_metadata_order1(tmp_path, small_sky_order1_dir, basic_catalog_parquet_metadata):
+def test_write_parquet_metadata_order1(
+    tmp_path, small_sky_order1_dir, basic_catalog_parquet_metadata, check_parquet_schema
+):
     """Copy existing catalog and create new metadata files for it,
     using a catalog with multiple files."""
     temp_path = os.path.join(tmp_path, "catalog")
@@ -46,7 +54,7 @@ def test_write_parquet_metadata_order1(tmp_path, small_sky_order1_dir, basic_cat
         temp_path,
     )
 
-    io.write_parquet_metadata(temp_path)
+    write_parquet_metadata(temp_path)
     ## 4 row groups for 4 partitioned parquet files
     check_parquet_schema(
         os.path.join(temp_path, "_metadata"),
@@ -61,7 +69,7 @@ def test_write_parquet_metadata_order1(tmp_path, small_sky_order1_dir, basic_cat
     )
 
 
-def test_write_index_parquet_metadata(tmp_path):
+def test_write_index_parquet_metadata(tmp_path, check_parquet_schema):
     """Create an index-like catalog, and test metadata creation."""
     temp_path = os.path.join(tmp_path, "index")
 
@@ -77,7 +85,7 @@ def test_write_index_parquet_metadata(tmp_path):
         ]
     )
 
-    io.write_parquet_metadata(temp_path)
+    write_parquet_metadata(temp_path)
     check_parquet_schema(os.path.join(tmp_path, "index", "_metadata"), index_catalog_parquet_metadata)
     ## _common_metadata has 0 row groups
     check_parquet_schema(
@@ -87,26 +95,25 @@ def test_write_index_parquet_metadata(tmp_path):
     )
 
 
-def check_parquet_schema(file_name, expected_schema, expected_num_row_groups=1):
-    """Check parquet schema against expectations"""
-    assert file_io.does_file_or_directory_exist(file_name), f"file not found [{file_name}]"
+def test_row_group_fragments(small_sky_order1_dir):
+    partition_info_file = paths.get_parquet_metadata_pointer(small_sky_order1_dir)
 
-    single_metadata = file_io.read_parquet_metadata(file_name)
-    schema = single_metadata.schema.to_arrow_schema()
+    num_row_groups = 0
+    for _ in read_row_group_fragments(partition_info_file):
+        num_row_groups += 1
 
-    assert len(schema) == len(
-        expected_schema
-    ), f"object list not the same size ({len(schema)} vs {len(expected_schema)})"
+    assert num_row_groups == 4
 
-    npt.assert_array_equal(schema.names, expected_schema.names)
 
-    assert schema.equals(expected_schema, check_metadata=False)
+def test_row_group_stats(small_sky_dir):
+    partition_info_file = paths.get_parquet_metadata_pointer(small_sky_dir)
+    first_row_group = next(read_row_group_fragments(partition_info_file))
 
-    parquet_file = file_io.read_parquet_file(file_name)
-    assert parquet_file.metadata.num_row_groups == expected_num_row_groups
+    assert row_group_stat_single_value(first_row_group, "Norder") == 0
+    assert row_group_stat_single_value(first_row_group, "Npix") == 11
 
-    for row_index in range(0, parquet_file.metadata.num_row_groups):
-        row_md = parquet_file.metadata.row_group(row_index)
-        for column_index in range(0, row_md.num_columns):
-            column_metadata = row_md.column(column_index)
-            assert column_metadata.file_path.endswith(".parquet")
+    with pytest.raises(ValueError, match="doesn't have expected key"):
+        row_group_stat_single_value(first_row_group, "NOT HERE")
+
+    with pytest.raises(ValueError, match="stat min != max"):
+        row_group_stat_single_value(first_row_group, "ra")
diff --git a/tests/hipscat/io/test_write_metadata.py b/tests/hipscat/io/test_write_metadata.py
index 72b3aaba..5f563edc 100644
--- a/tests/hipscat/io/test_write_metadata.py
+++ b/tests/hipscat/io/test_write_metadata.py
@@ -1,6 +1,7 @@
 """Tests of file IO (reads and writes)"""
 
 import os
+import shutil
 
 import numpy.testing as npt
 
@@ -142,6 +143,34 @@ def test_write_partition_info_float(assert_text_file_matches, tmp_path):
     assert_text_file_matches(expected_lines, metadata_filename)
 
 
+def test_write_parquet_metadata(
+    tmp_path, small_sky_dir, basic_catalog_parquet_metadata, check_parquet_schema
+):
+    """Copy existing catalog and create new metadata files for it"""
+    catalog_base_dir = os.path.join(tmp_path, "catalog")
+    shutil.copytree(
+        small_sky_dir,
+        catalog_base_dir,
+    )
+    io.write_parquet_metadata(catalog_base_dir)
+    check_parquet_schema(os.path.join(catalog_base_dir, "_metadata"), basic_catalog_parquet_metadata)
+    ## _common_metadata has 0 row groups
+    check_parquet_schema(
+        os.path.join(catalog_base_dir, "_common_metadata"),
+        basic_catalog_parquet_metadata,
+        0,
+    )
+    ## Re-write - should still have the same properties.
+    io.write_parquet_metadata(catalog_base_dir)
+    check_parquet_schema(os.path.join(catalog_base_dir, "_metadata"), basic_catalog_parquet_metadata)
+    ## _common_metadata has 0 row groups
+    check_parquet_schema(
+        os.path.join(catalog_base_dir, "_common_metadata"),
+        basic_catalog_parquet_metadata,
+        0,
+    )
+
+
 def test_read_write_fits_point_map(tmp_path):
     """Check that we write and can read a FITS file for spatial distribution."""
     initial_histogram = hist.empty_histogram(1)

From 4fdd21ec274c0ab8431687b3d7ff87979b83ecce Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Fri, 17 Nov 2023 15:36:47 -0500
Subject: [PATCH 7/8] PR responses.

---
 src/hipscat/catalog/partition_info.py   | 16 +++++++++-------
 src/hipscat/io/parquet_metadata.py      | 15 +++++++++++++--
 src/hipscat/io/validation.py            | 14 +++++++-------
 src/hipscat/pixel_math/healpix_pixel.py |  2 +-
 4 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/src/hipscat/catalog/partition_info.py b/src/hipscat/catalog/partition_info.py
index 2983e526..8d75eeeb 100644
--- a/src/hipscat/catalog/partition_info.py
+++ b/src/hipscat/catalog/partition_info.py
@@ -1,4 +1,6 @@
 """Container class to hold per-partition metadata"""
+from __future__ import annotations
+
 from typing import List
 
 import numpy as np
@@ -55,7 +57,7 @@ def write_to_metadata_files(self, catalog_path: FilePointer, storage_options: di
 
         Args:
             catalog_path (str): base path for the catalog
-            storage_options: dictionary that contains abstract filesystem credentials
+            storage_options (dict): dictionary that contains abstract filesystem credentials
         """
         batches = [
             pa.RecordBatch.from_arrays(
@@ -72,12 +74,12 @@ def write_to_metadata_files(self, catalog_path: FilePointer, storage_options: di
         write_parquet_metadata_for_batches(batches, catalog_path, storage_options)
 
     @classmethod
-    def read_from_file(cls, metadata_file: FilePointer, storage_options: dict = None):
+    def read_from_file(cls, metadata_file: FilePointer, storage_options: dict = None) -> PartitionInfo:
         """Read partition info from a `_metadata` file to create an object
 
         Args:
-            metadata_file: FilePointer to the `_metadata` file
-            storage_options: dictionary that contains abstract filesystem credentials
+            metadata_file (FilePointer): FilePointer to the `_metadata` file
+            storage_options (dict): dictionary that contains abstract filesystem credentials
 
         Returns:
             A `PartitionInfo` object with the data from the file
@@ -97,12 +99,12 @@ def read_from_file(cls, metadata_file: FilePointer, storage_options: dict = None
         return cls(pixel_list)
 
     @classmethod
-    def read_from_csv(cls, partition_info_file: FilePointer, storage_options: dict = None):
+    def read_from_csv(cls, partition_info_file: FilePointer, storage_options: dict = None) -> PartitionInfo:
         """Read partition info from a `partition_info.csv` file to create an object
 
         Args:
             partition_info_file: FilePointer to the `partition_info.csv` file
-            storage_options: dictionary that contains abstract filesystem credentials
+            storage_options (dict): dictionary that contains abstract filesystem credentials
 
         Returns:
             A `PartitionInfo` object with the data from the file
@@ -142,7 +144,7 @@ def as_dataframe(self):
         return pd.DataFrame.from_dict(partition_info_dict)
 
     @classmethod
-    def from_healpix(cls, healpix_pixels: List[HealpixPixel]):
+    def from_healpix(cls, healpix_pixels: List[HealpixPixel]) -> PartitionInfo:
         """Create a partition info object from a list of constituent healpix pixels.
 
         Args:
diff --git a/src/hipscat/io/parquet_metadata.py b/src/hipscat/io/parquet_metadata.py
index 047d603b..0d26fb21 100644
--- a/src/hipscat/io/parquet_metadata.py
+++ b/src/hipscat/io/parquet_metadata.py
@@ -12,7 +12,12 @@
 
 def row_group_stat_single_value(row_group, stat_key):
     """Convenience method to find the min and max inside a statistics dictionary,
-    and raise an error if they're unequal."""
+    and raise an error if they're unequal.
+
+    Args:
+        row_group: dataset fragment row group
+        stat_key (str): column name of interest.
+    """
     if stat_key not in row_group.statistics:
         raise ValueError(f"row group doesn't have expected key {stat_key}")
     stat_dict = row_group.statistics[stat_key]
@@ -25,7 +30,10 @@ def row_group_stat_single_value(row_group, stat_key):
 
 def write_parquet_metadata(catalog_path, storage_options: dict = None, output_path: str = None):
     """Generate parquet metadata, using the already-partitioned parquet files
-    for this catalog
+    for this catalog.
+
+    For more information on the general parquet metadata files, and why we write them, see
+    https://arrow.apache.org/docs/python/parquet.html#writing-metadata-and-common-metadata-files
 
     Args:
         catalog_path (str): base path for the catalog
@@ -50,6 +58,8 @@ def write_parquet_metadata(catalog_path, storage_options: dict = None, output_pa
     for hips_file in dataset.files:
         hips_file_pointer = file_io.get_file_pointer_from_path(hips_file, include_protocol=catalog_path)
         single_metadata = file_io.read_parquet_metadata(hips_file_pointer, storage_options=storage_options)
+
+        # Users must set the file path of each chunk before combining the metadata.
         relative_path = hips_file[len(catalog_path) :]
         single_metadata.set_file_path(relative_path)
         metadata_collector.append(single_metadata)
@@ -78,6 +88,7 @@ def write_parquet_metadata_for_batches(
 ):
     """Write parquet metadata files for some pyarrow table batches.
     This writes the batches to a temporary parquet dataset using local storage, and
+    generates the metadata for the partitioned catalog parquet files.
 
     Args:
         batches (List[pa.RecordBatch]): create one batch per group of data (partition or row group)
diff --git a/src/hipscat/io/validation.py b/src/hipscat/io/validation.py
index 997a4d15..4851bc21 100644
--- a/src/hipscat/io/validation.py
+++ b/src/hipscat/io/validation.py
@@ -7,7 +7,7 @@ def is_valid_catalog(pointer: FilePointer) -> bool:
     """Checks if a catalog is valid for a given base catalog pointer
 
     Args:
-        pointer: pointer to base catalog directory
+        pointer (FilePointer): pointer to base catalog directory
 
     Returns:
         True if both the catalog_info and partition_info files are
@@ -16,11 +16,11 @@ def is_valid_catalog(pointer: FilePointer) -> bool:
     return is_catalog_info_valid(pointer) and (is_partition_info_valid(pointer) or is_metadata_valid(pointer))
 
 
-def is_catalog_info_valid(pointer):
+def is_catalog_info_valid(pointer: FilePointer):
     """Checks if catalog_info is valid for a given base catalog pointer
 
     Args:
-        pointer: pointer to base catalog directory
+        pointer (FilePointer): pointer to base catalog directory
 
     Returns:
         True if the catalog_info file exists, and it is correctly formatted,
@@ -34,11 +34,11 @@ def is_catalog_info_valid(pointer):
     return is_valid
 
 
-def is_partition_info_valid(pointer):
+def is_partition_info_valid(pointer: FilePointer):
     """Checks if partition_info is valid for a given base catalog pointer
 
     Args:
-        pointer: pointer to base catalog directory
+        pointer (FilePointer): pointer to base catalog directory
 
     Returns:
         True if the partition_info file exists, False otherwise
@@ -48,11 +48,11 @@ def is_partition_info_valid(pointer):
     return partition_info_exists
 
 
-def is_metadata_valid(pointer):
+def is_metadata_valid(pointer: FilePointer):
     """Checks if _metadata is valid for a given base catalog pointer
 
     Args:
-        pointer: pointer to base catalog directory
+        pointer (FilePointer): pointer to base catalog directory
 
     Returns:
         True if the _metadata file exists, False otherwise
diff --git a/src/hipscat/pixel_math/healpix_pixel.py b/src/hipscat/pixel_math/healpix_pixel.py
index fe317f82..08e578fa 100644
--- a/src/hipscat/pixel_math/healpix_pixel.py
+++ b/src/hipscat/pixel_math/healpix_pixel.py
@@ -82,7 +82,7 @@ def convert_to_higher_order(self, delta_order: int) -> List[HealpixPixel]:
         return pixels
 
     @property
-    def dir(self):
+    def dir(self) -> int:
         """Directory number for the pixel.
 
         This is necessary for file systems that limit to 10,000 subdirectories.

From 22d61c99b30bf602f852aec34b660c0cd2b05b8d Mon Sep 17 00:00:00 2001
From: Melissa DeLucchi <113376043+delucchi-cmu@users.noreply.github.com>
Date: Mon, 20 Nov 2023 15:02:32 -0500
Subject: [PATCH 8/8] Apply suggestions from code review

Co-authored-by: Sandro Campos <scampos@andrew.cmu.edu>
---
 src/hipscat/catalog/partition_info.py | 4 ++--
 src/hipscat/io/parquet_metadata.py    | 8 +++++---
 src/hipscat/io/validation.py          | 6 +++---
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/hipscat/catalog/partition_info.py b/src/hipscat/catalog/partition_info.py
index 8d75eeeb..49a3ce9b 100644
--- a/src/hipscat/catalog/partition_info.py
+++ b/src/hipscat/catalog/partition_info.py
@@ -56,7 +56,7 @@ def write_to_metadata_files(self, catalog_path: FilePointer, storage_options: di
         """Generate parquet metadata, using the known partitions.
 
         Args:
-            catalog_path (str): base path for the catalog
+            catalog_path (FilePointer): base path for the catalog
             storage_options (dict): dictionary that contains abstract filesystem credentials
         """
         batches = [
@@ -103,7 +103,7 @@ def read_from_csv(cls, partition_info_file: FilePointer, storage_options: dict =
         """Read partition info from a `partition_info.csv` file to create an object
 
         Args:
-            partition_info_file: FilePointer to the `partition_info.csv` file
+            partition_info_file (FilePointer): FilePointer to the `partition_info.csv` file
             storage_options (dict): dictionary that contains abstract filesystem credentials
 
         Returns:
diff --git a/src/hipscat/io/parquet_metadata.py b/src/hipscat/io/parquet_metadata.py
index 0d26fb21..9aa84b3c 100644
--- a/src/hipscat/io/parquet_metadata.py
+++ b/src/hipscat/io/parquet_metadata.py
@@ -10,13 +10,15 @@
 from hipscat.io.file_io.file_pointer import get_fs, strip_leading_slash_for_pyarrow
 
 
-def row_group_stat_single_value(row_group, stat_key):
+def row_group_stat_single_value(row_group, stat_key: str):
     """Convenience method to find the min and max inside a statistics dictionary,
     and raise an error if they're unequal.
 
     Args:
         row_group: dataset fragment row group
         stat_key (str): column name of interest.
+    Returns:
+        The value of the specified row group statistic
     """
     if stat_key not in row_group.statistics:
         raise ValueError(f"row group doesn't have expected key {stat_key}")
@@ -28,7 +30,7 @@ def row_group_stat_single_value(row_group, stat_key):
     return min_val
 
 
-def write_parquet_metadata(catalog_path, storage_options: dict = None, output_path: str = None):
+def write_parquet_metadata(catalog_path: str, storage_options: dict = None, output_path: str = None):
     """Generate parquet metadata, using the already-partitioned parquet files
     for this catalog.
 
@@ -104,7 +106,7 @@ def write_parquet_metadata_for_batches(
         write_parquet_metadata(temp_pq_file, storage_options=storage_options, output_path=output_path)
 
 
-def read_row_group_fragments(metadata_file, storage_options: dict = None):
+def read_row_group_fragments(metadata_file: str, storage_options: dict = None):
     """Generator for metadata fragment row groups in a parquet metadata file.
 
     Args:
diff --git a/src/hipscat/io/validation.py b/src/hipscat/io/validation.py
index 4851bc21..877e7a98 100644
--- a/src/hipscat/io/validation.py
+++ b/src/hipscat/io/validation.py
@@ -16,7 +16,7 @@ def is_valid_catalog(pointer: FilePointer) -> bool:
     return is_catalog_info_valid(pointer) and (is_partition_info_valid(pointer) or is_metadata_valid(pointer))
 
 
-def is_catalog_info_valid(pointer: FilePointer):
+def is_catalog_info_valid(pointer: FilePointer) -> bool:
     """Checks if catalog_info is valid for a given base catalog pointer
 
     Args:
@@ -34,7 +34,7 @@ def is_catalog_info_valid(pointer: FilePointer):
     return is_valid
 
 
-def is_partition_info_valid(pointer: FilePointer):
+def is_partition_info_valid(pointer: FilePointer) -> bool:
     """Checks if partition_info is valid for a given base catalog pointer
 
     Args:
@@ -48,7 +48,7 @@ def is_partition_info_valid(pointer: FilePointer):
     return partition_info_exists
 
 
-def is_metadata_valid(pointer: FilePointer):
+def is_metadata_valid(pointer: FilePointer) -> bool:
     """Checks if _metadata is valid for a given base catalog pointer
 
     Args: