From 5419ac5a002a3c5c81e2179e9182ab7e2f8462b2 Mon Sep 17 00:00:00 2001 From: snowman2 Date: Tue, 16 Jan 2024 13:24:43 -0600 Subject: [PATCH] BUG: Support pandas IntegerArray --- geocube/rasterize.py | 12 ++++++--- test/integration/api/test_core_integration.py | 27 +++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/geocube/rasterize.py b/geocube/rasterize.py index 62680f0d..a6dc1959 100644 --- a/geocube/rasterize.py +++ b/geocube/rasterize.py @@ -1,7 +1,7 @@ """ This module contains tools for rasterizing vector data. """ -from typing import Optional +from typing import Optional, Union import geopandas import numpy @@ -62,7 +62,7 @@ def _minimize_dtype(dtype: numpy.dtype, fill: float) -> numpy.dtype: def rasterize_image( geometry_array: geopandas.GeoSeries, - data_values: NDArray, + data_values: Union[NDArray, pandas.arrays.IntegerArray], geobox: odc.geo.geobox.GeoBox, fill: float, merge_alg: MergeAlg = MergeAlg.replace, @@ -77,7 +77,7 @@ def rasterize_image( ----------- geometry_array: geopandas.GeoSeries A geometry array of points. - data_values: list + data_values: Union[NDArray, pandas.arrays.IntegerArray] Data values associated with the list of geojson shapes geobox: :obj:`odc.geo.geobox.GeoBox` Transform of the resulting image. @@ -107,6 +107,12 @@ def rasterize_image( # only numbers can be rasterized return None + if isinstance(data_values, pandas.arrays.IntegerArray): + data_values = data_values.to_numpy( + dtype=_minimize_dtype(data_values.dtype.numpy_dtype, fill), + na_value=fill, + ) + if filter_nan: data_values, geometry_array = _remove_missing_data(data_values, geometry_array) diff --git a/test/integration/api/test_core_integration.py b/test/integration/api/test_core_integration.py index 23399f2a..607ebd24 100644 --- a/test/integration/api/test_core_integration.py +++ b/test/integration/api/test_core_integration.py @@ -3,6 +3,7 @@ from functools import partial import geopandas +import numpy import pandas import pytest import xarray @@ -856,3 +857,29 @@ def test_rasterize__like_1d(): ) assert geom_array.rio.transform() == like.rio.transform() assert geom_array.in_geom.shape == (2, 1) + + +@pytest.mark.parametrize( + "dtype, expected_dtype", + [ + ("Int32", "int32"), + ("Int64", "float64"), + ], +) +def test_make_geocube__pandas_integer_array(dtype, expected_dtype, tmpdir): + soil_data = geopandas.read_file(TEST_INPUT_DATA_DIR / "soil_data_flat.geojson")[ + ["geometry", "sandtotal_r", "om_r"] + ] + soil_data["sandtotal_r"] = numpy.round(soil_data["sandtotal_r"] * 100).astype(dtype) + soil_data["sandtotal_r"].values[0] = pandas.NA + + out_grid = make_geocube( + vector_data=soil_data, + output_crs=TEST_GARS_PROJ, + geom=json.dumps(mapping(TEST_GARS_POLY)), + resolution=[-10, 10], + fill=-1, + ) + # test writing to netCDF + out_grid.to_netcdf(tmpdir.mkdir("make_geocube_soil") / "soil_grid_flat.nc") + assert out_grid.sandtotal_r.dtype.name == expected_dtype