Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mhs/das 2108/handle no data #6

Merged
merged 17 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## v1.0.2
### 2024-04-05

This version of HyBIG correctly handles missing/bad input data marked by _FillValue or NoData.
Any time one of the bad values occurs in the raster the output png image will be transparent.

## v1.0.1
### 2024-04-05

Expand Down
2 changes: 1 addition & 1 deletion docker/service_version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.0.1
1.0.2
57 changes: 34 additions & 23 deletions harmony_browse_image_generator/browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,27 @@
from harmony.message import Source as HarmonySource
from matplotlib.cm import ScalarMappable
from matplotlib.colors import Normalize
from numpy import around, concatenate, ndarray
from osgeo_utils.auxiliary.color_palette import ColorPalette
from PIL import Image
from rasterio.io import DatasetReader
from rasterio.plot import reshape_as_image, reshape_as_raster
from rasterio.warp import Resampling, reproject
from rioxarray import open_rasterio
from xarray import DataArray
from numpy import ndarray
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh - weird that this ordering happened. Feels a bit off, but I guess I trust it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I noticed this too*, I think we don't have that set in the pre-commit.

  • and then I forgot to check it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

3dbba3c

I had a bad pyproject.toml, not sure why our precommit didn't catch it though. I will look

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should propagate this to the other repos as we touch them.
da4a015

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch - I might just put up a couple of PRs now. (Some of those repos don't get much love and we might forget)



from harmony_browse_image_generator.color_utility import (
NODATA_IDX,
NODATA_RGBA,
OPAQUE,
TRANSPARENT,
TRANSPARENT_IDX,
TRANSPARENT_RGBA,
get_color_palette,
remove_alpha,
)

from harmony_browse_image_generator.exceptions import HyBIGError
from harmony_browse_image_generator.sizes import (
GridParams,
Expand Down Expand Up @@ -113,27 +118,39 @@ def create_browse_imagery(
def convert_mulitband_to_raster(data_array: DataArray) -> ndarray:
"""Convert multiband to a raster image.

Reads the three/four bands from the file, then normalizes them to the range
Reads the three or four bands from the file, then normalizes them to the range
0 to 255. This assumes the input image is already in RGB or RGBA format and
just ensures that the output is 8bit.

"""
if data_array.rio.count not in [3, 4]:
raise HyBIGError(
f'Cannot create image from {data_array.rio.count} band image. '
'Expecting 3 or 4 bands.'
)

bands = data_array.to_numpy()
norm = Normalize()

if data_array.rio.count == 3:
norm.autoscale(bands)
raster = around(norm(bands) * 255.0).astype('uint8')
# Create an alpha layer where input NaN values are transparent.
nan_mask = np.isnan(bands).any(axis=0)
nan_alpha = np.where(nan_mask, TRANSPARENT, OPAQUE)
flamingbear marked this conversation as resolved.
Show resolved Hide resolved

# grab any existing alpha layer
bands, image_alpha = remove_alpha(bands)

elif data_array.rio.count == 4:
norm.autoscale(bands[0:3, :, :])
partial_raster = around(norm(bands[0:3, :, :]) * 255.0).astype('uint8')
raster = concatenate([partial_raster.data, bands[3:4, :, :]])
norm = Normalize(vmin=np.nanmin(bands), vmax=np.nanmax(bands))
flamingbear marked this conversation as resolved.
Show resolved Hide resolved
raster = np.nan_to_num(np.around(norm(bands) * 255.0), copy=False, nan=0.0).astype(
flamingbear marked this conversation as resolved.
Show resolved Hide resolved
'uint8'
)

if image_alpha is not None:
# merge nan alpha with the image alpha prefering transparency to
# opaqueness.
alpha = np.minimum(nan_alpha, image_alpha)
else:
raise HyBIGError(f'Cannot create image from {data_array.rio.count} band image')
alpha = nan_alpha

return raster
return np.concatenate((raster, alpha[None, ...]), axis=0)


def convert_singleband_to_raster(
Expand All @@ -153,15 +170,16 @@ def convert_gray_1band_to_raster(data_array: DataArray) -> ndarray:
"""Convert a 1-band raster without a color association."""
band = data_array[0, :, :]
cmap = matplotlib.colormaps['Greys_r']
norm = Normalize()
norm.autoscale(band)
cmap.set_bad(TRANSPARENT_RGBA)
norm = Normalize(vmin=np.nanmin(band), vmax=np.nanmax(band))
scalar_map = ScalarMappable(cmap=cmap, norm=norm)

rgba_image = np.zeros((*band.shape, 4), dtype='uint8')
for row_no in range(band.shape[0]):
rgba_image_slice = scalar_map.to_rgba(band[row_no, :], bytes=True)
rgba_image[row_no, :, :] = rgba_image_slice
return reshape_as_raster(rgba_image[..., 0:3])

return reshape_as_raster(rgba_image)


def convert_paletted_1band_to_raster(
Expand All @@ -182,7 +200,7 @@ def convert_paletted_1band_to_raster(
levels, scaled_colors, extend='max'
)

# handle no data values
# handle palette no data value
if palette.ndv is not None:
nodata_colors = palette.color_to_color_entry(palette.ndv, with_alpha=True)
cmap.set_bad(
Expand Down Expand Up @@ -222,13 +240,6 @@ def prepare_raster_for_writing(
return palettize_raster(raster)


def remove_alpha(raster: ndarray) -> tuple[ndarray, ndarray | None]:
"""Pull raster array off of input if it exists."""
if raster.shape[0] == 4:
return raster[0:3, :, :], raster[3, :, :]
return raster, None


def palettize_raster(raster: ndarray) -> tuple[ndarray, dict]:
"""convert an RGB or RGBA image into a 1band image and palette.

Expand Down
11 changes: 11 additions & 0 deletions harmony_browse_image_generator/color_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import requests
from harmony.message import Source as HarmonySource
import numpy as np
from numpy import ndarray
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: It feels a bit weird importing all of numpy and then also separately np.ndarray. Not a big deal, but just feels a bit odd.

Copy link
Member Author

@flamingbear flamingbear Apr 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup. I'm surprised I can't have ndarray in a TYPE_CHECKING guard, because that's all it's used for. I can change them all to np.ndarray, but I remember the format got black wonky.

Ok, I tried again. I can update it by putting my type in single quotes. Is that better?

from typing import TYPE_CHECKING
if TYPE_CHECKING:
    from numpy import ndarray

And then

def remove_alpha(raster: 'ndarray') -> tuple['ndarray', 'ndarray', None]:
    """remove alpha layer when it exists."""
    if raster.shape[0] == 4:
        return raster[0:3, :, :], raster[3, :, :]
    return raster, None

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that even right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So, I'm worried this next comment is me missing something, but couldn't you just stick with the single import (import numpy as np) and then:

def remove_alpha(raster: np.ndarray) -> tuple[np.ndarray, np.ndarray, None]:

I tried a toy model locally, and that seemed okay.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could. This was why I hadn't elsewhere: "but I remember the format got black wonky"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but I don't have strong opinions I suppose 9a7902f

from osgeo_utils.auxiliary.color_palette import ColorPalette
from pystac import Item
from rasterio.io import DatasetReader
Expand All @@ -18,6 +20,8 @@

# Constants for output PNG images
# Applied to transparent pixels where alpha < 255
TRANSPARENT = np.uint8(0)
OPAQUE = np.uint8(255)
TRANSPARENT_RGBA = (0, 0, 0, 0)
TRANSPARENT_IDX = 254

Expand All @@ -26,6 +30,13 @@
NODATA_IDX = 255


def remove_alpha(raster: ndarray) -> tuple[ndarray, ndarray, None]:
"""remove alpha layer when it exists."""
if raster.shape[0] == 4:
return raster[0:3, :, :], raster[3, :, :]
return raster, None
flamingbear marked this conversation as resolved.
Show resolved Hide resolved


def palette_from_remote_colortable(url: str) -> ColorPalette:
"""Return a gdal ColorPalette from a remote colortable."""
response = requests.get(url, timeout=10)
Expand Down
69 changes: 50 additions & 19 deletions tests/unit/test_browse.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from rasterio.warp import Resampling
from xarray import DataArray


from harmony_browse_image_generator.browse import (
convert_mulitband_to_raster,
convert_singleband_to_raster,
Expand All @@ -36,6 +37,8 @@
convert_colormap_to_palette,
get_color_palette,
palette_from_remote_colortable,
OPAQUE,
TRANSPARENT,
)
from harmony_browse_image_generator.exceptions import HyBIGError
from tests.unit.utility import rasterio_test_file
Expand Down Expand Up @@ -298,39 +301,46 @@ def test_create_browse_imagery_with_mocks(
)

def test_convert_singleband_to_raster_without_colortable(self):
ds = MagicMock(DataArray)
ds.__getitem__.return_value = self.data
"""Tests convert_gray_1band_to_raster."""

return_data = np.copy(self.data).astype('float64')
return_data[0][1] = np.nan
ds = DataArray(return_data).expand_dims('band')

expected_raster = np.array(
[
[
[0, 104, 198, 255],
[0, 0, 198, 255],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you change this from 104 to 0 to demonstrate transparent pixel behavior?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, yes, this is the expected array, I added a missing value at the [0,1] location, and these changes are the ones reflected in the output from the missing data.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right. You changed the code so the expected array changed.

[0, 104, 198, 255],
[0, 104, 198, 255],
[0, 104, 198, 255],
],
[
[0, 104, 198, 255],
[0, 0, 198, 255],
[0, 104, 198, 255],
[0, 104, 198, 255],
[0, 104, 198, 255],
],
[
[0, 104, 198, 255],
[0, 0, 198, 255],
[0, 104, 198, 255],
[0, 104, 198, 255],
[0, 104, 198, 255],
],
[
[255, 0, 255, 255],
[255, 255, 255, 255],
[255, 255, 255, 255],
[255, 255, 255, 255],
],
],
dtype='uint8',
)

actual_raster = convert_singleband_to_raster(ds, None)
assert_array_equal(expected_raster, actual_raster)

def test_convert_singleband_to_raster_with_colormap(self):
ds = MagicMock(DataArray)
flamingbear marked this conversation as resolved.
Show resolved Hide resolved
ds.__getitem__.return_value = self.data
ds = DataArray(self.data).expand_dims('band')

expected_raster = np.array(
[
Expand Down Expand Up @@ -367,13 +377,11 @@ def test_convert_singleband_to_raster_with_colormap(self):
assert_array_equal(expected_raster, actual_raster)

def test_convert_singleband_to_raster_with_colormap_and_bad_data(self):
ds = MagicMock(DataArray)
data_array = np.array(self.data, dtype='float')
data_array[0, 0] = np.nan
ds = DataArray(data_array).expand_dims('band')
nv_color = (10, 20, 30, 40)

ds.__getitem__.return_value = data_array

# Read the image down: red, yellow, green, blue
expected_raster = np.array(
[
Expand Down Expand Up @@ -412,9 +420,14 @@ def test_convert_singleband_to_raster_with_colormap_and_bad_data(self):
assert_array_equal(expected_raster, actual_raster)

def test_convert_3_multiband_to_raster(self):
ds = Mock(DataArray)
ds.rio.count = 3
ds.to_numpy.return_value = np.stack([self.data, self.data, self.data])

bad_data = np.copy(self.data).astype('float64')
bad_data[1][1] = np.nan
bad_data[1][2] = np.nan
ds = DataArray(
np.stack([self.data, bad_data, self.data]),
dims=('band', 'y', 'x'),
)

expected_raster = np.array(
[
Expand All @@ -426,7 +439,7 @@ def test_convert_3_multiband_to_raster(self):
],
[
[0, 85, 170, 255],
[0, 85, 170, 255],
[0, 0, 0, 255],
[0, 85, 170, 255],
[0, 85, 170, 255],
],
Expand All @@ -436,6 +449,12 @@ def test_convert_3_multiband_to_raster(self):
[0, 85, 170, 255],
[0, 85, 170, 255],
],
[
[OPAQUE, OPAQUE, OPAQUE, OPAQUE],
[OPAQUE, TRANSPARENT, TRANSPARENT, OPAQUE],
[OPAQUE, OPAQUE, OPAQUE, OPAQUE],
[OPAQUE, OPAQUE, OPAQUE, OPAQUE],
],
],
dtype='uint8',
)
Expand All @@ -444,16 +463,27 @@ def test_convert_3_multiband_to_raster(self):
assert_array_equal(expected_raster, actual_raster.data)

def test_convert_4_multiband_to_raster(self):
"""Input data has NaN _fillValue match in the red layer at [1,1]
and alpha chanel also exists with a single transparent value at [0,0]
flamingbear marked this conversation as resolved.
Show resolved Hide resolved

See that the expected output has transformed the missing data [nan]
into fully transparent at [1,1] and retained the transparent value of 1
at [0,0]

"""
ds = Mock(DataArray)
bad_data = np.copy(self.data).astype('float64')
bad_data[1, 1] = np.nan

alpha = np.ones_like(self.data) * 255
alpha[0, 0] = 1
ds.rio.count = 4
ds.to_numpy.return_value = np.stack([self.data, self.data, self.data, alpha])
ds.to_numpy.return_value = np.stack([bad_data, self.data, self.data, alpha])
expected_raster = np.array(
[
[
[0, 85, 170, 255],
[0, 85, 170, 255],
[0, 0, 170, 255],
[0, 85, 170, 255],
[0, 85, 170, 255],
],
Expand All @@ -471,7 +501,7 @@ def test_convert_4_multiband_to_raster(self):
],
[
[1, 255, 255, 255],
[255, 255, 255, 255],
[255, 0, 255, 255],
[255, 255, 255, 255],
[255, 255, 255, 255],
],
Expand All @@ -493,7 +523,8 @@ def test_convert_5_multiband_to_raster(self):
convert_mulitband_to_raster(ds)

self.assertEqual(
excepted.exception.message, 'Cannot create image from 5 band image'
excepted.exception.message,
'Cannot create image from 5 band image. Expecting 3 or 4 bands.',
)

def test_prepare_raster_for_writing_jpeg_3band(self):
Expand Down
Loading