From bf0d3f55a1960c4b961c44ec2e71512dbb5502e8 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 12 Nov 2024 11:13:55 -0500 Subject: [PATCH 1/9] Implement a `bylayer` keyword for `zonal` if false, `f` gets an iterable of named tuples. if true, `f` gets an iterable of values, and we use `maplayers` outside that. Still an open question what the default should be. I would like to have it be `true` to underline the nature of RasterStacks as being arrays of namedtuples as well, but `false` would be less breaking. --- src/methods/zonal.jl | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 476386688..df58dc55f 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -12,6 +12,12 @@ covered by the `of` object/s. # Keywords $GEOMETRYCOLUMN_KEYWORD +- `bylayer`: **Only relevant if `x` is a `RasterStack`.** `false` (default) to apply `f` to the stack as a whole + (so `f` sees an iterator over NamedTuples), or `true` to apply `f` to each layer of a `RasterStack` individually. + In this case, `f` gets each layer separately, and the results are recombined into a NamedTuple. + Generally, it's more powerful to set `bylayer = false`, since you can perform statistics on one layer weighted by another. + Functions like `Statistics.mean` cannot handle NamedTuple input, so you will want to set `bylayer = true` for those. + These can be used when `of` is or contains (a) GeoInterface.jl compatible object(s): - `shape`: Force `data` to be treated as `:polygon`, `:line` or `:point`, where possible. @@ -81,11 +87,17 @@ _zonal(f, x::RasterStackOrArray, of::DimTuple; kw...) = # We don't need to `mask` with an extent, it's square so `crop` will do enough. _zonal(f, x::Raster, of::Extents.Extent; skipmissing=true) = _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) -function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true) +function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true, bylayer = false) cropped = crop(x; to=ext, touches=true) prod(size(cropped)) > 0 || return missing - return map(cropped) do A - _maybe_skipmissing_call(f, A, skipmissing) + if bylayer # apply f to each layer + return maplayers(cropped) do A + prod(size(A)) > 0 || return missing + _maybe_skipmissing_call(f, A, skipmissing) + end + else # apply f to the whole stack + prod(size(cropped)) > 0 || return missing + return _maybe_skipmissing_call(f, cropped, skipmissing) end end # Otherwise of is a geom, table or vector @@ -104,14 +116,19 @@ function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; return _maybe_skipmissing_call(f, masked, skipmissing) end function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; - skipmissing=true, kw... + skipmissing=true, bylayer = false, kw... ) cropped = crop(st; to=geom, touches=true) prod(size(cropped)) > 0 || return map(_ -> missing, st) masked = mask(cropped; with=geom, kw...) - return map(masked) do A - prod(size(A)) > 0 || return missing - _maybe_skipmissing_call(f, A, skipmissing) + if bylayer # apply f to each layer + return maplayers(masked) do A + prod(size(A)) > 0 || return missing + _maybe_skipmissing_call(f, A, skipmissing) + end + else # apply f to the whole stack + prod(size(masked)) > 0 || return missing + return _maybe_skipmissing_call(f, masked, skipmissing) end end function _zonal(f, x::RasterStackOrArray, ::Nothing, data; @@ -131,6 +148,13 @@ end function _alloc_zonal(f, x, geoms, n; kw...) # Find first non-missing entry and count number of missing entries n_missing::Int = 0 + # TODO: wrap this in try-catch, so that we can throw an intelligent error. + # There are a couple of scenarios where this can fail: + # 1. The function cannot accept an empty iterator + # 2. If passed a RasterStack, the function may not be able to handle layers. + # In this case you'll get a method error, where one of the arguments is a NamedTuple + # that is the same as eltype(rs). We should check for that and throw an informative error, + # that tells you to set `bylayer = true`. z1 = _zonal(f, x, first(geoms); kw...) for geom in geoms z1 = _zonal(f, x, geom; kw...) From 5c4e0aa406f12e0923a08f07da32ae516ef4f586 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 12 Nov 2024 11:14:10 -0500 Subject: [PATCH 2/9] Bump compat with DD to 0.29 (this will need a breaking version) --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index a7ba6b713..f24535992 100644 --- a/Project.toml +++ b/Project.toml @@ -58,7 +58,7 @@ CommonDataModel = "0.2.3, 0.3" ConstructionBase = "1" CoordinateTransformations = "0.6.2" DataFrames = "1" -DimensionalData = "0.28.2" +DimensionalData = "0.29" DiskArrays = "0.3, 0.4" Extents = "0.1" FillArrays = "0.12, 0.13, 1" From 943c5c284263dfed1a2cecb41f13fe22905b420d Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Tue, 12 Nov 2024 13:18:21 -0500 Subject: [PATCH 3/9] set default `bylayer=true` --- src/methods/zonal.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index df58dc55f..e61ff28a8 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -12,10 +12,10 @@ covered by the `of` object/s. # Keywords $GEOMETRYCOLUMN_KEYWORD -- `bylayer`: **Only relevant if `x` is a `RasterStack`.** `false` (default) to apply `f` to the stack as a whole - (so `f` sees an iterator over NamedTuples), or `true` to apply `f` to each layer of a `RasterStack` individually. +- `bylayer`: **Only relevant if `x` is a `RasterStack`.** `true` (default) to apply `f` to each layer of a `RasterStack` individually. In this case, `f` gets each layer separately, and the results are recombined into a NamedTuple. - Generally, it's more powerful to set `bylayer = false`, since you can perform statistics on one layer weighted by another. + If `false`, `f` gets a cropped and masked `RasterStack` as a whole, and must be able to handle that. This is useful for performing e.g + weighted statistics or multi-layer operations. Functions like `Statistics.mean` cannot handle NamedTuple input, so you will want to set `bylayer = true` for those. These can be used when `of` is or contains (a) GeoInterface.jl compatible object(s): @@ -87,7 +87,7 @@ _zonal(f, x::RasterStackOrArray, of::DimTuple; kw...) = # We don't need to `mask` with an extent, it's square so `crop` will do enough. _zonal(f, x::Raster, of::Extents.Extent; skipmissing=true) = _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) -function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true, bylayer = false) +function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true, bylayer=true) cropped = crop(x; to=ext, touches=true) prod(size(cropped)) > 0 || return missing if bylayer # apply f to each layer @@ -116,7 +116,7 @@ function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; return _maybe_skipmissing_call(f, masked, skipmissing) end function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; - skipmissing=true, bylayer = false, kw... + skipmissing=true, bylayer=true, kw... ) cropped = crop(st; to=geom, touches=true) prod(size(cropped)) > 0 || return map(_ -> missing, st) From e84f5f84555db36369d3fa450c3fb6f0b781e117 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 13 Nov 2024 11:01:14 -0500 Subject: [PATCH 4/9] implement zonal error check and hint for alloc_zonal --- src/methods/zonal.jl | 48 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index e61ff28a8..95be247f1 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -145,19 +145,49 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data; return zs end +function _zonal_error_check(f, x, geom; kw...) + try + return _zonal(f, x, geom; kw...) + catch e + # Check if the error is because the function doesn't accept empty iterators + if e isa ArgumentError && occursin("reducing over an empty collection is not allowed", e.msg) + @warn(""" + The function cannot handle empty iterators. You need to supply an `init` value. + If your original call was `zonal(f, x; of, kw...)`, try: + `zonal(x -> f(x; init=...), x; of, kw...)` + """ + ) + # Check if the error is because the function doesn't accept NamedTuples + elseif e isa MethodError && + x isa AbstractRasterStack && + get(kw, :bylayer, true) == false + namedtuple_arg_idx = findfirst( + a -> a isa NamedTuple && + ((eltype(a) <: Missing) || (a isa eltype(x))), + e.args + ) + if !isnothing(namedtuple_arg_idx) + @warn(""" + The function you passed to `zonal` cannot handle a RasterStack's values directly, + as they are NamedTuples of the form `(; varname = value, ...)`. + + Set `bylayer=true` to apply the function to each layer of the stack separately, + or change the function to accept NamedTuple input. + """) + end + end + + rethrow(e) + end +end + function _alloc_zonal(f, x, geoms, n; kw...) # Find first non-missing entry and count number of missing entries n_missing::Int = 0 - # TODO: wrap this in try-catch, so that we can throw an intelligent error. - # There are a couple of scenarios where this can fail: - # 1. The function cannot accept an empty iterator - # 2. If passed a RasterStack, the function may not be able to handle layers. - # In this case you'll get a method error, where one of the arguments is a NamedTuple - # that is the same as eltype(rs). We should check for that and throw an informative error, - # that tells you to set `bylayer = true`. - z1 = _zonal(f, x, first(geoms); kw...) + + z1 = _zonal_error_check(f, x, first(geoms); kw...) for geom in geoms - z1 = _zonal(f, x, geom; kw...) + z1 = _zonal_error_check(f, x, geom; kw...) if !ismissing(z1) break end From 978ddd10c2ebbab6b4a1ec2403bc4e72059bb3b3 Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 13 Nov 2024 11:16:23 -0500 Subject: [PATCH 5/9] Add tests for zonal error hinting --- test/methods.jl | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/test/methods.jl b/test/methods.jl index 324edaa8f..e120a1c6f 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -319,6 +319,56 @@ end @test !zonal(x -> x isa Raster, rast; of=polygon, skipmissing=true) @test zonal(x -> x isa Raster, rast; of=polygon, skipmissing=false) end + + @testset "bylayer=true" begin + # Test bylayer=true for RasterStack + st = RasterStack((; a, b, c)) + # Test that bylayer=true gives same results as individual layers + @test zonal(sum, st; of=polygon, bylayer=true) == + map(layer -> zonal(sum, layer; of=polygon), st) + # Test that bylayer=true works with multiple polygons + @test zonal(sum, st; of=[polygon, polygon], bylayer=true) == + map(layer -> zonal(sum, layer; of=[polygon, polygon]), st) + # Test that bylayer=true works with extent + @test zonal(sum, st; of=st, bylayer=true) == + map(sum, st) + end + + @testset "zonal error warnings" begin + mr = Raster(fill(missing, dims(a))) + # Test warning for empty iterator + @test_warn r"The function cannot handle empty iterators.*" begin + try + zonal(minimum, mr; of = [polygon], bylayer = true, skipmissing = true) + catch e + @test e isa ArgumentError + end + end + + # Test warning for NamedTuple input with bylayer=false + @test_warn r"The function you passed to `zonal` cannot handle a RasterStack's values directly.*" begin + try + zonal(mean, st; of = [polygon], bylayer=false) + catch e + @test e isa MethodError + end + end + end + + # TODO: this will not work, until we fix skipmissing for RasterStacks. + # @testset "bylayer=false" begin + # # Test bylayer=false for RasterStack + # st = RasterStack((; a, b, c)) + # # Test that bylayer=false gives results for whole stack at once + # @test zonal(sum, st; of=polygon, bylayer=false) == + # sum(skipmissing(mask(st; with=polygon))) + # # Test that bylayer=false works with multiple polygons + # @test zonal(sum, st; of=[polygon, polygon], bylayer=false) == + # [sum(skipmissing(mask(st; with=p))) for p in [polygon, polygon]] + # # Test that bylayer=false works with extent + # @test zonal(sum, st; of=st, bylayer=false) == + # sum(st) + # end end @testset "zonal return missing" begin From d6a1d99e2fa86775457132562157986b1aa838ae Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 13 Nov 2024 11:18:18 -0500 Subject: [PATCH 6/9] drop compat back down to DD 0.28 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f24535992..a7ba6b713 100644 --- a/Project.toml +++ b/Project.toml @@ -58,7 +58,7 @@ CommonDataModel = "0.2.3, 0.3" ConstructionBase = "1" CoordinateTransformations = "0.6.2" DataFrames = "1" -DimensionalData = "0.29" +DimensionalData = "0.28.2" DiskArrays = "0.3, 0.4" Extents = "0.1" FillArrays = "0.12, 0.13, 1" From d81b59526190c9c6ef73220317257593ecdf988f Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 13 Nov 2024 12:20:36 -0500 Subject: [PATCH 7/9] reset to DD v0.28 functions --- src/methods/zonal.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 95be247f1..cbae6e46c 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -91,10 +91,10 @@ function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true, bylaye cropped = crop(x; to=ext, touches=true) prod(size(cropped)) > 0 || return missing if bylayer # apply f to each layer - return maplayers(cropped) do A + return DD._maybestack(cropped, map(values(cropped)) do A prod(size(A)) > 0 || return missing _maybe_skipmissing_call(f, A, skipmissing) - end + end) else # apply f to the whole stack prod(size(cropped)) > 0 || return missing return _maybe_skipmissing_call(f, cropped, skipmissing) @@ -122,10 +122,10 @@ function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; prod(size(cropped)) > 0 || return map(_ -> missing, st) masked = mask(cropped; with=geom, kw...) if bylayer # apply f to each layer - return maplayers(masked) do A + return DD._maybestack(masked, map(values(masked)) do A # TODO: replace this with `maplayers` once DD v0.29 is available prod(size(A)) > 0 || return missing _maybe_skipmissing_call(f, A, skipmissing) - end + end) else # apply f to the whole stack prod(size(masked)) > 0 || return missing return _maybe_skipmissing_call(f, masked, skipmissing) From 7ff7eb0a6255f032e1886e5d69e21749990ccdfd Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 13 Nov 2024 17:03:24 -0500 Subject: [PATCH 8/9] Bump Shapefile compat up to the latest version --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index a7ba6b713..68253e989 100644 --- a/Project.toml +++ b/Project.toml @@ -81,7 +81,7 @@ RecipesBase = "0.7, 0.8, 1.0" Reexport = "0.2, 1.0" SafeTestsets = "0.1" Setfield = "0.6, 0.7, 0.8, 1" -Shapefile = "0.10, 0.11" +Shapefile = "0.10, 0.11, 0.12, 0.13" Statistics = "1" StatsBase = "0.34" Test = "1" From b39877d082c49dad19557ea1865f8b71024e9aed Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 13 Nov 2024 20:57:28 -0500 Subject: [PATCH 9/9] Don't overwrite preexisting `a` with an array --- test/methods.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/methods.jl b/test/methods.jl index e120a1c6f..35346e6b4 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -310,10 +310,10 @@ end sum(st) @testset "skipmissing" begin - a = Array{Union{Missing,Int}}(undef, 26, 31) - a .= (1:26) * (1:31)' - a[1:10, 3:10] .= missing - rast = Raster(a, (X(-20:5), Y(0:30))) + _arr = Array{Union{Missing,Int}}(undef, 26, 31) + _arr .= (1:26) * (1:31)' + _arr[1:10, 3:10] .= missing + rast = Raster(_arr, (X(-20:5), Y(0:30))) @test zonal(sum, rast; of=polygon, skipmissing=false) === missing @test zonal(sum, rast; of=polygon, skipmissing=true) isa Int @test !zonal(x -> x isa Raster, rast; of=polygon, skipmissing=true)