From ec02fd1f719966ed3bca70e4b17fbe6013602f7d Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 26 May 2024 07:57:04 +0200 Subject: [PATCH 01/38] start adding cf keyword --- docs/src/gbif_wflow.md | 2 +- ext/RastersArchGDALExt/RastersArchGDALExt.jl | 5 +- ext/RastersArchGDALExt/gdal_source.jl | 156 +++++++++++------- ext/RastersArchGDALExt/warp.jl | 4 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 49 +++--- src/Rasters.jl | 1 + src/array.jl | 58 ++++--- src/create.jl | 85 +++++++--- src/filearray.jl | 43 +++-- src/filestack.jl | 13 +- src/methods/crop_extend.jl | 9 +- src/nokw.jl | 3 + src/sources/commondatamodel.jl | 104 ++---------- src/sources/grd.jl | 12 +- src/utils.jl | 45 +++-- test/methods.jl | 11 +- test/rasterize.jl | 3 +- test/resample.jl | 50 +++--- test/runtests.jl | 2 +- test/sources/gdal.jl | 45 ++++- test/sources/ncdatasets.jl | 42 ++++- 21 files changed, 431 insertions(+), 311 deletions(-) diff --git a/docs/src/gbif_wflow.md b/docs/src/gbif_wflow.md index eb6ad82cf..de8474fac 100644 --- a/docs/src/gbif_wflow.md +++ b/docs/src/gbif_wflow.md @@ -51,4 +51,4 @@ Or convert them to a `DataFrame`: using DataFrames df = DataFrame(predictors) df[1:5,:] -```` \ No newline at end of file +```` diff --git a/ext/RastersArchGDALExt/RastersArchGDALExt.jl b/ext/RastersArchGDALExt/RastersArchGDALExt.jl index 68e721838..bee5e7576 100644 --- a/ext/RastersArchGDALExt/RastersArchGDALExt.jl +++ b/ext/RastersArchGDALExt/RastersArchGDALExt.jl @@ -1,9 +1,9 @@ module RastersArchGDALExt @static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, ArchGDAL + using Rasters, ArchGDAL, CommonDataModel else - using ..Rasters, ..ArchGDAL + using ..Rasters, ..ArchGDAL, ..CommonDataModel end import DiskArrays, @@ -28,6 +28,7 @@ const DD = DimensionalData const DA = DiskArrays const GI = GeoInterface const LA = Lookups +const CDM = CommonDataModel include("cellsize.jl") include("gdal_source.jl") diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 3858cb352..4e9979a39 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -38,11 +38,6 @@ const GDAL_VIRTUAL_FILESYSTEMS = "/vsi" .* ( # Array ######################################################################## -function RA.FileArray{GDALsource}(ds::AG.RasterDataset{T}, filename; kw...) where {T} - eachchunk, haschunks = DA.eachchunk(ds), DA.haschunks(ds) - RA.FileArray{GDALsource,T,3}(filename, size(ds); eachchunk, haschunks, kw...) -end - RA.cleanreturn(A::AG.RasterDataset) = Array(A) RA.haslayers(::GDALsource) = false RA._sourcetrait(A::AG.RasterDataset) = GDALsource() @@ -52,44 +47,33 @@ function Base.write( force=false, verbose=true, missingval=nokw, + maskingval=RA.missingval(A), + scale=nokw, + offset=nokw, + coerce=nokw, kw... ) where T RA.check_can_write(filename, force) A1 = _maybe_correct_to_write(A, missingval) - _create_with_driver(filename, dims(A1), eltype(A1), Rasters.missingval(A1); _block_template=A1, kw...) do dataset + missingval = missingval isa NoKW ? RA._writeable_missing(T; verbose) : missingval + mod = RA._mod(missingval, maskingval, scale, offset, coerce) + _create_with_driver(filename, dims(A1), eltype(A1); + missingval, _block_template=A1, scale, offset, kw... + ) do dataset verbose && _maybe_warn_south_up(A, verbose, "Writing South-up. Use `reverse(myrast; dims=Y)` first to write conventional North-up") open(A1; write=true) do O - AG.RasterDataset(dataset) .= parent(O) + RA._maybe_modify(AG.RasterDataset(dataset), mod) .= parent(O) end end return filename end -function RA.create(filename, ::GDALsource, T::Type, dims::DD.DimTuple; - missingval=nokw, - metadata=nokw, - name=nokw, - lazy=true, - verbose=true, - kw... -) - T = Missings.nonmissingtype(T) - missingval = ismissing(missingval) ? RA._writeable_missing(T) : missingval - _create_with_driver(filename, dims, T, missingval; kw...) do _ - verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. Use `reverse(myrast; dims=Y)` first to write conventional North-up") - nothing - end - - return Raster(filename; source=GDALsource(), name, lazy, metadata, dropband=!hasdim(dims, Band)) -end - function _maybe_warn_south_up(A, verbose, msg) verbose && lookup(A, Y) isa AbstractSampled && order(A, Y) isa ForwardOrdered && @warn msg end function RA._open(f, ::GDALsource, filename::AbstractString; - write=false, - kw... + write=false, mod=NoMod(), kw... ) # Check the file actually exists because the GDAL error is unhelpful if !isfile(filename) @@ -106,21 +90,23 @@ function RA._open(f, ::GDALsource, filename::AbstractString; end end end - if write - # Pass the OF_UPDATE flag to GDAL - AG.readraster(RA.cleanreturn ∘ f, filename; flags=AG.OF_UPDATE) - else - # Otherwise just read - AG.readraster(RA.cleanreturn ∘ f, filename) + flags = write ? AG.OF_UPDATE : AG.OF_READONLY + return AG.readraster(filename; flags) do A + C = RA._maybe_modify(A, mod) + RA.cleanreturn(f(C)) end end -RA._open(f, ::GDALsource, ds::AG.RasterDataset; kw...) = RA.cleanreturn(f(ds)) +RA._open(f, ::GDALsource, A::AG.RasterDataset; mod=NoMod(), kw...) = + RA.cleanreturn(f(RA._maybe_modify(A, mod))) # DimensionalData methods for ArchGDAL types ############################### # These methods are type piracy on DimensionalData/ArchGDAL and may have to move some day + +RA._dims(var::CDM.CFVariable{<:Any,<:Any,<:AG.RasterDataset}, crs=nokw, mappedcrs=nokw) = + RA._dims(var.var, crs, mappedcrs) # We allow passing in crs and mappedcrs manually function RA._dims(raster::AG.RasterDataset, crs=nokw, mappedcrs=nokw) gt_dims = try @@ -210,14 +196,21 @@ end # TODO make metadata optional, its slow to get function RA._metadata(raster::AG.RasterDataset, args...) band = AG.getband(raster.ds, 1) + metadata = RA._metadatadict(GDALsource()) # color = AG.getname(AG.getcolorinterp(band)) scale = AG.getscale(band) offset = AG.getoffset(band) # norvw = AG.noverview(band) units = AG.getunittype(band) filelist = AG.filelist(raster) - metadata = RA._metadatadict(GDALsource(), "scale"=>scale, "offset"=>offset) - if units == "" + # Set metadata if they are not default values + if scale != oneunit(scale) + metadata["scale"] = scale + end + if offset != zero(offset) + metadata["offset"] = offset + end + if units != "" metadata["units"] = units end if length(filelist) > 0 @@ -238,16 +231,19 @@ function RA.Raster(ds::AG.RasterDataset; name=nokw, metadata=RA._metadata(ds), missingval=RA.missingval(ds), + maskingval=missing, lazy=false, - dropband=false + dropband=false, + cf=true, ) kw = (; refdims, name, metadata, missingval) filelist = AG.filelist(ds) + mod = RA._mod(cf, metadata; missingval, maskingval) raster = if lazy && length(filelist) > 0 filename = first(filelist) - Raster(FileArray{GDALsource}(ds, filename), dims; kw...) + Raster(FileArray{GDALsource}(ds, filename; mod), dims, kw...) else - Raster(Array(ds), dims; kw...) + Raster(Array(RA._maybe_modify(ds, mod)), dims; kw...) end return dropband ? RA._drop_single_band(raster, lazy) : raster end @@ -288,12 +284,38 @@ function AG.Dataset(f::Function, A::AbstractRaster; kw...) f(rds.ds) end end -function AG.RasterDataset(f::Function, A::AbstractRaster; filename="", kw...) +function AG.RasterDataset(f::Function, A::AbstractRaster; + filename="", + scale=nokw, + offset=nokw, + coerce=nokw, + verbose=false, + eltype=Missings.nonmissingtype(eltype(A)), + maskingval=nokw, + missingval=nokw, + kw... +) A1 = _maybe_correct_to_write(A) - return _create_with_driver(filename, dims(A1), eltype(A1), missingval(A1); _block_template=A1, kw...) do dataset + mv = RA.missingval(A1) + if RA.isnokw(missingval) + missingval = (ismissing(mv) || typeof(mv) <: eltype) ? RA._type_missingval(eltype) : mv + end + if RA.isnokw(maskingval) + if ismissing(mv) + maskingval = missing + elseif maskingval === missingval + maskingval = nothing + else + maskingval = mv + end + end + mod = RA._mod(missingval, maskingval, scale, offset, coerce) + return _create_with_driver(filename, dims(A1), eltype; + _block_template=A1, missingval, scale, offset, verbose, kw... + ) do dataset rds = AG.RasterDataset(dataset) - open(A1) do a - rds .= parent(a) + open(A1) do O + RA._maybe_modify(rds, mod) .= parent(O) end f(rds) end @@ -303,19 +325,19 @@ end # Sometimes GDAL stores the `missingval` in the wrong type, so fix it. _missingval_from_gdal(T::Type{<:AbstractFloat}, x::Real) = convert(T, x) -function _missingval_from_gdal(T::Type{<:Integer}, x::AbstractFloat) +function _missingval_from_gdal(T::Type{<:Integer}, x::AbstractFloat; verbose=true) if trunc(x) === x && x >= typemin(T) && x <= typemax(T) convert(T, x) else - @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" + verbose && @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" nothing end end -function _missingval_from_gdal(T::Type{<:Integer}, x::Integer) +function _missingval_from_gdal(T::Type{<:Integer}, x::Integer; verbose=true) if x >= typemin(T) && x <= typemax(T) convert(T, x) else - @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" + verbose && @warn "Missing value $x can't be converted to array eltype $T. `missingval` set to `nothing`" nothing end end @@ -328,7 +350,7 @@ _maybe_correct_to_write(::Lookup, A::AbstractDimArray, args...) = A function _maybe_correct_to_write( lookup::Union{AbstractSampled,NoLookup}, A::AbstractDimArray, args... ) - RA._maybe_use_type_missingval(A, GDALsource(), args...) |> _maybe_permute_to_gdal + _maybe_permute_to_gdal(A) end _check_driver(filename::Nothing, driver) = "MEM" @@ -348,13 +370,20 @@ end # Handle creating a dataset with any driver, # applying the function `f` to the created dataset -function _create_with_driver(f, filename, dims::Tuple, T, missingval; +function _create_with_driver(f, filename, dims::Tuple, T; + verbose=true, + missingval=nokw, options=Dict{String,String}(), driver="", _block_template=nothing, chunks=nokw, + scale=nokw, + offset=nokw, kw... ) + verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. Use `reverse(myrast; dims=Y)` first to write conventional North-up") + + missingval = RA.isnokw(missingval) || ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval _gdal_validate(dims) x, y = map(DD.dims(dims, (XDim, YDim))) do d @@ -365,7 +394,7 @@ function _create_with_driver(f, filename, dims::Tuple, T, missingval; nbands = hasdim(dims, Band) ? length(DD.dims(dims, Band())) : 1 driver = _check_driver(filename, driver) - options_vec = _process_options(driver, options; _block_template, chunks) + options_vec = _process_options(driver, options; _block_template, chunks, verbose) gdaldriver = driver isa String ? AG.getdriver(driver) : driver create_kw = (; width=length(x), height=length(y), nbands, dtype=T,) @@ -373,17 +402,19 @@ function _create_with_driver(f, filename, dims::Tuple, T, missingval; if AG.shortname(gdaldriver) in GDAL_DRIVERS_SUPPORTING_CREATE AG.create(filename; driver=gdaldriver, options=options_vec, create_kw...) do dataset - _set_dataset_properties!(dataset, newdims, missingval) + _set_dataset_properties!(dataset, newdims, missingval, scale, offset) f(dataset) end else # Create a tif and copy it to `filename`, as ArchGDAL.create # does not support direct creation of ASCII etc. rasters - tif_options_vec = _process_options("GTiff", Dict{String,String}(); chunks, _block_template) + tif_options_vec = _process_options("GTiff", Dict{String,String}(); + chunks, _block_template, verbose + ) tif_driver = AG.getdriver("GTiff") tif_name = tempname() * ".tif" AG.create(tif_name; driver=tif_driver, options=tif_options_vec, create_kw...) do dataset - _set_dataset_properties!(dataset, newdims, missingval) + _set_dataset_properties!(dataset, newdims, missingval, scale, offset) f(dataset) target_ds = AG.copy(dataset; filename=filename, driver=gdaldriver, options=options_vec) AG.destroy(target_ds) @@ -404,7 +435,8 @@ end # Convert a Dict of options to a Vector{String} for GDAL function _process_options(driver::String, options::Dict; chunks=nokw, - _block_template=nothing + _block_template=nothing, + verbose=true, ) options_str = Dict(string(k)=>string(v) for (k,v) in options) # Get the GDAL driver object @@ -431,7 +463,7 @@ function _process_options(driver::String, options::Dict; if (xchunksize % 16 == 0) && (ychunksize % 16 == 0) options_str["TILED"] = "YES" else - xchunksize == 1 || @warn "X and Y chunk size do not match. Columns are used and X size $xchunksize is ignored" + xchunksize == 1 || (verbose && @warn "X and Y chunk size do not match. Columns are used and X size $xchunksize is ignored") end # don't overwrite user specified values if !("BLOCKXSIZE" in keys(options_str)) @@ -445,7 +477,7 @@ function _process_options(driver::String, options::Dict; if xchunksize == ychunksize options_str["BLOCKSIZE"] = block_x else - @warn "Writing COG X and Y chunks do not match: $block_x, $block_y. Default of 512, 512 used." + verbose && @warn "Writing COG X and Y chunks do not match: $block_x, $block_y. Default of 512, 512 used." end end end @@ -491,9 +523,9 @@ end # Set the properties of an ArchGDAL Dataset to match # the dimensions and missingval of a Raster -_set_dataset_properties!(ds::AG.Dataset, A) = - _set_dataset_properties!(ds, dims(A), missingval(A)) -function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval) +_set_dataset_properties!(ds::AG.Dataset, A, scale, offset) = + _set_dataset_properties!(ds, dims(A), missingval(A), scale, offset) +function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval, scale, offset) # We cant write mixed Points/Intervals, so default to Intervals if mixed xy = DD.dims(dims, (X, Y)) if any(x -> x isa Intervals, map(sampling, xy)) && any(x -> x isa Points, map(sampling, xy)) @@ -522,12 +554,14 @@ function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval) gt = RA.dims2geotransform(x, y) AG.setgeotransform!(dataset, gt) - # Set the missing value/nodataval. This is a little complicated - # because gdal has separate method for 64 bit integers if !isnothing(missingval) bands = hasdim(dims, Band) ? axes(DD.dims(dims, Band), 1) : 1 for i in bands rasterband = AG.getband(dataset, i) + (RA.isnokw(offset) || isnothing(offset)) || AG.setoffset!(rasterband, offset) + (RA.isnokw(scale) || isnothing(scale)) || AG.setscale!(rasterband, scale) + # Set the missing value/nodataval. This is a little complicated + # because gdal has separate method for 64 bit integers if missingval isa Int64 AG.GDAL.gdalsetrasternodatavalueasint64(rasterband, missingval) elseif missingval isa UInt64 diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index f9bb48253..aedc6a71e 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -23,14 +23,14 @@ function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", kw.. tempfile = isnothing(filename) ? nothing : tempname() * ".tif" warp_kw = isnothing(filename) || filename == "/vsimem/tmp" ? () : (; dest=filename) out = AG.Dataset(A1; filename=tempfile, kw...) do dataset - rds = Raster(dataset) AG.gdalwarp([dataset], flagvect; warp_kw...) do warped # Read the raster lazily, dropping Band if there is none in `A` - raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name = name(A)) + raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name=name(A)) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end end + @show missingval(out) # And permute the dimensions back to what they were in A out1 = _maybe_restore_from_gdal(out, dims(A)) out2 = _reset_gdalwarp_sampling(out1, A) diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 8131d3049..8164b2dea 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -68,31 +68,43 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; missingval=nokw, chunks=nokw, chunksizes=RA._chunks_to_tuple(A, dims(A), chunks), + scale=nokw, + offset=nokw, + eltype=Missings.nonmissingtype(T), kw... ) where {T,N} - missingval = missingval isa NoKW ? Rasters.missingval(A) : missingval _def_dim_var!(ds, A) attrib = RA._attribdict(metadata(A)) - # Set _FillValue - eltyp = Missings.nonmissingtype(T) - eltyp <: NCDAllowedType || throw(ArgumentError(""" + # Scale and offset + scale = if isnokw(scale) || isnothing(scale) + delete!(attrib, "scale_factor") + nothing + else + attrib["scale_factor"] = scale + end + offset = if isnokw(offset) || isnothing(offset) + delete!(attrib, "add_offset") + nothing + else + attrib["add_offset"] = offset + end + maskingval1 = begin + mv = maskingval isa NoKW ? Rasters.missingval(A) : maskingval + mv === missingval ? nothing : mv + end + mod = _mod(missingval1, maskingval1, scale, offset) + + eltype <: NCDAllowedType || throw(ArgumentError(""" Element type $eltyp cannot be written to NetCDF. Convert it to one of $(Base.uniontypes(NCDAllowedType)), usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) """ )) - if ismissing(missingval) - fillval = if haskey(attrib, "_FillValue") && attrib["_FillValue"] isa eltyp - attrib["_FillValue"] - else - NCD.fillvalue(eltyp) - end - attrib["_FillValue"] = fillval - A = replace_missing(A, fillval) - elseif Rasters.missingval(A) isa T + + # Set _FillValue + if !isnothing(maskingval1) && Rasters.missingval(A) isa T attrib["_FillValue"] = missingval - else - verbose && !(missingval isa Nothing) && @warn "`missingval` $(missingval) is not the same type as your data $T." end + verbose && !(maskingval isa Nothing) && @warn "`maskingval` $(maskingval) is not the same type as your data $T." key = if string(DD.name(A)) == "" UNNAMED_NCD_FILE_KEY @@ -101,8 +113,7 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; end dimnames = lowercase.(string.(map(RA.name, dims(A)))) - var = NCD.defVar(ds, key, eltyp, dimnames; attrib=attrib, chunksizes, kw...) |> RA.CFDiskArray - + var = _maybe_modify(NCD.defVar(ds, key, eltyp, dimnames; attrib=attrib, chunksizes, kw...), mod) # Write with a DiskArays.jl broadcast var .= A @@ -135,10 +146,6 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension) return nothing end -# Hack to get the inner DiskArrays chunks as they are not exposed at the top level -RA._get_eachchunk(var::NCD.Variable) = DiskArrays.eachchunk(var) -RA._get_haschunks(var::NCD.Variable) = DiskArrays.haschunks(var) - RA._sourcetrait(::NCD.Dataset) = NCDsource() RA._sourcetrait(::NCD.Variable) = NCDsource() diff --git a/src/Rasters.jl b/src/Rasters.jl index a297096dd..2c7f67383 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -94,6 +94,7 @@ include("methods/shared_docstrings.jl") include("lookup.jl") include("dimensions.jl") include("sources/sources.jl") +include("modifieddiskarray.jl") include("filearray.jl") include("filestack.jl") include("openstack.jl") diff --git a/src/array.jl b/src/array.jl index c8c9179db..8e06e6b8c 100644 --- a/src/array.jl +++ b/src/array.jl @@ -64,7 +64,7 @@ function DD.rebuild( A::AbstractRaster, data, dims::Tuple, refdims, name, metadata, missingval=missingval(A) ) - missingval1 = _fix_missingval(eltype(data), missingval) + missingval1 = _fix_missingval(eltype(data), missingval, NoMetadata()) Raster(data, dims, refdims, name, metadata, missingval1) end function DD.rebuild(A::AbstractRaster; @@ -75,8 +75,7 @@ function DD.rebuild(A::AbstractRaster; end function DD.modify(f, A::AbstractRaster) - # Have to avoid calling `open` on CFDiskArray - newdata = if isdisk(A) && !(parent(A) isa CFDiskArray) + newdata = if isdisk(A) # TODO may have to avoid calling `open` on DiskArray open(A) do O f(parent(O)) end @@ -201,6 +200,11 @@ $GROUP_KEYWORD when you know the value is not specified or is incorrect. This will *not* change any values in the raster, it simply assigns which value is treated as missing. To replace all of the missing values in the raster, use [`replace_missing`](@ref). +- `maskingval`: A value to convert `missingval` to, by default `missing`. If this is set it + will be the return value of `missingval(raster)` - `maskingval` becomes the new `missingval`. + Setting `maskingval` to `nothing` means no masking will occur, and the original `missingval` + will be the final `missingval`. This can give better performance than using `missing`. + Another efficient option is to use e.g. `zero(eltype(raster))` to replace missing values with zero. - `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. $CONSTRUCTOR_CRS_KEYWORD $CONSTRUCTOR_MAPPEDCRS_KEYWORD @@ -229,7 +233,7 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Noth data::A, dims::D, refdims::R, name::Na, metadata::Me, missingval::Mi ) where {D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi} where {T,N} DD.checkdims(data, dims) - missingval1 = _fix_missingval(T, missingval) + missingval1 = _fix_missingval(T, missingval, metadata) new{T,N,D,R,A,Na,Me,typeof(missingval1)}(data, dims, refdims, name, metadata, missingval1) end end @@ -283,7 +287,7 @@ function Raster(filename::AbstractString; kw... ) source = _sourcetrait(filename, source) - _open(filename; source) do ds + _open(filename; source, mod=NoMod()) do ds Raster(ds, filename; source, kw...) end::Raster end @@ -294,42 +298,47 @@ function Raster(ds, filename::AbstractString; group=nokw, metadata=nokw, missingval=nokw, + maskingval=nokw, crs=nokw, mappedcrs=nokw, + coerce=nokw, source=nokw, - replace_missing=false, write=false, lazy=false, dropband=true, + cf=true, )::Raster name1 = filekey(ds, name) source = _sourcetrait(filename, source) - data1, dims1, metadata1, missingval1 = _open(source, ds; name=name1, group) do var + data1, dims1, metadata1, maskingval1 = _open(source, ds; name=name1, group, mod=NoMod()) do var metadata1 = isnokw(metadata) ? _metadata(var) : metadata - missingval1 = _fix_missingval(var, missingval) - rm = replace_missing && !isnothing(missingval1) - missingval2 = rm ? missing : missingval1 + missingval1 = _fix_missingval(var, missingval, metadata1) + maskingval1 = isnokw(maskingval) ? missing : maskingval + mod = _mod(cf, metadata1; missingval=missingval1, maskingval=maskingval1, coerce) data = if lazy - A = FileArray{typeof(source)}(var, filename; name=name1, group, write) - rm ? _replace_missing(A, missingval1) : A + FileArray{typeof(source)}(var, filename; + name=name1, group, mod, write + ) else - _checkmem(var) - x = Array(rm ? _replace_missing(var, missingval1) : var) - x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug + modvar = _maybe_modify(var, mod) + _checkmem(modvar) + x = Array(modvar) + # Catch an NCDatasets zero dimensional bug + x isa AbstractArray ? x : fill(x) end dims1 = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data) - data, dims1, metadata1, missingval2 + data, dims1, metadata1, maskingval1 end name2 = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) - raster = Raster(data1, dims1, refdims, name2, metadata1, missingval1) + raster = Raster(data1, dims1, refdims, name2, metadata1, maskingval1) return dropband ? _drop_single_band(raster, lazy) : raster end -_fix_missingval(::Type, ::Union{NoKW,Nothing}) = nothing -_fix_missingval(::AbstractArray, ::Nothing) = nothing -_fix_missingval(A::AbstractArray, ::NoKW) = _fix_missingval(A, Rasters.missingval(A)) -_fix_missingval(::AbstractArray{T}, missingval) where T = _fix_missingval(T, missingval) -function _fix_missingval(::Type{T}, missingval::M) where {T,M} +_fix_missingval(::Type, ::Union{NoKW,Nothing}, metadata) = nothing +_fix_missingval(::AbstractArray, ::Nothing, metadata) = nothing +_fix_missingval(A::AbstractArray, ::NoKW, metadata) = _fix_missingval(A, Rasters.missingval(A), metadata) +_fix_missingval(::AbstractArray{T}, missingval, metadata) where T = _fix_missingval(T, missingval, metadata) +function _fix_missingval(::Type{T}, missingval::M, metadata) where {T,M} T1 = nonmissingtype(T) if missingval isa T missingval @@ -345,11 +354,6 @@ function _fix_missingval(::Type{T}, missingval::M) where {T,M} end end -function _replace_missing(A::AbstractArray{T}, missingval) where T - repmissing(x) = isequal(x, missingval) ? missing : x - return repmissing.(A) -end - filekey(ds, name) = name filekey(filename::String) = Symbol(splitext(basename(filename))[1]) diff --git a/src/create.jl b/src/create.jl index f0f66472c..f1aedd9ce 100644 --- a/src/create.jl +++ b/src/create.jl @@ -1,41 +1,80 @@ +""" + create(filename, A::Raster; kw...) + create(filename, T, dims::Tuple; kw...) + +Create a new Raster. If `filename` is a `String` it will be created on disk, +and opened lazily. If it is `nothing` a regular in-memory `Raster` +will be created. If written to disk, the values will be `missingval` when it +is defined, if in-memory values will be `undef`. + +Generally all indices should be written to after `create`. + +The return value is a `Raster`. The `eltype` will usually be `T`, except +where `scale` and/or `offset` keywords are used, in which case `T` will +depend on the tyepe promotion of `scale` and `offset` and `T`. +`maskingval` will also affect the `eltype`. + +# Keywords + + +""" create(filename, A::AbstractRaster{T}; kw...) where T = create(filename, T, A; kw...) -function create(filename, T, A::AbstractRaster; - name=name(A), metadata=metadata(A), missingval=missingval(A), kw... +function create(filename, x, A::AbstractRaster; + name=name(A), + metadata=metadata(A), + missingval=missingval(A), + kw... ) - create(filename, T, dims(A); parent=parent(A), name, metadata, missingval, kw...) + create(filename, x, dims(A); parent=parent(A), name, metadata, missingval, kw...) end -function create(filename::AbstractString, T::Type, dims::Tuple; - lazy=true, - parent=nothing, - suffix=nothing, - source::Source=_sourcetrait(filename), - missingval=nothing, kw... +function create(filename::AbstractString, x, dims::Tuple; + lazy=true, + parent=nokw, + suffix=nokw, + source::Source=_sourcetrait(filename), + missingval=nokw, + kw... ) filename = _maybe_add_suffix(filename, suffix) # This calls `create` in the /sources file for this `source` - create(filename, source, T, dims; lazy, missingval, kw...) + create(filename, source, x, dims; lazy, missingval, kw...) end function create(filename::Nothing, T::Type, dims::Tuple; - parent=nothing, - suffix=nothing, - missingval, + parent=nokw, + suffix=nokw, + force=false, + missingval, kw... ) T = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) - data = isnothing(parent) ? Array{T}(undef, dims) : similar(parent, T, size(dims)) + data = isnokw(parent) || isnothing(parent) ? Array{T}(undef, dims) : similar(parent, T, size(dims)) Raster(data, dims; missingval, kw...) end - -_maybe_add_suffix(filename::Nothing, suffix) = nothing -_maybe_add_suffix(filename::Nothing, suffix::Nothing) = nothing -_maybe_add_suffix(filename, suffix::Nothing) = filename -function _maybe_add_suffix(filename, suffix) - base, ext = splitext(filename) - if string(suffix) == "" - filename +function create(filename::AbstractString, source::Source, T::Type, dims::DimTuple; + name=nokw, + missingval=nokw, + maskingval=missingval, + metadata=nokw, + chunks=nokw, + scale=nokw, + offset=nokw, + dropband=!hasdim(dims, Band), + lazy=true, + verbose=true, + force=false, + coerce=nokw, +) + T1 = Missings.nonmissingtype(T) + if isnothing(missingval) + A = FillArrays.Zeros{T1}(map(length, dims)) else - return string(base, "_", suffix, ext) + missingval = ismissing(missingval) || isnokw(missingval) ? _type_missingval(T1) : convert(T1, missingval) + A = FillArrays.Fill{T1}(missingval, map(length, dims)) end + # Create layers of zero arrays + rast = Raster(A, dims; name, missingval) + write(filename, source, rast; chunks, metadata, scale, offset, missingval, verbose, force, coerce) + return Raster(filename; source, lazy, metadata, missingval, maskingval, dropband) end diff --git a/src/filearray.jl b/src/filearray.jl index 500e2e151..64be5098e 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -1,4 +1,3 @@ - """ FileArray{S} <: DiskArrays.AbstractDiskArray @@ -6,38 +5,56 @@ Filearray is a DiskArrays.jl `AbstractDiskArray`. Instead of holding an open object, it just holds a filename string that is opened lazily when it needs to be read. """ -struct FileArray{S,T,N,Na,G,EC,HC} <: DiskArrays.AbstractDiskArray{T,N} +struct FileArray{S,T,N,Na,G,EC,HC,M<:AbstractModifications} <: DiskArrays.AbstractDiskArray{T,N} filename::String size::NTuple{N,Int} name::Na group::G eachchunk::EC haschunks::HC + mod::M write::Bool end function FileArray{S,T,N}( filename, - size, + size::NTuple{N,Int}, name::Na, - group::G=nothing, - eachchunk::EC=size, - haschunks::HC=DA.Unchunked(), - write=false -) where {S,T,N,Na,G,EC,HC} - FileArray{S,T,N,Na,G,EC,HC}(filename, size, name, group, eachchunk, haschunks, write) + group::G, + eachchunk::EC, + haschunks::HC, + mod::M, + write::Bool, +) where {S,T,N,Na,G,EC,HC,M} + FileArray{S,T,N,Na,G,EC,HC,M}( + String(filename), size, name, group, eachchunk, haschunks, mod, write + ) end -function FileArray{S,T,N}(filename::String, size::Tuple; - name=nokw, group=nokw, eachchunk=size, haschunks=DA.Unchunked(), write=false +function FileArray{S,T,N}(filename::AbstractString, size::Tuple; + name=nokw, + group=nokw, + eachchunk=size, + haschunks=DA.Unchunked(), + mod, + write=false ) where {S,T,N} name = isnokw(name) ? nothing : name group = isnokw(group) ? nothing : group - FileArray{S,T,N}(filename, size, name, group, eachchunk, haschunks, write) + FileArray{S,T,N}(filename, size, name, group, eachchunk, haschunks, mod, write) +end +function FileArray{S}( + var::AbstractArray{<:Any,N}, filename; mod, kw... +) where {S,N} + eachchunk = DA.eachchunk(var) + haschunks = DA.haschunks(var) + T = _mod_eltype(var, mod) + return FileArray{S,T,N}(filename, size(var); eachchunk, haschunks, mod, kw...) end # FileArray has S, T and N parameters not recoverable from fields ConstructionBase.constructorof(::Type{<:FileArray{S,T,N}}) where {S,T,N} = FileArray{S,T,N} filename(A::FileArray) = A.filename +mod(A::FileArray) = A.mod DD.name(A::FileArray) = A.name Base.size(A::FileArray) = A.size DA.eachchunk(A::FileArray) = A.eachchunk @@ -45,7 +62,7 @@ DA.haschunks(A::FileArray) = A.haschunks # Run function `f` on the result of _open for the file type function Base.open(f::Function, A::FileArray{S}; write=A.write, kw...) where S - _open(f, S(), filename(A); name=name(A), write, kw...) + _open(f, S(), filename(A); name=name(A), group=A.group, write, mod=mod(A), kw...) end function DA.readblock!(A::FileArray, dst, r::AbstractUnitRange...) diff --git a/src/filestack.jl b/src/filestack.jl index 865864973..a29868788 100644 --- a/src/filestack.jl +++ b/src/filestack.jl @@ -9,24 +9,26 @@ typically netcdf or hdf5. `S` is a backend type like `NCDsource`, and `Na` is a tuple of `Symbol` keys. """ -struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC} +struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC,M<:AbstractModifications} filename::String sizes::SZ group::G eachchunk::EC haschunks::HC + mods::M write::Bool end function FileStack{S,Na,T}( - filename::AbstractString, sizes::SZ, group::G, eachchunk::EC, haschunks::HC, write::Bool -) where {S,Na,T,SZ,G,EC,HC} - FileStack{S,Na,T,SZ,G,EC,HC}(String(filename), sizes, group, eachchunk, haschunks, write) + filename::AbstractString, sizes::SZ, group::G, eachchunk::EC, haschunks::HC, mods::M, write::Bool +) where {S,Na,T,SZ,G,EC,M,HC} + FileStack{S,Na,T,SZ,G,EC,HC,M}(String(filename), sizes, group, eachchunk, haschunks, mods, write) end # FileStack has `S,Na,T` parameters that are not recoverable from fields. ConstructionBase.constructorof(::Type{<:FileStack{S,Na,T}}) where {S,Na,T} = FileStack{S,Na,T} filename(fs::FileStack) = fs.filename +mods(fs::FileStack) = fs.mods DD.name(::FileStack{<:Any,Na}) where Na = Na DD.data_eltype(::FileStack{<:Any,<:Any,T}) where T = T @@ -43,8 +45,9 @@ function Base.getindex(fs::FileStack{S,Na,T}, name::Symbol) where {S,Na,T} size = fs.sizes[i] eachchunk = fs.eachchunk[i] haschunks = fs.haschunks[i] + mod = fs.mods[i] N = length(size) - return FileArray{S,_itype(T, i),N}(filename(fs), size, name, fs.group, eachchunk, haschunks, fs.write) + return FileArray{S,_itype(T, i),N}(filename(fs), size, name, fs.group, eachchunk, haschunks, mod, fs.write) end @inline _itype(::Type{<:NamedTuple{<:Any,T}}, i) where T = T.parameters[i] diff --git a/src/methods/crop_extend.jl b/src/methods/crop_extend.jl index 11ae83d90..fdfd514de 100644 --- a/src/methods/crop_extend.jl +++ b/src/methods/crop_extend.jl @@ -174,8 +174,11 @@ end _extend_to(x::RasterStackOrArray, to::Dimension; kw...) = _extend_to(x, (to,); kw...) function _extend_to(A::AbstractRaster, to::DimTuple; - filename=nothing, suffix=nothing, touches=false, - missingval=(isnothing(missingval(A)) ? missing : missingval(A)) + filename=nothing, + suffix=nothing, + missingval=(isnothing(missingval(A)) ? nokw : missingval(A)), + touches=false, + force=false ) others = otherdims(to, A) # Allow not specifying all dimensions @@ -194,7 +197,7 @@ function _extend_to(A::AbstractRaster, to::DimTuple; # Create a new extended array newA = create(filename, eltype(A), final_to; suffix, parent=parent(A), missingval, - name=name(A), metadata=metadata(A) + name=name(A), metadata=metadata(A), force ) # Input checks map(dims(A, to), dims(newA, to)) do d1, d2 diff --git a/src/nokw.jl b/src/nokw.jl index 7a6a4743f..dae5894ae 100644 --- a/src/nokw.jl +++ b/src/nokw.jl @@ -5,3 +5,6 @@ struct NoKW end const nokw = NoKW() @inline isnokw(::NoKW) = true @inline isnokw(_) = false + +_nokw2nothing(::NoKW) = nothing +_nokw2nothing(x) = x diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index ad40fd2df..30cbcb3d7 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -31,122 +31,42 @@ const CDM_STANDARD_NAME_MAP = Dict( "time" => Ti, ) - -# CFDiskArray ######################################################################## - -struct CFDiskArray{T,N,TV,TA,TSA} <: DiskArrays.AbstractDiskArray{T,N} - var::CDM.CFVariable{T,N,TV,TA,TSA} -end - -# Rasters methods -FileArray{source}(var::CFDiskArray, filename::AbstractString; kw...) where source = - FileArray{source}(parent(var), filename; kw...) - -cleanreturn(A::CFDiskArray) = Array(A) -missingval(A::CFDiskArray) = missingval(parent(A)) - -# DimensionalData methods -_dims(var::CFDiskArray, args...) = _dims(parent(var), args...) -_metadata(var::CFDiskArray, args...) = _metadata(parent(var), args...) - -# Base methods -Base.parent(A::CFDiskArray) = A.var - Base.getindex(os::OpenStack{<:CDMsource}, name::Symbol) = CFDiskArray(dataset(os)[name]) -# DiskArrays.jl methods -function DiskArrays.readblock!(A::CFDiskArray, aout, i::AbstractUnitRange...) - aout .= getindex(parent(A), i...) -end -function DiskArrays.writeblock!(A::CFDiskArray, data, i::AbstractUnitRange...) - setindex!(parent(A), data, i...) - return data -end - -# We have to dig down to find the chunks as they are not implemented -# in the CDM, but they are in their internal objects. -DiskArrays.eachchunk(var::CFDiskArray) = _get_eachchunk(var) -DiskArrays.haschunks(var::CFDiskArray) = _get_haschunks(var) - -_get_eachchunk(var::CFDiskArray) = _get_eachchunk(parent(var)) -_get_eachchunk(var::CDM.CFVariable) = _get_eachchunk(var.var) -_get_haschunks(var::CFDiskArray) = _get_haschunks(parent(var)) -_get_haschunks(var::CDM.CFVariable) = _get_haschunks(var.var) - -_sourcetrait(var::CFDiskArray) = _sourcetrait(parent(var)) _sourcetrait(var::CDM.CFVariable) = _sourcetrait(var.var) -# CommonDataModel.jl methods -for method in (:size, :name, :dimnames, :dataset, :attribnames) - @eval begin - CDM.$(method)(var::CFDiskArray) = CDM.$(method)(parent(var)) - end -end - -for method in (:attrib, :dim) - @eval begin - CDM.$(method)(var::CFDiskArray, name::CDM.SymbolOrString) = CDM.$(method)(parent(var), name) - end -end - # Rasters methods for CDM types ############################### -function FileArray{source}(var::AbstractVariable, filename::AbstractString; kw...) where source<:CDMsource - eachchunk = DA.eachchunk(var) - haschunks = DA.haschunks(var) - T = eltype(var) - N = ndims(var) - FileArray{source,T,N}(filename, size(var); eachchunk, haschunks, kw...) -end - -function FileStack{source}( - ds::AbstractDataset, filename::AbstractString; +function FileStack{source}(ds::AbstractDataset, filename::AbstractString; write::Bool=false, group=nokw, name::NTuple{N,Symbol}, + mods, vars ) where {source<:CDMsource,N} - T = NamedTuple{name,Tuple{map(var -> Union{Missing,eltype(var)}, vars)...}} + T = NamedTuple{name,Tuple{map(_mod_eltype, vars, mods)...}} layersizes = map(size, vars) eachchunk = map(_get_eachchunk, vars) haschunks = map(_get_haschunks, vars) group = isnokw(group) ? nothing : group - return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, write) -end - -function Base.open(f::Function, A::FileArray{source}; write=A.write, kw...) where source<:CDMsource - _open(source(), filename(A); name=name(A), group=A.group, write, kw...) do var - f(var) - end + return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, cdf, write) end -function _open(f, ::CDMsource, ds::AbstractDataset; name=nokw, group=nothing, kw...) +function _open(f, ::CDMsource, ds::AbstractDataset; + name=nokw, group=nothing, mod=NoMod(), kw... +) g = _getgroup(ds, group) - x = isnokw(name) ? g : CFDiskArray(g[_firstname(g, name)]) - cleanreturn(f(x)) + x = isnokw(name) ? g : _maybe_modify(CDM.variable(g, _firstname(g, name)), mod) + return cleanreturn(f(x)) end -_open(f, ::CDMsource, var::CFDiskArray; kw...) = cleanreturn(f(var)) +_open(f, ::CDMsource, var::AbstractArray; mod=NoMod(), kw...) = + cleanreturn(f(_maybe_modify(var, mod))) # This allows arbitrary group nesting _getgroup(ds, ::Union{Nothing,NoKW}) = ds _getgroup(ds, group::Union{Symbol,AbstractString}) = ds.group[String(group)] _getgroup(ds, group::Pair) = _getgroup(ds.group[String(group[1])], group[2]) -function create(filename, source::CDMsource, T::Type, dims::DimTuple; - name=nokw, - missingval=nokw, - metadata=nokw, - lazy=true, - verbose=true, - chunks=nokw, -) - # Create layers of zero arrays - A = FillArrays.Zeros{T}(map(length, dims)) - rast = Raster(A, dims; name, missingval, metadata) - write(filename, source, rast; chunks) - return Raster(filename; metadata, source, lazy) -end - filekey(ds::AbstractDataset, name) = _firstname(ds, name) missingval(var::AbstractDataset) = missing missingval(var::AbstractVariable{T}) where T = missing isa T ? missing : nothing @@ -241,6 +161,8 @@ function _layermetadata(ds::AbstractDataset; layers) end end +_fix_missingval(::CDM.AbstractVariable, ::Nothing, metadata) = get(metadata, "_FillValue", nothing) + # Utils ######################################################################## diff --git a/src/sources/grd.jl b/src/sources/grd.jl index 04954b2af..d9fd4be3d 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -133,16 +133,6 @@ end _sizeof(A::GRDdataset{T}) where T = sizeof(T) * prod(size(A)) _sizeof(A::RasterDiskArray{GRDsource}) = _sizeof(A.attrib) - -# Array ######################################################################## - -function FileArray{GRDsource}(A::RasterDiskArray{<:Any,T}, filename=filename(A.attrib); kw...) where T - filename = first(splitext(filename)) - eachchunk = DiskArrays.eachchunk(A) - haschunks = DiskArrays.haschunks(A) - FileArray{GRDsource,T,3}(filename, size(A); eachchunk, haschunks, kw...) -end - # Base methods """ @@ -263,7 +253,7 @@ function Base.open(f::Function, A::FileArray{GRDsource}, args...; write=A.write) _mmapgrd(mm -> f(RasterDiskArray{GRDsource}(mm, A.eachchunk, A.haschunks)), A; write) end -function _open(f, ::GRDsource, filename::AbstractString; write=false, name=nokw, group=nokw) +function _open(f, ::GRDsource, filename::AbstractString; write=false, kw...) isfile(filename) || _filenotfound_error(filename) attr = GRDdataset(filename) _mmapgrd(attr; write) do mm diff --git a/src/utils.jl b/src/utils.jl index 7f5082207..ae19d4dca 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -23,16 +23,16 @@ nolookup_to_sampled(dims::DimTuple) = map(nolookup_to_sampled, dims) nolookup_to_sampled(d::Dimension) = lookup(d) isa NoLookup ? set(d, Sampled(; sampling=Points())) : d -function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missingval=nokw) where T - if ismissing(Rasters.missingval(A)) - newmissingval = missingval isa NoKW ? _type_missingval(Missings.nonmissingtype(T)) : missingval - A1 = replace_missing(A, newmissingval) - @warn "`missing` cant be written with $(SOURCE2SYMBOL[source]), missinval for `$(eltype(A1))` of `$newmissingval` used instead" - return A1 - else - return A - end -end +# function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missingval=nokw) where T +# if ismissing(Rasters.missingval(A)) +# newmissingval = missingval isa NoKW ? _type_missingval(Missings.nonmissingtype(T)) : missingval +# A1 = replace_missing(A, newmissingval) +# @warn "`missing` cant be written with $(SOURCE2SYMBOL[source]), missinval for `$(eltype(A1))` of `$newmissingval` used instead" +# return A1 +# else +# return A +# end +# end # Create a standardised Metadata object of source T, containing a `Dict{String,Any}` _metadatadict(s::Source, p1::Pair, pairs::Pair...) = @@ -87,11 +87,16 @@ maybe_eps(dim::Dimension) = maybe_eps(eltype(dim)) maybe_eps(::Type) = nothing maybe_eps(T::Type{<:AbstractFloat}) = _default_atol(T) -_writeable_missing(filename::Nothing, T) = missing -_writeable_missing(filename::AbstractString, T) = _writeable_missing(T) -function _writeable_missing(T) +_writeable_missing(filename::Nothing, T; kw...) = missing +_writeable_missing(filename::AbstractString, T; kw...) = _writeable_missing(T; kw...) +function _writeable_missing(::Type{Missing}; verbose=true) + missingval = _type_missingval(UInt8) + verbose && @info "`missingval` set to $missingval" + return missingval +end +function _writeable_missing(T; verbose=true) missingval = _type_missingval(Missings.nonmissingtype(T)) - @info "`missingval` set to $missingval" + verbose && @info "`missingval` set to $missingval" return missingval end @@ -276,3 +281,15 @@ function _checkregular(A::AbstractArray) end return true end + +_maybe_add_suffix(filename::Nothing, suffix) = nothing +_maybe_add_suffix(filename::Nothing, suffix::Union{Nothing,NoKW}) = nothing +_maybe_add_suffix(filename, suffix::Union{Nothing,NoKW}) = filename +function _maybe_add_suffix(filename, suffix) + base, ext = splitext(filename) + if string(suffix) == "" + filename + else + return string(base, "_", suffix, ext) + end +end diff --git a/test/methods.jl b/test/methods.jl index ffe8c22ee..5aa37cf1a 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -55,9 +55,9 @@ gaMi = replace_missing(ga) @test all(map(values(replace_missing(st, NaN32)), (a=[NaN32 7.0f0; 2.0f0 NaN32], b=[1.0 0.4; 2.0 NaN])) do x, y all(x .=== y) end) - dNaN = replace_missing(ga, NaN32; filename="test.tif") + testfile = tempname() * ".tif" + dNaN = replace_missing(ga, NaN32; filename=testfile) @test all(isequal.(dNaN, [NaN32 7.0f0; 2.0f0 NaN32])) - rm("test.tif") stNaN = replace_missing(st, NaN32; filename="teststack.tif") @test all(map(stNaN[Band(1)], (a=[NaN32 7.0f0; 2.0f0 NaN32], b=[1.0 0.4; 2.0 NaN])) do x, y all(x .=== y) @@ -172,9 +172,9 @@ end ga4 = replace_missing(ga1; missingval=-9999) mask!(ga4; with=ga, invert=true) @test all(ga4 .=== [-9999 -9999; -9999 3]) - dmask = mask(ga3; with=ga, filename="mask.tif") + maskfile = tempname() * ".tif" + dmask = mask(ga3; with=ga, filename=maskfile) @test Rasters.isdisk(dmask) - rm("mask.tif") stmask = mask(replace_missing(st, NaN); with=ga, filename="mask.tif") @test Rasters.isdisk(stmask) rm("mask_a.tif") @@ -541,7 +541,8 @@ end extended = extend(cropped, ga)[1] extended_r = extend(cropped_r; to=ga_r) extended1 = extend(extend(cropped; to=dims(ga, X)); to=dims(ga, Y)) - extended_d = extend(cropped; to=ga, filename="extended.tif") + filename = tempname() * ".tif" + extended_d = extend(cropped; to=ga, filename) @test all(extended .=== extended1 .=== replace_missing(extended_d) .=== ga) @test all(extended_r .=== ga_r) @test all(map(==, lookup(extended_d), lookup(extended))) diff --git a/test/rasterize.jl b/test/rasterize.jl index 63c34ef89..159dc19d2 100644 --- a/test/rasterize.jl +++ b/test/rasterize.jl @@ -396,7 +396,8 @@ end @test sum(skipmissing(r)) == (12 * 1 + 8 * 2 + 8 * 3 + 12 * 4) + (4 * 1.5 + 4 * 2.5 + 4 * 3.5) end - prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, filename="test.tif", threaded) + filename = tempname() * ".tif" + prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, filename, threaded) prod_r = rasterize(prod, polygons; res=5, fill=1:4, boundary=:center, threaded) @test sum(skipmissing(prod_r)) == (12 * 1 + 8 * 2 + 8 * 3 + 12 * 4) + (4 * 1 * 2 + 4 * 2 * 3 + 4 * 3 * 4) diff --git a/test/resample.jl b/test/resample.jl index 8dc55f2c6..5b9e03081 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -23,26 +23,36 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) end end - # Resample cea.tif using resample - cea = Raster(raster_path; missingval=0x00, name = :cea) - raster_output = resample(cea; res=output_res, crs=output_crs, method) - disk_output = resample(cea; res=output_res, crs=output_crs, method, filename="resample.tif") - - cea_permuted = permutedims(Raster(raster_path), (Y, X)) - permuted_output = resample(cea_permuted, output_res; crs=output_crs, method) - - # Compare ArchGDAL, resample and permuted resample - @test AG_output == - raster_output[Band(1)] == - disk_output[Band(1)] == - permutedims(permuted_output, (X, Y)) - @test abs(step(dims(raster_output, Y))) ≈ - abs(step(dims(raster_output, X))) ≈ - abs(step(dims(disk_output, X))) ≈ - abs(step(dims(permuted_output, X))) ≈ output_res - @test name(cea) == name(raster_output) - - rm("resample.tif") + maskingval = missing + for maskingval in (nothing, missing, Rasters.nokw) + # Resample cea.tif using resample + cea = Raster(raster_path; missingval=0x00, name=:cea, maskingval) + raster_output = resample(cea; res=output_res, crs=output_crs, method, maskingval) + disk_output = resample(cea; res=output_res, crs=output_crs, method, filename="resample.tif") + + cea_permuted = permutedims(Raster(raster_path), (Y, X); missingval=0x00, name=:cea_permuted, maskingval) + permuted_output = resample(cea_permuted, output_res; crs=output_crs, method) + + AG_output1 = if maskingval === missing || maskingval === Rasters.nokw + replace(AG_output, 0x00 => missing) + else + AG_output + end + # Compare ArchGDAL, resample and permuted resample + AG_output1 + .=== + raster_output + @test all(AG_output1 .=== + raster_output .=== + read(disk_output .=== permutedims(permuted_output, (X, Y))) + @test abs(step(dims(raster_output, Y))) ≈ + abs(step(dims(raster_output, X))) ≈ + abs(step(dims(disk_output, X))) ≈ + abs(step(dims(permuted_output, X))) ≈ output_res + @test name(cea) == name(raster_output) + + rm("resample.tif") + end @testset "missingval propagates" begin @test missingval(resample(cea; res=output_res, crs=output_crs, method)) == 0x00 diff --git a/test/runtests.jl b/test/runtests.jl index 61b634f26..745e9b825 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ using Rasters, Test, Aqua, SafeTestsets -if VERSION >= v"1.9.0" +@testset "Aqua" begin # Aqua.test_ambiguities([Rasters, Base, Core]) Aqua.test_unbound_args(Rasters) Aqua.test_stale_deps(Rasters) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index fcff53092..2bd613b79 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -11,7 +11,7 @@ gdalpath = maybedownload(url) @test_throws ArgumentError Raster("notafile.tif") @time gdalarray = Raster(gdalpath; name=:test) - @time lazyarray = Raster(gdalpath; lazy=true); + @time lazyarray = Raster(gdalpath; cf=false, lazy=true); @time eagerarray = Raster(gdalpath; lazy=false); @testset "lazyness" begin @@ -25,6 +25,31 @@ gdalpath = maybedownload(url) end end + @testset "cf" begin + # This file has no scale/offset so cf does nothing + @time cfarray = Raster(gdalpath; cf=true) + @time cf_nomask_array = Raster(gdalpath; cf=true, maskingval=nothing) + @time nocfarray = Raster(gdalpath; cf=false) + @time lazycfarray = Raster(gdalpath; cf=true, lazy=true) + @time lazynocfarray = Raster(gdalpath; cf=false, lazy=true) + @time lazynocfnomaskarray = Raster(gdalpath; cf=false, lazy=true, maskingval=nothing) + @test parent(cfarray) isa Array{UInt8,2} + @test parent(cf_nomask_array) isa Array{UInt8,2} + @test parent(nocfarray) isa Array{UInt8,2} + open(lazycfarray) do A + @test parent(A) isa DiskArrays.SubDiskArray{UInt8} + @test parent(parent(A)) isa Rasters.ModifiedDiskArray{UInt8} + end + open(lazynocfarray) do A + @test parent(A) isa DiskArrays.SubDiskArray{UInt8} + @test parent(parent(A)) isa Rasters.ModifiedDiskArray{UInt8} + end + open(lazynocfnomaskarray) do A + @test parent(A) isa DiskArrays.SubDiskArray{UInt8} + @test parent(parent(A)) isa ArchGDAL.RasterDataset{UInt8} + end + end + @testset "load from url" begin A = Raster("/vsicurl/" * url) B = Raster(url; source=:gdal) @@ -55,6 +80,22 @@ gdalpath = maybedownload(url) @test A == A2 == A3 end + @testset "create" begin + created = Rasters.create("created.tif", Int16, (X(1:10), Y(1:10)); + missingval=255, maskingval=missing, scale=0.1, offset=5.0, force=true, cooerce=trunc + ) + open(created; write=true) do O + O .= 2.0 + end + read(created) + Raster("created.tif"; cf=false) .* 1 + created = Rasters.create("created.tif", UInt8, (X(1:10), Y(1:10)); + missingval=255, maskingval=UInt8(0), force=true + ) + read(created) + rm("created.tif") + end + @testset "custom filename" begin gdal_custom = replace(gdalpath, "tif" => "foo") cp(gdalpath, gdal_custom, force=true) @@ -292,7 +333,7 @@ gdalpath = maybedownload(url) @testset "2d asc" begin filename = tempname() * ".asc" - @time write(filename, gdalarray; force = true) + @time write(filename, gdalarray; force=true) saved1 = Raster(filename); @test all(saved1 .== gdalarray) # @test typeof(saved1) == typeof(geoA) diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index b52e15ca4..985d1f377 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -33,14 +33,14 @@ stackkeys = ( @testset "grid mapping" begin stack = RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) @test metadata(stack.mask)["grid_mapping"] == Dict{String, Any}( - "straight_vertical_longitude_from_pole" => 0.0, - "false_easting" => 0.0, - "standard_parallel" => -71.0, - "inverse_flattening" => 298.27940504282, - "latitude_of_projection_origin" => -90.0, - "grid_mapping_name" => "polar_stereographic", - "semi_major_axis" => 6.378273e6, - "false_northing" => 0.0, + "straight_vertical_longitude_from_pole" => 0.0, + "false_easting" => 0.0, + "standard_parallel" => -71.0, + "inverse_flattening" => 298.27940504282, + "latitude_of_projection_origin" => -90.0, + "grid_mapping_name" => "polar_stereographic", + "semi_major_axis" => 6.378273e6, + "false_northing" => 0.0, ) end @@ -59,6 +59,32 @@ end @time read(lazyarray); end + @testset "cf" begin + @time cfarray = Raster(ncsingle; cf=true) + @time cf_nomask_array = Raster(ncsingle; cf=true, maskingval=nothing) + @time nocfarray = Raster(ncsingle; cf=false) + @time nocf_nomask_array = Raster(ncsingle; cf=false, maskingval=nothing) + @time lazycfarray = Raster(ncsingle; lazy=true, cf=false) + @time lazynocfarray = Raster(ncsingle; lazy=true, cf=false) + @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, cf=false, maskingval=nothing) + @test missingval(cfarray) === missing + @test missingval(nocfarray) === missing + @test missingval(cf_nomask_array) === 1.0f20 + @test missingval(nocf_nomask_array) === 1.0f20 + @test all(skipmissing(cfarray) .=== skipmissing(nocfarray)) + @test parent(cfarray) isa Array{Union{Float32,Missing}} + @test parent(nocfarray) isa Array{Union{Float32,Missing}} + open(lazycfarray) do A + @test parent(A) isa Rasters.ModifiedDiskArray{Union{Missing,Float32}} + end + open(lazynocfarray) do A + @test parent(A) isa Rasters.ModifiedDiskArray{Union{Missing,Float32}} + end + open(lazynocf_nomask_array) do A + @test parent(parent(A)) isa NCDatasets.Variable{Float32} + end + end + # @testset "from url" begin # # TODO we need a permanent url here that doesn't end in .nc # url = "http://apdrc.soest.hawaii.edu:80/dods/public_data/Reanalysis_Data/NCEP/NCEP2/daily/surface/mslp" From 5f7bed4e4214b8b3b22d93ff7f7222cfd20e7044 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Fri, 12 Jul 2024 14:29:52 +0200 Subject: [PATCH 02/38] add modifieddiskarray file --- src/modifieddiskarray.jl | 113 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 src/modifieddiskarray.jl diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl new file mode 100644 index 000000000..c7797dcef --- /dev/null +++ b/src/modifieddiskarray.jl @@ -0,0 +1,113 @@ +abstract type AbstractModifications end +struct NoMod{Mi} <: AbstractModifications + missingval::Mi +end +NoMod() = NoMod(nothing) +NoMod(::NoKW) = NoMod(nothing) +struct Mod{Mi,Ma,S,O,F} <: AbstractModifications + missingval::Mi + maskingval::Ma + scale::S + offset::O + coerce::F + function Mod(missingval, maskingval, scale, offset, coerce) + if isnokw(coerce) || isnothing(coerce) + coerce = convert + end + vals = map(_nokw2nothing, (missingval, maskingval, scale, offset)) + new{map(typeof, vals)...,typeof(coerce)}(vals..., coerce) + end +end + +function _mod(cf::Bool, metadata; missingval, maskingval, coerce=convert) + scale = cf ? get(metadata, "scale", nothing) : nothing + offset = cf ? get(metadata, "offset", nothing) : nothing + _mod(missingval, maskingval, scale, offset, coerce) +end +function _mod(missingval, maskingval, scale, offset, coerce=convert) + if isnothing(maskingval) && isnothing(scale) && isnothing(offset) + return NoMod(missingval) + else + return Mod(missingval, maskingval, scale, offset, coerce) + end +end + +_mod_eltype(::AbstractArray{T}, ::NoMod) where T = T +_mod_eltype(::AbstractArray{T}, m::Mod) where T = + Base.promote_op(_applymod, T, typeof(m)) + +_mod_inverse_eltype(::AbstractArray{T}, ::NoMod) where T = T +_mod_inverse_eltype(::AbstractArray{T}, m::Mod) where T = + Base.promote_op(_invertmod, typeof(m.coerce), T, typeof(m)) + +_maybe_modify(var, m::Mod) = ModifiedDiskArray(var, m) +_maybe_modify(var, ::NoMod) = var + +struct ModifiedDiskArray{T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} + var::V + mod::M +end +function ModifiedDiskArray(v::V, m::M) where {V<:AbstractArray{<:Any,N},M} where N + T = _mod_eltype(v, m) + return ModifiedDiskArray{T,N,V,M}(v, m) +end + +Base.parent(A::ModifiedDiskArray) = A.var +Base.size(A::ModifiedDiskArray, args...) = size(A.var, args...) +DiskArrays.haschunks(A::ModifiedDiskArray) = DiskArrays.haschunks(A.var) +DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(A.var) + +function DiskArrays.readblock!(A::ModifiedDiskArray, out_block, I::AbstractVector...) + broadcast!(_applymod, out_block, A.var[I...], (A.mod,)) + return nothing +end + +function DiskArrays.writeblock!( + A::ModifiedDiskArray{<:Any,<:Any,<:AbstractArray{T}}, in_block, I::AbstractVector... +) where T + A.var[I...] = _invertmod.((Val{T}(),), in_block, (A.mod,)) + return nothing +end + +Base.@assume_effects :foldable function _applymod(x, m::Mod) + tm = if isnothing(m.maskingval) + x + else + if _ismissing(x, m.missingval) + return m.maskingval + else + x + end + end + return _scaleoffset(tm, m) +end + +_ismissing(x, mv) = isequal(x, mv) +_ismissing(_, ::Nothing) = false + +_scaleoffset(x, m::Mod) = _scaleoffset(x, m.scale, m.offset) +_scaleoffset(x, scale, offset) = muladd(x, scale, offset) +_scaleoffset(x, ::Nothing, offset) = x + offset +_scaleoffset(x, scale, ::Nothing) = x * scale +_scaleoffset(x, ::Nothing, ::Nothing) = x + +Base.@assume_effects :foldable function _invertmod(::Val{T}, x, m::Mod) where T + tm = if isnothing(m.missingval) + x + else + if _ismissing(x, m.maskingval) + return m.missingval + else + x + end + end + return _scaleoffset_inv(T, tm, m) +end + +_scaleoffset_inv(::Type{T}, x, m::Mod) where T = _scaleoffset_inv(m.coerce, T, x, m) +_scaleoffset_inv(coerce::Base.Callable, ::Type{T}, x, m::Mod) where T = + coerce(T, _scaleoffset_inv(x, m.scale, m.offset)) +_scaleoffset_inv(x, scale, offset) = (x - offset) / scale +_scaleoffset_inv(x, scale, ::Nothing) = x / scale +_scaleoffset_inv(x, ::Nothing, offset) = x - offset +_scaleoffset_inv(x, ::Nothing, ::Nothing) = x From 8be3f25d5de969786be051a64a967c0320ba86d7 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 14 Jul 2024 23:17:14 +0200 Subject: [PATCH 03/38] use scale and offset keywords --- src/array.jl | 5 +++-- src/modifieddiskarray.jl | 20 ++++++++++++++++---- test/sources/gdal.jl | 14 +++++++------- test/sources/ncdatasets.jl | 14 +++++++------- 4 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/array.jl b/src/array.jl index 8e06e6b8c..9a26d37fc 100644 --- a/src/array.jl +++ b/src/array.jl @@ -303,10 +303,11 @@ function Raster(ds, filename::AbstractString; mappedcrs=nokw, coerce=nokw, source=nokw, + scale=nokw, + offset=nokw, write=false, lazy=false, dropband=true, - cf=true, )::Raster name1 = filekey(ds, name) source = _sourcetrait(filename, source) @@ -314,7 +315,7 @@ function Raster(ds, filename::AbstractString; metadata1 = isnokw(metadata) ? _metadata(var) : metadata missingval1 = _fix_missingval(var, missingval, metadata1) maskingval1 = isnokw(maskingval) ? missing : maskingval - mod = _mod(cf, metadata1; missingval=missingval1, maskingval=maskingval1, coerce) + mod = _mod(metadata1; scale, offset, missingval=missingval1, maskingval=maskingval1, coerce) data = if lazy FileArray{typeof(source)}(var, filename; name=name1, group, mod, write diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index c7797dcef..11829092a 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -19,10 +19,22 @@ struct Mod{Mi,Ma,S,O,F} <: AbstractModifications end end -function _mod(cf::Bool, metadata; missingval, maskingval, coerce=convert) - scale = cf ? get(metadata, "scale", nothing) : nothing - offset = cf ? get(metadata, "offset", nothing) : nothing - _mod(missingval, maskingval, scale, offset, coerce) +function _mod(metadata; scale, offset, missingval, maskingval, coerce=convert) + scale1 = if isnokw(scale) + s = get(metadata, "scale", nothing) + # Dont convert types for scale of one (gdal default) + s == 1.0 ? nothing : s + else + scale + end + offset1 = if isnokw(offset) + o = get(metadata, "offset", nothing) + # Dont convert types for offset of zero (gdal default) + o == 0.0 ? nothing : o + else + offset + end + return _mod(missingval, maskingval, scale, offset, coerce) end function _mod(missingval, maskingval, scale, offset, coerce=convert) if isnothing(maskingval) && isnothing(scale) && isnothing(offset) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 2bd613b79..c7da88290 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -11,7 +11,7 @@ gdalpath = maybedownload(url) @test_throws ArgumentError Raster("notafile.tif") @time gdalarray = Raster(gdalpath; name=:test) - @time lazyarray = Raster(gdalpath; cf=false, lazy=true); + @time lazyarray = Raster(gdalpath; scale=nothing, offset=nothing, lazy=true); @time eagerarray = Raster(gdalpath; lazy=false); @testset "lazyness" begin @@ -27,12 +27,12 @@ gdalpath = maybedownload(url) @testset "cf" begin # This file has no scale/offset so cf does nothing - @time cfarray = Raster(gdalpath; cf=true) - @time cf_nomask_array = Raster(gdalpath; cf=true, maskingval=nothing) - @time nocfarray = Raster(gdalpath; cf=false) - @time lazycfarray = Raster(gdalpath; cf=true, lazy=true) - @time lazynocfarray = Raster(gdalpath; cf=false, lazy=true) - @time lazynocfnomaskarray = Raster(gdalpath; cf=false, lazy=true, maskingval=nothing) + @time cfarray = Raster(gdalpath) + @time cf_nomask_array = Raster(gdalpath; maskingval=nothing) + @time nocfarray = Raster(gdalpath; scale=nothing, offset=nothing) + @time lazycfarray = Raster(gdalpath; lazy=true) + @time lazynocfarray = Raster(gdalpath; lazy=true, scale=nothing, offset=nothing) + @time lazynocfnomaskarray = Raster(gdalpath; lazy=true, scale=nothing, offset=nothing, maskingval=nothing) @test parent(cfarray) isa Array{UInt8,2} @test parent(cf_nomask_array) isa Array{UInt8,2} @test parent(nocfarray) isa Array{UInt8,2} diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 985d1f377..755bbb374 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -60,13 +60,13 @@ end end @testset "cf" begin - @time cfarray = Raster(ncsingle; cf=true) - @time cf_nomask_array = Raster(ncsingle; cf=true, maskingval=nothing) - @time nocfarray = Raster(ncsingle; cf=false) - @time nocf_nomask_array = Raster(ncsingle; cf=false, maskingval=nothing) - @time lazycfarray = Raster(ncsingle; lazy=true, cf=false) - @time lazynocfarray = Raster(ncsingle; lazy=true, cf=false) - @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, cf=false, maskingval=nothing) + @time cfarray = Raster(ncsingle) + @time cf_nomask_array = Raster(ncsingle; maskingval=nothing) + @time nocfarray = Raster(ncsingle; scale=nothing, offset=nothing) + @time nocf_nomask_array = Raster(ncsingle; scale=nothing, offset=nothing, maskingval=nothing) + @time lazycfarray = Raster(ncsingle; lazy=true, scale=nothing, offset=nothing) + @time lazynocfarray = Raster(ncsingle; lazy=true, , scale=nothing, offset=nothing) + @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scale=nothing, offset=nothing, maskingval=nothing) @test missingval(cfarray) === missing @test missingval(nocfarray) === missing @test missingval(cf_nomask_array) === 1.0f20 From ded3eebc6a214d953716cc9451932d3a2f4299b1 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 14 Jul 2024 23:19:03 +0200 Subject: [PATCH 04/38] bugfix --- test/sources/gdal.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index c7da88290..160ab75f8 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -88,7 +88,7 @@ gdalpath = maybedownload(url) O .= 2.0 end read(created) - Raster("created.tif"; cf=false) .* 1 + Raster("created.tif"; scale=nothing, offset=nothing) .* 1 created = Rasters.create("created.tif", UInt8, (X(1:10), Y(1:10)); missingval=255, maskingval=UInt8(0), force=true ) From 9e6019cfa70c321c2e03ab037f8606db9d58d479 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Mon, 15 Jul 2024 00:06:03 +0200 Subject: [PATCH 05/38] bugfix --- ext/RastersArchGDALExt/gdal_source.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 4e9979a39..7cd19d93c 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -234,11 +234,12 @@ function RA.Raster(ds::AG.RasterDataset; maskingval=missing, lazy=false, dropband=false, - cf=true, + scale=nokw, + offset=nokw, ) kw = (; refdims, name, metadata, missingval) filelist = AG.filelist(ds) - mod = RA._mod(cf, metadata; missingval, maskingval) + mod = RA._mod(metadata; scale, offset, missingval, maskingval) raster = if lazy && length(filelist) > 0 filename = first(filelist) Raster(FileArray{GDALsource}(ds, filename; mod), dims, kw...) From afe79202dd91c24af90f141aefaed7a0d7479bb4 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sat, 20 Jul 2024 19:12:47 +0200 Subject: [PATCH 06/38] updates --- ext/RastersArchGDALExt/gdal_source.jl | 6 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 3 +- src/array.jl | 43 ++++---- src/create.jl | 75 +++++++++---- src/methods/shared_docstrings.jl | 53 +++++++++ src/modifieddiskarray.jl | 51 ++++++--- src/stack.jl | 101 +++++++++++++----- src/utils.jl | 46 +++++--- test/sources/gdal.jl | 8 +- test/sources/ncdatasets.jl | 18 ++-- 10 files changed, 287 insertions(+), 117 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 7cd19d93c..2eb7d48d0 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -234,12 +234,12 @@ function RA.Raster(ds::AG.RasterDataset; maskingval=missing, lazy=false, dropband=false, - scale=nokw, - offset=nokw, + scaled=true, + coerce=convert, ) kw = (; refdims, name, metadata, missingval) filelist = AG.filelist(ds) - mod = RA._mod(metadata; scale, offset, missingval, maskingval) + mod = RA._mod(metadata, missingval, maskingval; scaled, coerce) raster = if lazy && length(filelist) > 0 filename = first(filelist) Raster(FileArray{GDALsource}(ds, filename; mod), dims, kw...) diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 8164b2dea..6ac1edd57 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -70,6 +70,7 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; chunksizes=RA._chunks_to_tuple(A, dims(A), chunks), scale=nokw, offset=nokw, + coerce=convert, eltype=Missings.nonmissingtype(T), kw... ) where {T,N} @@ -92,7 +93,7 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; mv = maskingval isa NoKW ? Rasters.missingval(A) : maskingval mv === missingval ? nothing : mv end - mod = _mod(missingval1, maskingval1, scale, offset) + mod = _mod(missingval1, maskingval1, scale, offset, coerce) eltype <: NCDAllowedType || throw(ArgumentError(""" Element type $eltyp cannot be written to NetCDF. Convert it to one of $(Base.uniontypes(NCDAllowedType)), diff --git a/src/array.jl b/src/array.jl index 4b9966633..10e8a5672 100644 --- a/src/array.jl +++ b/src/array.jl @@ -209,30 +209,19 @@ methods will _not_ load data from disk; they will be applied later, lazily. # Keywords -- `name`: a `Symbol` name for the array, which will also retrieve the, alphabetically first, - named layer if `Raster` is used on a multi-layered file like a NetCDF. - If instead `RasterStack` is used to read the multi-layered file, by default, all variables - will be added to the stack. +$NAME_KEYWORD $GROUP_KEYWORD -- `missingval`: value reprsenting missing data, normally detected from the file. Set manually - when you know the value is not specified or is incorrect. This will *not* change any - values in the raster, it simply assigns which value is treated as missing. To replace all of - the missing values in the raster, use [`replace_missing`](@ref). -- `maskingval`: A value to convert `missingval` to, by default `missing`. If this is set it - will be the return value of `missingval(raster)` - `maskingval` becomes the new `missingval`. - Setting `maskingval` to `nothing` means no masking will occur, and the original `missingval` - will be the final `missingval`. This can give better performance than using `missing`. - Another efficient option is to use e.g. `zero(eltype(raster))` to replace missing values with zero. -- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. +$MISSINGVAL_KEYWORD +$MASKINGVAL_KEYWORD +$METADATA_KEYWORD $CONSTRUCTOR_CRS_KEYWORD $CONSTRUCTOR_MAPPEDCRS_KEYWORD -- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. - Usually not needed. +$REFDIMS_KEYWORD +$SCALED_KEYWORD When a filepath `String` is used: $DROPBAND_KEYWORD $LAZY_KEYWORD -$REPLACE_MISSING_KEYWORD $SOURCE_KEYWORD - `write`: defines the default `write` keyword value when calling `open` on the Raster. `false` by default. Only makes sense to use when `lazy=true`. @@ -319,37 +308,41 @@ function Raster(ds, filename::AbstractString; maskingval=nokw, crs=nokw, mappedcrs=nokw, - coerce=nokw, source=nokw, - scale=nokw, - offset=nokw, + replace_missing=nokw, + coerce=convert, + scaled=true, write=false, lazy=false, dropband=true, checkmem=CHECKMEM[], + mod=nokw, )::Raster + _maybewarn_replace_missing(replace_missing) name1 = filekey(ds, name) source = _sourcetrait(filename, source) - data1, dims1, metadata1, maskingval1 = _open(source, ds; name=name1, group, mod=NoMod()) do var + data1, dims1, metadata1, missingval2 = _open(source, ds; name=name1, group, mod=NoMod()) do var metadata1 = isnokw(metadata) ? _metadata(var) : metadata missingval1 = _fix_missingval(var, missingval, metadata1) maskingval1 = isnokw(maskingval) ? missing : maskingval - mod = _mod(metadata1; scale, offset, missingval=missingval1, maskingval=maskingval1, coerce) + # If maskingval is `nothing` use missingval as missingval + missingval2 = isnothing(maskingval1) ? missingval1 : maskingval1 + mod = isnokw(mod) ? _mod(metadata1, missingval1, maskingval1; scaled, coerce) : mod data = if lazy FileArray{typeof(source)}(var, filename; name=name1, group, mod, write ) else modvar = _maybe_modify(var, mod) - checkmem && _checkobjmem(var) + checkmem && _checkobjmem(modvar) x = Array(modvar) x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug end dims1 = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data) - data, dims1, metadata1, maskingval1 + data, dims1, metadata1, missingval2 end name2 = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) - raster = Raster(data1, dims1, refdims, name2, metadata1, maskingval1) + raster = Raster(data1, dims1, refdims, name2, metadata1, missingval2) return dropband ? _drop_single_band(raster, lazy) : raster end diff --git a/src/create.jl b/src/create.jl index f1aedd9ce..35e9290a0 100644 --- a/src/create.jl +++ b/src/create.jl @@ -1,11 +1,12 @@ """ - create(filename, A::Raster; kw...) - create(filename, T, dims::Tuple; kw...) + create([filename], template::Raster; kw...) + create([filename], T, template::Raster; kw...) + create([filename], T, template::Tuple; kw...) -Create a new Raster. If `filename` is a `String` it will be created on disk, -and opened lazily. If it is `nothing` a regular in-memory `Raster` +Create a new Raster. If `filename` is a `String` it will be created on disk, +and opened lazily. If it is `nothing` a regular in-memory `Raster` will be created. If written to disk, the values will be `missingval` when it is defined, if in-memory values will be `undef`. @@ -13,33 +14,71 @@ Generally all indices should be written to after `create`. The return value is a `Raster`. The `eltype` will usually be `T`, except where `scale` and/or `offset` keywords are used, in which case `T` will -depend on the tyepe promotion of `scale` and `offset` and `T`. +depend on the tyepe promotion of `scale` and `offset` and `T`. `maskingval` will also affect the `eltype`. -# Keywords +## Arguments +- `filename`: a String file path, which will create a file on disk and return it as + a lazy `Raster`, or `nothing` to create an in-memory `Raster`. +- `T`: the element type to use in the created array. +- `template`: a `Raster`, `Tuple` of `Dimension` or `Extents.Extent` to use as a template. + If an `Extent` is used, a `size` or `res` keyword must be passed. +## Keywords + +$NAME_KEYWORD +$REFDIMS_KEYWORD +$METADATA_KEYWORD +$MISSINGVAL_KEYWORD +$MASKINGVAL_KEYWORD +$SOURCE_KEYWORD +- `lazy`: A `Bool` specifying if to load data lazily from disk. For `create` + `lazy=true` is the default, as creating a disk-based file is normally associated + with it being larger than memory. +$CHUNKS_KEYWORD +$SCALE_KEYWORD +$OFFSET_KEYWORD +$COERCE_KEYWORD +$VERBOSE_KEYWORD +$RES_KEYWORD +$SIZE_KEYWORD +$CRS_KEYWORD """ -create(filename, A::AbstractRaster{T}; kw...) where T = create(filename, T, A; kw...) -function create(filename, x, A::AbstractRaster; - name=name(A), - metadata=metadata(A), - missingval=missingval(A), +create(A::AbstractRaster; kw...) where T = create(nothing, A; kw...) +create(T::Type, dims::Tuple; kw...) where T = create(nothing, T, dims; kw...) +create(T::Type, extent::Extents.Extent; kw...) where T = create(nothing, T, dims; kw...) +create(filename::Union{AbstractString,Nothing}, A::AbstractRaster{T}; kw...) where T = + create(filename, T, A; kw...) +function create(filename::Union{AbstractString,Nothing}, T::Type, A::AbstractRaster; + name=name(A), + metadata=metadata(A), + missingval=missingval(A), kw... ) - create(filename, x, dims(A); parent=parent(A), name, metadata, missingval, kw...) + return create(filename, T, dims(A); parent=parent(A), name, metadata, missingval, kw...) end -function create(filename::AbstractString, x, dims::Tuple; +function create(filename::AbstractString, T::Type, dims::Tuple; lazy=true, parent=nokw, suffix=nokw, source::Source=_sourcetrait(filename), - missingval=nokw, + missingval=nokw, kw... ) filename = _maybe_add_suffix(filename, suffix) # This calls `create` in the /sources file for this `source` - create(filename, source, x, dims; lazy, missingval, kw...) + return create(filename, source, T, dims; lazy, missingval, kw...) +end +function create(filename::Union{AbstractString,Extent}, T::Type, extent::Extents.Extent; + res=nokw, + size=nokw, + crs=nothing, + sampling=Points(), + kw... +) + ds = _extent2dims(extent; size, res, crs, sampling) + return create(filename, T, ds; kw...) end function create(filename::Nothing, T::Type, dims::Tuple; parent=nokw, @@ -50,12 +89,12 @@ function create(filename::Nothing, T::Type, dims::Tuple; ) T = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) data = isnokw(parent) || isnothing(parent) ? Array{T}(undef, dims) : similar(parent, T, size(dims)) - Raster(data, dims; missingval, kw...) + return Raster(data, dims; missingval, kw...) end function create(filename::AbstractString, source::Source, T::Type, dims::DimTuple; name=nokw, missingval=nokw, - maskingval=missingval, + maskingval=missing, metadata=nokw, chunks=nokw, scale=nokw, @@ -76,5 +115,5 @@ function create(filename::AbstractString, source::Source, T::Type, dims::DimTupl # Create layers of zero arrays rast = Raster(A, dims; name, missingval) write(filename, source, rast; chunks, metadata, scale, offset, missingval, verbose, force, coerce) - return Raster(filename; source, lazy, metadata, missingval, maskingval, dropband) + return Raster(filename; source, lazy, metadata, missingval, maskingval, dropband, coerce) end diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 443808750..163980c98 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -125,3 +125,56 @@ const CHECKMEMORY_KEYWORD = """ - `checkmemory`: If `true` (the default), check if there is enough memory for the operation. `false` will ignore memory needs. """ + +const SCALE_KEYWORD = """ +- `scale`: set `scale` for `x * scale + offset` transformations. +""" + +const OFFSET_KEYWORD = """ +- `offset`: set `offset` for `x * scale + offset` transformations. +""" + +const SCALED_KEYWORD = """ +- `scaled`: apply scale and offset as `x * scale + offset`. `true` by default. + This is common where data has been convert to e.g. UInt8 to save disk space. + To ignore `scale` and `offset` metadata, use `scaled=false`. If `scale` and + Note: `offset` are `1.0` and `0.0` they will be ignored and the original type will + be used even when `scaled=true`. This is because these values may be fallback + defaults and we do not want to convert every `Real` array to larger `Float64` values. +""" + +const COERCE_KEYWORD = """ +- `coerce`: where `scale` and/or `offset` are present during `setindex!` to disk, + coerce values to the disk type. `convert` is the default, but `round`, `trunc` or + or `ceil` may be needed where the values are not exact. +""" + +const MISSINGVAL_KEYWORD = """ +- `missingval`: value representing missing data, normally detected from the file. Set manually + when you know the value is not specified or is incorrect. This will *not* change any + values in the raster, it simply assigns which value is treated as missing. +""" + +const MASKINGVAL_KEYWORD = """ +- `maskingval`: A value to convert `missingval` to, by default `missing`. If this is set it + will be the return value of `missingval(raster)` - `maskingval` becomes the new `missingval`. + Setting `maskingval` to `nothing` means no masking will occur, and the original `missingval` + will be the final `missingval`. This can give better performance than using `missing`. + Another efficient option is to use e.g. `zero(eltype(raster))` to replace missing values with zero. +""" + +const NAME_KEYWORD = """ +- `name`: a `Symbol` name for the array, which will also retrieve the, alphabetically first, + named layer if `Raster` is used on a multi-layered file like a NetCDF. + If instead `RasterStack` is used to read the multi-layered file, by default, all variables + will be added to the stack. +""" + +const METADATA_KEYWORD = """ +- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. +""" + +const REFDIMS_KEYWORD = """ +- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. + Usually not needed. +""" diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 11829092a..94b438dbf 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -19,24 +19,36 @@ struct Mod{Mi,Ma,S,O,F} <: AbstractModifications end end -function _mod(metadata; scale, offset, missingval, maskingval, coerce=convert) - scale1 = if isnokw(scale) - s = get(metadata, "scale", nothing) - # Dont convert types for scale of one (gdal default) - s == 1.0 ? nothing : s - else - scale +function _stack_mods(metadata::Vector, missingval::Vector, maskingval; scaled, coerce) + map(metadata, missingval) do md, mv + scale, offset = _get_scale_offset(md, scaled) + _mod(mv, maskingval, scale, offset, coerce) end - offset1 = if isnokw(offset) - o = get(metadata, "offset", nothing) - # Dont convert types for offset of zero (gdal default) - o == 0.0 ? nothing : o - else - offset +end +function _stack_mods(metadata::Vector, missingval, maskingval::Vector; scaled::Bool, coerce) + map(metadata, maskingval) do md, mk + scale, offset = _get_scale_offset(md, scaled) + _mod(missingval, mk, scale, offset, coerce) end - return _mod(missingval, maskingval, scale, offset, coerce) end -function _mod(missingval, maskingval, scale, offset, coerce=convert) +function _stack_mods(metadata::Vector, missingval::Vector, maskingval::Vector; scaled::Bool, coerce) + map(metadata, missingval, maskingval) do md, mv, mk + scale, offset = _get_scale_offset(md, scaled) + _mod(mv, mk, scale, offset, coerce) + end +end +function _stack_mods(metadata::Vector, missingval, maskingval; scaled::Bool, coerce) + map(metadata) do md + scale, offset = _get_scale_offset(md, scaled) + _mod(missingval, maskingval, scale, offset, coerce) + end +end + +function _mod(metadata, missingval, maskingval; scaled::Bool, coerce) + scale, offset = _get_scale_offset(metadata, scaled) + _mod(missingval, maskingval, scale, offset, coerce) +end +function _mod(missingval, maskingval, scale, offset, coerce) if isnothing(maskingval) && isnothing(scale) && isnothing(offset) return NoMod(missingval) else @@ -44,6 +56,13 @@ function _mod(missingval, maskingval, scale, offset, coerce=convert) end end +@inline _get_scale_offset(metadata::NoKW, scaled) = (nothing, nothing) +@inline function _get_scale_offset(metadata, scaled) + scale = scaled ? get(metadata, "scale", nothing) : nothing + offset = scaled ? get(metadata, "offset", nothing) : nothing + return scale, offset +end + _mod_eltype(::AbstractArray{T}, ::NoMod) where T = T _mod_eltype(::AbstractArray{T}, m::Mod) where T = Base.promote_op(_applymod, T, typeof(m)) @@ -98,7 +117,7 @@ _ismissing(x, mv) = isequal(x, mv) _ismissing(_, ::Nothing) = false _scaleoffset(x, m::Mod) = _scaleoffset(x, m.scale, m.offset) -_scaleoffset(x, scale, offset) = muladd(x, scale, offset) +_scaleoffset(x, scale, offset) = x * scale + offset _scaleoffset(x, ::Nothing, offset) = x + offset _scaleoffset(x, scale, ::Nothing) = x * scale _scaleoffset(x, ::Nothing, ::Nothing) = x diff --git a/src/stack.jl b/src/stack.jl index 231579f48..1d5d7d304 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -156,6 +156,8 @@ $GROUP_KEYWORD - `metadata`: A `Dict` or `DimensionalData.Metadata` object. - `missingval`: a single value for all layers or a `NamedTuple` of missingval for each layer. `nothing` specifies no missing value. +$MASKINGVAL_KEYWORD +$SCALED_KEYWORD $CONSTRUCTOR_CRS_KEYWORD $CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. @@ -164,7 +166,6 @@ For when one or multiple filepaths are used: $DROPBAND_KEYWORD $LAZY_KEYWORD -$REPLACE_MISSING_KEYWORD $SOURCE_KEYWORD For when a single `Raster` is used: @@ -268,7 +269,7 @@ function RasterStack(layers::NamedTuple{K,<:Tuple{Vararg{<:AbstractDimArray}}}; refdims::Tuple=(), missingval=map(missingval, _layers), metadata=NoMetadata(), - layermetadata=map(DD.metadata, _layers), + layermetadata::NamedTuple{K}=map(DD.metadata, _layers), layerdims::NamedTuple{K}=map(DD.basedims, _layers), kw... ) where K @@ -326,8 +327,8 @@ function RasterStack(s::DD.AbstractDimStack; data=parent(s), dims::Union{Tuple,NoKW}=dims(s), refdims::Tuple=refdims(s), - layerdims=DD.layerdims(s), metadata=metadata(s), + layerdims=DD.layerdims(s), layermetadata=DD.layermetadata(s), missingval=missingval(s), kw... @@ -351,12 +352,29 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; resize=nokw, layermetadata::Union{NoKW,NamedTuple{K}}=nokw, layerdims::Union{NoKW,NamedTuple{K}}=nokw, + maskingval=nokw, + scaled=true, + coerce=convert, kw... ) where K - missingval = missingval isa NamedTuple ? missingval : map(_ -> missingval, filenames) - layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, filenames) - layers = map(keys(filenames), values(filenames), values(missingval), values(layermetadata)) do name, fn, mv, md - Raster(fn; source=_sourcetrait(fn, source), name, missingval=mv, metadata=md, kw...) + missingval1 = if missingval isa NamedTuple + keys(missingval) == K || throw(ArgumentError("missingval keys $(keys(missingval)) do not match filename keywords $K")) + collect(missingval) + else + missingval + end + maskingval1 = if maskingval isa NamedTuple + keys(maskingval) == K || throw(ArgumentError("maskingval keys $(keys(maskingval)) do not match filename keywords $K")) + collect(maskingval) + else + maskingval + end + fn = collect(filenames) + layermetadata = layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) + layerdims = layerdims isa NamedTuple ? collect(layerdims) : map(_ -> NoKW(), fn) + mods = _stack_mods(layermetadata, missingval1, maskingval1; scaled, coerce) + layers = map(K, fn, layermetadata, layerdims, mods) do name, fn, md, d, mod + Raster(fn; source=_sourcetrait(fn, source), name, metadata=md, dims=d, mod, kw...) end return RasterStack(NamedTuple{K}(layers); resize, metadata) end @@ -364,10 +382,13 @@ end function RasterStack(filename::AbstractString; lazy::Bool=false, dropband::Bool=true, - replace_missing::Bool=false, source::Union{Symbol,Source,NoKW}=nokw, + missingval=nokw, + maskingval=nokw, name=nokw, group=nokw, + scaled=true, + coerce=convert, kw... ) source = _sourcetrait(filename, source) @@ -377,19 +398,19 @@ function RasterStack(filename::AbstractString; length(filenames) > 0 || throw(ArgumentError("No files in directory $filename")) # Detect keys from names name = if isnokw(name) - all_shared = true stripped = lstrip.(x -> x in (" ", "_"), (x -> x[1:end]).(filenames)) Symbol.(replace.(first.(splitext.(stripped)), Ref(" " => "_"))) else name end - RasterStack(joinpath.(Ref(filename), filenames); lazy, replace_missing, dropband, group, kw...) + RasterStack(joinpath.(Ref(filename), filenames); + missingval, maskingval, scaled, coerce, lazy, dropband, group, kw... + ) else # Load as a single file if haslayers(source) # With multiple named layers - l_st = _layer_stack(filename; source, name, lazy, group, replace_missing, kw...) - + l_st = _layer_stack(filename; source, name, lazy, group, kw...) # Maybe split the stack into separate arrays to remove extra dims. if !isnokw(name) map(identity, l_st) @@ -398,7 +419,8 @@ function RasterStack(filename::AbstractString; end else # With bands actings as layers - RasterStack(Raster(filename; source, lazy, replace_missing, dropband=false); kw...) + raster = Raster(filename; source, lazy, scaled, coerce, missingval, maskingval, dropband=false) + RasterStack(raster; kw...) end end @@ -470,15 +492,20 @@ function _layer_stack(filename; name=nokw, group=nokw, metadata=nokw, - layerdims=nokw, layermetadata=nokw, + layerdims=nokw, missingval=nokw, + maskingval=nokw, + replace_missing=nokw, crs=nokw, mappedcrs=nokw, - replace_missing=false, + coerce=convert, + scaled=true, + checkmem=true, lazy=false, kw... ) + _maybewarn_replace_missing(replace_missing) data, field_kw = _open(filename; source) do ds layers = _layers(ds, name, group) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` @@ -487,27 +514,43 @@ function _layer_stack(filename; metadata = isnokw(metadata) ? _metadata(ds) : metadata layerdims = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims) - layermetadata = isnokw(layermetadata) ? _layermetadata(ds; layers) : layermetadata - missingval = isnokw(missingval) ? Rasters.missingval(ds) : missingval - name = Tuple(map(Symbol, layers.names)) - data = if lazy - # TODO replace_missing is currently always true for - # CommonDataModel FileStack. We should change this. - FileStack{typeof(source)}(ds, filename; name, group, vars=Tuple(layers.vars)) + layermetadata1 = if isnokw(layermetadata) + _layermetadata(ds; layers) else - map(layers.vars) do v - x = Array(replace_missing ? _replace_missing(v, missingval) : v) - x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug - end |> NamedTuple{name} + layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) end - if replace_missing - missingval = missing + missingval1 = if missingval isa NamedTuple + collect(missingval) + elseif isnokw(missingval) + Rasters.missingval(ds) + else + missingval end - data, (; dims, refdims, layerdims=NamedTuple{name}(layerdims), metadata, layermetadata=NamedTuple{name}(layermetadata), missingval) + maskingval1 = maskingval isa NamedTuple ? collect(maskingval) : maskingval + mods = _stack_mods(layermetadata1, missingval1, maskingval1; scaled, coerce) + name = Tuple(map(Symbol, layers.names)) + _return_lifted(NamedTuple{name}, dims, refdims, layerdims, metadata, layermetadata1, missingval, lazy, layers, mods, checkmem) end return RasterStack(data; field_kw..., kw...) end +function _return_lifted( + ::Type{NT}, dims, refdims, layerdims, metadata, layermetadata, missingval, lazy, layers, mods, checkmem +) where NT<:NamedTuple{K} where K + data = if lazy + vars = ntuple(layers.vars[i], Val{K}()) + FileStack{typeof(source)}(ds, filename; name, group, mods, vars) + else + map(layers.vars, layermetadata, mods) do var, md, mod + modvar = _maybe_modify(var, mod) + checkmem && _checkobjmem(modvar) + x = Array(modvar) + x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug + end |> NT + end + return data, (; dims, refdims, layerdims=NT(layerdims), metadata, layermetadata=NT(layermetadata), missingval) +end + # Try to sort the dimensions by layer dimension into a sensible # order that applies without permutation, preferencing the layers # with most dimensions, and those that come first. diff --git a/src/utils.jl b/src/utils.jl index 45de419e8..664a09a6e 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -136,17 +136,17 @@ function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) end function _extent2dims(to; size=nothing, res=nothing, crs=nothing, kw...) - _extent2dims(to, size, res, crs) + _extent2dims(to, size, res, crs; kw...) end -function _extent2dims(to::Extents.Extent, size::Nothing, res::Nothing, crs) +function _extent2dims(to::Extents.Extent, size::Nothing, res::Nothing, crs; kw...) isnothing(res) && throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) end -function _extent2dims(to::Extents.Extent, size, res, crs) +function _extent2dims(to::Extents.Extent, size, res, crs; kw...) isnothing(res) || _size_and_res_error() end -function _extent2dims(to::Extents.Extent{K}, size::Nothing, res::Real, crs) where K +function _extent2dims(to::Extents.Extent{K}, size::Nothing, res::Real, crs; kw...) where K tuple_res = ntuple(_ -> res, length(K)) - _extent2dims(to, size, tuple_res, crs) + _extent2dims(to, size, tuple_res, crs; kw...) end function _extent2dims(to::Extents.Extent{K}, size::Nothing, res, crs) where K ranges = map(values(to), res) do bounds, r @@ -155,9 +155,9 @@ function _extent2dims(to::Extents.Extent{K}, size::Nothing, res, crs) where K step = (outer - start) / length range(; start, step, length) end - return _extent2dims(to, ranges, crs) + return _extent2dims(to, ranges, crs; kw...) end -function _extent2dims(to::Extents.Extent{K}, size, res::Nothing, crs) where K +function _extent2dims(to::Extents.Extent{K}, size, res::Nothing, crs; kw...) where K if size isa Int size = ntuple(_ -> size, length(K)) end @@ -168,15 +168,26 @@ function _extent2dims(to::Extents.Extent{K}, size, res::Nothing, crs) where K end return _extent2dims(to, ranges, crs) end -function _extent2dims(to::Extents.Extent{K}, ranges, crs) where K +function _extent2dims(to::Extents.Extent{K}, ranges, crs; + sampling=Intervals(Start()), + kw... +) where K emptydims = map(name2dim, K) - lookups = map(ranges) do range - Projected(range; - order=ForwardOrdered(), - sampling=Intervals(Start()), - span=Regular(step(range)), - crs, - ) + lookups = map(emptydims, ranges) do d, range + if d isa SpatialDim && !isnothing(crs) + Projected(range; + sampling, + order=ForwardOrdered(), + span=Regular(step(range)), + crs, + ) + else + Sampled(range; + sampling, + order=ForwardOrdered(), + span=Regular(step(range)), + ) + end end d = map(rebuild, emptydims, lookups) return d @@ -368,3 +379,8 @@ function _no_memory_error(f, bytes) """ return error(msg) end + +_maybewarn_replace_missing(replace_missing::NoKW) = nothing +function _maybewarn_replace_missing(replace_missing) + @warn "`replace_missing` keyword no longer used. Set `maskingval` to nothing for no replacement, to `missing` to mask `missingval` with `missing`, or any other value" +end diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index c9ad74530..8feb17db3 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -11,7 +11,7 @@ gdalpath = maybedownload(url) @test_throws ArgumentError Raster("notafile.tif") @time gdalarray = Raster(gdalpath; name=:test) - @time lazyarray = Raster(gdalpath; scale=nothing, offset=nothing, lazy=true); + @time lazyarray = Raster(gdalpath; lazy=true); @time eagerarray = Raster(gdalpath; lazy=false); @testset "lazyness" begin @@ -29,10 +29,10 @@ gdalpath = maybedownload(url) # This file has no scale/offset so cf does nothing @time cfarray = Raster(gdalpath) @time cf_nomask_array = Raster(gdalpath; maskingval=nothing) - @time nocfarray = Raster(gdalpath; scale=nothing, offset=nothing) + @time nocfarray = Raster(gdalpath; scaled=false) @time lazycfarray = Raster(gdalpath; lazy=true) - @time lazynocfarray = Raster(gdalpath; lazy=true, scale=nothing, offset=nothing) - @time lazynocfnomaskarray = Raster(gdalpath; lazy=true, scale=nothing, offset=nothing, maskingval=nothing) + @time lazynocfarray = Raster(gdalpath; lazy=true, scaled=false) + @time lazynocfnomaskarray = Raster(gdalpath; lazy=true, scaled=false, maskingval=nothing) @test parent(cfarray) isa Array{UInt8,2} @test parent(cf_nomask_array) isa Array{UInt8,2} @test parent(nocfarray) isa Array{UInt8,2} diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 755bbb374..011af58ec 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -31,7 +31,13 @@ stackkeys = ( ) @testset "grid mapping" begin - stack = RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) + using ProfileView + using SnoopCompile + @profview 1 + 2 + stack = + tinf = @snoopi_deep RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) + fg = flamegraph(tinf) + ProfileView.view(fg) @test metadata(stack.mask)["grid_mapping"] == Dict{String, Any}( "straight_vertical_longitude_from_pole" => 0.0, "false_easting" => 0.0, @@ -62,11 +68,11 @@ end @testset "cf" begin @time cfarray = Raster(ncsingle) @time cf_nomask_array = Raster(ncsingle; maskingval=nothing) - @time nocfarray = Raster(ncsingle; scale=nothing, offset=nothing) - @time nocf_nomask_array = Raster(ncsingle; scale=nothing, offset=nothing, maskingval=nothing) - @time lazycfarray = Raster(ncsingle; lazy=true, scale=nothing, offset=nothing) - @time lazynocfarray = Raster(ncsingle; lazy=true, , scale=nothing, offset=nothing) - @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scale=nothing, offset=nothing, maskingval=nothing) + @time nocfarray = Raster(ncsingle; scaled=false) + @time nocf_nomask_array = Raster(ncsingle; scaled=false, maskingval=nothing) + @time lazycfarray = Raster(ncsingle; lazy=true, scaled=false) + @time lazynocfarray = Raster(ncsingle; lazy=true, scaled=false) + @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, maskingval=nothing) @test missingval(cfarray) === missing @test missingval(nocfarray) === missing @test missingval(cf_nomask_array) === 1.0f20 From 967083c62ad8ad0b0fbcf73bd0bcd32761379a29 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sun, 21 Jul 2024 03:33:50 +0200 Subject: [PATCH 07/38] create --- ext/RastersArchGDALExt/gdal_source.jl | 48 ++-- ext/RastersArchGDALExt/warp.jl | 1 - .../RastersNCDatasetsExt.jl | 2 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 41 ++-- src/create.jl | 212 +++++++++++++++--- src/filestack.jl | 2 +- src/methods/shared_docstrings.jl | 27 ++- src/modifieddiskarray.jl | 146 +++++++----- src/show.jl | 2 +- src/sources/commondatamodel.jl | 17 +- src/stack.jl | 110 +++++---- src/utils.jl | 123 +++++----- test/sources/gdal.jl | 5 +- 13 files changed, 494 insertions(+), 242 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 2eb7d48d0..1534ed0dd 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -42,6 +42,10 @@ RA.cleanreturn(A::AG.RasterDataset) = Array(A) RA.haslayers(::GDALsource) = false RA._sourcetrait(A::AG.RasterDataset) = GDALsource() +function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRasterStack; kw...) + ext = splitext(filename)[2] + throw(ArgumentError("Cant write a RasterStack to $ext with gdal")) +end function Base.write( filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; force=false, @@ -51,27 +55,26 @@ function Base.write( scale=nokw, offset=nokw, coerce=nokw, + eltype=Missings.nonmissingtype(T), + write=true, kw... ) where T RA.check_can_write(filename, force) A1 = _maybe_correct_to_write(A, missingval) - missingval = missingval isa NoKW ? RA._writeable_missing(T; verbose) : missingval - mod = RA._mod(missingval, maskingval, scale, offset, coerce) - _create_with_driver(filename, dims(A1), eltype(A1); - missingval, _block_template=A1, scale, offset, kw... + mod = RA._writer_mod(eltype; missingval, maskingval, scale, offset, coerce) + _create_with_driver(filename, dims(A1), T; + missingval, _block_template=A1, scale, offset, verbose, kw... ) do dataset verbose && _maybe_warn_south_up(A, verbose, "Writing South-up. Use `reverse(myrast; dims=Y)` first to write conventional North-up") - open(A1; write=true) do O - RA._maybe_modify(AG.RasterDataset(dataset), mod) .= parent(O) + if write + open(A1; write=true) do O + RA._maybe_modify(AG.RasterDataset(dataset), mod) .= parent(O) + end end end return filename end -function _maybe_warn_south_up(A, verbose, msg) - verbose && lookup(A, Y) isa AbstractSampled && order(A, Y) isa ForwardOrdered && @warn msg -end - function RA._open(f, ::GDALsource, filename::AbstractString; write=false, mod=NoMod(), kw... ) @@ -292,25 +295,12 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; coerce=nokw, verbose=false, eltype=Missings.nonmissingtype(eltype(A)), - maskingval=nokw, missingval=nokw, + maskingval=nokw, kw... ) A1 = _maybe_correct_to_write(A) - mv = RA.missingval(A1) - if RA.isnokw(missingval) - missingval = (ismissing(mv) || typeof(mv) <: eltype) ? RA._type_missingval(eltype) : mv - end - if RA.isnokw(maskingval) - if ismissing(mv) - maskingval = missing - elseif maskingval === missingval - maskingval = nothing - else - maskingval = mv - end - end - mod = RA._mod(missingval, maskingval, scale, offset, coerce) + mod = _writer_mod(A, missingval, maskingval) return _create_with_driver(filename, dims(A1), eltype; _block_template=A1, missingval, scale, offset, verbose, kw... ) do dataset @@ -620,6 +610,14 @@ function _maybe_reorder(A, dims) A end end + +function _maybe_warn_south_up(A, verbose, msg) + if hasdim(A, Y()) + verbose && lookup(A, Y()) isa AbstractSampled && order(A, Y()) isa ForwardOrdered && @warn msg + end + return nothing +end + #= Geotranforms ######################################################################## See https://lists.osgeo.org/pipermail/gdal-dev/2011-July/029449.html diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index aedc6a71e..db2b5beab 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -30,7 +30,6 @@ function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", kw.. return isnothing(filename) ? read(raster) : raster end end - @show missingval(out) # And permute the dimensions back to what they were in A out1 = _maybe_restore_from_gdal(out, dims(A)) out2 = _reset_gdalwarp_sampling(out1, A) diff --git a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl index c9d475ec6..b8e50adc5 100644 --- a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl +++ b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl @@ -18,7 +18,7 @@ using Dates, using Rasters.Lookups using Rasters.Dimensions -using Rasters: CDMsource, NCDsource, nokw, NoKW +using Rasters: CDMsource, NCDsource, NoKW, nokw, isnokw using CommonDataModel: AbstractDataset diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 6ac1edd57..eb16a9299 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -66,16 +66,26 @@ end function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; verbose=true, missingval=nokw, + maskingval=nokw, + metadata=nokw, chunks=nokw, chunksizes=RA._chunks_to_tuple(A, dims(A), chunks), scale=nokw, offset=nokw, coerce=convert, eltype=Missings.nonmissingtype(T), + write=true, + name=DD.name(A), kw... ) where {T,N} + eltype <: NCDAllowedType || throw(ArgumentError(""" + Element type $eltype cannot be written to NetCDF. Convert it to one of $(Base.uniontypes(NCDAllowedType)), + usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) + """ + )) _def_dim_var!(ds, A) - attrib = RA._attribdict(metadata(A)) + metadata = isnokw(metadata) ? NoMetadata() : metadata + attrib = RA._attribdict(metadata) # Scale and offset scale = if isnokw(scale) || isnothing(scale) delete!(attrib, "scale_factor") @@ -89,34 +99,27 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; else attrib["add_offset"] = offset end - maskingval1 = begin - mv = maskingval isa NoKW ? Rasters.missingval(A) : maskingval - mv === missingval ? nothing : mv - end - mod = _mod(missingval1, maskingval1, scale, offset, coerce) - eltype <: NCDAllowedType || throw(ArgumentError(""" - Element type $eltyp cannot be written to NetCDF. Convert it to one of $(Base.uniontypes(NCDAllowedType)), - usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) - """ - )) + mod = RA._writer_mod(eltype; missingval, maskingval, scale, offset, coerce) - # Set _FillValue - if !isnothing(maskingval1) && Rasters.missingval(A) isa T + if !isnothing(mod.missingval) attrib["_FillValue"] = missingval end - verbose && !(maskingval isa Nothing) && @warn "`maskingval` $(maskingval) is not the same type as your data $T." - key = if string(DD.name(A)) == "" + key = if isnokw(name) || string(name) == "" UNNAMED_NCD_FILE_KEY else - string(DD.name(A)) + string(name) end dimnames = lowercase.(string.(map(RA.name, dims(A)))) - var = _maybe_modify(NCD.defVar(ds, key, eltyp, dimnames; attrib=attrib, chunksizes, kw...), mod) - # Write with a DiskArays.jl broadcast - var .= A + var = NCD.defVar(ds, key, eltype, dimnames; attrib=attrib, chunksizes, kw...) + + if write + modvar = RA._maybe_modify(var, mod) + # Write with a DiskArays.jl broadcast + modvar .= A + end return nothing end diff --git a/src/create.jl b/src/create.jl index 35e9290a0..ea47839ee 100644 --- a/src/create.jl +++ b/src/create.jl @@ -2,35 +2,36 @@ """ create([filename], template::Raster; kw...) - create([filename], T, template::Raster; kw...) - create([filename], T, template::Tuple; kw...) + create([filename], T, template; kw...) Create a new Raster. If `filename` is a `String` it will be created on disk, -and opened lazily. If it is `nothing` a regular in-memory `Raster` -will be created. If written to disk, the values will be `missingval` when it -is defined, if in-memory values will be `undef`. - -Generally all indices should be written to after `create`. +and opened lazily. If it is `nothing` of not passed, a regular in-memory `Raster` +will be created. When written to disk, the values will be `missingval`, +if in-memory values will be `undef`. The return value is a `Raster`. The `eltype` will usually be `T`, except where `scale` and/or `offset` keywords are used, in which case `T` will -depend on the tyepe promotion of `scale` and `offset` and `T`. +depend on the tyepe promotion of `scale` and `offset` with `T`. `maskingval` will also affect the `eltype`. ## Arguments - `filename`: a String file path, which will create a file on disk and return it as a lazy `Raster`, or `nothing` to create an in-memory `Raster`. -- `T`: the element type to use in the created array. -- `template`: a `Raster`, `Tuple` of `Dimension` or `Extents.Extent` to use as a template. +- `template`: a `Raster`, `Tuple` of `Dimension` or `Extents.Extent` to use as a template. If an `Extent` is used, a `size` or `res` keyword must be passed. + If a `T` argument is not used, it is taken from the `template` eltype. +- `T`: the element type to use in the created array. ## Keywords $NAME_KEYWORD $REFDIMS_KEYWORD $METADATA_KEYWORD -$MISSINGVAL_KEYWORD +$WRITE_MISSINGVAL_KEYWORD +- `fillval`: A value to fill the array with. By default this will be + `missingval`. If there is no `missingval` set or `fillval` is set to nothing + disk values will remain undefined. $MASKINGVAL_KEYWORD $SOURCE_KEYWORD - `lazy`: A `Bool` specifying if to load data lazily from disk. For `create` @@ -44,13 +45,81 @@ $VERBOSE_KEYWORD $RES_KEYWORD $SIZE_KEYWORD $CRS_KEYWORD +$CHUNKS_KEYWORD +- `reverse_y`: usually we want to write `Y` dimensions in reverse. + When building dimensions from an `Extents.Extent` we do this by + default, unless `reverse_y=false`. With template `Raster` or dimensions, + the existing order is used. + +## Example + +Here we create a UInt8 GeoTIFF and open it as a Raster, from -80 to 80 +lattitude, and 0 to 120 longitude, with a resolution of 0.25 degrees. + +We scale values from 0-1 over `UInt8` 0-200, and using `255`. +Values that don't convert exactly will error (we could use `coerce=trunc` to fix that). + +We use `UInt8(255)` as the `missingval` on disk, but mask it with `missing` in +the loaded `Raster`. + +We use standard lat/lon (EPSG:4326) as the crs, and force writing if the file exists. + +```julia +using Rasters, NCDatasets, ArchGDAL, Extents, Dates +using Rasters.Lookups +rast = Rasters.create("created.tif", UInt8, Extents.Extent(X=(0, 120), Y=(-80, 80), Band=(0, 12)); + res=(X=1.0, Y=1.0, Band=1), + # size=(X=100, Y=100, Band=12), + maskingval=nothing, + name=:myraster, + crs=EPSG(4326), + force=true, + sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Intervals(Start())), +) +using ProfileView +@profview open(rast; write=true) do A + A .= Rasters.Missings.nonmissingtype(eltype(A))(1) + nothing +end +Raster("created.tif"; maskingval=nothing) +rm("created.tif") + +extent = Extents.Extent(X=(0, 120), Y=(-80, 80))#, Band=(1, 3)) +types = (a=UInt8, b=Int32, c=Float64=>Y) +rast = Rasters.create("created.nc", types, extent; + # res=(X=1.0, Y=1.0, Band=1), + maskingval=nothing, + size=(X=100, Y=100), + crs=EPSG(4326), + force=true, + sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Points()), +) +RasterStack("created.nc") + +╭───────────────────────────────────────────╮ +│ 480×640 Raster{Union{Missing, Float64},2} │ +├───────────────────────────────────────────┴───────────────────────────────────────── dims ┐ + ↓ X Projected{Float64} LinRange{Float64}(0.0, 119.75, 480) ForwardOrdered Regular Points, + → Y Projected{Float64} LinRange{Float64}(79.75, -80.0, 640) ReverseOrdered Regular Points +├───────────────────────────────────────────────────────────────────────────────── metadata ┤ + Metadata{GDALsource} of Dict{String, Any} with 2 entries: + "filepath" => "created.tif" + "scale" => 0.005 +├─────────────────────────────────────────────────────────────────────────────────── raster ┤ + extent: Extent(X = (0.0, 119.75), Y = (-80.0, 79.75)) + missingval: missing + crs: GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199 +433,AUTHORITY["EPSG","9122"]],AXIS["Latitude",NORTH],AXIS["Longitude",EAST],AUTHORITY["EPSG","4326"]] + filename: nothing +└───────────────────────────────────────────────────────────────────────────────────────────┘ +``` """ -create(A::AbstractRaster; kw...) where T = create(nothing, A; kw...) -create(T::Type, dims::Tuple; kw...) where T = create(nothing, T, dims; kw...) -create(T::Type, extent::Extents.Extent; kw...) where T = create(nothing, T, dims; kw...) +create(A::AbstractRaster; kw...) = create(nothing, A; kw...) +create(T::Union{Type,NamedTuple}, dims::Tuple; kw...) = create(nothing, T, dims; kw...) +create(T::Union{Type,NamedTuple}, extent::Extents.Extent; kw...) = create(nothing, T, dims; kw...) create(filename::Union{AbstractString,Nothing}, A::AbstractRaster{T}; kw...) where T = create(filename, T, A; kw...) -function create(filename::Union{AbstractString,Nothing}, T::Type, A::AbstractRaster; +function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTuple}, A::AbstractRaster; name=name(A), metadata=metadata(A), missingval=missingval(A), @@ -58,7 +127,7 @@ function create(filename::Union{AbstractString,Nothing}, T::Type, A::AbstractRas ) return create(filename, T, dims(A); parent=parent(A), name, metadata, missingval, kw...) end -function create(filename::AbstractString, T::Type, dims::Tuple; +function create(filename::AbstractString, T::Union{Type,NamedTuple}, dims::Tuple; lazy=true, parent=nokw, suffix=nokw, @@ -70,31 +139,58 @@ function create(filename::AbstractString, T::Type, dims::Tuple; # This calls `create` in the /sources file for this `source` return create(filename, source, T, dims; lazy, missingval, kw...) end -function create(filename::Union{AbstractString,Extent}, T::Type, extent::Extents.Extent; - res=nokw, - size=nokw, +function create(filename::AbstractString, T::Union{Type,NamedTuple}, extent::Extents.Extent; + res=nokw, + size=nokw, crs=nothing, sampling=Points(), + reverse_y=true, kw... ) ds = _extent2dims(extent; size, res, crs, sampling) + ds = if reverse_y && hasdim(ds, Y()) + DD.setdims(ds, reverse(dims(ds, Y()))) + else + ds + end return create(filename, T, ds; kw...) end -function create(filename::Nothing, T::Type, dims::Tuple; +function create(filename::Nothing, ::Type{T}, dims::Tuple; parent=nokw, suffix=nokw, force=false, missingval, kw... -) - T = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) - data = isnokw(parent) || isnothing(parent) ? Array{T}(undef, dims) : similar(parent, T, size(dims)) +) where T + eltype = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) + data = if isnokw(parent) || isnothing(parent) + Array{eltype}(undef, dims) + else + similar(parent, eltype, size(dims)) + end return Raster(data, dims; missingval, kw...) end -function create(filename::AbstractString, source::Source, T::Type, dims::DimTuple; +function create(filename::Nothing, types::NamedTuple, dims::Tuple; + suffix=nokw, + force=false, + missingval, + kw... +) + layers = map(types) do T + # eltype = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) + data = if isnokw(parent) || isnothing(parent) + Array{eltype}(undef, dims) + else + similar(parent, eltype, size(dims)) + end + end + return RasterStack(layers, dims; missingval, kw...) +end +function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTuple; name=nokw, missingval=nokw, maskingval=missing, + fillval=nokw, metadata=nokw, chunks=nokw, scale=nokw, @@ -104,16 +200,70 @@ function create(filename::AbstractString, source::Source, T::Type, dims::DimTupl verbose=true, force=false, coerce=nokw, -) - T1 = Missings.nonmissingtype(T) - if isnothing(missingval) - A = FillArrays.Zeros{T1}(map(length, dims)) +) where T + eltype = Missings.nonmissingtype(T) + if isnokw(fillval) || isnothing(fillval) + write = false # Leave fill undefined + A = FillArrays.Zeros{eltype}(map(length, dims)) else - missingval = ismissing(missingval) || isnokw(missingval) ? _type_missingval(T1) : convert(T1, missingval) - A = FillArrays.Fill{T1}(missingval, map(length, dims)) + fillval isa T || throw(ArgumentError("fillval must be of type $T, got $fillval")) + write = true # Write fill to disk + A = FillArrays.Fill{eltype}(fillval, map(length, dims)) end # Create layers of zero arrays rast = Raster(A, dims; name, missingval) - write(filename, source, rast; chunks, metadata, scale, offset, missingval, verbose, force, coerce) + Rasters.write(filename, source, rast; + eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write + ) return Raster(filename; source, lazy, metadata, missingval, maskingval, dropband, coerce) end +function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::DimTuple; + name=keys(layertypes), + missingval=nokw, + maskingval=missing, + fillval=nokw, + metadata=nokw, + chunks=nokw, + scale=nokw, + offset=nokw, + dropband=!hasdim(dims, Band), + lazy=true, + verbose=true, + force=false, + coerce=nokw, +) + layers = map(layertypes) do x + if x isa Type + eltype = Missings.nonmissingtype(x) + size = map(length, dims) + elseif x isa Pair{<:Type} + eltype = Missings.nonmissingtype(x[1]) + ds = x[2] + size = map(length, DD.dims(dims, DD._astuple(ds))) + else + throw(ArgumentError("Must be a Type or a Pair of Type and Dimension/Symbol")) + end + FillArrays.Zeros{eltype}(size) + end + layerdims = map(layertypes) do x + if x isa Type + DD.basedims(dims) + else + ds = DD._astuple(DD.basedims(x[2])) + end + end + # if isnokw(fillval) || isnothing(fillval) + # write = false # Leave fill undefined + # A = FillArrays.Zeros{eltype}(map(length, dims)) + # else + # fillval isa T || throw(ArgumentError("fillval must be of type $T, got $fillval")) + # write = true # Write fill to disk + # A = FillArrays.Fill{eltype}(fillval, map(length, dims)) + # end + # Create layers of zero arrays + stack = RasterStack(layers, dims; layerdims, missingval) + fn = Rasters.write(filename, stack; + chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write=false + ) + return RasterStack(fn; source, lazy, metadata, maskingval, dropband, coerce) +end diff --git a/src/filestack.jl b/src/filestack.jl index a29868788..d943cc03e 100644 --- a/src/filestack.jl +++ b/src/filestack.jl @@ -9,7 +9,7 @@ typically netcdf or hdf5. `S` is a backend type like `NCDsource`, and `Na` is a tuple of `Symbol` keys. """ -struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC,M<:AbstractModifications} +struct FileStack{S,Na,T,SZ,G<:Union{AbstractString,Symbol,Nothing},EC,HC,M} filename::String sizes::SZ group::G diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 163980c98..d6e880e64 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -164,10 +164,8 @@ const MASKINGVAL_KEYWORD = """ """ const NAME_KEYWORD = """ -- `name`: a `Symbol` name for the array, which will also retrieve the, alphabetically first, - named layer if `Raster` is used on a multi-layered file like a NetCDF. - If instead `RasterStack` is used to read the multi-layered file, by default, all variables - will be added to the stack. +- `name`: a `Symbol` name for a Raster, which will also retrieve the + a named layer if `Raster` is used on a multi-layered file like a NetCDF. """ const METADATA_KEYWORD = """ @@ -178,3 +176,24 @@ const REFDIMS_KEYWORD = """ - `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. Usually not needed. """ + +const GROUP_KEYWORD = """ +- `group`: the group in the dataset where `name` can be found. Only needed for nested datasets. + A `String` or `Symbol` will select a single group. Pairs can also used to access groups + at any nested depth, i.e `group=:group1 => :group2 => :group3`. +""" + +const CHUNKS_KEYWORD = """ +- `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. + To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` + or a `NamedTuple` of `Int` can be used. Other dimensions will have a chunk + size of `1`. `true` can be used to mean: use the original + chunk size of the lazy `Raster` being written or X and Y of 256 by 256. + `false` means don't use chunks at all. +""" + +const WRITE_MISSINGVAL_KEYWORD = """ +- `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, + as Julias `missing` cannot be stored. If not passed in, `missingval` will be detected + from metadata or a default will be chosen. +""" diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 94b438dbf..fa6e78ead 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -19,60 +19,11 @@ struct Mod{Mi,Ma,S,O,F} <: AbstractModifications end end -function _stack_mods(metadata::Vector, missingval::Vector, maskingval; scaled, coerce) - map(metadata, missingval) do md, mv - scale, offset = _get_scale_offset(md, scaled) - _mod(mv, maskingval, scale, offset, coerce) - end -end -function _stack_mods(metadata::Vector, missingval, maskingval::Vector; scaled::Bool, coerce) - map(metadata, maskingval) do md, mk - scale, offset = _get_scale_offset(md, scaled) - _mod(missingval, mk, scale, offset, coerce) - end -end -function _stack_mods(metadata::Vector, missingval::Vector, maskingval::Vector; scaled::Bool, coerce) - map(metadata, missingval, maskingval) do md, mv, mk - scale, offset = _get_scale_offset(md, scaled) - _mod(mv, mk, scale, offset, coerce) - end -end -function _stack_mods(metadata::Vector, missingval, maskingval; scaled::Bool, coerce) - map(metadata) do md - scale, offset = _get_scale_offset(md, scaled) - _mod(missingval, maskingval, scale, offset, coerce) - end -end - -function _mod(metadata, missingval, maskingval; scaled::Bool, coerce) - scale, offset = _get_scale_offset(metadata, scaled) - _mod(missingval, maskingval, scale, offset, coerce) -end -function _mod(missingval, maskingval, scale, offset, coerce) - if isnothing(maskingval) && isnothing(scale) && isnothing(offset) - return NoMod(missingval) - else - return Mod(missingval, maskingval, scale, offset, coerce) - end -end - -@inline _get_scale_offset(metadata::NoKW, scaled) = (nothing, nothing) -@inline function _get_scale_offset(metadata, scaled) - scale = scaled ? get(metadata, "scale", nothing) : nothing - offset = scaled ? get(metadata, "offset", nothing) : nothing - return scale, offset -end - -_mod_eltype(::AbstractArray{T}, ::NoMod) where T = T -_mod_eltype(::AbstractArray{T}, m::Mod) where T = - Base.promote_op(_applymod, T, typeof(m)) - -_mod_inverse_eltype(::AbstractArray{T}, ::NoMod) where T = T -_mod_inverse_eltype(::AbstractArray{T}, m::Mod) where T = - Base.promote_op(_invertmod, typeof(m.coerce), T, typeof(m)) +missingval(m::Mod) = m.missingval +maskingval(m::Mod) = m.maskingval +missingval(m::NoMod) = m.missingval +maskingval(m::NoMod) = nothing -_maybe_modify(var, m::Mod) = ModifiedDiskArray(var, m) -_maybe_modify(var, ::NoMod) = var struct ModifiedDiskArray{T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} var::V @@ -84,9 +35,12 @@ function ModifiedDiskArray(v::V, m::M) where {V<:AbstractArray{<:Any,N},M} where end Base.parent(A::ModifiedDiskArray) = A.var -Base.size(A::ModifiedDiskArray, args...) = size(A.var, args...) -DiskArrays.haschunks(A::ModifiedDiskArray) = DiskArrays.haschunks(A.var) -DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(A.var) +Base.size(A::ModifiedDiskArray, args...) = size(parent(A), args...) +filename(A::ModifiedDiskArray) = filename(parent(A)) +missingval(A::ModifiedDiskArray) = A.missingval +maskingval(A::ModifiedDiskArray) = A.maskingval +DiskArrays.haschunks(A::ModifiedDiskArray) = DiskArrays.haschunks(parent(A)) +DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(parent(A)) function DiskArrays.readblock!(A::ModifiedDiskArray, out_block, I::AbstractVector...) broadcast!(_applymod, out_block, A.var[I...], (A.mod,)) @@ -142,3 +96,83 @@ _scaleoffset_inv(x, scale, offset) = (x - offset) / scale _scaleoffset_inv(x, scale, ::Nothing) = x / scale _scaleoffset_inv(x, ::Nothing, offset) = x - offset _scaleoffset_inv(x, ::Nothing, ::Nothing) = x + + +function _stack_mods(metadata::Vector, missingval::Vector, maskingval; scaled, coerce) + map(metadata, missingval) do md, mv + scale, offset = _get_scale_offset(md, scaled) + _mod(mv, maskingval, scale, offset, coerce) + end +end +function _stack_mods(metadata::Vector, missingval, maskingval::Vector; scaled::Bool, coerce) + map(metadata, maskingval) do md, mk + scale, offset = _get_scale_offset(md, scaled) + _mod(missingval, mk, scale, offset, coerce) + end +end +function _stack_mods(metadata::Vector, missingval::Vector, maskingval::Vector; scaled::Bool, coerce) + map(metadata, missingval, maskingval) do md, mv, mk + scale, offset = _get_scale_offset(md, scaled) + _mod(mv, mk, scale, offset, coerce) + end +end +function _stack_mods(metadata::Vector, missingval, maskingval; scaled::Bool, coerce) + map(metadata) do md + scale, offset = _get_scale_offset(md, scaled) + _mod(missingval, maskingval, scale, offset, coerce) + end +end + +function _mod(metadata, missingval, maskingval; scaled::Bool, coerce) + scale, offset = _get_scale_offset(metadata, scaled) + _mod(missingval, maskingval, scale, offset, coerce) +end +function _mod(missingval, maskingval, scale, offset, coerce) + if isnothing(maskingval) && isnothing(scale) && isnothing(offset) + return NoMod(missingval) + else + return Mod(missingval, maskingval, scale, offset, coerce) + end +end + +@inline _get_scale_offset(metadata::NoKW, scaled) = (nothing, nothing) +@inline function _get_scale_offset(metadata, scaled) + scale = scaled ? get(metadata, "scale", nothing) : nothing + offset = scaled ? get(metadata, "offset", nothing) : nothing + return scale, offset +end + +function _writer_mod(::Type{T}; missingval, maskingval, scale, offset, coerce) where T + missingval1 = if isnokw(missingval) || isnothing(missingval) + if isnokw(maskingval) || isnothing(maskingval) + nothing + else + _type_missingval(T) + end + elseif ismissing(missingval) + _type_missingval(T) + else + missingval + end + maskingval1 = if isnokw(maskingval) + if Missing <: T + missing + else + nothing + end + else + maskingval + end + return _mod(missingval1, maskingval1, scale, offset, coerce) +end + +_mod_eltype(::AbstractArray{T}, ::NoMod) where T = T +_mod_eltype(::AbstractArray{T}, m::Mod) where T = + Base.promote_op(_applymod, T, typeof(m)) + +_mod_inverse_eltype(::AbstractArray{T}, ::NoMod) where T = T +_mod_inverse_eltype(::AbstractArray{T}, m::Mod) where T = + Base.promote_op(_invertmod, typeof(m.coerce), T, typeof(m)) + +_maybe_modify(var, m::Mod) = ModifiedDiskArray(var, m) +_maybe_modify(var, ::NoMod) = var diff --git a/src/show.jl b/src/show.jl index 8fbb8f83d..670f5566d 100644 --- a/src/show.jl +++ b/src/show.jl @@ -36,7 +36,7 @@ function print_geo(io, mime, A; blockwidth) fn = filename(A) if !(fn == "") printstyled(io, "\n filename: "; color=:light_black) - print(io, ) + print(io, fn) end end println(io) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 30cbcb3d7..2170a2f20 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -42,14 +42,14 @@ function FileStack{source}(ds::AbstractDataset, filename::AbstractString; group=nokw, name::NTuple{N,Symbol}, mods, - vars + vars, ) where {source<:CDMsource,N} T = NamedTuple{name,Tuple{map(_mod_eltype, vars, mods)...}} layersizes = map(size, vars) - eachchunk = map(_get_eachchunk, vars) - haschunks = map(_get_haschunks, vars) + eachchunk = map(DiskArrays.eachchunk, vars) + haschunks = map(DiskArrays.haschunks, vars) group = isnokw(group) ? nothing : group - return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, cdf, write) + return FileStack{source,name,T}(filename, layersizes, group, eachchunk, haschunks, mods, write) end function _open(f, ::CDMsource, ds::AbstractDataset; @@ -67,6 +67,7 @@ _getgroup(ds, ::Union{Nothing,NoKW}) = ds _getgroup(ds, group::Union{Symbol,AbstractString}) = ds.group[String(group)] _getgroup(ds, group::Pair) = _getgroup(ds.group[String(group[1])], group[2]) +filekey(ds::AbstractDataset, name::Union{String,Symbol}) = Symbol(name) filekey(ds::AbstractDataset, name) = _firstname(ds, name) missingval(var::AbstractDataset) = missing missingval(var::AbstractVariable{T}) where T = missing isa T ? missing : nothing @@ -98,7 +99,7 @@ end function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) nondim = _nondimnames(ds) grid_mapping = String[] - vars = map(k -> ds[k], nondim) + vars = map(k -> CDM.variable(ds, k), nondim) attrs = map(CDM.attribs, vars) for attr in attrs if haskey(attr, "grid_mapping") @@ -113,7 +114,7 @@ function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) ) end function _layers(ds::AbstractDataset, names, ::NoKW) - vars = map(k -> ds[k], names) + vars = map(k -> CDM.variable(ds, k), names) attrs = map(CDM.attribs, vars) (; names, vars, attrs) end @@ -168,10 +169,10 @@ _fix_missingval(::CDM.AbstractVariable, ::Nothing, metadata) = get(metadata, "_F # TODO don't load all keys here with _layers _firstname(ds::AbstractDataset, name) = Symbol(name) -function _firstname(ds::AbstractDataset, name::NoKW=nokw) +function _firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=nokw) names = _nondimnames(ds) if length(names) > 0 - Symbol(first(names)) + return Symbol(first(names)) else throw(ArgumentError("No non-dimension layers found in dataset with keys: $(keys(ds))")) end diff --git a/src/stack.jl b/src/stack.jl index 1d5d7d304..8ba37437e 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -66,15 +66,24 @@ function DD.layers(s::AbstractRasterStack{<:Any,<:Any,<:Any,<:OpenStack{<:Any,Ke end function DD.rebuild( - s::AbstractRasterStack, data, dims=dims(s), refdims=refdims(s), - layerdims=DD.layerdims(s), metadata=metadata(s), layermetadata=DD.layermetadata(s), + s::AbstractRasterStack, + data, + dims=dims(s), + refdims=refdims(s), + layerdims=DD.layerdims(s), + metadata=metadata(s), + layermetadata=DD.layermetadata(s), missingval=missingval(s), ) DD.basetypeof(s)(data, dims, refdims, layerdims, metadata, layermetadata, missingval) end function DD.rebuild(s::AbstractRasterStack; - data=parent(s), dims=dims(s), refdims=refdims(s), layerdims=DD.layerdims(s), - metadata=metadata(s), layermetadata=DD.layermetadata(s), + data=parent(s), + dims=dims(s), + refdims=refdims(s), + layerdims=DD.layerdims(s), + metadata=metadata(s), + layermetadata=DD.layermetadata(s), missingval=missingval(s), ) DD.basetypeof(s)( @@ -152,14 +161,14 @@ Load a file path or a `NamedTuple` of paths as a `RasterStack`, or convert argum - `name`: Used as stack layer names when a `Tuple`, `Vector` or splat of `Raster` is passed in. Has no effect when `NameTuple` is used - the `NamedTuple` keys are the layer names. -$GROUP_KEYWORD +$GROUP_KEYWORD - `metadata`: A `Dict` or `DimensionalData.Metadata` object. - `missingval`: a single value for all layers or a `NamedTuple` of missingval for each layer. `nothing` specifies no missing value. $MASKINGVAL_KEYWORD $SCALED_KEYWORD -$CONSTRUCTOR_CRS_KEYWORD -$CONSTRUCTOR_MAPPEDCRS_KEYWORD +$CONSTRUCTOR_CRS_KEYWORD +$CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. For when one or multiple filepaths are used: @@ -211,16 +220,22 @@ function RasterStack( missingval=nokw, kw... ) + K = keys(data) # Handle values that musbe be `NamedTuple` layermetadata = if layermetadata isa NamedTuple layermetadata - elseif layermetadata isa Union{Nothing,NoMetadata} - map(_ -> NoMetadata(), layers) + elseif layermetadata isa Union{Nothing,NoKW,NoMetadata} + NamedTuple{K}(map(_ -> NoMetadata(), K)) else throw(ArgumentError("$layermetadata is not a valid input for `layermetadata`. Try a `NamedTuple` of `Dict`, `MetaData` or `NoMetadata`")) end metadata = isnokw(metadata) ? NoMetadata() : metadata missingval = _maybe_collapse_missingval(missingval) + layerdims = if layerdims isa NamedTuple + layerdims + else + NamedTuple{K}(ntuple(i -> layerdims[i], Val{length(K)}())) + end st = RasterStack( data, dims, refdims, layerdims, metadata, layermetadata, missingval ) @@ -228,20 +243,20 @@ function RasterStack( end # Convert Tuple/Array of array to NamedTuples using name/key function RasterStack(data::Tuple{Vararg{<:AbstractArray}}, dims::Tuple; - name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nokw, + name::Union{Tuple,AbstractArray,NamedTuple,Nothing}=nokw, kw... ) isnokw(name) && throw(ArgumentError("Pass a Tuple, Array or NamedTuple of names to the `name` keyword")) return RasterStack(NamedTuple{cleankeys(name)}(data), dims; kw...) end # Multi Raster stack from NamedTuple of AbstractArray -function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, dims::Tuple; +function RasterStack(data::NamedTuple{<:Any,<:Tuple{Vararg{<:AbstractArray}}}, dims::Tuple; layerdims=nokw, kw... ) if isnokw(layerdims) # TODO: make this more sophisticated and match dimension length to axes? - # We don't worry about Raster keywords because these rasters will be deconstructed + # We don't worry about Raster keywords because these rasters will be deconstructed # again later, and `kw` will define the RasterStack keywords layers = map(data) do A Raster(A, dims[1:ndims(A)]) @@ -358,13 +373,13 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; kw... ) where K missingval1 = if missingval isa NamedTuple - keys(missingval) == K || throw(ArgumentError("missingval keys $(keys(missingval)) do not match filename keywords $K")) + keys(missingval) == K || throw(ArgumentError("missingval keys $(keys(missingval)) do not match filename keywords $K")) collect(missingval) else missingval end maskingval1 = if maskingval isa NamedTuple - keys(maskingval) == K || throw(ArgumentError("maskingval keys $(keys(maskingval)) do not match filename keywords $K")) + keys(maskingval) == K || throw(ArgumentError("maskingval keys $(keys(maskingval)) do not match filename keywords $K")) collect(maskingval) else maskingval @@ -403,14 +418,16 @@ function RasterStack(filename::AbstractString; else name end - RasterStack(joinpath.(Ref(filename), filenames); + RasterStack(joinpath.(Ref(filename), filenames); missingval, maskingval, scaled, coerce, lazy, dropband, group, kw... ) else # Load as a single file if haslayers(source) # With multiple named layers - l_st = _layer_stack(filename; source, name, lazy, group, kw...) + l_st = _layer_stack(filename; + source, name, lazy, group, missingval, maskingval, scaled, coerce, kw... + ) # Maybe split the stack into separate arrays to remove extra dims. if !isnokw(name) map(identity, l_st) @@ -419,7 +436,9 @@ function RasterStack(filename::AbstractString; end else # With bands actings as layers - raster = Raster(filename; source, lazy, scaled, coerce, missingval, maskingval, dropband=false) + raster = Raster(filename; + source, lazy, scaled, coerce, missingval, maskingval, dropband=false + ) RasterStack(raster; kw...) end end @@ -505,6 +524,7 @@ function _layer_stack(filename; lazy=false, kw... ) + @show maskingval _maybewarn_replace_missing(replace_missing) data, field_kw = _open(filename; source) do ds layers = _layers(ds, name, group) @@ -514,42 +534,52 @@ function _layer_stack(filename; metadata = isnokw(metadata) ? _metadata(ds) : metadata layerdims = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims) - layermetadata1 = if isnokw(layermetadata) - _layermetadata(ds; layers) + layermetadata1 = if isnokw(layermetadata) + _layermetadata(ds; layers) else layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) end missingval1 = if missingval isa NamedTuple collect(missingval) - elseif isnokw(missingval) - Rasters.missingval(ds) else missingval end - maskingval1 = maskingval isa NamedTuple ? collect(maskingval) : maskingval - mods = _stack_mods(layermetadata1, missingval1, maskingval1; scaled, coerce) + mods = _stack_mods(layermetadata1, missingval1, maskingval; scaled, coerce) name = Tuple(map(Symbol, layers.names)) - _return_lifted(NamedTuple{name}, dims, refdims, layerdims, metadata, layermetadata1, missingval, lazy, layers, mods, checkmem) + NT = NamedTuple{name} + data = if lazy + vars = ntuple(i -> layers.vars[i], length(name)) + mods = ntuple(i -> mods[i], length(name)) + FileStack{typeof(source)}(ds, filename; name, group, mods, vars) + else + map(layers.vars, layermetadata1, mods) do var, md, mod + modvar = _maybe_modify(var, mod) + checkmem && _checkobjmem(modvar) + x = Array(modvar) + x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug + end |> NT + end + missingval = map(mods) do mod + if isnothing(Rasters.missingval(mod)) + nothing + elseif isnothing(Rasters.maskingval(mod)) + Rasters.missingval(mod) + @show mod Rasters.missingval(mod) + else + Rasters.maskingval(mod) + end + end |> NT + @show missingval + return data, (; dims, refdims, layerdims, metadata, layermetadata=NT(layermetadata1), missingval) end return RasterStack(data; field_kw..., kw...) end -function _return_lifted( - ::Type{NT}, dims, refdims, layerdims, metadata, layermetadata, missingval, lazy, layers, mods, checkmem -) where NT<:NamedTuple{K} where K - data = if lazy - vars = ntuple(layers.vars[i], Val{K}()) - FileStack{typeof(source)}(ds, filename; name, group, mods, vars) - else - map(layers.vars, layermetadata, mods) do var, md, mod - modvar = _maybe_modify(var, mod) - checkmem && _checkobjmem(modvar) - x = Array(modvar) - x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug - end |> NT - end - return data, (; dims, refdims, layerdims=NT(layerdims), metadata, layermetadata=NT(layermetadata), missingval) -end + # _return_lifted(NamedTuple{name}, source, dims, refdims, layerdims, metadata, layermetadata1, missingval, lazy, layers, mods, checkmem, group) +# function _return_lifted( +# ::Type{NT}, source, dims, refdims, layerdims, metadata, layermetadata, missingval, lazy, layers, mods, checkmem, group +# ) where NT<:NamedTuple{K} where K +# end # Try to sort the dimensions by layer dimension into a sensible # order that applies without permutation, preferencing the layers diff --git a/src/utils.jl b/src/utils.jl index 664a09a6e..260bf6b49 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -35,7 +35,7 @@ nolookup_to_sampled(d::Dimension) = # end # Create a standardised Metadata object of source T, containing a `Dict{String,Any}` -_metadatadict(s::Source, p1::Pair, pairs::Pair...) = +_metadatadict(s::Source, p1::Pair, pairs::Pair...) = _metadatadict(s, (p1, pairs...)) _metadatadict(::S) where S<:Source = Metadata{S}(Dict{String,Any}()) function _metadatadict(::S, pairs) where S<:Source @@ -91,12 +91,12 @@ _writeable_missing(filename::Nothing, T; kw...) = missing _writeable_missing(filename::AbstractString, T; kw...) = _writeable_missing(T; kw...) function _writeable_missing(::Type{Missing}; verbose=true) missingval = _type_missingval(UInt8) - verbose && @info "`missingval` set to $missingval" + verbose && @info "`missingval` set to $missingval on disk" return missingval end function _writeable_missing(T; verbose=true) missingval = _type_missingval(Missings.nonmissingtype(T)) - verbose && @info "`missingval` set to $missingval" + verbose && @info "`missingval` set to $missingval on disk" return missingval end @@ -135,64 +135,80 @@ function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) return x end -function _extent2dims(to; size=nothing, res=nothing, crs=nothing, kw...) - _extent2dims(to, size, res, crs; kw...) +function _extent2dims(to::Extents.Extent; + size=nothing, res=nothing, crs=nothing, + sampling=Intervals(Start()), +) + _extent2dims(to, size, res; crs, sampling=_match_to_extent(to, sampling)) end -function _extent2dims(to::Extents.Extent, size::Nothing, res::Nothing, crs; kw...) +function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Union{Nothing,NoKW}; kw...) isnothing(res) && throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) end -function _extent2dims(to::Extents.Extent, size, res, crs; kw...) +function _extent2dims(to::Extents.Extent, size, res; kw...) isnothing(res) || _size_and_res_error() end -function _extent2dims(to::Extents.Extent{K}, size::Nothing, res::Real, crs; kw...) where K - tuple_res = ntuple(_ -> res, length(K)) - _extent2dims(to, size, tuple_res, crs; kw...) +function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res; kw...) + _extent2dims(to, size, _match_to_extent(to, res); kw...) end -function _extent2dims(to::Extents.Extent{K}, size::Nothing, res, crs) where K - ranges = map(values(to), res) do bounds, r - start, outer = bounds - length = ceil(Int, (outer - start) / r) - step = (outer - start) / length - range(; start, step, length) +function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Tuple; sampling, kw...) + ranges = map(values(to), res, sampling) do (start, stop), step, s + if s isa Points + range(; start, step, stop) + else + r = range(; start, step, stop) + if locus(s) isa Start + r[1:end-1] + elseif locus(s) isa End + r[2:end] + else # Center + r .+ step / 2 + end + end end - return _extent2dims(to, ranges, crs; kw...) + return _extent2dims(to, ranges; sampling, kw...) end -function _extent2dims(to::Extents.Extent{K}, size, res::Nothing, crs; kw...) where K - if size isa Int - size = ntuple(_ -> size, length(K)) - end - ranges = map(values(to), size) do bounds, length - start, outer = bounds - step = (outer - start) / length - range(; start, step, length) +function _extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; kw...) + _extent2dims(to, _match_to_extent(to, size), res; kw...) +end +function _extent2dims(to::Extents.Extent, size::Tuple, res::Union{Nothing,NoKW}; + sampling, kw... +) + ranges = map(values(to), size, sampling) do (start, stop), length, s + if s isa Points + range(; start, stop, length) + else + range(; start, stop, length=length+1)[1:end-1] + end end - return _extent2dims(to, ranges, crs) + return _extent2dims(to, ranges; sampling, kw...) end -function _extent2dims(to::Extents.Extent{K}, ranges, crs; - sampling=Intervals(Start()), - kw... -) where K +function _extent2dims(::Extents.Extent{K}, ranges; crs, sampling) where K emptydims = map(name2dim, K) - lookups = map(emptydims, ranges) do d, range + order = ForwardOrdered() + lookups = map(emptydims, ranges, sampling) do d, range, s + span = Regular(step(range)) if d isa SpatialDim && !isnothing(crs) - Projected(range; - sampling, - order=ForwardOrdered(), - span=Regular(step(range)), - crs, - ) + Projected(range; sampling=s, order, span, crs) else - Sampled(range; - sampling, - order=ForwardOrdered(), - span=Regular(step(range)), - ) + Sampled(range; sampling=s, order, span) end end d = map(rebuild, emptydims, lookups) return d end +function _match_to_extent(::Extents.Extent{K}, x) where K + if x isa DimTuple + map(val, dims(x, map(name2dim, K))) + elseif x isa NamedTuple + values(x[K]) + elseif x isa Tuple + x + else + map(_ -> x, K) + end +end + function _as_intervals(ds::Tuple) # Rasterization only makes sense on Sampled Intervals interval_dims = map(dims(ds, DEFAULT_POINT_ORDER)) do d @@ -241,12 +257,12 @@ function _get_geometries(data, geometrycolumn::NTuple{<:Any, <:Symbol}) ismissing(r) && return missing end return row - end + end return points end function _check_geometries(geoms) for g in geoms - ismissing(g) || GI.geomtrait(g) !== nothing || + ismissing(g) || GI.geomtrait(g) !== nothing || throw(ArgumentError("$g is not a valid GeoInterface.jl geometry")) end return @@ -267,7 +283,7 @@ _size_and_res_error() = throw(ArgumentError("Both `size` and `res` keywords are _no_crs_error() = throw(ArgumentError("The provided object does not have a CRS. Use `setcrs` to set one.")) _type_missingval(::Type{T}) where T = typemin(T) -_type_missingval(::Type{T}) where T<:Unsigned = typemax(T) +_type_missingval(::Type{T}) where T<:Unsigned = typemax(T) # Modified from IsURL.jl, many thanks to @zlatanvasovic const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" @@ -276,7 +292,7 @@ const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" _isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) # Run `f` threaded or not, w -function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) +function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) p = progress ? _progress(length(range); desc) : nothing if threaded Threads.@threads :static for i in range @@ -306,8 +322,8 @@ end end end @inline function _chunks_to_tuple(template, dimorder, chunks::NTuple{N,Integer}) where N - n = length(dimorder) - if n < N + n = length(dimorder) + if n < N throw(ArgumentError("Length $n tuple needed for `chunks`, got $N")) elseif n > N (chunks..., ntuple(_ -> 1, Val{n-N}())...) @@ -338,7 +354,7 @@ function _checkregular(A::AbstractArray) step = stepof(A) for i in eachindex(A)[2:end] if !(A[i] - A[i-1] ≈ step) - return false + return false end end return true @@ -356,12 +372,12 @@ function _maybe_add_suffix(filename, suffix) end end -function _checkobjmem(obj) +function _checkobjmem(obj) f = bytes -> """ - required memory $(bytes) is greater than system memory $(Sys.free_memory()). + required memory $(bytes) is greater than system memory $(Sys.free_memory()). Use `lazy=true` if you are loading dataset, and only call `read` on a subset after `view`. """ - _checkobjmem(f, obj) + _checkobjmem(f, obj) end _checkobjmem(f, obj) = _checkmem(f, _sizeof(obj)) @@ -374,7 +390,7 @@ _sizeof(s::AbstractRasterSeries) = function _no_memory_error(f, bytes) msg = f(bytes) * """ - If you beleive this is not correct, pass the keyword `checkmem=false` or set `Rasters.checkmem!(false)` + If you beleive this is not correct, pass the keyword `checkmem=false` or set `Rasters.checkmem!(false)` and try again. These options may crash your system if the file is actually larger than memory. """ return error(msg) @@ -384,3 +400,4 @@ _maybewarn_replace_missing(replace_missing::NoKW) = nothing function _maybewarn_replace_missing(replace_missing) @warn "`replace_missing` keyword no longer used. Set `maskingval` to nothing for no replacement, to `missing` to mask `missingval` with `missing`, or any other value" end + diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 8feb17db3..1cb86f436 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -82,13 +82,14 @@ gdalpath = maybedownload(url) @testset "create" begin created = Rasters.create("created.tif", Int16, (X(1:10), Y(1:10)); - missingval=255, maskingval=missing, scale=0.1, offset=5.0, force=true, cooerce=trunc + missingval=255, maskingval=missing, scale=0.1, offset=5.0, force=true, coerce=trunc ) open(created; write=true) do O O .= 2.0 end read(created) - Raster("created.tif"; scale=nothing, offset=nothing) .* 1 + @test all(Raster("created.tif") .== 2.0) + @test all(Raster("created.tif"; scaled=false) .=== -30) created = Rasters.create("created.tif", UInt8, (X(1:10), Y(1:10)); missingval=255, maskingval=UInt8(0), force=true ) From c8a38288d69895586d2af8243a067b8f2f676550 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Fri, 2 Aug 2024 09:13:26 +0200 Subject: [PATCH 08/38] tweaks --- ext/RastersArchGDALExt/RastersArchGDALExt.jl | 3 +- ext/RastersArchGDALExt/gdal_source.jl | 12 +- ext/RastersArchGDALExt/warp.jl | 25 +- src/array.jl | 31 +- src/create.jl | 21 +- src/extensions.jl | 4 +- src/methods/burning/array_init.jl | 24 +- src/methods/rasterize.jl | 5 +- src/modifieddiskarray.jl | 44 +-- src/stack.jl | 1 - src/utils.jl | 304 ++++++++++--------- test/resample.jl | 61 ++-- test/warp.jl | 24 +- 13 files changed, 304 insertions(+), 255 deletions(-) diff --git a/ext/RastersArchGDALExt/RastersArchGDALExt.jl b/ext/RastersArchGDALExt/RastersArchGDALExt.jl index bee5e7576..b4e250e96 100644 --- a/ext/RastersArchGDALExt/RastersArchGDALExt.jl +++ b/ext/RastersArchGDALExt/RastersArchGDALExt.jl @@ -16,7 +16,8 @@ using DimensionalData, using Rasters.Lookups using Rasters.Dimensions -using Rasters: GDALsource, AbstractProjected, RasterStackOrArray, FileArray, NoKW, +using Rasters: GDALsource, AbstractProjected, AbstractRaster, AbstractRasterStack, + RasterStackOrArray, FileArray, NoKW, RES_KEYWORD, SIZE_KEYWORD, CRS_KEYWORD, FILENAME_KEYWORD, SUFFIX_KEYWORD, EXPERIMENTAL, GDAL_EMPTY_TRANSFORM, GDAL_TOPLEFT_X, GDAL_WE_RES, GDAL_ROT1, GDAL_TOPLEFT_Y, GDAL_ROT2, GDAL_NS_RES, _no_crs_error diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 1534ed0dd..1ae397bf5 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -240,12 +240,12 @@ function RA.Raster(ds::AG.RasterDataset; scaled=true, coerce=convert, ) - kw = (; refdims, name, metadata, missingval) filelist = AG.filelist(ds) - mod = RA._mod(metadata, missingval, maskingval; scaled, coerce) + mod = RA._mod(eltype(ds), metadata, missingval, maskingval; scaled, coerce) + kw = (; refdims, name, metadata, missingval=Rasters.maskingval(mod)) raster = if lazy && length(filelist) > 0 filename = first(filelist) - Raster(FileArray{GDALsource}(ds, filename; mod), dims, kw...) + Raster(FileArray{GDALsource}(ds, filename; mod), dims; kw...) else Raster(Array(RA._maybe_modify(ds, mod)), dims; kw...) end @@ -295,16 +295,16 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; coerce=nokw, verbose=false, eltype=Missings.nonmissingtype(eltype(A)), - missingval=nokw, - maskingval=nokw, + missingval=Rasters.missingval(A), + maskingval=Rasters.missingval(A), kw... ) A1 = _maybe_correct_to_write(A) - mod = _writer_mod(A, missingval, maskingval) return _create_with_driver(filename, dims(A1), eltype; _block_template=A1, missingval, scale, offset, verbose, kw... ) do dataset rds = AG.RasterDataset(dataset) + mod = RA._writer_mod(eltype; missingval=RA.missingval(rds), maskingval, scale, offset, coerce) open(A1) do O RA._maybe_modify(rds, mod) .= parent(O) end diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index db2b5beab..0e555cbab 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -14,18 +14,37 @@ function warp(st::AbstractRasterStack, flags::Dict; filename=nothing, suffix=key RA.mapargs((A, s) -> warp(A, flags; filename, suffix=s), st, suffix; kw...) end -function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", kw...) +function _warp(A::AbstractRaster, flags::Dict; + filename=nothing, + suffix="", + missingval=nokw, + maskingval=Rasters.missingval(A), + name=Rasters.name(A), + kw... +) A1 = _set_gdalwarp_sampling(A) filename = RA._maybe_add_suffix(filename, suffix) flagvect = reduce([flags...]; init=String[]) do acc, (key, val) append!(acc, String[_asflag(key), _stringvect(val)...]) end + # TODO: detect if `A` already holds a lazy GDAL FileArray. + # If it does, we can just open it and use it directly. tempfile = isnothing(filename) ? nothing : tempname() * ".tif" warp_kw = isnothing(filename) || filename == "/vsimem/tmp" ? () : (; dest=filename) - out = AG.Dataset(A1; filename=tempfile, kw...) do dataset + # We really need a missingval for `warp`, as it may rotate and add missing value + missingval = if RA.isnokw(missingval) + if RA.missingval(A) isa Union{Missing,Nothing} + RA._type_missingval(Missings.nonmissingtype(eltype(A))) + else + RA.missingval(A) + end + else + missingval + end + out = AG.Dataset(A1; filename=tempfile, missingval, kw...) do dataset AG.gdalwarp([dataset], flagvect; warp_kw...) do warped # Read the raster lazily, dropping Band if there is none in `A` - raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name=name(A)) + raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, maskingval) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end diff --git a/src/array.jl b/src/array.jl index 10e8a5672..04132a4a5 100644 --- a/src/array.jl +++ b/src/array.jl @@ -82,7 +82,7 @@ function DD.rebuild( A::AbstractRaster, data, dims::Tuple, refdims, name, metadata, missingval=missingval(A) ) - missingval1 = _fix_missingval(eltype(data), missingval, NoMetadata()) + missingval1 = _fix_missingval(eltype(data), missingval) Raster(data, dims, refdims, name, metadata, missingval1) end function DD.rebuild(A::AbstractRaster; @@ -240,7 +240,7 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Noth data::A, dims::D, refdims::R, name::Na, metadata::Me, missingval::Mi ) where {D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi} where {T,N} DD.checkdims(data, dims) - missingval1 = _fix_missingval(T, missingval, metadata) + missingval1 = _fix_missingval(T, missingval) new{T,N,D,R,A,Na,Me,typeof(missingval1)}(data, dims, refdims, name, metadata, missingval1) end end @@ -323,11 +323,11 @@ function Raster(ds, filename::AbstractString; source = _sourcetrait(filename, source) data1, dims1, metadata1, missingval2 = _open(source, ds; name=name1, group, mod=NoMod()) do var metadata1 = isnokw(metadata) ? _metadata(var) : metadata - missingval1 = _fix_missingval(var, missingval, metadata1) - maskingval1 = isnokw(maskingval) ? missing : maskingval + missingval1 = _fix_missingval(var, missingval) + maskingval1 = isnokw(maskingval) && !isnothing(missingval1) ? missing : maskingval # If maskingval is `nothing` use missingval as missingval missingval2 = isnothing(maskingval1) ? missingval1 : maskingval1 - mod = isnokw(mod) ? _mod(metadata1, missingval1, maskingval1; scaled, coerce) : mod + mod = isnokw(mod) ? _mod(eltype(var), metadata1, missingval1, maskingval1; scaled, coerce) : mod data = if lazy FileArray{typeof(source)}(var, filename; name=name1, group, mod, write @@ -346,32 +346,11 @@ function Raster(ds, filename::AbstractString; return dropband ? _drop_single_band(raster, lazy) : raster end -_fix_missingval(::Type, ::Union{NoKW,Nothing}, metadata) = nothing -_fix_missingval(::AbstractArray, ::Nothing, metadata) = nothing -_fix_missingval(A::AbstractArray, ::NoKW, metadata) = _fix_missingval(A, Rasters.missingval(A), metadata) -_fix_missingval(::AbstractArray{T}, missingval, metadata) where T = _fix_missingval(T, missingval, metadata) -function _fix_missingval(::Type{T}, missingval::M, metadata) where {T,M} - T1 = nonmissingtype(T) - if missingval isa T - missingval - elseif hasmethod(convert, Tuple{Type{T1},M}) && isreal(missingval) && - missingval <= typemax(T1) && missingval >= typemin(T1) - if T1 <: Integer && !isinteger(missingval) - nothing - else - convert(T, missingval) - end - else - nothing - end -end - filekey(ds, name) = name filekey(filename::String) = Symbol(splitext(basename(filename))[1]) DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{<:Dimension}}) = Raster - function _drop_single_band(raster, lazy::Bool) if hasdim(raster, Band()) && size(raster, Band()) < 2 if lazy diff --git a/src/create.jl b/src/create.jl index ea47839ee..0449457d8 100644 --- a/src/create.jl +++ b/src/create.jl @@ -221,8 +221,9 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple name=keys(layertypes), missingval=nokw, maskingval=missing, - fillval=nokw, metadata=nokw, + layerdims=nokw, + layermetadata=nokw, chunks=nokw, scale=nokw, offset=nokw, @@ -245,12 +246,16 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple end FillArrays.Zeros{eltype}(size) end - layerdims = map(layertypes) do x - if x isa Type - DD.basedims(dims) - else - ds = DD._astuple(DD.basedims(x[2])) + layerdims = if isnokw(layerdims) + map(layertypes) do x + if x isa Type + DD.basedims(dims) + else + ds = DD._astuple(DD.basedims(x[2])) + end end + else + layerdims end # if isnokw(fillval) || isnothing(fillval) # write = false # Leave fill undefined @@ -261,9 +266,9 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple # A = FillArrays.Fill{eltype}(fillval, map(length, dims)) # end # Create layers of zero arrays - stack = RasterStack(layers, dims; layerdims, missingval) + stack = RasterStack(layers, dims; layerdims, layermetadata, missingval) fn = Rasters.write(filename, stack; chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write=false ) - return RasterStack(fn; source, lazy, metadata, maskingval, dropband, coerce) + return RasterStack(fn; source, lazy, metadata, layerdims, maskingval, dropband, coerce) end diff --git a/src/extensions.jl b/src/extensions.jl index 417eb6b13..07ce6ac9a 100644 --- a/src/extensions.jl +++ b/src/extensions.jl @@ -116,7 +116,9 @@ Run `using ArchGDAL` to make this method available. $FILENAME_KEYWORD $SUFFIX_KEYWORD - +- `missingval`: the missing value to use during warping, will default to + `Rasters.missingval(A). +- `maskingval`: the missing value to mask with after warping Any additional keywords are passed to `ArchGDAL.Dataset`. ## Example diff --git a/src/methods/burning/array_init.jl b/src/methods/burning/array_init.jl index 10ddd5937..9b6150f67 100644 --- a/src/methods/burning/array_init.jl +++ b/src/methods/burning/array_init.jl @@ -7,18 +7,28 @@ _init_bools(to, T::Type; kw...) = _init_bools(to, T, nothing; kw...) _init_bools(to::AbstractRasterSeries, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) _init_bools(to::AbstractRasterStack, T::Type, data; kw...) = _init_bools(first(to), T, data; kw...) _init_bools(to::AbstractRaster, T::Type, data; kw...) = _init_bools(to, dims(to), T, data; kw...) -_init_bools(to::Extents.Extent, T::Type, data; kw...) = _init_bools(to, _extent2dims(to; kw...), T, data; kw...) _init_bools(to::DimTuple, T::Type, data; kw...) = _init_bools(to, to, T, data; kw...) -function _init_bools(to::Nothing, T::Type, data; geometrycolumn=nothing,kw...) +function _init_bools(to::Nothing, T::Type, data; + geometrycolumn=nothing, + collapse=nokw, + res=nokw, + size=nokw, + kw... +) # Get the extent of the geometries - ext = _extent(data; geometrycolumn, kw...) + ext = _extent(data; geometrycolumn) isnothing(ext) && throw(ArgumentError("no recognised dimensions, extent or geometry")) + return _init_bools(ext, T, data; collapse, res, size) +end +function _init_bools(to::Extents.Extent, T::Type, data; + collapse=nokw, size=nokw, res=nokw, sampling=nokw, kw... +) # Convert the extent to dims (there must be `res` or `size` in `kw`) - dims = _extent2dims(ext; kw...) - return _init_bools(to, dims, T, data; kw...) + ext = _extent2dims(to; size, res, sampling, kw...) + _init_bools(to, ext, T, data; kw...) end -function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Nothing}=nothing, kw...) - if isnothing(data) || isnothing(collapse) || collapse +function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Nothing,NoKW}=nokw, kw...) + if isnothing(data) || isnothing(collapse) || isnokw(collapse) || collapse _alloc_bools(to, dims, T; kw...) else n = if Base.IteratorSize(data) isa Base.HasShape diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index 042920072..c391018d2 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -72,9 +72,10 @@ RasterCreator(to::Nothing, data; kw...) = RasterCreator(_extent(data; kw...); kw RasterCreator(to, data; kw...) = RasterCreator(_extent(to); kw...) function RasterCreator(to::Extents.Extent; res::Union{Nothing,Real,NTuple{<:Any,<:Real}}=nothing, - size::Union{Nothing,Int,NTuple{<:Any,Int}}=nothing, kw... + size::Union{Nothing,Int,NTuple{<:Any,Int}}=nothing, + kw... ) - to_as_dims = _extent2dims(to; size, res, kw...) + to_as_dims = _extent2dims(to; size, res) return RasterCreator(to_as_dims; kw...) end diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index fa6e78ead..8f14a9745 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -4,26 +4,34 @@ struct NoMod{Mi} <: AbstractModifications end NoMod() = NoMod(nothing) NoMod(::NoKW) = NoMod(nothing) -struct Mod{Mi,Ma,S,O,F} <: AbstractModifications +struct Mod{T,Mi,Ma,S,O,F} <: AbstractModifications missingval::Mi maskingval::Ma scale::S offset::O coerce::F - function Mod(missingval, maskingval, scale, offset, coerce) + function Mod(::Type{T}, missingval, maskingval, scale, offset, coerce) where T + maskingval = maskingval === missingval ? nothing : maskingval if isnokw(coerce) || isnothing(coerce) coerce = convert end vals = map(_nokw2nothing, (missingval, maskingval, scale, offset)) - new{map(typeof, vals)...,typeof(coerce)}(vals..., coerce) + T1 = _resolve_mod_eltype(T, vals...) + new{T1,map(typeof, vals)...,typeof(coerce)}(vals..., coerce) end end +function _resolve_mod_eltype(::Type{T}, missingval, maskingval, scale, offset) where T + T1 = isnothing(maskingval) ? T : promote_type(T, typeof(maskingval)) + T2 = isnothing(scale) ? T1 : promote_type(T1, typeof(scale)) + T3 = isnothing(offset) ? T2 : promote_type(T2, typeof(offset)) + return T3 +end + missingval(m::Mod) = m.missingval -maskingval(m::Mod) = m.maskingval +maskingval(m::Mod) = isnothing(m.maskingval) ? m.missingval : m.maskingval missingval(m::NoMod) = m.missingval -maskingval(m::NoMod) = nothing - +maskingval(m::NoMod) = missingval(m) struct ModifiedDiskArray{T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} var::V @@ -55,16 +63,11 @@ function DiskArrays.writeblock!( end Base.@assume_effects :foldable function _applymod(x, m::Mod) - tm = if isnothing(m.maskingval) - x + if _ismissing(x, missingval(m)) + maskingval(m) else - if _ismissing(x, m.missingval) - return m.maskingval - else - x - end + _scaleoffset(x, m) end - return _scaleoffset(tm, m) end _ismissing(x, mv) = isequal(x, mv) @@ -123,15 +126,15 @@ function _stack_mods(metadata::Vector, missingval, maskingval; scaled::Bool, coe end end -function _mod(metadata, missingval, maskingval; scaled::Bool, coerce) +function _mod(T, metadata, missingval, maskingval; scaled::Bool, coerce) scale, offset = _get_scale_offset(metadata, scaled) - _mod(missingval, maskingval, scale, offset, coerce) + _mod(T, missingval, maskingval, scale, offset, coerce) end -function _mod(missingval, maskingval, scale, offset, coerce) +function _mod(::Type{T}, missingval, maskingval, scale, offset, coerce) where T if isnothing(maskingval) && isnothing(scale) && isnothing(offset) return NoMod(missingval) else - return Mod(missingval, maskingval, scale, offset, coerce) + return Mod(T, missingval, maskingval, scale, offset, coerce) end end @@ -163,12 +166,11 @@ function _writer_mod(::Type{T}; missingval, maskingval, scale, offset, coerce) w else maskingval end - return _mod(missingval1, maskingval1, scale, offset, coerce) + return _mod(T, missingval1, maskingval1, scale, offset, coerce) end _mod_eltype(::AbstractArray{T}, ::NoMod) where T = T -_mod_eltype(::AbstractArray{T}, m::Mod) where T = - Base.promote_op(_applymod, T, typeof(m)) +_mod_eltype(::AbstractArray, m::Mod{T}) where T = T _mod_inverse_eltype(::AbstractArray{T}, ::NoMod) where T = T _mod_inverse_eltype(::AbstractArray{T}, m::Mod) where T = diff --git a/src/stack.jl b/src/stack.jl index 8ba37437e..c94522b83 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -524,7 +524,6 @@ function _layer_stack(filename; lazy=false, kw... ) - @show maskingval _maybewarn_replace_missing(replace_missing) data, field_kw = _open(filename; source) do ds layers = _layers(ds, name, group) diff --git a/src/utils.jl b/src/utils.jl index 260bf6b49..56fc3b05b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,10 +1,30 @@ +# File paths, urls and strings + filter_ext(path, ext::AbstractString) = filter(fn -> splitext(fn)[2] == ext, readdir(path; join=true)) filter_ext(path, exts::Union{Tuple,AbstractArray}) = filter(fn -> splitext(fn)[2] in exts, readdir(path; join=true)) filter_ext(path, ext::Nothing) = readdir(path; join=true) +_maybe_add_suffix(filename::Nothing, suffix) = nothing +_maybe_add_suffix(filename::Nothing, suffix::Union{Nothing,NoKW}) = nothing +_maybe_add_suffix(filename, suffix::Union{Nothing,NoKW}) = filename +function _maybe_add_suffix(filename, suffix) + base, ext = splitext(filename) + if string(suffix) == "" + filename + else + return string(base, "_", suffix, ext) + end +end + +# Modified from IsURL.jl, many thanks to @zlatanvasovic +const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" +const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" + +_isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) + cleankeys(name) = (_cleankey(name),) function cleankeys(keys::Union{NamedTuple,Tuple,AbstractArray}) Tuple(map(_cleankey, keys, ntuple(i -> i, length(keys)))) @@ -18,34 +38,6 @@ function _cleankey(name::Union{Symbol,AbstractString,Name,NoName}, i=1) end end -nolookup_to_sampled(A) = rebuild(A; dims=nolookup_to_sampled(dims(A))) -nolookup_to_sampled(dims::DimTuple) = map(nolookup_to_sampled, dims) -nolookup_to_sampled(d::Dimension) = - lookup(d) isa NoLookup ? set(d, Sampled(; sampling=Points())) : d - -# function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missingval=nokw) where T -# if ismissing(Rasters.missingval(A)) -# newmissingval = missingval isa NoKW ? _type_missingval(Missings.nonmissingtype(T)) : missingval -# A1 = replace_missing(A, newmissingval) -# @warn "`missing` cant be written with $(SOURCE2SYMBOL[source]), missinval for `$(eltype(A1))` of `$newmissingval` used instead" -# return A1 -# else -# return A -# end -# end - -# Create a standardised Metadata object of source T, containing a `Dict{String,Any}` -_metadatadict(s::Source, p1::Pair, pairs::Pair...) = - _metadatadict(s, (p1, pairs...)) -_metadatadict(::S) where S<:Source = Metadata{S}(Dict{String,Any}()) -function _metadatadict(::S, pairs) where S<:Source - dict = Dict{String,Any}() - for (k, v) in pairs - dict[String(k)] = v - end - return Metadata{S}(dict) -end - # We often need to convert the locus and the lookup in the same step, # as doing it in the wrong order can give errors. # function convert_locus_lookup(M1::Type{<:Lookup}, L1::Type{<:Locus}, dim::Dimension) @@ -74,18 +66,12 @@ end # _convert_by_lookup(::Type{Projected}, dim) = shiftlocus(Center(), convertlookup(Projected, dim)) -_unwrap(::Val{X}) where X = X -_unwrap(x) = x +# Missing values -_missingval_or_missing(x) = _maybe_nothing_to_missing(missingval(x)) +_missingval_or_missing(x) = _maybe_to_missing(missingval(x)) -_maybe_nothing_to_missing(::Nothing) = missing -_maybe_nothing_to_missing(missingval) = missingval - -maybe_eps(dims::DimTuple) = map(maybe_eps, dims) -maybe_eps(dim::Dimension) = maybe_eps(eltype(dim)) -maybe_eps(::Type) = nothing -maybe_eps(T::Type{<:AbstractFloat}) = _default_atol(T) +_maybe_to_missing(::Union{Nothing,NoKW}) = missing +_maybe_to_missing(missingval) = missingval _writeable_missing(filename::Nothing, T; kw...) = missing _writeable_missing(filename::AbstractString, T; kw...) = _writeable_missing(T; kw...) @@ -100,57 +86,48 @@ function _writeable_missing(T; verbose=true) return missingval end -# Map filename suffix over a stack -function mapargs(f, st::AbstractRasterStack, args...) - layers = map(values(st), args...) do A, mappedargs... - f(A, mappedargs...) - end - return DD.rebuild_from_arrays(st, Tuple(layers)) -end +_type_missingval(::Type{T}) where T = typemin(T) +_type_missingval(::Type{T}) where T<:Unsigned = typemax(T) -_without_mapped_crs(f, x) = _without_mapped_crs(f, x, mappedcrs(x)) -_without_mapped_crs(f, x, ::Nothing) = f(x) -function _without_mapped_crs(f, dims::DimTuple, mappedcrs::GeoFormat) - dims1 = setmappedcrs(dims, nothing) - x = f(dims1) - if x isa DimTuple - x = setmappedcrs(x, mappedcrs) - end - return x -end -function _without_mapped_crs(f, A::AbstractRaster, mappedcrs::GeoFormat) - A = setmappedcrs(A, nothing) - x = f(A) - if x isa AbstractRaster - x = setmappedcrs(x, mappedcrs) - end - return x -end -function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) - st1 = map(A -> setmappedcrs(A, nothing), st) - x = f(st1) - if x isa AbstractRasterStack - x = map(A -> setmappedcrs(A, mappedcrs(st)), x) +_fix_missingval(::Type, ::Union{NoKW,Nothing}) = nothing +_fix_missingval(::AbstractArray, ::Nothing) = nothing +_fix_missingval(A::AbstractArray, ::NoKW) = _fix_missingval(A, Rasters.missingval(A)) +_fix_missingval(::AbstractArray{T}, missingval) where T = _fix_missingval(T, missingval) +function _fix_missingval(::Type{T}, missingval::M) where {T,M} + T1 = nonmissingtype(T) + if missingval isa T + missingval + elseif hasmethod(convert, Tuple{Type{T1},M}) && isreal(missingval) && + missingval <= typemax(T1) && missingval >= typemin(T1) + if T1 <: Integer && !isinteger(missingval) + nothing + else + convert(T, missingval) + end + else + nothing end - return x end + +# Extents + function _extent2dims(to::Extents.Extent; - size=nothing, res=nothing, crs=nothing, - sampling=Intervals(Start()), + size=nokw, res=nokw, crs=nokw, sampling=nokw, ) - _extent2dims(to, size, res; crs, sampling=_match_to_extent(to, sampling)) + sampling = _match_to_extent(to, isnokw(sampling) ? Intervals(Start()) : sampling) + _extent2dims(to, size, res; crs, sampling) end function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Union{Nothing,NoKW}; kw...) - isnothing(res) && throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) -end -function _extent2dims(to::Extents.Extent, size, res; kw...) - isnothing(res) || _size_and_res_error() + throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) end +_extent2dims(to::Extents.Extent, size, res; kw...) = _size_and_res_error() function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res; kw...) _extent2dims(to, size, _match_to_extent(to, res); kw...) end -function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Tuple; sampling, kw...) +function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Tuple; + sampling::Tuple, kw... +) ranges = map(values(to), res, sampling) do (start, stop), step, s if s isa Points range(; start, step, stop) @@ -167,11 +144,10 @@ function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Tuple; end return _extent2dims(to, ranges; sampling, kw...) end -function _extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; kw...) +_extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; kw...) = _extent2dims(to, _match_to_extent(to, size), res; kw...) -end function _extent2dims(to::Extents.Extent, size::Tuple, res::Union{Nothing,NoKW}; - sampling, kw... + sampling::Tuple, crs ) ranges = map(values(to), size, sampling) do (start, stop), length, s if s isa Points @@ -180,9 +156,10 @@ function _extent2dims(to::Extents.Extent, size::Tuple, res::Union{Nothing,NoKW}; range(; start, stop, length=length+1)[1:end-1] end end - return _extent2dims(to, ranges; sampling, kw...) + return _extent2dims(to, ranges; sampling, crs) end -function _extent2dims(::Extents.Extent{K}, ranges; crs, sampling) where K +function _extent2dims(::Extents.Extent{K}, ranges; crs, sampling::Tuple) where K + crs = isnokw(crs) ? nothing : crs emptydims = map(name2dim, K) order = ForwardOrdered() lookups = map(emptydims, ranges, sampling) do d, range, s @@ -209,14 +186,7 @@ function _match_to_extent(::Extents.Extent{K}, x) where K end end -function _as_intervals(ds::Tuple) - # Rasterization only makes sense on Sampled Intervals - interval_dims = map(dims(ds, DEFAULT_POINT_ORDER)) do d - l = parent(d) - rebuild(d, rebuild(l; sampling=Intervals(locus(l)))) - end - return setdims(ds, interval_dims) -end +# Geometries # get geometries from what may be a table with a geometrycolumn or an interable of geometries # if it has no geometry column and does not iterate valid geometries, error informatively @@ -269,43 +239,9 @@ function _check_geometries(geoms) end # to distinguish between objects returned by _get_geometries and other objects struct IterableOfGeometries end -_warn_disk() = @warn "Disk-based objects may be very slow here. User `read` first." - -_filenotfound_error(filename) = throw(ArgumentError("file \"$filename\" not found")) -_progress(args...; kw...) = ProgressMeter.Progress(args...; color=:blue, barlen=50, kw...) - -# Function barrier for splatted vector broadcast -@noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) - -_size_and_res_error() = throw(ArgumentError("Both `size` and `res` keywords are passed, but only one can be used")) - -_no_crs_error() = throw(ArgumentError("The provided object does not have a CRS. Use `setcrs` to set one.")) - -_type_missingval(::Type{T}) where T = typemin(T) -_type_missingval(::Type{T}) where T<:Unsigned = typemax(T) -# Modified from IsURL.jl, many thanks to @zlatanvasovic -const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" -const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" - -_isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) - -# Run `f` threaded or not, w -function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) - p = progress ? _progress(length(range); desc) : nothing - if threaded - Threads.@threads :static for i in range - f(i) - isnothing(p) || ProgressMeter.next!(p) - end - else - for i in range - f(i) - isnothing(p) || ProgressMeter.next!(p) - end - end -end +# Chunking # NoKW means true @inline function _chunks_to_tuple(template, dims, chunks::Bool) @@ -348,7 +284,6 @@ end @inline _chunks_to_tuple(template, dimorder, chunks::Nothing) = nothing @inline _chunks_to_tuple(template, dims, chunks::NoKW) = nothing - _checkregular(A::AbstractRange) = true function _checkregular(A::AbstractArray) step = stepof(A) @@ -360,17 +295,8 @@ function _checkregular(A::AbstractArray) return true end -_maybe_add_suffix(filename::Nothing, suffix) = nothing -_maybe_add_suffix(filename::Nothing, suffix::Union{Nothing,NoKW}) = nothing -_maybe_add_suffix(filename, suffix::Union{Nothing,NoKW}) = filename -function _maybe_add_suffix(filename, suffix) - base, ext = splitext(filename) - if string(suffix) == "" - filename - else - return string(base, "_", suffix, ext) - end -end + +# Memory function _checkobjmem(obj) f = bytes -> """ @@ -396,8 +322,112 @@ function _no_memory_error(f, bytes) return error(msg) end + +# Lookups + +function _as_intervals(ds::Tuple) + # Rasterization only makes sense on Sampled Intervals + interval_dims = map(dims(ds, DEFAULT_POINT_ORDER)) do d + l = parent(d) + rebuild(d, rebuild(l; sampling=Intervals(locus(l)))) + end + return setdims(ds, interval_dims) +end + +nolookup_to_sampled(A) = rebuild(A; dims=nolookup_to_sampled(dims(A))) +nolookup_to_sampled(dims::DimTuple) = map(nolookup_to_sampled, dims) +nolookup_to_sampled(d::Dimension) = + lookup(d) isa NoLookup ? set(d, Sampled(; sampling=Points())) : d + + +# Metadata + +# Create a standardised Metadata object of source T, containing a `Dict{String,Any}` +_metadatadict(s::Source, p1::Pair, pairs::Pair...) = + _metadatadict(s, (p1, pairs...)) +_metadatadict(::S) where S<:Source = Metadata{S}(Dict{String,Any}()) +function _metadatadict(::S, pairs) where S<:Source + dict = Dict{String,Any}() + for (k, v) in pairs + dict[String(k)] = v + end + return Metadata{S}(dict) +end + + +# Other + +_progress(args...; kw...) = ProgressMeter.Progress(args...; color=:blue, barlen=50, kw...) + +# Function barrier for splatted vector broadcast +@noinline _do_broadcast!(f, x, args...) = broadcast!(f, x, args...) + +# Run `f` threaded or not, w +function _run(f, range::OrdinalRange, threaded::Bool, progress::Bool, desc::String) + p = progress ? _progress(length(range); desc) : nothing + if threaded + Threads.@threads :static for i in range + f(i) + isnothing(p) || ProgressMeter.next!(p) + end + else + for i in range + f(i) + isnothing(p) || ProgressMeter.next!(p) + end + end +end + +_unwrap(::Val{X}) where X = X +_unwrap(x) = x + +# Map filename suffix over a stack +function mapargs(f, st::AbstractRasterStack, args...) + layers = map(values(st), args...) do A, mappedargs... + f(A, mappedargs...) + end + return DD.rebuild_from_arrays(st, Tuple(layers)) +end + +_without_mapped_crs(f, x) = _without_mapped_crs(f, x, mappedcrs(x)) +_without_mapped_crs(f, x, ::Nothing) = f(x) +function _without_mapped_crs(f, dims::DimTuple, mappedcrs::GeoFormat) + dims1 = setmappedcrs(dims, nothing) + x = f(dims1) + if x isa DimTuple + x = setmappedcrs(x, mappedcrs) + end + return x +end +function _without_mapped_crs(f, A::AbstractRaster, mappedcrs::GeoFormat) + A = setmappedcrs(A, nothing) + x = f(A) + if x isa AbstractRaster + x = setmappedcrs(x, mappedcrs) + end + return x +end +function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) + st1 = map(A -> setmappedcrs(A, nothing), st) + x = f(st1) + if x isa AbstractRasterStack + x = map(A -> setmappedcrs(A, mappedcrs(st)), x) + end + return x +end + + +# Warnings and erros + _maybewarn_replace_missing(replace_missing::NoKW) = nothing function _maybewarn_replace_missing(replace_missing) @warn "`replace_missing` keyword no longer used. Set `maskingval` to nothing for no replacement, to `missing` to mask `missingval` with `missing`, or any other value" end +@noinline _warn_disk() = @warn "Disk-based objects may be very slow here. User `read` first." + +_filenotfound_error(filename) = throw(ArgumentError("file \"$filename\" not found")) + +_size_and_res_error() = throw(ArgumentError("Both `size` and `res` keywords are passed, but only one can be used")) + +_no_crs_error() = throw(ArgumentError("The provided object does not have a CRS. Use `setcrs` to set one.")) diff --git a/test/resample.jl b/test/resample.jl index 5b9e03081..d08449512 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -23,36 +23,8 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) end end - maskingval = missing - for maskingval in (nothing, missing, Rasters.nokw) - # Resample cea.tif using resample - cea = Raster(raster_path; missingval=0x00, name=:cea, maskingval) - raster_output = resample(cea; res=output_res, crs=output_crs, method, maskingval) - disk_output = resample(cea; res=output_res, crs=output_crs, method, filename="resample.tif") - - cea_permuted = permutedims(Raster(raster_path), (Y, X); missingval=0x00, name=:cea_permuted, maskingval) - permuted_output = resample(cea_permuted, output_res; crs=output_crs, method) - - AG_output1 = if maskingval === missing || maskingval === Rasters.nokw - replace(AG_output, 0x00 => missing) - else - AG_output - end - # Compare ArchGDAL, resample and permuted resample - AG_output1 - .=== - raster_output - @test all(AG_output1 .=== - raster_output .=== - read(disk_output .=== permutedims(permuted_output, (X, Y))) - @test abs(step(dims(raster_output, Y))) ≈ - abs(step(dims(raster_output, X))) ≈ - abs(step(dims(disk_output, X))) ≈ - abs(step(dims(permuted_output, X))) ≈ output_res - @test name(cea) == name(raster_output) - - rm("resample.tif") - end + cea = Raster(raster_path; missingval=0x00, name=:cea, maskingval=nothing) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, maskingval=nothing) @testset "missingval propagates" begin @test missingval(resample(cea; res=output_res, crs=output_crs, method)) == 0x00 @@ -73,9 +45,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) resampled = resample(cea; method) @test crs(cea) == crs(resampled) @test cea == resampled - # There is some floating point error here after Rasters -> GDAL -> Rasterss... - # Should we correct it by detecting almost identical extent and using the original? - # @test_broken extent(cea) == extent(resampled) + @test extent(cea) == extent(resampled) end @testset "only `res` kw changes the array size predictably" begin @@ -186,4 +156,29 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @test dims(resampled_3D, (1,2)) == to @test dims(resampled_3D, Z) == Z(1:2) end + + for maskingval in (nothing, missing, Rasters.nokw) + # Resample cea.tif using resample + cea = Raster(raster_path; missingval=0x00, name=:cea, maskingval) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, maskingval) + disk_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, maskingval, filename="resample.tif") + + cea_permuted = permutedims(Raster(raster_path; missingval=0x00, name=:cea_permuted, maskingval), (Y, X)) + permuted_output = resample(cea_permuted, output_res; missingval=0x00, maskingval, crs=output_crs, method) + + AG_output1 = if maskingval === missing + replace(AG_output, 0x00 => missing) + else + AG_output + end + # Compare ArchGDAL, resample and permuted resample + @test all(AG_output1 .=== raster_output .=== read(disk_output) .=== permutedims(permuted_output, (X, Y))) + @test abs(step(dims(raster_output, Y))) ≈ + abs(step(dims(raster_output, X))) ≈ + abs(step(dims(disk_output, X))) ≈ + abs(step(dims(permuted_output, X))) ≈ output_res + @test name(cea) == name(raster_output) + + rm("resample.tif") + end end diff --git a/test/warp.jl b/test/warp.jl index b49653013..df64ccf2b 100644 --- a/test/warp.jl +++ b/test/warp.jl @@ -7,19 +7,25 @@ gdalpath = maybedownload(url) @testset "warp" begin # test that warp actually does *something* - r = Raster(gdalpath)[:,:,1] + r = Raster(gdalpath) crs_ = crs(r).val + warped = warp(r, Dict(:t_srs => "EPSG:25832"); missingval=nothing) + @test warped isa Raster + @test size(warped) == (720, 721) + # the crs is way off, the image is rotated - all four corners should be black + missingval(warped) === nothing + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0x00 + warped = warp(r, Dict(:t_srs => "EPSG:25832")) @test warped isa Raster - @test size(warped) == (720,721) + @test size(warped) == (720, 721) # the crs is way off, the image is rotated - all four corners should be black - @test warped[1,1] == warped[1,end] == warped[end,1] == warped[end,end] == 0 + missingval(warped) === nothing + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff # now compute mean squared error of the back transformation - warped_back = warp(warped, Dict(:t_srs => crs_)) - # get rid of black border - ex = extrema(findall(>(0), warped_back)) - cropped = warped_back[ex[1]:ex[2]] + warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_), res=map(step, lookup(r)))) # subtracting UInts brings us into hell -> Int - diff_ = Int.(cropped[2:end-1, 2:end-1]) .- r + # we also need to shrink the range because of some bleed during warp + diff_ = Int.(warped_back[2:end-1, 2:end-1]) .- r @test sum(x->x^2, diff_) / prod(size(diff_)) < 600 -end \ No newline at end of file +end From 7a87d8180cceab141e9d39ccfc15bdce00d256ae Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sat, 10 Aug 2024 15:20:38 +0200 Subject: [PATCH 09/38] bugfixes --- ext/RastersArchGDALExt/RastersArchGDALExt.jl | 2 +- ext/RastersArchGDALExt/gdal_source.jl | 19 +- ext/RastersArchGDALExt/resample.jl | 7 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 8 +- src/array.jl | 4 +- src/create.jl | 192 +++++++++++------- src/methods/burning/array_init.jl | 12 +- src/methods/crop_extend.jl | 39 ++-- src/methods/mask.jl | 32 +-- src/modifieddiskarray.jl | 38 ++-- src/nokw.jl | 2 + src/show.jl | 3 +- src/sources/commondatamodel.jl | 6 +- src/sources/grd.jl | 57 ++++-- src/stack.jl | 12 +- src/utils.jl | 14 +- src/write.jl | 8 +- test/array.jl | 3 +- test/create.jl | 176 ++++++++++++++++ test/runtests.jl | 79 +++---- test/sources/gdal.jl | 17 -- 21 files changed, 491 insertions(+), 239 deletions(-) create mode 100644 test/create.jl diff --git a/ext/RastersArchGDALExt/RastersArchGDALExt.jl b/ext/RastersArchGDALExt/RastersArchGDALExt.jl index b4e250e96..e7292ecae 100644 --- a/ext/RastersArchGDALExt/RastersArchGDALExt.jl +++ b/ext/RastersArchGDALExt/RastersArchGDALExt.jl @@ -22,7 +22,7 @@ using Rasters: GDALsource, AbstractProjected, AbstractRaster, AbstractRasterStac GDAL_EMPTY_TRANSFORM, GDAL_TOPLEFT_X, GDAL_WE_RES, GDAL_ROT1, GDAL_TOPLEFT_Y, GDAL_ROT2, GDAL_NS_RES, _no_crs_error -import Rasters: reproject, resample, warp, cellsize, nokw +import Rasters: reproject, resample, warp, cellsize, nokw, isnokw, isnokwornothing const RA = Rasters const DD = DimensionalData diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 1ae397bf5..1aff1971f 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -60,12 +60,11 @@ function Base.write( kw... ) where T RA.check_can_write(filename, force) - A1 = _maybe_correct_to_write(A, missingval) + A1 = _maybe_correct_to_write(A) mod = RA._writer_mod(eltype; missingval, maskingval, scale, offset, coerce) _create_with_driver(filename, dims(A1), T; missingval, _block_template=A1, scale, offset, verbose, kw... ) do dataset - verbose && _maybe_warn_south_up(A, verbose, "Writing South-up. Use `reverse(myrast; dims=Y)` first to write conventional North-up") if write open(A1; write=true) do O RA._maybe_modify(AG.RasterDataset(dataset), mod) .= parent(O) @@ -76,7 +75,9 @@ function Base.write( end function RA._open(f, ::GDALsource, filename::AbstractString; - write=false, mod=NoMod(), kw... + write=false, + mod=RA.NoMod(), + kw... ) # Check the file actually exists because the GDAL error is unhelpful if !isfile(filename) @@ -95,11 +96,11 @@ function RA._open(f, ::GDALsource, filename::AbstractString; end flags = write ? AG.OF_UPDATE : AG.OF_READONLY return AG.readraster(filename; flags) do A - C = RA._maybe_modify(A, mod) - RA.cleanreturn(f(C)) + A1 = RA._maybe_modify(A, mod) + RA.cleanreturn(f(A1)) end end -RA._open(f, ::GDALsource, A::AG.RasterDataset; mod=NoMod(), kw...) = +RA._open(f, ::GDALsource, A::AG.RasterDataset; mod=RA.NoMod(), kw...) = RA.cleanreturn(f(RA._maybe_modify(A, mod))) @@ -372,9 +373,9 @@ function _create_with_driver(f, filename, dims::Tuple, T; offset=nokw, kw... ) - verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. Use `reverse(myrast; dims=Y)` first to write conventional North-up") + verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. You may wish to reverse the `Y` dimension to use conventional North-up") - missingval = RA.isnokw(missingval) || ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval + missingval = ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval _gdal_validate(dims) x, y = map(DD.dims(dims, (XDim, YDim))) do d @@ -545,7 +546,7 @@ function _set_dataset_properties!(dataset::AG.Dataset, dims::Tuple, missingval, gt = RA.dims2geotransform(x, y) AG.setgeotransform!(dataset, gt) - if !isnothing(missingval) + if !RA.isnokwornothing(missingval) bands = hasdim(dims, Band) ? axes(DD.dims(dims, Band), 1) : 1 for i in bands rasterband = AG.getband(dataset, i) diff --git a/ext/RastersArchGDALExt/resample.jl b/ext/RastersArchGDALExt/resample.jl index 1a42d1a98..c4bd64243 100644 --- a/ext/RastersArchGDALExt/resample.jl +++ b/ext/RastersArchGDALExt/resample.jl @@ -7,7 +7,12 @@ function resample(xs::Union{Tuple,NamedTuple}; to=first(xs), kw...) map(x -> resample(x; to, kw...), xs) end function resample(A::RasterStackOrArray; - to=nothing, res=nothing, crs=nothing, size=nothing, method=:near, kw... + to=nothing, + res=nothing, + crs=nothing, + size=nothing, + method=:near, + kw... ) (isnothing(size) || isnothing(res)) || _size_and_res_error() diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index eb16a9299..434de5060 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -39,7 +39,7 @@ function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStac ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) try if missingval isa NamedTuple - map(k -> _writevar!(ds, s[k]; missinval=missingval[k], kw...), keys(s)) + map(k -> _writevar!(ds, s[k]; missingval=missingval[k], kw...), keys(s)) else map(k -> _writevar!(ds, s[k]; missingval, kw...), keys(s)) end @@ -153,6 +153,12 @@ end RA._sourcetrait(::NCD.Dataset) = NCDsource() RA._sourcetrait(::NCD.Variable) = NCDsource() +@inline function RA.get_scale(metadata::Metadata{NCDsource}, scaled::Bool) + scale = scaled ? get(metadata, "scale_factor", nothing) : nothing + offset = scaled ? get(metadata, "add_offset", nothing) : nothing + return scale, offset +end + # precompilation # const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Variable{Float32, 3, NCDatasets.NCDataset}, NCDatasets.Attributes{NCDatasets.NCDataset{Nothing}}, NamedTuple{(:fillvalue, :scale_factor, :add_offset, :calendar, :time_origin, :time_factor), Tuple{Float32, Nothing, Nothing, Nothing, Nothing, Nothing}}} diff --git a/src/array.jl b/src/array.jl index 04132a4a5..6103cec9d 100644 --- a/src/array.jl +++ b/src/array.jl @@ -323,10 +323,10 @@ function Raster(ds, filename::AbstractString; source = _sourcetrait(filename, source) data1, dims1, metadata1, missingval2 = _open(source, ds; name=name1, group, mod=NoMod()) do var metadata1 = isnokw(metadata) ? _metadata(var) : metadata - missingval1 = _fix_missingval(var, missingval) + missingval1 = isnokwornothing(missingval) ? Rasters.missingval(var, metadata1) : missingval maskingval1 = isnokw(maskingval) && !isnothing(missingval1) ? missing : maskingval # If maskingval is `nothing` use missingval as missingval - missingval2 = isnothing(maskingval1) ? missingval1 : maskingval1 + missingval2 = isnokwornothing(maskingval1) ? missingval1 : maskingval1 mod = isnokw(mod) ? _mod(eltype(var), metadata1, missingval1, maskingval1; scaled, coerce) : mod data = if lazy FileArray{typeof(source)}(var, filename; diff --git a/src/create.jl b/src/create.jl index 0449457d8..3b8e97957 100644 --- a/src/create.jl +++ b/src/create.jl @@ -1,18 +1,21 @@ - +const TypeNamedTuple = NamedTuple{<:Any,<:Tuple{Vararg{Type}}} """ create([filename], template::Raster; kw...) - create([filename], T, template; kw...) + create([filename], type, template; kw...) + +Create a new, uninitialised [`Raster`](@ref) or [`RasterStack`](@ref). + +If `filename` is a `String` it will be created on disk, and opened lazily. +If it is `nothing` or not passed, an in-memory `Raster` will be created. -Create a new Raster. If `filename` is a `String` it will be created on disk, -and opened lazily. If it is `nothing` of not passed, a regular in-memory `Raster` -will be created. When written to disk, the values will be `missingval`, -if in-memory values will be `undef`. +If type is a `Type` return value is a `Raster`. The `eltype` will usually be `T`, except +where `scale` and/or `offset` keywords are used or a `missingval` of a different type is specified, +in which case `T` will depend on the tyepe promotion of `scale`, `offset` and `missingval` with `T`. +`maskingval` will also affect the `eltype` of the openeded raster if you `create` to a file. -The return value is a `Raster`. The `eltype` will usually be `T`, except -where `scale` and/or `offset` keywords are used, in which case `T` will -depend on the tyepe promotion of `scale` and `offset` with `T`. -`maskingval` will also affect the `eltype`. +If types is a `NamedTuple` of types, the result will be a `RasterStack`. In this case `fill` and +`missingval` can be single values (for all layers) or `NamedTuple` with the same names to specify per-layer. ## Arguments @@ -21,7 +24,8 @@ depend on the tyepe promotion of `scale` and `offset` with `T`. - `template`: a `Raster`, `Tuple` of `Dimension` or `Extents.Extent` to use as a template. If an `Extent` is used, a `size` or `res` keyword must be passed. If a `T` argument is not used, it is taken from the `template` eltype. -- `T`: the element type to use in the created array. +- `type`: the element type to use in the created array. A `NamedTuple` of types + will create a `RasterStack` ## Keywords @@ -29,9 +33,10 @@ $NAME_KEYWORD $REFDIMS_KEYWORD $METADATA_KEYWORD $WRITE_MISSINGVAL_KEYWORD -- `fillval`: A value to fill the array with. By default this will be - `missingval`. If there is no `missingval` set or `fillval` is set to nothing - disk values will remain undefined. +- `fill`: A value to fill the array with, before `scale` and `offset` are applied. + If there is no `fill`, raster values may remain undefined. They may be set to + `missingval` on disk, but this is not guaranteed. It us often more efficient to + use `fill` than to fill manually after `create`. $MASKINGVAL_KEYWORD $SOURCE_KEYWORD - `lazy`: A `Bool` specifying if to load data lazily from disk. For `create` @@ -46,10 +51,10 @@ $RES_KEYWORD $SIZE_KEYWORD $CRS_KEYWORD $CHUNKS_KEYWORD -- `reverse_y`: usually we want to write `Y` dimensions in reverse. - When building dimensions from an `Extents.Extent` we do this by - default, unless `reverse_y=false`. With template `Raster` or dimensions, - the existing order is used. +- `reverse_y`: often we want to write `Y` dimensions in reverse. + When building dimensions from an `Extents.Extent` and `size` or `res` we can do this by + using `reverse_y=true`. Using a negative value in `res` will acheive the same result. + With a template `Raster` or a `Tuple` of `Dimension`, the existing order is used. ## Example @@ -114,18 +119,36 @@ RasterStack("created.nc") └───────────────────────────────────────────────────────────────────────────────────────────┘ ``` """ -create(A::AbstractRaster; kw...) = create(nothing, A; kw...) -create(T::Union{Type,NamedTuple}, dims::Tuple; kw...) = create(nothing, T, dims; kw...) -create(T::Union{Type,NamedTuple}, extent::Extents.Extent; kw...) = create(nothing, T, dims; kw...) -create(filename::Union{AbstractString,Nothing}, A::AbstractRaster{T}; kw...) where T = - create(filename, T, A; kw...) -function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTuple}, A::AbstractRaster; - name=name(A), - metadata=metadata(A), - missingval=missingval(A), +create(A::Union{AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, A; kw...) +create(T::Union{Type,TypeNamedTuple}, A::Union{Tuple,Extents.Extent,AbstractRaster,AbstractRasterStack}; kw...) = + create(nothing, T, A; kw...) +function create(filename::Union{AbstractString,Nothing}, A::AbstractRaster{T}; + missingval=missingval(A), # Only take missingval here when types are not specified + kw... +) where T + create(filename, T, A; missingval, kw...) +end +function create(filename::Union{AbstractString,Nothing}, st::AbstractRasterStack; + missingval=missingval(st), # Only take missingval here when types are not specified kw... ) - return create(filename, T, dims(A); parent=parent(A), name, metadata, missingval, kw...) + create(filename, map(eltype, layers(st)), st; missingval, kw...) +end +create(filename::Union{AbstractString,Nothing}, T::Union{Type,TypeNamedTuple}, A::AbstractRaster; kw...) = + create(filename, T, dims(A); parent=parent(A), kw...) +function create(filename::Union{AbstractString,Nothing}, T::NamedTuple{K1}, st::AbstractRasterStack{K2}; + metadata=metadata(st), + layerdims=nokw, + layermetadata=nokw, + kw... +) where {K1,K2} + if all(map(in(K2), K1)) + layerdims = isnokw(layerdims) ? DD.layerdims(st)[K1] : layerdims + layermetadata = isnokw(layermetadata) ? DD.layermetadata(st)[K1] : layermetadata + end + return create(filename, T, dims(st); + parent=first(parent(st)), metadata, missingval, layerdims, layermetadata, kw... + ) end function create(filename::AbstractString, T::Union{Type,NamedTuple}, dims::Tuple; lazy=true, @@ -139,16 +162,16 @@ function create(filename::AbstractString, T::Union{Type,NamedTuple}, dims::Tuple # This calls `create` in the /sources file for this `source` return create(filename, source, T, dims; lazy, missingval, kw...) end -function create(filename::AbstractString, T::Union{Type,NamedTuple}, extent::Extents.Extent; +function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTuple}, extent::Extents.Extent; res=nokw, size=nokw, crs=nothing, sampling=Points(), - reverse_y=true, + reverse_y=nokw, kw... ) ds = _extent2dims(extent; size, res, crs, sampling) - ds = if reverse_y && hasdim(ds, Y()) + ds = if reverse_y isa Bool && reverse_y && hasdim(ds, Y()) DD.setdims(ds, reverse(dims(ds, Y()))) else ds @@ -156,71 +179,92 @@ function create(filename::AbstractString, T::Union{Type,NamedTuple}, extent::Ext return create(filename, T, ds; kw...) end function create(filename::Nothing, ::Type{T}, dims::Tuple; + missingval=nokw, + maskingval=nothing, + fill=nokw, parent=nokw, + verbose=true, + # Not used but here for consistency suffix=nokw, force=false, - missingval, + chunks=nokw, kw... ) where T - eltype = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) + if verbose + isnokw(chunks) || @warn "`chunks` of `$chunks` found. But `chunks` are not used for in-memory rasters" + end + missingval = isnokw(maskingval) || isnothing(maskingval) ? missingval : maskingval + eltype = isnokw(missingval) || isnothing(missingval) ? T : promote_type(T, typeof(missingval)) data = if isnokw(parent) || isnothing(parent) Array{eltype}(undef, dims) else similar(parent, eltype, size(dims)) end + if !(isnokw(fill) || isnothing(fill)) + fill!(data, fill) + end return Raster(data, dims; missingval, kw...) end function create(filename::Nothing, types::NamedTuple, dims::Tuple; - suffix=nokw, + suffix=keys(types), force=false, - missingval, + chunks=nokw, + verbose=true, + parent=nokw, + missingval=nokw, + maskingval=nothing, + fill=nokw, + layerdims=nokw, + layermetadata=nokw, kw... ) - layers = map(types) do T - # eltype = isnothing(missingval) ? T : promote_type(T, typeof(missingval)) - data = if isnokw(parent) || isnothing(parent) - Array{eltype}(undef, dims) - else - similar(parent, eltype, size(dims)) - end + missingval = isnokw(maskingval) || isnothing(maskingval) ? missingval : maskingval + layerdims = isnokw(layerdims) ? map(_ -> basedims(dims), types) : layerdims + layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, types) + layerfill = fill isa NamedTuple ? fill : map(_ -> fill, types) + layermissingvals = missingval isa NamedTuple ? missingval : map(_ -> missingval, types) + layers = map(types, layermissingvals, layerfill, layerdims, layermetadata) do T, lmv, lfv, ld, lm + create(nothing, T, DD.dims(dims, ld); parent, missingval=lmv, fill=lfv, metadata=lm) end - return RasterStack(layers, dims; missingval, kw...) + return RasterStack(layers; kw...) end function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTuple; name=nokw, missingval=nokw, - maskingval=missing, - fillval=nokw, + maskingval=nothing, + fill=nokw, metadata=nokw, chunks=nokw, scale=nokw, offset=nokw, - dropband=!hasdim(dims, Band), + dropband=!hasdim(dims, Band()), lazy=true, verbose=true, force=false, coerce=nokw, + kw... ) where T eltype = Missings.nonmissingtype(T) - if isnokw(fillval) || isnothing(fillval) + + if isnokw(fill) || isnothing(fill) write = false # Leave fill undefined A = FillArrays.Zeros{eltype}(map(length, dims)) else - fillval isa T || throw(ArgumentError("fillval must be of type $T, got $fillval")) + fill isa T || throw(ArgumentError("fill must be of type $T, got $fill")) write = true # Write fill to disk - A = FillArrays.Fill{eltype}(fillval, map(length, dims)) + A = FillArrays.Fill{eltype}(fill, map(length, dims)) end # Create layers of zero arrays rast = Raster(A, dims; name, missingval) Rasters.write(filename, source, rast; - eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write + eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write, kw... ) return Raster(filename; source, lazy, metadata, missingval, maskingval, dropband, coerce) end function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::DimTuple; - name=keys(layertypes), missingval=nokw, - maskingval=missing, + maskingval=nothing, + fill=nokw, metadata=nokw, layerdims=nokw, layermetadata=nokw, @@ -232,43 +276,37 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple verbose=true, force=false, coerce=nokw, + kw... ) - layers = map(layertypes) do x - if x isa Type - eltype = Missings.nonmissingtype(x) - size = map(length, dims) - elseif x isa Pair{<:Type} - eltype = Missings.nonmissingtype(x[1]) - ds = x[2] - size = map(length, DD.dims(dims, DD._astuple(ds))) - else - throw(ArgumentError("Must be a Type or a Pair of Type and Dimension/Symbol")) - end - FillArrays.Zeros{eltype}(size) - end - layerdims = if isnokw(layerdims) + write = Ref(false) + fill = fill isa NamedTuple ? fill : map(_ -> fill, layertypes) + layerdims = if isnokw(layerdims) map(layertypes) do x if x isa Type DD.basedims(dims) else - ds = DD._astuple(DD.basedims(x[2])) + DD._astuple(DD.basedims(x[2])) end end else layerdims end - # if isnokw(fillval) || isnothing(fillval) - # write = false # Leave fill undefined - # A = FillArrays.Zeros{eltype}(map(length, dims)) - # else - # fillval isa T || throw(ArgumentError("fillval must be of type $T, got $fillval")) - # write = true # Write fill to disk - # A = FillArrays.Fill{eltype}(fillval, map(length, dims)) - # end + layers = map(layertypes, layerdims, fill) do T, ld, f + lks = lookup(dims, ld) + eltype = Missings.nonmissingtype(T) + size = map(length, lks) + if isnokwornothing(f) + A = FillArrays.Zeros{eltype}(size) + else + write[] = true # Write fill to disk + A = FillArrays.Fill{eltype}(f, size) + end + end # Create layers of zero arrays stack = RasterStack(layers, dims; layerdims, layermetadata, missingval) fn = Rasters.write(filename, stack; - chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write=false + chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write=write[], kw... ) - return RasterStack(fn; source, lazy, metadata, layerdims, maskingval, dropband, coerce) + st = RasterStack(fn; source, lazy, metadata, layerdims, maskingval, dropband, coerce) + return st end diff --git a/src/methods/burning/array_init.jl b/src/methods/burning/array_init.jl index 9b6150f67..f8f1e1fc8 100644 --- a/src/methods/burning/array_init.jl +++ b/src/methods/burning/array_init.jl @@ -1,5 +1,5 @@ -# Like `create` but without disk writes, mostly for Bool/Union{Missing,Boo}, +# Like `create` but without disk writes, mostly for Bool or Union{Missing,Bool}, # and uses `similar` where possible # TODO merge this with `create` somehow _init_bools(to; kw...) = _init_bools(to, BitArray; kw...) @@ -25,10 +25,12 @@ function _init_bools(to::Extents.Extent, T::Type, data; ) # Convert the extent to dims (there must be `res` or `size` in `kw`) ext = _extent2dims(to; size, res, sampling, kw...) - _init_bools(to, ext, T, data; kw...) + _init_bools(to, ext, T, data; collapse, kw...) end -function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Nothing,NoKW}=nokw, kw...) - if isnothing(data) || isnothing(collapse) || isnokw(collapse) || collapse +function _init_bools(to, dims::DimTuple, T::Type, data; + collapse::Union{Bool,Nothing,NoKW}=nokw, kw... +) + if isnothing(data) || isnokwornothing(collapse) || collapse _alloc_bools(to, dims, T; kw...) else n = if Base.IteratorSize(data) isa Base.HasShape @@ -47,7 +49,7 @@ function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; missingval::Bool=fal return Raster(vals, dims; missingval, metadata) end function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; missingval=false, metadata=NoMetadata(), kw...) where T - # Use an `Array` + # Use an Array data = fill!(Raster{T}(undef, dims), missingval) return rebuild(data; missingval, metadata) end diff --git a/src/methods/crop_extend.jl b/src/methods/crop_extend.jl index b651bfd3e..4fd409619 100644 --- a/src/methods/crop_extend.jl +++ b/src/methods/crop_extend.jl @@ -9,7 +9,7 @@ to match the size of the object `to`, or smallest of any dimensions that are sha # Keywords -- `to`: the object to crop to. This can be $OBJ_ARGUMENT +- `to`: the object to crop to. This can be $OBJ_ARGUMENT If no `to` keyword is passed, the smallest shared area of all `xs` is used. - `touches`: `true` or `false`. Whether to use `Touches` wraper on the object extent. @@ -158,12 +158,12 @@ function extend end function extend(l1::RasterStackOrArray, l2::RasterStackOrArray, ls::RasterStackOrArray...; kw...) extend((l1, l2, ls...); kw...) end -function extend(xs; to=nothing) +function extend(xs; to=nothing, kw...) if isnothing(to) to = _subsetbounds((min, max), xs) - map(l -> _extend_to(l, to), xs) + map(l -> _extend_to(l, to, kw...), xs) else - map(l -> extend(l; to), xs) + map(l -> extend(l; to, kw...), xs) end end extend(x::RasterStackOrArray; to=dims(x), kw...) = _extend_to(x, to; kw...) @@ -177,11 +177,13 @@ end _extend_to(x::RasterStackOrArray, to::Dimension; kw...) = _extend_to(x, (to,); kw...) function _extend_to(A::AbstractRaster, to::DimTuple; - filename=nothing, - suffix=nothing, + filename=nothing, + suffix=nothing, missingval=(isnothing(missingval(A)) ? nokw : missingval(A)), + fill=nokw, touches=false, - force=false + force=false, + verbose=true, ) others = otherdims(to, A) # Allow not specifying all dimensions @@ -197,10 +199,26 @@ function _extend_to(A::AbstractRaster, to::DimTuple; end others1 = otherdims(to, A) final_to = (set(dims(A), map(=>, dims(A, to), to)...)..., others1...) + + # If we are writing to disk swap missingval to something writeable + if ismissing(missingval) + missingval = _writeable_missing(filename, eltype(A); verbose) + end + # If no `fill` is passed use `missingval` or zero + if isnokw(fill) + fill = isnokwornothing(missingval) ? zero(Missings.nonmissingtype(eltype(A))) : missingval + end # Create a new extended array newA = create(filename, eltype(A), final_to; - suffix, parent=parent(A), missingval, - name=name(A), metadata=metadata(A), force + suffix, + parent=parent(A), + missingval, + name=name(A), + metadata=metadata(A), + maskingval=Rasters.missingval(A), + fill, + force, + verbose, ) # Input checks map(dims(A, to), dims(newA, to)) do d1, d2 @@ -219,9 +237,6 @@ function _extend_to(A::AbstractRaster, to::DimTuple; A = replace_missing(A, Rasters.missingval(newA)) end open(newA; write=true) do O - # Fill it with missing/nodata values - O .= Rasters.missingval(O) - # Copy the original data to the new array # Somehow this is slow from disk? broadcast_dims!(identity, view(O, rangedims...), A) end diff --git a/src/methods/mask.jl b/src/methods/mask.jl index 55b0f3732..f5fa87608 100644 --- a/src/methods/mask.jl +++ b/src/methods/mask.jl @@ -4,6 +4,13 @@ const INVERT_KEYWORD = """ masked, and areas missing in `with` are masked. """ +const COLLAPSE_KEYWORD = """ +- `collapse`: if `true`, collapse all geometry masks into a single mask. Otherwise + return a Raster with an additional `geometry` dimension, so that each slice + along this axis is the mask of the `geometry` opbject of each row of the + table, feature in the feature collection, or just each geometry in the iterable. +""" + """ mask(A:AbstractRaster; with, missingval=missingval(A)) mask(x; with) @@ -236,18 +243,9 @@ $GEOMETRYCOLUMN_KEYWORD $THREADED_KEYWORD $PROGRESS_KEYWORD -And specifically for `shape=:polygon`: - -- `boundary`: include pixels where the `:center` is inside the polygon, where - the line `:touches` the pixel, or that are completely `:inside` inside the polygon. - The default is `:center`. - For tabular data, feature collections and other iterables -- `collapse`: if `true`, collapse all geometry masks into a single mask. Otherwise - return a Raster with an additional `geometry` dimension, so that each slice - along this axis is the mask of the `geometry` opbject of each row of the - table, feature in the feature collection, or just each geometry in the iterable. +$COLLAPSE_KEYWORD # Example @@ -338,9 +336,10 @@ function boolmask!(dest::AbstractRaster, data; threaded=false, allocs=_burning_allocs(dest; threaded), geometrycolumn=nothing, + collapse=nokw, kw... ) - if hasdim(dest, :geometry) + if collapse === false && hasdim(dest, :geometry) geoms = _get_geometries(data, geometrycolumn) range = eachindex(geoms) _run(range, threaded, progress, "Burning each geometry to a BitArray slice...") do i @@ -351,16 +350,18 @@ function boolmask!(dest::AbstractRaster, data; burn_geometry!(slice, geom; kw..., fill=!invert, allocs=_get_alloc(allocs)) return nothing end - else + elseif isnokw(collapse) || collapse === true burn_geometry!(dest, data; kw..., allocs, lock, progress, threaded, geometrycolumn, fill=!invert) + else + throw(ArgumentError("`collapse` must be `false` or not passed if there is no `:geometry` dimension in `dest`")) end return dest end """ missingmask(obj::Raster; kw...) - missingmask(obj; [to, res, size, collapse]) - missingmask(obj::RasterStack; alllayers = true, kw...) + missingmask(obj; [to, res, size]) + missingmask(obj::RasterStack; alllayers=true, kw...) Create a mask array of `missing` and `true` values, from another `Raster`. `AbstractRasterStack` or `AbstractRasterSeries` are also accepted- @@ -376,6 +377,7 @@ The array returned from calling `missingmask` on a `AbstractRaster` is a - `obj`: $OBJ_ARGUMENT # Keywords + - `alllayers`: if `true` a mask is taken for all layers, otherwise only the first layer is used. Defaults to `true` $INVERT_KEYWORD $GEOM_KEYWORDS @@ -423,8 +425,8 @@ function missingmask!(dest::AbstractRaster, src::AbstractRaster; end end function missingmask!(dest::AbstractRaster, geom; kw...) - B = boolmask!(dest, geom; kw...) # boolmask! handles `invert` keyword here + B = boolmask!(dest, geom; kw...) dest .= _false_to_missing.(B) return dest end diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 8f14a9745..bb4abf676 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -101,33 +101,33 @@ _scaleoffset_inv(x, ::Nothing, offset) = x - offset _scaleoffset_inv(x, ::Nothing, ::Nothing) = x -function _stack_mods(metadata::Vector, missingval::Vector, maskingval; scaled, coerce) - map(metadata, missingval) do md, mv - scale, offset = _get_scale_offset(md, scaled) - _mod(mv, maskingval, scale, offset, coerce) +function _stack_mods(eltypes::Vector, metadata::Vector, missingval::Vector, maskingval; scaled, coerce) + map(eltypes, metadata, missingval) do T, md, mv + scale, offset = get_scale(md, scaled) + _mod(T, mv, maskingval, scale, offset, coerce) end end -function _stack_mods(metadata::Vector, missingval, maskingval::Vector; scaled::Bool, coerce) - map(metadata, maskingval) do md, mk - scale, offset = _get_scale_offset(md, scaled) - _mod(missingval, mk, scale, offset, coerce) +function _stack_mods(eltypes::Vector, metadata::Vector, missingval, maskingval::Vector; scaled::Bool, coerce) + map(eltypes, metadata, maskingval) do T, md, mk + scale, offset = get_scale(md, scaled) + _mod(T, missingval, mk, scale, offset, coerce) end end -function _stack_mods(metadata::Vector, missingval::Vector, maskingval::Vector; scaled::Bool, coerce) - map(metadata, missingval, maskingval) do md, mv, mk - scale, offset = _get_scale_offset(md, scaled) +function _stack_mods(eltypes::Vector, metadata::Vector, missingval::Vector, maskingval::Vector; scaled::Bool, coerce) + map(eltypes, metadata, missingval, maskingval) do T, md, mv, mk + scale, offset = get_scale(md, scaled) _mod(mv, mk, scale, offset, coerce) end end -function _stack_mods(metadata::Vector, missingval, maskingval; scaled::Bool, coerce) - map(metadata) do md - scale, offset = _get_scale_offset(md, scaled) - _mod(missingval, maskingval, scale, offset, coerce) +function _stack_mods(eltypes::Vector, metadata::Vector, missingval, maskingval; scaled::Bool, coerce) + map(eltypes, metadata) do T, md + scale, offset = get_scale(md, scaled) + _mod(T, missingval, maskingval, scale, offset, coerce) end end -function _mod(T, metadata, missingval, maskingval; scaled::Bool, coerce) - scale, offset = _get_scale_offset(metadata, scaled) +function _mod(::Type{T}, metadata, missingval, maskingval; scaled::Bool, coerce) where T + scale, offset = get_scale(metadata, scaled) _mod(T, missingval, maskingval, scale, offset, coerce) end function _mod(::Type{T}, missingval, maskingval, scale, offset, coerce) where T @@ -138,8 +138,8 @@ function _mod(::Type{T}, missingval, maskingval, scale, offset, coerce) where T end end -@inline _get_scale_offset(metadata::NoKW, scaled) = (nothing, nothing) -@inline function _get_scale_offset(metadata, scaled) +@inline get_scale(metadata::NoKW, scaled::Bool) = nothing, nothing +@inline function get_scale(metadata, scaled::Bool) scale = scaled ? get(metadata, "scale", nothing) : nothing offset = scaled ? get(metadata, "offset", nothing) : nothing return scale, offset diff --git a/src/nokw.jl b/src/nokw.jl index dae5894ae..89e0828e1 100644 --- a/src/nokw.jl +++ b/src/nokw.jl @@ -5,6 +5,8 @@ struct NoKW end const nokw = NoKW() @inline isnokw(::NoKW) = true @inline isnokw(_) = false +@inline isnokwornothing(::Union{NoKW,Nothing}) = true +@inline isnokwornothing(_) = false _nokw2nothing(::NoKW) = nothing _nokw2nothing(x) = x diff --git a/src/show.jl b/src/show.jl index 670f5566d..634aea6f0 100644 --- a/src/show.jl +++ b/src/show.jl @@ -19,9 +19,8 @@ function print_geo(io, mime, A; blockwidth) DD.print_block_separator(io, "raster", blockwidth) printstyled(io, "\n extent: "; color=:light_black) show(io, mime, Extents.extent(A)) - println(io) if missingval(A) !== nothing - printstyled(io, " missingval: "; color=:light_black) + printstyled(io, "\n missingval: "; color=:light_black) show(io, mime, missingval(A)) end if crs(A) !== nothing diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 2170a2f20..802e570b4 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -69,8 +69,8 @@ _getgroup(ds, group::Pair) = _getgroup(ds.group[String(group[1])], group[2]) filekey(ds::AbstractDataset, name::Union{String,Symbol}) = Symbol(name) filekey(ds::AbstractDataset, name) = _firstname(ds, name) -missingval(var::AbstractDataset) = missing -missingval(var::AbstractVariable{T}) where T = missing isa T ? missing : nothing +missingval(var::AbstractVariable, args...) = get(CDM.attribs(var), "_FillValue", nothing) +missingval(var::AbstractVariable, md::Metadata{<:CDMsource}) = get(md, "_FillValue", nothing) cleanreturn(A::AbstractVariable) = Array(A) haslayers(::CDMsource) = true defaultcrs(::CDMsource) = EPSG(4326) @@ -162,8 +162,6 @@ function _layermetadata(ds::AbstractDataset; layers) end end -_fix_missingval(::CDM.AbstractVariable, ::Nothing, metadata) = get(metadata, "_FillValue", nothing) - # Utils ######################################################################## diff --git a/src/sources/grd.jl b/src/sources/grd.jl index d9fd4be3d..ca898dff2 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -59,7 +59,12 @@ DiskArrays.haschunks(::GRDdataset) = DiskArrays.Unchunked() function _dims(A::RasterDiskArray{GRDsource}, crs=nokw, mappedcrs=nokw) attrib = A.attrib.attrib - crs = crs isa NoKW ? ProjString(attrib["projection"]) : crs + crs = if crs isa NoKW + str = attrib["projection"] + str == "" ? nothing : ProjString(str) + else + crs + end mappedcrs = mappedcrs isa NoKW ? nothing : mappedcrs xsize, ysize, nbands = size(A) @@ -115,7 +120,7 @@ function _metadata(A::RasterDiskArray{GRDsource}, args...) metadata end -function missingval(A::RasterDiskArray{GRDsource,T}) where T +function missingval(A::RasterDiskArray{GRDsource,T}, args...) where T if haskey(A.attrib.attrib, "nodatavalue") ndv = A.attrib.attrib["nodatavalue"] ndv === "nothing" && return nothing @@ -155,11 +160,14 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; force=false, missingval=nokw, chunks=nokw, + scale=nokw, + offset=nokw, kw... ) + isnokwornothing(scale) && isnokwornothing(offset) || throw(ArgumentError("Cant write scale or offset to .grd files")) chunks isa NoKW || @warn "specifying chunks not supported for .grd files" check_can_write(filename, force) - A = _maybe_use_type_missingval(A, GRDsource(), missingval) + missingval = ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval if hasdim(A, Band) correctedA = permutedims(A, (X, Y, Band)) |> a -> reorder(a, (X(GRD_X_ORDER), Y(GRD_Y_ORDER), Band(GRD_BAND_ORDER))) @@ -194,7 +202,7 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) ncols, nrows = length(x), length(y) xmin, xmax = bounds(x) ymin, ymax = bounds(y) - proj = convert(String, convert(ProjString, crs(x))) + proj = isnothing(crs(x)) ? "" : convert(String, convert(ProjString, crs(x))) datatype = REVGRD_DATATYPE_TRANSLATION[T] nodatavalue = missingval @@ -227,23 +235,23 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) end end -function create(filename, ::GRDsource, T::Type, dims::DD.DimTuple; - name="layer", metadata=nothing, missingval=nothing, lazy=true, -) - # Remove extension - basename = splitext(filename)[1] - minvalue = maxvalue = zero(T) - sze = map(length, DD.dims(dims, (XDim, YDim, Band))) +# function create(filename, ::GRDsource, T::Type, dims::DD.DimTuple; +# name="layer", metadata=nothing, missingval=nothing, lazy=true, +# ) +# # Remove extension +# basename = splitext(filename)[1] +# minvalue = maxvalue = zero(T) +# sze = map(length, DD.dims(dims, (XDim, YDim, Band))) - # Metadata: grd file - _write_grd(basename, T, dims, missingval, minvalue, maxvalue, name) +# # Metadata: grd file +# _write_grd(basename, T, dims, missingval, minvalue, maxvalue, name) - # Data: gri file - open(basename * ".gri", write=true) do IO - write(IO, FillArrays.Zeros(sze)) - end - return Raster(filename; source=GRDsource(), lazy) -end +# # Data: gri file +# open(basename * ".gri", write=true) do IO +# write(IO, FillArrays.Zeros(sze)) +# end +# return Raster(filename; source=GRDsource(), lazy) +# end # AbstractRasterStack methods @@ -253,15 +261,22 @@ function Base.open(f::Function, A::FileArray{GRDsource}, args...; write=A.write) _mmapgrd(mm -> f(RasterDiskArray{GRDsource}(mm, A.eachchunk, A.haschunks)), A; write) end -function _open(f, ::GRDsource, filename::AbstractString; write=false, kw...) +function _open(f, ::GRDsource, filename::AbstractString; + mod=RA.NoMod(), + write=false, + kw... +) isfile(filename) || _filenotfound_error(filename) attr = GRDdataset(filename) _mmapgrd(attr; write) do mm A = RasterDiskArray{GRDsource}(mm, DA.eachchunk(attr), DA.haschunks(attr), attr) - f(A) + A1 = _maybe_modify(A, mod) + f(A1) end end _open(f, ::GRDsource, attrib::GRDdataset; kw...) = f(attrib) +_open(f, ::GRDsource, A::RasterDiskArray; mod=RA.NoMod(), kw...) = + cleanreturn(f(_maybe_modify(A, mod))) haslayers(::GRDsource) = false diff --git a/src/stack.jl b/src/stack.jl index c94522b83..269a0193d 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -387,8 +387,7 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; fn = collect(filenames) layermetadata = layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) layerdims = layerdims isa NamedTuple ? collect(layerdims) : map(_ -> NoKW(), fn) - mods = _stack_mods(layermetadata, missingval1, maskingval1; scaled, coerce) - layers = map(K, fn, layermetadata, layerdims, mods) do name, fn, md, d, mod + layers = map(K, fn, layermetadata, layerdims) do name, fn, md, d, mod Raster(fn; source=_sourcetrait(fn, source), name, metadata=md, dims=d, mod, kw...) end return RasterStack(NamedTuple{K}(layers); resize, metadata) @@ -538,12 +537,15 @@ function _layer_stack(filename; else layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) end - missingval1 = if missingval isa NamedTuple + missingval1 = if isnokw(missingval) + map(Rasters.missingval, layers.vars, layermetadata1) + elseif missingval isa NamedTuple collect(missingval) else missingval end - mods = _stack_mods(layermetadata1, missingval1, maskingval; scaled, coerce) + eltypes = map(eltype, layers.vars) + mods = _stack_mods(eltypes, layermetadata1, missingval1, maskingval; scaled, coerce) name = Tuple(map(Symbol, layers.names)) NT = NamedTuple{name} data = if lazy @@ -563,12 +565,10 @@ function _layer_stack(filename; nothing elseif isnothing(Rasters.maskingval(mod)) Rasters.missingval(mod) - @show mod Rasters.missingval(mod) else Rasters.maskingval(mod) end end |> NT - @show missingval return data, (; dims, refdims, layerdims, metadata, layermetadata=NT(layermetadata1), missingval) end return RasterStack(data; field_kw..., kw...) diff --git a/src/utils.jl b/src/utils.jl index 56fc3b05b..904c7dd30 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -129,16 +129,18 @@ function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Tuple; sampling::Tuple, kw... ) ranges = map(values(to), res, sampling) do (start, stop), step, s - if s isa Points + r = if step >= zero(step) range(; start, step, stop) else - r = range(; start, step, stop) + range(; start=stop, step, stop=start) + end + r = if s isa Intervals if locus(s) isa Start r[1:end-1] elseif locus(s) isa End r[2:end] else # Center - r .+ step / 2 + r .+ abs(step) / 2 end end end @@ -161,8 +163,8 @@ end function _extent2dims(::Extents.Extent{K}, ranges; crs, sampling::Tuple) where K crs = isnokw(crs) ? nothing : crs emptydims = map(name2dim, K) - order = ForwardOrdered() lookups = map(emptydims, ranges, sampling) do d, range, s + order = Lookups.orderof(range) span = Regular(step(range)) if d isa SpatialDim && !isnothing(crs) Projected(range; sampling=s, order, span, crs) @@ -244,14 +246,14 @@ struct IterableOfGeometries end # Chunking # NoKW means true -@inline function _chunks_to_tuple(template, dims, chunks::Bool) +@inline function _chunks_to_tuple(template, dimorder, chunks::Bool) if chunks == true if template isa AbstractArray && DA.haschunks(template) == DA.Chunked() # Get chunks from the template DA.max_chunksize(DA.eachchunk(template)) else # Use defaults - _chunks_to_tuple(template, dims, (X(512), Y(512))) + _chunks_to_tuple(template, dimorder, (X(512), Y(512))) end else nothing diff --git a/src/write.jl b/src/write.jl index 03e9644e8..5e3984c9c 100644 --- a/src/write.jl +++ b/src/write.jl @@ -35,10 +35,12 @@ Other keyword arguments are passed to the `write` method for the backend. $FORCE_KEYWORD - `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. By default `driver` is guessed from the filename extension. + - `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. For example: `Dict("COMPRESS" => "DEFLATE")`. -Valid `options` for each specific `driver` can be found at: https://gdal.org/drivers/raster/index.html +Valid `driver` names and the `options` for each can be found at: +[https://gdal.org/drivers/raster/index.html](https://gdal.org/drivers/raster/index.html) ## Source comments @@ -83,7 +85,7 @@ function Base.write( filename::AbstractString, source::Source, A::Union{AbstractRaster,AbstractRasterStack}; kw... ) missing_package = SOURCE2PACKAGENAME[source] - error("Missing package extension for $source. Run `using $missing_package` before useing `write` for this file.") + error("Missing package extension for $source. Run `using $missing_package` before using `write` for this file extension.") end """ @@ -196,10 +198,12 @@ end haslayers(T) = false # This is used in source `write` methods +check_can_write(filename::Union{Nothing,NoKW}, force) = true function check_can_write(filename, force) if !check_can_write(Bool, filename, force) throw(ArgumentError("filename already exists at $filename. use the keyword `force=true` to write anyway")) end return true end +check_can_write(::Type{Bool}, filename::Union{Nothing,NoKW}, force) = true check_can_write(::Type{Bool}, filename, force) = (force || !isfile(filename)) diff --git a/test/array.jl b/test/array.jl index f9c3c9d07..e7c4ab231 100644 --- a/test/array.jl +++ b/test/array.jl @@ -1,6 +1,7 @@ using Rasters, Test, Dates, DiskArrays using Rasters.Lookups, Rasters.Dimensions using Rasters: isdisk, ismem, filename +using ArchGDAL data1 = cumsum(cumsum(ones(10, 11); dims=1); dims=2) data2 = 2cumsum(cumsum(ones(10, 11, 1); dims=1); dims=2) @@ -77,7 +78,7 @@ end @testset "collect and Array" begin - @test collect(ga1) isa Array + @test collect(ga1) isa Array{Float64,2} @test collect(ga1) == data1 @test Array(ga1) isa Array{Float64,2} @test Array(ga1) == data1 diff --git a/test/create.jl b/test/create.jl new file mode 100644 index 000000000..3eaf98058 --- /dev/null +++ b/test/create.jl @@ -0,0 +1,176 @@ +using Rasters, Test, Dates, DiskArrays, Extents, ArchGDAL, NCDatasets +using Rasters.Lookups, Rasters.Dimensions +using Rasters: isdisk, ismem, filename + +@testset "create Raster" begin + rast = Rasters.create(Int32, Extents.Extent(X=(0, 10), Y=(0, 5)); + size=(1024, 1024), + crs=EPSG(4326), + chunks=(X=128, Y=128), + force=true, + name=:testname + ) + @test crs(rast) == EPSG(4326) + @test size(rast) == (1024, 1024) + @test Rasters.name(rast) == :testname + @test missingval(rast) === nothing + @test ispoints(rast) + + rast = @test_nowarn Rasters.create(Float64, Extents.Extent(X=(0, 10), Y=(0, 5), Ti=(DateTime(2001), DateTime(2002))); + res=(X=0.2, Y=0.1, Ti=Month(1)), + crs=EPSG(4326), + force=true, + sampling=Intervals(Start()), + name=:testname, + missingval=missing, + reverse_y=false, + ) + @test crs(rast) == EPSG(4326) + @test size(rast) == (50, 50, 12) + @test Rasters.name(rast) == :testname + @test missingval(rast) === missing + @test isintervals(rast) + @test map(step, lookup(rast)) == (0.2, 0.1, Month(1)) + + D = (Ti(DateTime(2000):Month(1):DateTime(2000, 12); sampling=Intervals(Start())), X(0.0:0.01:10.0), Y(0.0:0.01:10)) + rast = Rasters.create(Int32, D; fill=1, missingval=missing, crs=EPSG(4326), name=:testname) + map(length, Rasters.dims(rast)) + @test crs(rast) == EPSG(4326) + @test size(rast) == (12, 1001, 1001) + @test Rasters.name(rast) == :testname + @test missingval(rast) === missing + @test isintervals(rast, Ti) + @test ispoints(rast, (X, Y)) + @test map(step, lookup(rast)) == (Month(1), 0.01, 0.01) + @test all(x -> x === Int32(1), rast) + + rast1 = Rasters.create(rast) + @test dims(rast1) == dims(rast) + @test eltype(rast1) == eltype(rast) +end + + +@testset "create RasterStack" begin + st = Rasters.create((a=Int32, b=Float64, c=Bool), Extents.Extent(X=(0, 10), Y=(0, 5)); + size=(X=1024, Y=1024), + sampling=(X=Points(), Y=Intervals()), + crs=EPSG(4326), + force=true, + verbose=false, + missingval=(a=Int32(-9999), b=Float64(-9999), c=false), + fill=(a=Int32(-9999), b=0, c=false), + ) + @test crs(st) == EPSG(4326) + @test size(st) == (1024, 1024) + @test Rasters.name(st) == (:a, :b, :c) + @test eltype(st) === @NamedTuple{a::Int32,b::Float64, c::Bool} + @test missingval(st) === (a=Int32(-9999), b=-9999.0, c=false) + @test ispoints(st, X) + @test isintervals(st, Y) + @test all(x -> x === Int32(-9999), st.a) + @test all(x -> x === 0.0, st.b) + @test all(x -> x === false, st.c) + + st2 = Rasters.create((a=UInt8, b=Float32), st; + layerdims=(a=(X(), Y()), b=(Y(),)), + missingval=(a=UInt8(0), b=1.0f0) + ) + @test basedims(st2.a) == (X(), Y()) + @test basedims(st2.b) == (Y(),) + @test eltype(st2) === @NamedTuple{a::UInt8, b::Float32} + @test missingval(st2) === (a=UInt8(0), b=1.0f0) + + + @testset "from template with new dims" begin + st1 = Rasters.create(st; + layerdims=(a=(X, Y), b=(Y,), c=(X,)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:a, :b, :c) + @test missingval(st1) === (a=Int32(-9999), b=-9999.0, c=false) + @test ispoints(st1, X) + @test isintervals(st1, Y) + @test basedims(st1.a) == (X(), Y()) + @test basedims(st1.b) == (Y(),) + @test basedims(st1.c) == (X(),) + end + @testset "from template with new layers" begin + st1 = Rasters.create((c=UInt8, d=Int16), st; + missingval=(c=0x00, d=Int16(1)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:c, :d) + @test eltype(st1) == @NamedTuple{c::UInt8,d::Int16} + @test missingval(st1) === (c=0x00, d=Int16(1)) + end + @testset "from template with new dims and layers" begin + st1 = Rasters.create((c=UInt8, d=Int16), st; + layerdims=(c=(X, Y), d=(Y,)), + missingval=(c=UInt8(0), d=Int16(1)), + ) + @test crs(st1) == EPSG(4326) + @test size(st1) == (1024, 1024) + @test Rasters.name(st1) == (:c, :d) + @test missingval(st1) === (c=0x00, d=Int16(1)) + end +end + +ext = ".nc" +for ext in (".nc", ".tif", ".grd") + @testset "create $ext" begin + fn = "created$ext" + created = Rasters.create(fn, UInt8, (X(1:10), Y(1:10)); + missingval=0xff, + fill=0x01, + force=true + ) + @test all(Raster(fn; maskingval=nothing) .=== 0x01) + @test missingval(created) === 0xff + + if ext == ".grd" + created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); + missingval=typemax(Int16), + force=true, + ); + open(created; write=true) do O + O .= 2 + nothing + end + @test all(Raster(fn) .=== Int16(2)) + @test missingval(Raster(fn; maskingval=nothing)) === typemax(Int16) + else + @time created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); + missingval=typemax(Int16), + scale=0.1, + offset=5.0, + force=true, + ); + open(created; write=true) do O + O .= 2 + nothing + end + @test all(Raster(fn) .=== 2.0) + @test all(Raster(fn; scaled=false) .== Int16(-30)) + @test missingval(Raster(fn; maskingval=nothing, scaled=false)) === typemax(Int16) + end + end +end + + +@testset "create .nc stack" begin + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)), + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) + @test missingval(created) == (a=0xff, b=typemax(Float32)) + @test size(created.a) == (10,) + @test size(created.b) == (10, 10) + @test all(created.a .=== 0x01) + @test all(created.b .=== 1.0f0) + st = RasterStack("created.nc"; maskingval=nothing) + @test missingval(st) == (a=0xff, b=typemax(Float32)) +end diff --git a/test/runtests.jl b/test/runtests.jl index 745e9b825..34b8a6354 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,45 +1,48 @@ using Rasters, Test, Aqua, SafeTestsets -@testset "Aqua" begin - # Aqua.test_ambiguities([Rasters, Base, Core]) - Aqua.test_unbound_args(Rasters) - Aqua.test_stale_deps(Rasters) - Aqua.test_undefined_exports(Rasters) - Aqua.test_project_extras(Rasters) - # Aqua.test_deps_compat(Rasters) # This breaks GDAL downstream tests - # Aqua.test_project_toml_formatting(Rasters) # This seems to change between versions for extensions +@testset "Rasters" begin + @testset "Aqua" begin + # Aqua.test_ambiguities([Rasters, Base, Core]) + Aqua.test_unbound_args(Rasters) + Aqua.test_stale_deps(Rasters) + Aqua.test_undefined_exports(Rasters) + Aqua.test_project_extras(Rasters) + # Aqua.test_deps_compat(Rasters) # This breaks GDAL downstream tests + # Aqua.test_project_toml_formatting(Rasters) # This seems to change between versions for extensions + end + @time @safetestset "extensions" begin include("extensions.jl") end -end + @time @safetestset "methods" begin include("methods.jl") end + @time @safetestset "array" begin include("array.jl") end + @time @safetestset "stack" begin include("stack.jl") end + @time @safetestset "series" begin include("series.jl") end + @time @safetestset "create" begin include("create.jl") end + @time @safetestset "utils" begin include("utils.jl") end + @time @safetestset "set" begin include("set.jl") end + @time @safetestset "aggregate" begin include("aggregate.jl") end + @time @safetestset "rasterize" begin include("rasterize.jl") end + @time @safetestset "adapt" begin include("adapt.jl") end + @time @safetestset "reproject" begin include("reproject.jl") end + @time @safetestset "warp" begin include("warp.jl") end + @time @safetestset "resample" begin include("resample.jl") end + @time @safetestset "cellsize" begin include("cellsize.jl") end -@time @safetestset "methods" begin include("methods.jl") end -@time @safetestset "array" begin include("array.jl") end -@time @safetestset "stack" begin include("stack.jl") end -@time @safetestset "series" begin include("series.jl") end -@time @safetestset "utils" begin include("utils.jl") end -@time @safetestset "set" begin include("set.jl") end -@time @safetestset "aggregate" begin include("aggregate.jl") end -@time @safetestset "rasterize" begin include("rasterize.jl") end -@time @safetestset "adapt" begin include("adapt.jl") end -@time @safetestset "reproject" begin include("reproject.jl") end -@time @safetestset "warp" begin include("warp.jl") end -@time @safetestset "resample" begin include("resample.jl") end -@time @safetestset "cellsize" begin include("cellsize.jl") end + # CommondataModel sources + @time @safetestset "ncdatasets" begin include("sources/ncdatasets.jl") end + if !Sys.iswindows() + # GRIBDatasets doesn't work on Windows for now + @time @safetestset "gribdatasets" begin include("sources/gribdatasets.jl") end + end -# CommondataModel sources -@time @safetestset "ncdatasets" begin include("sources/ncdatasets.jl") end -if !Sys.iswindows() - # GRIBDatasets doesn't work on Windows for now - @time @safetestset "gribdatasets" begin include("sources/gribdatasets.jl") end -end - -# Only test SMAP locally for now, also RasterDataSources because CI downloads keep breaking -if !haskey(ENV, "CI") - @time @safetestset "rasterdatasources" begin include("sources/rasterdatasources.jl") end -end + # Only test SMAP locally for now, also RasterDataSources because CI downloads keep breaking + if !haskey(ENV, "CI") + @time @safetestset "rasterdatasources" begin include("sources/rasterdatasources.jl") end + end -if !Sys.iswindows() - # GDAL Environment vars need to be set manually for windows, so skip for now - @time @safetestset "gdal" begin include("sources/gdal.jl") end - @time @safetestset "grd" begin include("sources/grd.jl") end + if !Sys.iswindows() + # GDAL Environment vars need to be set manually for windows, so skip for now + @time @safetestset "gdal" begin include("sources/gdal.jl") end + @time @safetestset "grd" begin include("sources/grd.jl") end + end + @time @safetestset "plot recipes" begin include("plotrecipes.jl") end end -@time @safetestset "plot recipes" begin include("plotrecipes.jl") end diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 1cb86f436..493443ed3 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -80,23 +80,6 @@ gdalpath = maybedownload(url) @test A == A2 == A3 end - @testset "create" begin - created = Rasters.create("created.tif", Int16, (X(1:10), Y(1:10)); - missingval=255, maskingval=missing, scale=0.1, offset=5.0, force=true, coerce=trunc - ) - open(created; write=true) do O - O .= 2.0 - end - read(created) - @test all(Raster("created.tif") .== 2.0) - @test all(Raster("created.tif"; scaled=false) .=== -30) - created = Rasters.create("created.tif", UInt8, (X(1:10), Y(1:10)); - missingval=255, maskingval=UInt8(0), force=true - ) - read(created) - rm("created.tif") - end - @testset "custom filename" begin gdal_custom = replace(gdalpath, "tif" => "foo") cp(gdalpath, gdal_custom, force=true) From a7d9b190214d23f7c1104a21d3fd03ccd7446abd Mon Sep 17 00:00:00 2001 From: rafaqz Date: Mon, 12 Aug 2024 00:43:26 +0200 Subject: [PATCH 10/38] bugfix GRIB --- .../gribdatasets_source.jl | 11 +++++---- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 24 +++++++++++-------- src/methods/rasterize.jl | 8 ++++--- src/sources/commondatamodel.jl | 20 ++++++++++------ src/stack.jl | 3 ++- src/utils.jl | 13 +++++----- test/sources/gribdatasets.jl | 7 ++++-- 7 files changed, 53 insertions(+), 33 deletions(-) diff --git a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl index 25df03172..f3824cbda 100644 --- a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl +++ b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl @@ -1,4 +1,5 @@ const GDS = GRIBDatasets +const CDM = CommonDataModel function RA.OpenStack(fs::RA.FileStack{GRIBsource,K}) where K RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs))) @@ -13,9 +14,11 @@ function RA._open(f, ::GRIBsource, filename::AbstractString; write=false, kw...) RA._open(f, GRIBsource(), ds; kw...) end -# Hack to get the inner DiskArrays chunks as they are not exposed at the top level -RA._get_eachchunk(var::GDS.Variable) = DiskArrays.eachchunk(var.values) -RA._get_haschunks(var::GDS.Variable) = DiskArrays.haschunks(var.values) - RA._sourcetrait(::GDS.Variable) = GRIBsource() RA._sourcetrait(::GDS.GRIBDataset) = GRIBsource() + +function RA.missingval(var::GDS.Variable{T}, args...) where T + mv = GDS.missing_value(var) + T1 = promote_type(typeof(mv), T) + return T1(mv) +end diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 434de5060..00f86229a 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -49,10 +49,11 @@ function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStac return filename end +Base.close(os::RA.OpenStack{NCDsource}) = NCD.close(RA.dataset(os)) + function RA.OpenStack(fs::RA.FileStack{NCDsource,K}) where K RA.OpenStack{NCDsource,K}(NCD.Dataset(RA.filename(fs))) end -Base.close(os::RA.OpenStack{NCDsource}) = NCD.close(RA.dataset(os)) function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...) isfile(filename) || RA._isurl(filename) || RA._filenotfound_error(filename) @@ -62,6 +63,18 @@ function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...) end end +RA._sourcetrait(::NCD.Dataset) = NCDsource() +RA._sourcetrait(::NCD.Variable) = NCDsource() + +@inline function RA.get_scale(metadata::Metadata{NCDsource}, scaled::Bool) + scale = scaled ? get(metadata, "scale_factor", nothing) : nothing + offset = scaled ? get(metadata, "add_offset", nothing) : nothing + return scale, offset +end + +RA.missingval(var::NCD.Variable, args...) = missingval(CDM.attribs(var), "_FillValue", nothing) +RA.missingval(var::NCD.Variable, md::Metadata{<:NCDsource}) = get(md, "_FillValue", nothing) + # Add a var array to a dataset before writing it. function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; verbose=true, @@ -150,15 +163,6 @@ function _def_dim_var!(ds::AbstractDataset, dim::Dimension) return nothing end -RA._sourcetrait(::NCD.Dataset) = NCDsource() -RA._sourcetrait(::NCD.Variable) = NCDsource() - -@inline function RA.get_scale(metadata::Metadata{NCDsource}, scaled::Bool) - scale = scaled ? get(metadata, "scale_factor", nothing) : nothing - offset = scaled ? get(metadata, "add_offset", nothing) : nothing - return scale, offset -end - # precompilation # const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Variable{Float32, 3, NCDatasets.NCDataset}, NCDatasets.Attributes{NCDatasets.NCDataset{Nothing}}, NamedTuple{(:fillvalue, :scale_factor, :add_offset, :calendar, :time_origin, :time_factor), Tuple{Float32, Nothing, Nothing, Nothing, Nothing, Nothing}}} diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index c391018d2..3cebe1b38 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -30,7 +30,7 @@ _reduce_init(::typeof(sum), ::Type{T}, missingval) where T = zero(nonmissingtype _reduce_init(::typeof(prod), ::Type{T}, missingval) where T = oneunit(nonmissingtype(T)) _reduce_init(::typeof(minimum), ::Type{T}, missingval) where T = typemax(nonmissingtype(T)) _reduce_init(::typeof(maximum), ::Type{T}, missingval) where T = typemin(nonmissingtype(T)) -_reduce_init(::typeof(last), ::Type{T}, missingval) where T = _maybe_nothing_to_missing(missingval) +_reduce_init(::typeof(last), ::Type{T}, missingval) where T = _maybe_to_missing(missingval) struct FillChooser{F,I,M} fill::F @@ -73,10 +73,12 @@ RasterCreator(to, data; kw...) = RasterCreator(_extent(to); kw...) function RasterCreator(to::Extents.Extent; res::Union{Nothing,Real,NTuple{<:Any,<:Real}}=nothing, size::Union{Nothing,Int,NTuple{<:Any,Int}}=nothing, + crs=nokw, + mappedcrs=nokw, kw... ) - to_as_dims = _extent2dims(to; size, res) - return RasterCreator(to_as_dims; kw...) + to_as_dims = _extent2dims(to; size, res, crs, mappedcrs) + return RasterCreator(to_as_dims; crs, mappedcrs, kw...) end diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 802e570b4..f627a1cbf 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -53,10 +53,18 @@ function FileStack{source}(ds::AbstractDataset, filename::AbstractString; end function _open(f, ::CDMsource, ds::AbstractDataset; - name=nokw, group=nothing, mod=NoMod(), kw... + name=nokw, + group=nothing, + mod=NoMod(), + kw... ) g = _getgroup(ds, group) - x = isnokw(name) ? g : _maybe_modify(CDM.variable(g, _firstname(g, name)), mod) + x = if isnokw(name) + g + else + v = CDM.variable(g, string(_name_or_firstname(g, name))) + _maybe_modify(v, mod) + end return cleanreturn(f(x)) end _open(f, ::CDMsource, var::AbstractArray; mod=NoMod(), kw...) = @@ -68,9 +76,7 @@ _getgroup(ds, group::Union{Symbol,AbstractString}) = ds.group[String(group)] _getgroup(ds, group::Pair) = _getgroup(ds.group[String(group[1])], group[2]) filekey(ds::AbstractDataset, name::Union{String,Symbol}) = Symbol(name) -filekey(ds::AbstractDataset, name) = _firstname(ds, name) -missingval(var::AbstractVariable, args...) = get(CDM.attribs(var), "_FillValue", nothing) -missingval(var::AbstractVariable, md::Metadata{<:CDMsource}) = get(md, "_FillValue", nothing) +filekey(ds::AbstractDataset, name) = _name_or_firstname(ds, name) cleanreturn(A::AbstractVariable) = Array(A) haslayers(::CDMsource) = true defaultcrs(::CDMsource) = EPSG(4326) @@ -166,8 +172,8 @@ end # Utils ######################################################################## # TODO don't load all keys here with _layers -_firstname(ds::AbstractDataset, name) = Symbol(name) -function _firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=nokw) +_name_or_firstname(ds::AbstractDataset, name) = Symbol(name) +function _name_or_firstname(ds::AbstractDataset, name::Union{Nothing,NoKW}=nokw) names = _nondimnames(ds) if length(names) > 0 return Symbol(first(names)) diff --git a/src/stack.jl b/src/stack.jl index 269a0193d..6a6f103c4 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -544,8 +544,9 @@ function _layer_stack(filename; else missingval end + maskingval1 = isnokw(maskingval) && !isnothing(missingval1) ? missing : maskingval eltypes = map(eltype, layers.vars) - mods = _stack_mods(eltypes, layermetadata1, missingval1, maskingval; scaled, coerce) + mods = _stack_mods(eltypes, layermetadata1, missingval1, maskingval1; scaled, coerce) name = Tuple(map(Symbol, layers.names)) NT = NamedTuple{name} data = if lazy diff --git a/src/utils.jl b/src/utils.jl index 904c7dd30..b50478612 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -113,10 +113,10 @@ end # Extents function _extent2dims(to::Extents.Extent; - size=nokw, res=nokw, crs=nokw, sampling=nokw, + size=nokw, res=nokw, crs=nokw, mappedcrs=nokw, sampling=nokw, ) sampling = _match_to_extent(to, isnokw(sampling) ? Intervals(Start()) : sampling) - _extent2dims(to, size, res; crs, sampling) + _extent2dims(to, size, res; crs, mappedcrs, sampling) end function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Union{Nothing,NoKW}; kw...) throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) @@ -149,7 +149,7 @@ end _extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; kw...) = _extent2dims(to, _match_to_extent(to, size), res; kw...) function _extent2dims(to::Extents.Extent, size::Tuple, res::Union{Nothing,NoKW}; - sampling::Tuple, crs + sampling::Tuple, crs, mappedcrs ) ranges = map(values(to), size, sampling) do (start, stop), length, s if s isa Points @@ -158,16 +158,17 @@ function _extent2dims(to::Extents.Extent, size::Tuple, res::Union{Nothing,NoKW}; range(; start, stop, length=length+1)[1:end-1] end end - return _extent2dims(to, ranges; sampling, crs) + return _extent2dims(to, ranges; sampling, crs, mappedcrs) end -function _extent2dims(::Extents.Extent{K}, ranges; crs, sampling::Tuple) where K +function _extent2dims(::Extents.Extent{K}, ranges; crs, mappedcrs, sampling::Tuple) where K crs = isnokw(crs) ? nothing : crs + mappedcrs = isnokw(mappedcrs) ? nothing : mappedcrs emptydims = map(name2dim, K) lookups = map(emptydims, ranges, sampling) do d, range, s order = Lookups.orderof(range) span = Regular(step(range)) if d isa SpatialDim && !isnothing(crs) - Projected(range; sampling=s, order, span, crs) + Projected(range; sampling=s, order, span, crs, mappedcrs) else Sampled(range; sampling=s, order, span) end diff --git a/test/sources/gribdatasets.jl b/test/sources/gribdatasets.jl index 3779e68d7..255984b3d 100644 --- a/test/sources/gribdatasets.jl +++ b/test/sources/gribdatasets.jl @@ -25,6 +25,9 @@ gribexamples_dir = abspath(joinpath(dirname(pathof(GRIBDatasets)), "..", "test", era5 = joinpath(gribexamples_dir, "era5-levels-members.grib") +ds = GRIBDatasets.GRIBDataset(era5) +v = ds[:z] + @testset "Raster" begin @time gribarray = Raster(era5) @time lazyarray = Raster(era5; lazy=true) @@ -87,8 +90,8 @@ era5 = joinpath(gribexamples_dir, "era5-levels-members.grib") end @testset "cf attributes" begin - z = lazystack[:z] - @test metadata(z)["standard_name"] == "geopotential" + z = lazystack.z + @test metadata(z)["cfName"] == "geopotential" @test metadata(lazystack)["Conventions"] == "CF-1.7" x = dims(lazystack, :X) From ee50b637844dd2918a34db5a690b3a5ffe924296 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Tue, 13 Aug 2024 11:35:36 +0200 Subject: [PATCH 11/38] bugfixes --- ext/RastersArchGDALExt/gdal_source.jl | 55 ++++---- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 62 +++++++-- src/create.jl | 54 ++++---- src/methods/mosaic.jl | 46 +++++-- src/modifieddiskarray.jl | 129 ++++++++++++++---- src/sources/commondatamodel.jl | 8 +- src/sources/grd.jl | 100 ++++++++------ src/stack.jl | 37 ++--- src/utils.jl | 5 +- src/write.jl | 63 +++++++-- test/sources/gdal.jl | 42 +++--- test/sources/grd.jl | 62 +++++---- test/sources/ncdatasets.jl | 48 +++---- 13 files changed, 452 insertions(+), 259 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 1aff1971f..ad4a092ae 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -46,28 +46,39 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRasterSta ext = splitext(filename)[2] throw(ArgumentError("Cant write a RasterStack to $ext with gdal")) end -function Base.write( - filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; +function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T}; force=false, verbose=true, + write=true, missingval=nokw, - maskingval=RA.missingval(A), + maskingval=nokw, scale=nokw, offset=nokw, coerce=nokw, eltype=Missings.nonmissingtype(T), - write=true, kw... ) where T RA.check_can_write(filename, force) - A1 = _maybe_correct_to_write(A) - mod = RA._writer_mod(eltype; missingval, maskingval, scale, offset, coerce) - _create_with_driver(filename, dims(A1), T; + A1 = _maybe_permute_to_gdal(A) + + # Missing values + maskingval = isnokw(maskingval) ? RA.missingval(A) : maskingval + missingval = isnokw(missingval) ? maskingval : missingval + missingval = if ismissing(missingval) + # See if there is a missing value in metadata + # But only use it if its the right type + RA._writeable_missing(eltype; verbose=true) + else + missingval + end + + _create_with_driver(filename, dims(A1), eltype; missingval, _block_template=A1, scale, offset, verbose, kw... - ) do dataset + ) do dataset if write + mod = RA._writer_mod(eltype; missingval, maskingval, scale, offset, coerce) open(A1; write=true) do O - RA._maybe_modify(AG.RasterDataset(dataset), mod) .= parent(O) + AG.RasterDataset(dataset) .= RA._maybe_modify(parent(O), mod; invert=true) end end end @@ -96,8 +107,7 @@ function RA._open(f, ::GDALsource, filename::AbstractString; end flags = write ? AG.OF_UPDATE : AG.OF_READONLY return AG.readraster(filename; flags) do A - A1 = RA._maybe_modify(A, mod) - RA.cleanreturn(f(A1)) + RA.cleanreturn(f(RA._maybe_modify(A, mod))) end end RA._open(f, ::GDALsource, A::AG.RasterDataset; mod=RA.NoMod(), kw...) = @@ -300,7 +310,7 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; maskingval=Rasters.missingval(A), kw... ) - A1 = _maybe_correct_to_write(A) + A1 = _maybe_permute_to_gdal(A) return _create_with_driver(filename, dims(A1), eltype; _block_template=A1, missingval, scale, offset, verbose, kw... ) do dataset @@ -335,19 +345,11 @@ function _missingval_from_gdal(T::Type{<:Integer}, x::Integer; verbose=true) end _missingval_from_gdal(T, x) = x -# Fix array and dimension configuration before writing with GDAL -_maybe_correct_to_write(A::AbstractDimArray, args...) = - _maybe_correct_to_write(lookup(A, X()), A, args...) -_maybe_correct_to_write(::Lookup, A::AbstractDimArray, args...) = A -function _maybe_correct_to_write( - lookup::Union{AbstractSampled,NoLookup}, A::AbstractDimArray, args... -) - _maybe_permute_to_gdal(A) +function _check_driver(::Nothing, driver) + isnokwornothing(driver) || isempty(driver) ? "MEM" : driver end - -_check_driver(filename::Nothing, driver) = "MEM" function _check_driver(filename::AbstractString, driver) - if isempty(driver) + if isnokwornothing(driver) || isempty(driver) if isempty(filename) driver = "MEM" else @@ -365,15 +367,16 @@ end function _create_with_driver(f, filename, dims::Tuple, T; verbose=true, missingval=nokw, - options=Dict{String,String}(), - driver="", - _block_template=nothing, + options=nokw, + driver=nokw, chunks=nokw, scale=nokw, offset=nokw, + _block_template=nothing, kw... ) verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. You may wish to reverse the `Y` dimension to use conventional North-up") + options = isnokwornothing(options) ? Dict{String,String}() : options missingval = ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval _gdal_validate(dims) diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 00f86229a..d9690398b 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -28,6 +28,7 @@ function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStac append=false, force=false, missingval=nokw, + maskingval=nokw, kw... ) mode = if append @@ -37,11 +38,16 @@ function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStac "c" end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) + + maskingval = RA._stack_nt(s, isnokw(maskingval) ? Rasters.missingval(s) : maskingval) + missingval = RA._stack_missingvals(s, isnokw(missingval) ? maskingval : missingval) try - if missingval isa NamedTuple - map(k -> _writevar!(ds, s[k]; missingval=missingval[k], kw...), keys(s)) - else - map(k -> _writevar!(ds, s[k]; missingval, kw...), keys(s)) + map(keys(s)) do k + _writevar!(ds, s[k]; + missingval=missingval[k], + maskingval=maskingval[k], + kw... + ) end finally close(ds) @@ -72,8 +78,27 @@ RA._sourcetrait(::NCD.Variable) = NCDsource() return scale, offset end -RA.missingval(var::NCD.Variable, args...) = missingval(CDM.attribs(var), "_FillValue", nothing) -RA.missingval(var::NCD.Variable, md::Metadata{<:NCDsource}) = get(md, "_FillValue", nothing) +RA.missingval(var::NCD.Variable, args...) = _mv(CDM.attribs(var)) +RA.missingval(var::NCD.Variable, md::Metadata{<:NCDsource}) = _mv(md) + +# TODO: handle multiple missing values +function _mv(md) + fv = get(md, "_FillValue", nothing) + mv = get(md, "missing_value", nothing) + if isnothing(fv) + if mv isa Vector + length(mv) > 1 && @warn "'missing_value' $mv has multiple values. Currently we only uses the first." + return first(mv) + else + return mv + end + else + if isnothing(mv) + fv == mv || @warn "Both '_FillValue' $fv and 'missing_value' $mv were found. Currently we only use the first." + end + return fv + end +end # Add a var array to a dataset before writing it. function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; @@ -89,6 +114,8 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; eltype=Missings.nonmissingtype(T), write=true, name=DD.name(A), + options=nokw, + driver=nokw, kw... ) where {T,N} eltype <: NCDAllowedType || throw(ArgumentError(""" @@ -97,7 +124,25 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; """ )) _def_dim_var!(ds, A) - metadata = isnokw(metadata) ? NoMetadata() : metadata + metadata = if isnokw(metadata) + DD.metadata(A) + elseif isnothing(metadata) + NoMetadata() + else + metadata + end + + maskingval = isnokw(maskingval) ? RA.missingval(A) : maskingval + missingval = isnokw(missingval) ? RA.missingval(A) : missingval + missingval = if ismissing(missingval) + # See if there is a missing value in metadata + mv = _mv(metadata) + # But only use it if its the right type + mv isa eltype ? mv : RA._writeable_missing(eltype; verbose=true) + else + missingval + end + attrib = RA._attribdict(metadata) # Scale and offset scale = if isnokw(scale) || isnothing(scale) @@ -129,9 +174,8 @@ function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; var = NCD.defVar(ds, key, eltype, dimnames; attrib=attrib, chunksizes, kw...) if write - modvar = RA._maybe_modify(var, mod) # Write with a DiskArays.jl broadcast - modvar .= A + RA._maybe_modify(var.var, mod) .= A end return nothing diff --git a/src/create.jl b/src/create.jl index 3b8e97957..12c7bc332 100644 --- a/src/create.jl +++ b/src/create.jl @@ -188,13 +188,16 @@ function create(filename::Nothing, ::Type{T}, dims::Tuple; suffix=nokw, force=false, chunks=nokw, + driver=nokw, + options=nokw, kw... ) where T if verbose isnokw(chunks) || @warn "`chunks` of `$chunks` found. But `chunks` are not used for in-memory rasters" end - missingval = isnokw(maskingval) || isnothing(maskingval) ? missingval : maskingval - eltype = isnokw(missingval) || isnothing(missingval) ? T : promote_type(T, typeof(missingval)) + # maskingval determines missingval here as we don't use both + missingval = isnokwornothing(maskingval) ? missingval : maskingval + eltype = isnokwornothing(missingval) ? T : promote_type(T, typeof(missingval)) data = if isnokw(parent) || isnothing(parent) Array{eltype}(undef, dims) else @@ -210,28 +213,33 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; force=false, chunks=nokw, verbose=true, + driver=nokw, + options=nokw, parent=nokw, missingval=nokw, - maskingval=nothing, + maskingval=nokw, fill=nokw, layerdims=nokw, layermetadata=nokw, kw... ) - missingval = isnokw(maskingval) || isnothing(maskingval) ? missingval : maskingval + missingval = isnokwornothing(missingval) ? maskingval : missingval layerdims = isnokw(layerdims) ? map(_ -> basedims(dims), types) : layerdims layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, types) layerfill = fill isa NamedTuple ? fill : map(_ -> fill, types) layermissingvals = missingval isa NamedTuple ? missingval : map(_ -> missingval, types) - layers = map(types, layermissingvals, layerfill, layerdims, layermetadata) do T, lmv, lfv, ld, lm - create(nothing, T, DD.dims(dims, ld); parent, missingval=lmv, fill=lfv, metadata=lm) + layermaskingvals = maskingval isa NamedTuple ? maskingval : map(_ -> maskingval, types) + layers = map(types, layermissingvals, layermaskingvals, layerfill, layerdims, layermetadata) do T, lmv, lma, lfv, ld, lm + create(nothing, T, DD.dims(dims, ld); + parent, missingval=lmv, maskingval=lma, fill=lfv, metadata=lm, driver, options, + ) end return RasterStack(layers; kw...) end function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTuple; name=nokw, missingval=nokw, - maskingval=nothing, + maskingval=nokw, fill=nokw, metadata=nokw, chunks=nokw, @@ -257,13 +265,17 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTu # Create layers of zero arrays rast = Raster(A, dims; name, missingval) Rasters.write(filename, source, rast; - eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write, kw... + eltype, chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write, kw... ) - return Raster(filename; source, lazy, metadata, missingval, maskingval, dropband, coerce) + # Don't pass in `missingval`, read it again from disk in case it changed + return Raster(filename; source, lazy, metadata, maskingval, dropband, coerce) end function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::DimTuple; + lazy=true, + verbose=true, + force=false, missingval=nokw, - maskingval=nothing, + maskingval=nokw, fill=nokw, metadata=nokw, layerdims=nokw, @@ -272,25 +284,20 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple scale=nokw, offset=nokw, dropband=!hasdim(dims, Band), - lazy=true, - verbose=true, - force=false, coerce=nokw, kw... ) - write = Ref(false) - fill = fill isa NamedTuple ? fill : map(_ -> fill, layertypes) - layerdims = if isnokw(layerdims) - map(layertypes) do x - if x isa Type - DD.basedims(dims) - else - DD._astuple(DD.basedims(x[2])) - end - end + layerdims = if isnokwornothing(layerdims) + # Use the same dims for all layers by default + map(_ -> DD.basedims(dims), layertypes) else layerdims end + # Define no-allocation layers with FillArrays + # We need a fill value for each layer + fill = fill isa NamedTuple ? fill : map(_ -> fill, layertypes) + # We update `write` in the closure below + write = Ref(false) layers = map(layertypes, layerdims, fill) do T, ld, f lks = lookup(dims, ld) eltype = Missings.nonmissingtype(T) @@ -307,6 +314,7 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple fn = Rasters.write(filename, stack; chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write=write[], kw... ) + # Don't pass in `missingval`, read it again from disk in case it changed st = RasterStack(fn; source, lazy, metadata, layerdims, maskingval, dropband, coerce) return st end diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 9228cbf94..3a63b1e45 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -65,22 +65,45 @@ function mosaic(f::Function, r1::RasterStackOrArray, rs::RasterStackOrArray...; mosaic(f, (r1, rs...); kw...) end mosaic(f::Function, regions; kw...) = _mosaic(f, first(regions), regions; kw...) -function _mosaic(f::Function, ::AbstractRaster, regions; - missingval=missingval(first(regions)), filename=nothing, suffix=nothing, kw... +function _mosaic(f::Function, A1::AbstractRaster, regions; + missingval=nokw, + maskingval=nokw, + filename=nothing, + suffix=nothing, + driver=nokw, + options=nokw, + force=false, + kw... ) - missingval = missingval isa Nothing ? missing : missingval + maskingval = isnokw(maskingval) ? Rasters.missingval(first(regions)) : maskingval + missingval = isnokw(missingval) ? Rasters.missingval(first(regions)) : missingval + # missingval is not ooptional here + missingval = ismissing(missingval) || isnothing(missingval) ? _type_missingval(eltype(A1)) : missingval T = Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) dims = _mosaic(Tuple(map(DD.dims, regions))) l1 = first(regions) - A = create(filename, T, dims; name=name(l1), missingval, metadata=metadata(l1)) - open(A; write=true) do a - mosaic!(f, a, regions; missingval, kw...) + + A = create(filename, T, dims; + name=name(l1), + fill=missingval, + metadata=metadata(l1), + missingval, + maskingval, + driver, + options, + force + ) + open(A; write=true) do O + mosaic!(f, O, regions; missingval, kw...) end return A end function _mosaic(f::Function, ::AbstractRasterStack, regions; - filename=nothing, suffix=keys(first(regions)), kw... + filename=nothing, + suffix=keys(first(regions)), + kw... ) + # TODO make this write inside a single netcdf layers = map(suffix, map(values, regions)...) do s, A... mosaic(f, A...; filename, suffix=s, kw...) end @@ -135,12 +158,15 @@ nothing $EXPERIMENTAL """ -mosaic!(f::Function, x::RasterStackOrArray, regions::RasterStackOrArray...; kw...) = mosaic!(f, x, regions; kw...) +mosaic!(f::Function, x::RasterStackOrArray, regions::RasterStackOrArray...; kw...) = + mosaic!(f, x, regions; kw...) function mosaic!(f::Function, A::AbstractRaster{T}, regions; - missingval=missingval(A), atol=_default_atol(T) + missingval=Rasters.missingval(A), + atol=_default_atol(T) ) where T + isnokwornothing(missingval) && throw(ArgumentError("destination array must have a `missingval`")) _without_mapped_crs(A) do A1 - broadcast!(A1, DimKeys(A1; atol)) do ds + broadcast!(A1, DimSelectors(A1; atol)) do ds # Get all the regions that have this point ls = foldl(regions; init=()) do acc, l if DD.hasselection(l, ds) diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index bb4abf676..5f581d83d 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -1,10 +1,16 @@ abstract type AbstractModifications end -struct NoMod{Mi} <: AbstractModifications +struct NoMod{T,Mi} <: AbstractModifications missingval::Mi end -NoMod() = NoMod(nothing) -NoMod(::NoKW) = NoMod(nothing) -struct Mod{T,Mi,Ma,S,O,F} <: AbstractModifications +NoMod{T}(missingval::Mi) where {T,Mi} = NoMod{T,Mi}(missingval) +NoMod() = NoMod{Any}() +NoMod{T}() where T = NoMod{T}(nothing) +NoMod{T}(::NoKW) where T = NoMod{T}(nothing) + +Base.eltype(::NoMod{T}) where T = T +source_eltype(::NoMod{T}) where T = T + +struct Mod{T1,T2,Mi,Ma,S,O,F} <: AbstractModifications missingval::Mi maskingval::Ma scale::S @@ -12,14 +18,17 @@ struct Mod{T,Mi,Ma,S,O,F} <: AbstractModifications coerce::F function Mod(::Type{T}, missingval, maskingval, scale, offset, coerce) where T maskingval = maskingval === missingval ? nothing : maskingval - if isnokw(coerce) || isnothing(coerce) + if isnokw(coerce) || isnothing(coerce) coerce = convert end vals = map(_nokw2nothing, (missingval, maskingval, scale, offset)) T1 = _resolve_mod_eltype(T, vals...) - new{T1,map(typeof, vals)...,typeof(coerce)}(vals..., coerce) + new{T1,T,map(typeof, vals)...,typeof(coerce)}(vals..., coerce) end end +Base.eltype(::Mod{T1}) where T1 = T1 +source_eltype(::Mod{<:Any,T2}) where T2 = T2 + function _resolve_mod_eltype(::Type{T}, missingval, maskingval, scale, offset) where T T1 = isnothing(maskingval) ? T : promote_type(T, typeof(maskingval)) @@ -33,13 +42,13 @@ maskingval(m::Mod) = isnothing(m.maskingval) ? m.missingval : m.maskingval missingval(m::NoMod) = m.missingval maskingval(m::NoMod) = missingval(m) -struct ModifiedDiskArray{T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} +struct ModifiedDiskArray{I,T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} var::V mod::M end -function ModifiedDiskArray(v::V, m::M) where {V<:AbstractArray{<:Any,N},M} where N - T = _mod_eltype(v, m) - return ModifiedDiskArray{T,N,V,M}(v, m) +function ModifiedDiskArray(v::V, m::M; invert=false) where {V<:AbstractArray{<:Any,N},M} where N + T = invert ? source_eltype(m) : eltype(m) + return ModifiedDiskArray{invert,T,N,V,M}(v, m) end Base.parent(A::ModifiedDiskArray) = A.var @@ -50,15 +59,53 @@ maskingval(A::ModifiedDiskArray) = A.maskingval DiskArrays.haschunks(A::ModifiedDiskArray) = DiskArrays.haschunks(parent(A)) DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(parent(A)) -function DiskArrays.readblock!(A::ModifiedDiskArray, out_block, I::AbstractVector...) - broadcast!(_applymod, out_block, A.var[I...], (A.mod,)) +function DiskArrays.readblock!( + A::ModifiedDiskArray{false,<:Any,0}, out_block, I::AbstractVector... +) + out_block[] = _applymod(parent(A)[I...], A.mod) + return nothing +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{true,T,<:Any,0}, out_block, I::AbstractVector... +) where T + out_block[] = _invertmod(Val{T}(), parent(A)[I...], A.mod) + return nothing +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{false}, out_block, I::AbstractVector... +) + out_block .= _applymod.(parent(A)[I...], (A.mod,)) + return nothing +end +function DiskArrays.readblock!( + A::ModifiedDiskArray{true,T}, out_block, I::AbstractVector... +) where T + out_block .= _invertmod.(Ref(Val{T}()), view(parent(A), I...), Ref(A.mod)) return nothing end function DiskArrays.writeblock!( - A::ModifiedDiskArray{<:Any,<:Any,<:AbstractArray{T}}, in_block, I::AbstractVector... + A::ModifiedDiskArray{false,<:Any,0,<:AbstractArray{T}}, block, I::AbstractVector... +) where T + A.var[I...] = _invertmod(Val{source_eltype(A.mod)}(), block[], A.mod) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{true,<:Any,0,<:AbstractArray{T}}, _block, I::AbstractVector... ) where T - A.var[I...] = _invertmod.((Val{T}(),), in_block, (A.mod,)) + A.var[I...] = _applymod(Val{eltype(A.mod)}(), block[], A.mod) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{false,<:Any,<:Any,<:AbstractArray{T}}, block, I::AbstractVector... +) where T + A.var[I...] .= _invertmod.(Val{source_eltype(A.mod)}(), block, Ref(A.mod)) + return nothing +end +function DiskArrays.writeblock!( + A::ModifiedDiskArray{true,<:Any,<:Any,<:AbstractArray{T}}, _block, I::AbstractVector... +) where T +A.var[I...] .= _applymod.((Val{eltype(A.mod)}(),), block, (A.mod,)) return nothing end @@ -69,6 +116,7 @@ Base.@assume_effects :foldable function _applymod(x, m::Mod) _scaleoffset(x, m) end end +_applymod(x, m::NoMod) = x _ismissing(x, mv) = isequal(x, mv) _ismissing(_, ::Nothing) = false @@ -91,35 +139,49 @@ Base.@assume_effects :foldable function _invertmod(::Val{T}, x, m::Mod) where T end return _scaleoffset_inv(T, tm, m) end +_invertmod(v, x, m::NoMod) = x _scaleoffset_inv(::Type{T}, x, m::Mod) where T = _scaleoffset_inv(m.coerce, T, x, m) -_scaleoffset_inv(coerce::Base.Callable, ::Type{T}, x, m::Mod) where T = - coerce(T, _scaleoffset_inv(x, m.scale, m.offset)) -_scaleoffset_inv(x, scale, offset) = (x - offset) / scale -_scaleoffset_inv(x, scale, ::Nothing) = x / scale -_scaleoffset_inv(x, ::Nothing, offset) = x - offset -_scaleoffset_inv(x, ::Nothing, ::Nothing) = x +_scaleoffset_inv(coerce::Base.Callable, ::Type{T}, x, m::Mod) where T = + coerce(T, _scaleoffset_inv1(x, m.scale, m.offset)) +_scaleoffset_inv1(x, scale, offset) = (x - offset) / scale +_scaleoffset_inv1(x, scale, ::Nothing) = x / scale +_scaleoffset_inv1(x, ::Nothing, offset) = x - offset +_scaleoffset_inv1(x, ::Nothing, ::Nothing) = x -function _stack_mods(eltypes::Vector, metadata::Vector, missingval::Vector, maskingval; scaled, coerce) + +function _stack_mods( + eltypes::Vector, metadata::Vector, missingval::Vector, maskingval; + scaled, coerce +) map(eltypes, metadata, missingval) do T, md, mv scale, offset = get_scale(md, scaled) _mod(T, mv, maskingval, scale, offset, coerce) end end -function _stack_mods(eltypes::Vector, metadata::Vector, missingval, maskingval::Vector; scaled::Bool, coerce) +function _stack_mods( + eltypes::Vector, metadata::Vector, missingval, maskingval::Vector; + scaled::Bool, coerce +) map(eltypes, metadata, maskingval) do T, md, mk scale, offset = get_scale(md, scaled) _mod(T, missingval, mk, scale, offset, coerce) end end -function _stack_mods(eltypes::Vector, metadata::Vector, missingval::Vector, maskingval::Vector; scaled::Bool, coerce) +function _stack_mods( + eltypes::Vector, metadata::Vector, missingval::Vector, maskingval::Vector; + scaled::Bool, coerce +) map(eltypes, metadata, missingval, maskingval) do T, md, mv, mk scale, offset = get_scale(md, scaled) _mod(mv, mk, scale, offset, coerce) end end -function _stack_mods(eltypes::Vector, metadata::Vector, missingval, maskingval; scaled::Bool, coerce) +function _stack_mods( + eltypes::Vector, metadata::Vector, missingval, maskingval; + scaled::Bool, coerce +) map(eltypes, metadata) do T, md scale, offset = get_scale(md, scaled) _mod(T, missingval, maskingval, scale, offset, coerce) @@ -131,8 +193,15 @@ function _mod(::Type{T}, metadata, missingval, maskingval; scaled::Bool, coerce) _mod(T, missingval, maskingval, scale, offset, coerce) end function _mod(::Type{T}, missingval, maskingval, scale, offset, coerce) where T - if isnothing(maskingval) && isnothing(scale) && isnothing(offset) - return NoMod(missingval) + maskingval = if isnokw(maskingval) + # If there is no missingval dont mask + isnokwornothing(missingval) ? nothing : missing + else + # Unless maskingval was passed explicitly + maskingval === missingval ? nothing : maskingval + end + if isnokwornothing(maskingval) && isnokwornothing(scale) && isnokwornothing(offset) + return NoMod{T}(missingval) else return Mod(T, missingval, maskingval, scale, offset, coerce) end @@ -150,10 +219,10 @@ function _writer_mod(::Type{T}; missingval, maskingval, scale, offset, coerce) w if isnokw(maskingval) || isnothing(maskingval) nothing else - _type_missingval(T) + _type_missingval(T) end elseif ismissing(missingval) - _type_missingval(T) + _type_missingval(T) else missingval end @@ -176,5 +245,5 @@ _mod_inverse_eltype(::AbstractArray{T}, ::NoMod) where T = T _mod_inverse_eltype(::AbstractArray{T}, m::Mod) where T = Base.promote_op(_invertmod, typeof(m.coerce), T, typeof(m)) -_maybe_modify(var, m::Mod) = ModifiedDiskArray(var, m) -_maybe_modify(var, ::NoMod) = var +_maybe_modify(var, m::Mod; kw...) = ModifiedDiskArray(var, m; kw...) +_maybe_modify(var, ::NoMod; kw...) = var diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index f627a1cbf..ffc4b756b 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -59,7 +59,7 @@ function _open(f, ::CDMsource, ds::AbstractDataset; kw... ) g = _getgroup(ds, group) - x = if isnokw(name) + x = if isnokw(name) g else v = CDM.variable(g, string(_name_or_firstname(g, name))) @@ -119,8 +119,10 @@ function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) attrs=attrs[bitinds], ) end -function _layers(ds::AbstractDataset, names, ::NoKW) - vars = map(k -> CDM.variable(ds, k), names) +_layers(ds::AbstractDataset, names, ::NoKW) = + _layers(ds, collect(names), nokw) +function _layers(ds::AbstractDataset, names::Vector, ::NoKW) + vars = map(n -> CDM.variable(ds, n), names) attrs = map(CDM.attribs, vars) (; names, vars, attrs) end diff --git a/src/sources/grd.jl b/src/sources/grd.jl index ca898dff2..dfb4cef5d 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -121,13 +121,17 @@ function _metadata(A::RasterDiskArray{GRDsource}, args...) end function missingval(A::RasterDiskArray{GRDsource,T}, args...) where T - if haskey(A.attrib.attrib, "nodatavalue") - ndv = A.attrib.attrib["nodatavalue"] + _grd_mv(T, A.attrib.attrib) +end + +function _grd_mv(::Type{T}, md; verbose=true) where T + if haskey(md, "nodatavalue") + ndv = md["nodatavalue"] ndv === "nothing" && return nothing try return parse(T, ndv) catch - @warn "nodatavalue $(ndv) is not convertible to data type $T. `missingval` set to `nothing`." + verbose && @warn "nodatavalue $(ndv) is not convertible to data type $T. `missingval` set to `nothing`." return nothing end else @@ -158,16 +162,37 @@ Returns the base of `filename` with a `.grd` extension. """ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; force=false, + verbose=true, + write=true, missingval=nokw, + maskingval=nokw, chunks=nokw, scale=nokw, offset=nokw, + coerce=nokw, + eltype=Missings.nonmissingtype(eltype(A)), kw... ) + check_can_write(filename, force) + haskey(REVGRD_DATATYPE_TRANSLATION, eltype) || throw(ArgumentError(""" + Element type $eltype cannot be written to grd file. Convert it to one of $(keys(REVGRD_DATATYPE_TRANSLATION)), + usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) + """ + )) isnokwornothing(scale) && isnokwornothing(offset) || throw(ArgumentError("Cant write scale or offset to .grd files")) chunks isa NoKW || @warn "specifying chunks not supported for .grd files" - check_can_write(filename, force) - missingval = ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval + + missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval + maskingval = isnokw(maskingval) ? Rasters.missingval(A) : maskingval + missingval = if ismissing(missingval) || isnothing(missingval) && !isnothing(maskingval) + # See if there is a missing value in metadata + mv = _grd_mv(eltype, metadata(A); verbose=false) + # Otherwise define one + isnothing(mv) ? _writeable_missing(eltype; verbose) : mv + else + missingval + end + if hasdim(A, Band) correctedA = permutedims(A, (X, Y, Band)) |> a -> reorder(a, (X(GRD_X_ORDER), Y(GRD_Y_ORDER), Band(GRD_BAND_ORDER))) @@ -179,19 +204,32 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; end # Remove extension filename = splitext(filename)[1] - minvalue = minimum(skipmissing(A)) - maxvalue = maximum(skipmissing(A)) - _write_grd(filename, eltype(A), dims(A), Rasters.missingval(A), minvalue, maxvalue, name(A)) - # Data: gri file - open(filename * ".gri", write=true) do IO - write(IO, parent(correctedA)) - end + # Data: write a raw gri file from the array + mod = _writer_mod(eltype; missingval, maskingval, scale, offset, coerce) + gri_filename = filename * ".gri" + isfile(gri_filename) && rm(gri_filename) + _write_gri(gri_filename, Val{source_eltype(mod)}(), mod, parent(correctedA)) + + _write_grd(filename, eltype, dims(A), missingval, name(A)) return filename * ".grd" end -function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) +function _write_gri(filename, v, ::NoMod, A::Array) + open(filename; write=true, lock=false) do io + write(io, A) + end +end +function _write_gri(filename, v, mod, A) + open(filename; write=true, lock=false) do io + for x in A # We are modifying the source array so invert the modifications + write(io, _invertmod(v, x, mod)) + end + end +end + +function _write_grd(filename, T, dims, missingval, name) filename = splitext(filename)[1] x, y = map(DD.dims(dims, (X(), Y()))) do d @@ -226,8 +264,6 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) nodatavalue= $nodatavalue byteorder= little nbands= $nbands - minvalue= $minvalue - maxvalue= $maxvalue [description] layername= $name """ @@ -235,32 +271,7 @@ function _write_grd(filename, T, dims, missingval, minvalue, maxvalue, name) end end -# function create(filename, ::GRDsource, T::Type, dims::DD.DimTuple; -# name="layer", metadata=nothing, missingval=nothing, lazy=true, -# ) -# # Remove extension -# basename = splitext(filename)[1] -# minvalue = maxvalue = zero(T) -# sze = map(length, DD.dims(dims, (XDim, YDim, Band))) - -# # Metadata: grd file -# _write_grd(basename, T, dims, missingval, minvalue, maxvalue, name) - -# # Data: gri file -# open(basename * ".gri", write=true) do IO -# write(IO, FillArrays.Zeros(sze)) -# end -# return Raster(filename; source=GRDsource(), lazy) -# end - -# AbstractRasterStack methods - -# Custom `open` because the data and metadata objects are separate -# Here we _mmapgrd instead of `_open` -function Base.open(f::Function, A::FileArray{GRDsource}, args...; write=A.write) - _mmapgrd(mm -> f(RasterDiskArray{GRDsource}(mm, A.eachchunk, A.haschunks)), A; write) -end - +# Rasters methods function _open(f, ::GRDsource, filename::AbstractString; mod=RA.NoMod(), write=false, @@ -275,11 +286,16 @@ function _open(f, ::GRDsource, filename::AbstractString; end end _open(f, ::GRDsource, attrib::GRDdataset; kw...) = f(attrib) -_open(f, ::GRDsource, A::RasterDiskArray; mod=RA.NoMod(), kw...) = +function _open(f, ::GRDsource, A::RasterDiskArray; + mod=RA.NoMod(), + kw... +) cleanreturn(f(_maybe_modify(A, mod))) +end haslayers(::GRDsource) = false + # Utils ######################################################################## function _mmapgrd(f, x::Union{FileArray,GRDdataset}; kw...) diff --git a/src/stack.jl b/src/stack.jl index 6a6f103c4..8d4ad2025 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -362,33 +362,24 @@ function RasterStack( end function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; source=nokw, - missingval=nokw, metadata=nokw, resize=nokw, layermetadata::Union{NoKW,NamedTuple{K}}=nokw, layerdims::Union{NoKW,NamedTuple{K}}=nokw, + missingval=nokw, maskingval=nokw, - scaled=true, - coerce=convert, kw... ) where K - missingval1 = if missingval isa NamedTuple - keys(missingval) == K || throw(ArgumentError("missingval keys $(keys(missingval)) do not match filename keywords $K")) - collect(missingval) - else - missingval - end - maskingval1 = if maskingval isa NamedTuple - keys(maskingval) == K || throw(ArgumentError("maskingval keys $(keys(maskingval)) do not match filename keywords $K")) - collect(maskingval) - else - maskingval - end + missingval1 = collect(_stack_nt(filenames, missingval)) + maskingval1 = collect(_stack_nt(filenames, maskingval)) fn = collect(filenames) layermetadata = layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) layerdims = layerdims isa NamedTuple ? collect(layerdims) : map(_ -> NoKW(), fn) - layers = map(K, fn, layermetadata, layerdims) do name, fn, md, d, mod - Raster(fn; source=_sourcetrait(fn, source), name, metadata=md, dims=d, mod, kw...) + layers = map(K, fn, layermetadata, layerdims, missingval1, maskingval1) do name, fn, md, d, mv, ma + Raster(fn; + source=_sourcetrait(fn, source), + dims=d, name, metadata=md, missingval=mv, maskingval=ma, kw... + ) end return RasterStack(NamedTuple{K}(layers); resize, metadata) end @@ -436,7 +427,7 @@ function RasterStack(filename::AbstractString; else # With bands actings as layers raster = Raster(filename; - source, lazy, scaled, coerce, missingval, maskingval, dropband=false + source, lazy, missingval, maskingval, scaled, coerce, dropband=false ) RasterStack(raster; kw...) end @@ -514,7 +505,6 @@ function _layer_stack(filename; layerdims=nokw, missingval=nokw, maskingval=nokw, - replace_missing=nokw, crs=nokw, mappedcrs=nokw, coerce=convert, @@ -523,7 +513,6 @@ function _layer_stack(filename; lazy=false, kw... ) - _maybewarn_replace_missing(replace_missing) data, field_kw = _open(filename; source) do ds layers = _layers(ds, name, group) # Create a Dict of dimkey => Dimension to use in `dim` and `layerdims` @@ -537,18 +526,18 @@ function _layer_stack(filename; else layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) end + name = Tuple(map(Symbol, layers.names)) + NT = NamedTuple{name} missingval1 = if isnokw(missingval) map(Rasters.missingval, layers.vars, layermetadata1) elseif missingval isa NamedTuple + keys(missingval1) == name || throw(ArgumentError("`missingval` names $(keys(missingval)) do not match layer names $name")) collect(missingval) else missingval end - maskingval1 = isnokw(maskingval) && !isnothing(missingval1) ? missing : maskingval eltypes = map(eltype, layers.vars) - mods = _stack_mods(eltypes, layermetadata1, missingval1, maskingval1; scaled, coerce) - name = Tuple(map(Symbol, layers.names)) - NT = NamedTuple{name} + mods = _stack_mods(eltypes, layermetadata1, missingval1, maskingval; scaled, coerce) data = if lazy vars = ntuple(i -> layers.vars[i], length(name)) mods = ntuple(i -> mods[i], length(name)) diff --git a/src/utils.jl b/src/utils.jl index b50478612..d89c2ca7a 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -86,8 +86,9 @@ function _writeable_missing(T; verbose=true) return missingval end -_type_missingval(::Type{T}) where T = typemin(T) -_type_missingval(::Type{T}) where T<:Unsigned = typemax(T) +_type_missingval(::Type{T}) where T = _type_missingval1(Missings.nonmissingtype(T)) +_type_missingval1(::Type{T}) where T = typemin(T) +_type_missingval1(::Type{T}) where T<:Unsigned = typemax(T) _fix_missingval(::Type, ::Union{NoKW,Nothing}) = nothing _fix_missingval(::AbstractArray, ::Nothing) = nothing diff --git a/src/write.jl b/src/write.jl index 5e3984c9c..46fdf4794 100644 --- a/src/write.jl +++ b/src/write.jl @@ -1,16 +1,16 @@ const CHUNKS_KEYWORD = """ -- `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. - To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` +- `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. + To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` or a `NamedTuple` of `Int` can be used. Other dimensions will have a chunk - size of `1`. `true` can be used to mean: use the original + size of `1`. `true` can be used to mean: use the original chunk size of the lazy `Raster` being written or X and Y of 256 by 256. `false` means don't use chunks at all. """ const MISSINGVAL_KEYWORD = """ - `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, - as Julias `missing` cannot be stored. If not passed in, `missingval` will be detected + as Julias `missing` cannot be stored. If not passed in, `missingval` will be detected from metadata or a default will be chosen. """ @@ -33,13 +33,13 @@ Other keyword arguments are passed to the `write` method for the backend. ## GDAL Keywords $FORCE_KEYWORD -- `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. +- `driver`: A GDAL driver name `String` or a GDAL driver retrieved via `ArchGDAL.getdriver(drivername)`. By default `driver` is guessed from the filename extension. -- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. - For example: `Dict("COMPRESS" => "DEFLATE")`. +- `options::Dict{String,String}`: A dictionary containing the dataset creation options passed to the driver. + For example: `Dict("COMPRESS" => "DEFLATE")`. -Valid `driver` names and the `options` for each can be found at: +Valid `driver` names and the `options` for each can be found at: [https://gdal.org/drivers/raster/index.html](https://gdal.org/drivers/raster/index.html) @@ -52,7 +52,7 @@ Returns the base of `filename` with a `.grd` extension. ### GDAL (tiff, and everything else) -Used if you `write` a `Raster` with a `filename` extension that no other backend can write. +Used if you `write` a `Raster` with a `filename` extension that no other backend can write. GDAL is the fallback, and writes a lot of file types, but is not guaranteed to work. """ @@ -73,11 +73,15 @@ $SOURCE_WRITE_DOCSTRING Returns `filename`. """ -function Base.write( filename::AbstractString, A::AbstractRaster; - source=_sourcetrait(filename), +function Base.write(filename::AbstractString, A::AbstractRaster; + source=_sourcetrait(filename), + missingval=nokw, + maskingval=nokw, kw... ) - write(filename, _sourcetrait(source), A; kw...) + missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval + maskingval = isnokw(maskingval) ? missingval : maskingval + write(filename, _sourcetrait(source), A; missingval, maskingval, kw...) end Base.write(A::AbstractRaster; kw...) = write(filename(A), A; kw...) # Fallback @@ -115,11 +119,15 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; ext=nothing, source=_sourcetrait(path, ext), verbose=true, + missingval=nokw, + maskingval=nokw, kw... ) source = _sourcetrait(source) + maskingval = _stack_nt(s, isnokw(maskingval) ? Rasters.missingval(s) : maskingval) + missingval = _stack_missingvals(s, isnokw(missingval) ? maskingval : missingval) if haslayers(source) - write(path, source, s; kw...) + write(path, source, s; missingval, maskingval, kw...) else # Otherwise write separate files for each layer if isnothing(ext) @@ -138,13 +146,38 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; if verbose @warn string("Cannot write complete stacks to \"", ext, "\", writing layers as individual files") end - map(keys(s), suffix1) do key, suf + map(keys(s), suffix1, missingval, maskingval) do key, suf, mv, ma fn = string(base, suf, ext) - write(fn, source, s[key]; kw...) + write(fn, source, s[key]; missingval=mv, maskingval=ma, kw...) end |> NamedTuple{keys(s)} end end +_stack_missingvals(::RasterStack{<:Any,T}, x) where T = _stack_missingvals(T, x) +function _stack_missingvals(::Type{T}, missingval::NamedTuple{K}) where {K,T<:NamedTuple{K}} + map(_types(T), values(missingval)) do t, mv + ismissing(mv) ? _type_missingval(t) : mv + end |> NamedTuple{K} +end +_stack_missingvals(::Type{T}, missingval::NamedTuple{K1}) where {K1,T<:NamedTuple{K2}} where K2 = + throw(ArgumentError("stack keys $K1 do not match misssingval keys $K2")) +_stack_missingvals(::Type{T}, missingval::Missing) where T<:NamedTuple{K} where K = + NamedTuple{K}(map(_type_missingval, _types(T))) +_stack_missingvals(::Type{T}, missingval) where T = + _stack_nt(T, missingval) + +_stack_nt(::NamedTuple{K}, x) where K = NamedTuple{K}(map(_ -> x, K)) +_stack_nt(::RasterStack{<:Any,T}, x) where T = _stack_nt(T, x) +_stack_nt(::Type{T}, x::NamedTuple{K}) where {K,T<:NamedTuple{K}} = x +_stack_nt(::Type{T}, x::NamedTuple{K1}) where {K1,T<:NamedTuple{K2}} where K2 = + throw(ArgumentError("stack keys $K1 do not match misssingval keys $K2")) +_stack_nt(::Type{T}, x) where T<:NamedTuple{K} where K = + NamedTuple{K}(map(_ -> x, K)) + +@generated function _types(::Type{<:NamedTuple{K,T}}) where {K,T} + Tuple(T.parameters) +end + """ Base.write(filepath::AbstractString, s::AbstractRasterSeries; kw...) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 493443ed3..1a3aac041 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -27,24 +27,19 @@ gdalpath = maybedownload(url) @testset "cf" begin # This file has no scale/offset so cf does nothing - @time cfarray = Raster(gdalpath) + @time cfarray = Raster(gdalpath; missingval=0x00) @time cf_nomask_array = Raster(gdalpath; maskingval=nothing) @time nocfarray = Raster(gdalpath; scaled=false) - @time lazycfarray = Raster(gdalpath; lazy=true) + @time lazycfarray = Raster(gdalpath; lazy=true, missingval=0x00) @time lazynocfarray = Raster(gdalpath; lazy=true, scaled=false) - @time lazynocfnomaskarray = Raster(gdalpath; lazy=true, scaled=false, maskingval=nothing) - @test parent(cfarray) isa Array{UInt8,2} + @test parent(cfarray) isa Base.ReshapedArray{Union{UInt8,Missing},2} @test parent(cf_nomask_array) isa Array{UInt8,2} @test parent(nocfarray) isa Array{UInt8,2} open(lazycfarray) do A - @test parent(A) isa DiskArrays.SubDiskArray{UInt8} - @test parent(parent(A)) isa Rasters.ModifiedDiskArray{UInt8} + @test parent(A) isa DiskArrays.SubDiskArray{Union{Missing,UInt8}} + @test parent(parent(A)) isa Rasters.ModifiedDiskArray{false,Union{Missing,UInt8}} end open(lazynocfarray) do A - @test parent(A) isa DiskArrays.SubDiskArray{UInt8} - @test parent(parent(A)) isa Rasters.ModifiedDiskArray{UInt8} - end - open(lazynocfnomaskarray) do A @test parent(A) isa DiskArrays.SubDiskArray{UInt8} @test parent(parent(A)) isa ArchGDAL.RasterDataset{UInt8} end @@ -200,7 +195,7 @@ gdalpath = maybedownload(url) @testset "trim, crop, extend" begin a = read(replace_missing(gdalarray, zero(eltype(gdalarray)))) a[X(1:100)] .= missingval(a) - trimmed = trim(a) + trimmed = Rasters.trim(a) @test size(trimmed) == (414, 514) cropped = Rasters.crop(a; to=trimmed) @test size(cropped) == (414, 514) @@ -278,13 +273,21 @@ gdalpath = maybedownload(url) @time gdalarray = Raster(gdalpath; name=:test) A1 = gdalarray[X(1:300), Y(1:200)] A2 = gdalarray[X(57:500), Y(101:301)] - tempfile = tempname() * ".tif" - Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile) + tempfile1 = tempname() * ".tif" + tempfile2 = tempname() * ".tif" + tempfile3 = tempname() * ".tif" + Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile1) + Afile2 = mosaic(first, A1, A2; + missingval=0x00, atol=1e-8, filename=tempfile2, maskingval=missing + ) + @test missingval(Afile2) === missing Amem = mosaic(first, A1, A2; missingval=0x00, atol=1e-8) Atest = gdalarray[X(1:500), Y(1:301)] Atest[X(1:56), Y(201:301)] .= 0x00 Atest[X(301:500), Y(1:100)] .= 0x00 - @test all(Atest .=== Amem .=== Afile) + @test all(Atest .=== Amem .=== Afile .== replace_missing(Afile2, 0x00)) + Afile3 = mosaic(first, A1, A2; atol=1e-8, filename=tempfile3) + @test missingval(Afile3) === 0xff end end # methods @@ -464,10 +467,11 @@ gdalpath = maybedownload(url) end @testset "write missing" begin - A = read(replace_missing(gdalarray, missing)) + A = replace_missing(gdalarray, missing) filename = tempname() * ".tif" write(filename, A) - @test missingval(Raster(filename)) === typemax(UInt8) + @test missingval(Raster(filename)) === missing + @test missingval(Raster(filename; maskingval=nothing)) === typemax(UInt8) rm(filename) end @@ -513,11 +517,11 @@ gdalpath = maybedownload(url) gdalarray[Y(1)] |> plot end - @testset "nodatavalue type matches the array type" begin + @testset "unmasked missingval type matches the array type" begin # Handle WorldClim/ucdavis unreliability A = nothing try - A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1) + A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, maskingval=nothing) catch end if !isnothing(A) @@ -921,7 +925,7 @@ end mv = zero(eltype(gdalser[1])) ser = read(replace_missing(gdalser, mv)) ser = map(A -> (view(A, X(1:100)) .= mv; A), ser) - trimmed = trim(ser) + trimmed = Rasters.trim(ser) @test size(trimmed[1]) == (414, 514) cropped = crop(ser; to=trimmed[1]) @test size(cropped[1]) == (414, 514) diff --git a/test/sources/grd.jl b/test/sources/grd.jl index 41ca11eca..ed8f4f367 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -29,11 +29,13 @@ grdpath = stem * ".gri" @test parent(eagerarray) isa Array end - @testset "replace_missing keyword" begin - # Eager is the default - @time missingarray = Raster(grdpath; replace_missing=true) + @testset "maskingval keyword" begin + @time missingarray = Raster(grdpath) @test missingval(missingarray) === missing @test eltype(missingarray) === Union{Missing,Float32} + @time missingarray = Raster(grdpath; maskingval=nothing) + @test missingval(missingarray) === -3.4f38 + @test eltype(missingarray) === Float32 end @testset "open" begin @@ -60,7 +62,7 @@ grdpath = stem * ".gri" end @testset "array properties" begin - @test grdarray isa Raster{Float32,3} + @test grdarray isa Raster{Union{Missing,Float32},3} end @testset "dimensions" begin @@ -74,7 +76,7 @@ grdpath = stem * ".gri" @testset "other fields" begin proj = ProjString("+proj=merc +datum=WGS84") @test name(grdarray) == Symbol("red:green:blue") - @test missingval(grdarray) == -3.4f38 + @test missingval(grdarray) === missing @test metadata(grdarray) isa Metadata{GRDsource,Dict{String,Any}} @test label(grdarray) == "red:green:blue" @test units(grdarray) == nothing @@ -100,7 +102,7 @@ grdpath = stem * ".gri" @test mappedcrs(customgrdarray) == EPSG(4326) @test mappedcrs(dims(customgrdarray, Y)) == EPSG(4326) @test mappedcrs(dims(customgrdarray, X)) == EPSG(4326) - @test parent(customgrdarray) isa DiskArrays.BroadcastDiskArray + @test parent(customgrdarray) isa Rasters.FileArray @test eltype(customgrdarray) == Union{Float32,Missing} # Needs to be separate as it overrides crs/mappedcrs dimsgrdarray = Raster(grdpath; @@ -110,9 +112,9 @@ grdpath = stem * ".gri" end @testset "getindex" begin - @test grdarray[Band(1)] isa Raster{Float32,2} - @test grdarray[Y(1), Band(1)] isa Raster{Float32,1} - @test grdarray[X(1), Band(1)] isa Raster{Float32,1} + @test grdarray[Band(1)] isa Raster{Union{Missing,Float32},2} + @test grdarray[Y(1), Band(1)] isa Raster{Union{Missing,Float32},1} + @test grdarray[X(1), Band(1)] isa Raster{Union{Missing,Float32},1} @test grdarray[X(50), Y(30), Band(1)] == 115.0f0 @test grdarray[1, 1, 1] == 255.0f0 @test grdarray[Y(At(20.0; atol=1e10)), X(At(20; atol=1e10)), Band(3)] == 255.0f0 @@ -130,9 +132,9 @@ grdpath = stem * ".gri" @test size(cropped) == (81, 77, 3) kwcropped = crop(a; to=trimmed, dims=(X,)) @test size(kwcropped) == (81, size(a,Y), 3) - @test all(collect(cropped .== trimmed)) + @test all(collect(cropped .=== trimmed)) extended = extend(cropped; to=a); - @test all(collect(extended .== a)) + @test all(collect(extended .=== a)) end @testset "mask and mask! to disk" begin @@ -176,15 +178,14 @@ grdpath = stem * ".gri" tn = tempname() tempgrd = tn * ".grd" tempgri = tn * ".gri" - cp(stem * ".grd", tempgrd) - cp(stem * ".gri", tempgri) - Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd) + Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, maskingval=nothing) Amem = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1) Atest = grdarray[X(1:80), Y(1:60)] Atest[X(1:26), Y(31:60)] .= 0.0f0 Atest[X(41:80), Y(1:24)] .= 0.0f0 @test size(Atest) == size(Afile) == size(Amem) @test all(Atest .=== Amem .== Afile) + read(Atest .- Afile) end @testset "rasterize" begin @@ -200,37 +201,37 @@ grdpath = stem * ".gri" @testset "selectors" begin geoA = grdarray[Y(Contains(3)), X(:), Band(1)] - @test geoA isa Raster{Float32,1} + @test geoA isa Raster{Union{Missing,Float32},1} @test grdarray[X(Contains(20)), Y(Contains(10)), Band(1)] isa Float32 end @testset "conversion to Raster" begin geoA = grdarray[X(1:50), Y(1:1), Band(1)] @test size(geoA) == (50, 1) - @test eltype(geoA) <: Float32 - @time geoA isa Raster{Float32,1} + @test eltype(geoA) <: Union{Missing,Float32} + @time geoA isa Raster{Union{Missing,Float32},1} @test dims(geoA) isa Tuple{<:X,Y} @test refdims(geoA) isa Tuple{<:Band} @test metadata(geoA) == metadata(grdarray) - @test missingval(geoA) == -3.4f38 + @test missingval(geoA) === missing @test name(geoA) == Symbol("red:green:blue") end @testset "write" begin @testset "2d" begin filename2 = tempname() * ".gri" - write(filename2, grdarray[Band(1)]; force = true) + write(filename2, grdarray[Band(1)]; force=true) saved = Raster(filename2) # 1 band is added again on save @test size(saved) == size(grdarray[Band(1)]) @test parent(saved) == parent(grdarray[Band(1)]) - @test (@allocations write(filename2, grdarray[Band(1)]; force = true)) < 1e3 + @test_broken (@allocations write(filename2, view(grdarray, Band(1)); force = true)) < 1e3 end @testset "3d with subset" begin geoA = grdarray[1:100, 1:50, 1:2] filename = tempname() * ".grd" - write(filename, GRDsource(), geoA; force = true) + write(filename, GRDsource(), geoA; force=true) saved = Raster(filename) @test size(saved) == size(geoA) @test refdims(saved) == () @@ -250,7 +251,7 @@ grdpath = stem * ".gri" @test all(parent(saved) .=== parent(geoA)) @test saved isa typeof(geoA) @test parent(saved) == parent(geoA) - @test (@allocations write(filename, GRDsource(), geoA; force = true)) < 1e3 + @test_broken (@allocations write(filename, GRDsource(), geoA; force = true)) < 1e3 end @testset "to netcdf" begin @@ -264,8 +265,7 @@ grdpath = stem * ".gri" @test index(saved, Y) ≈ index(grdarray, Y) .+ 0.5 @test bounds(saved, Y) == bounds(grdarray, Y) @test bounds(saved, X) == bounds(grdarray, X) - @test (@allocations write(filename2, grdarray[Band(1)]; force = true)) < 1e3 - + @test_broken (@allocations write(filename2, grdarray[Band(1)]; force = true)) < 1e3 end @testset "to gdal" begin @@ -273,11 +273,11 @@ grdpath = stem * ".gri" gdalfilename = tempname() * ".tif" write(gdalfilename, GDALsource(), grdarray[Band(1)]; force = true) @test (@allocations write(gdalfilename, GDALsource(), grdarray[Band(1)]; force = true)) < 1e4 - gdalarray = Raster(gdalfilename) + gdalarray = Raster(gdalfilename; maskingval=nothing) # @test convert(ProjString, crs(gdalarray)) == convert(ProjString, EPSG(4326)) @test val(dims(gdalarray, X)) ≈ val(dims(grdarray, X)) @test val(dims(gdalarray, Y)) ≈ val(dims(grdarray, Y)) - @test Raster(gdalarray) ≈ permutedims(grdarray[Band(1)], [X(), Y()]) + @test gdalarray ≈ replace_missing(permutedims(grdarray[Band(1)], [X(), Y()]), typemin(Int32)) # 3 Bands gdalfilename2 = tempname() * ".tif" write(gdalfilename2, grdarray) @@ -290,8 +290,10 @@ grdpath = stem * ".gri" A = replace_missing(grdarray, missing) filename = tempname() * ".grd" write(filename, A) - @test missingval(Raster(filename)) === typemin(Float32) - rm(filename) + @test missingval(Raster(filename)) === missing + filename = tempname() * ".grd" + write(filename, A) + @test missingval(Raster(filename; maskingval=nothing)) === typemin(Float32) end end @@ -347,7 +349,7 @@ end @testset "child array properties" begin @test size(grdstack[:a]) == size(Raster(grdstack[:a])) == (101, 77, 3) - @test grdstack[:a] isa Raster{Float32,3} + @test grdstack[:a] isa Raster{Union{Missing,Float32},3} end # Stack Constructors @@ -414,7 +416,7 @@ end @testset "child array properties" begin @test size(grdstack[:Band_3]) == size(Raster(grdstack[:Band_3])) == (101, 77) - @test grdstack[:Band_1] isa Raster{Float32,2} + @test grdstack[:Band_1] isa Raster{Union{Missing,Float32},2} end # Stack Constructors diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 011af58ec..db05060be 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -31,14 +31,8 @@ stackkeys = ( ) @testset "grid mapping" begin - using ProfileView - using SnoopCompile - @profview 1 + 2 - stack = - tinf = @snoopi_deep RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) - fg = flamegraph(tinf) - ProfileView.view(fg) - @test metadata(stack.mask)["grid_mapping"] == Dict{String, Any}( + st = RasterStack(joinpath(testdir, "data/grid_mapping_test.nc")) + @test metadata(st.mask)["grid_mapping"] == Dict{String, Any}( "straight_vertical_longitude_from_pole" => 0.0, "false_easting" => 0.0, "standard_parallel" => -71.0, @@ -52,7 +46,6 @@ end @testset "Raster" begin @time ncarray = Raster(ncsingle) - @time lazyarray = Raster(ncsingle; lazy=true) @time eagerarray = Raster(ncsingle; lazy=false) @test_throws ArgumentError Raster("notafile.nc") @@ -65,7 +58,7 @@ end @time read(lazyarray); end - @testset "cf" begin + @testset "cf" begin @time cfarray = Raster(ncsingle) @time cfarray = Raster(ncsingle) @time cf_nomask_array = Raster(ncsingle; maskingval=nothing) @time nocfarray = Raster(ncsingle; scaled=false) @@ -81,10 +74,10 @@ end @test parent(cfarray) isa Array{Union{Float32,Missing}} @test parent(nocfarray) isa Array{Union{Float32,Missing}} open(lazycfarray) do A - @test parent(A) isa Rasters.ModifiedDiskArray{Union{Missing,Float32}} + @test parent(A) isa Rasters.ModifiedDiskArray{false,Union{Missing,Float32}} end open(lazynocfarray) do A - @test parent(A) isa Rasters.ModifiedDiskArray{Union{Missing,Float32}} + @test parent(A) isa Rasters.ModifiedDiskArray{false,Union{Missing,Float32}} end open(lazynocf_nomask_array) do A @test parent(parent(A)) isa NCDatasets.Variable{Float32} @@ -210,7 +203,7 @@ end A1 = ncarray[X(1:80), Y(1:100)] A2 = ncarray[X(50:150), Y(90:150)] tempfile = tempname() * ".nc" - Afile = mosaic(first, read(A1), read(A2); missingval=missing, atol=1e-7, filename=tempfile) + Afile = mosaic(first, read(A1), read(A2); missingval=missing, atol=1e-7, filename=tempfile, force=true) Amem = mosaic(first, A1, A2; missingval=missing, atol=1e-7) Atest = ncarray[X(1:150), Y(1:150)] Atest[X(1:49), Y(101:150)] .= missing @@ -288,7 +281,8 @@ end all(s .== g) end |> all @test metadata(saved) == metadata(ncarray) - @test_broken all(metadata(dims(saved))[2] == metadata.(dims(ncarray))[2]) + # Dimension names are renamed so metadata is different + @test_broken all( metadata(dims(saved)) == metadata.(dims(ncarray))) @test Rasters.name(saved) == Rasters.name(ncarray) @test all(lookup.(dims(saved)) .== lookup.(dims(ncarray))) @test all(order.(dims(saved)) .== order.(dims(ncarray))) @@ -354,7 +348,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write(gdalfilename, nccleaned; force=true) @test (@allocations write(gdalfilename, nccleaned; force=true)) < 1e4 - gdalarray = Raster(gdalfilename) + gdalarray = Raster(gdalfilename; maskingval=nothing) # gdalarray WKT is missing one AUTHORITY # @test_broken crs(gdalarray) == convert(WellKnownText, EPSG(4326)) # But the Proj representation is the same @@ -365,18 +359,19 @@ end @test index(gdalarray, X) .+ 1.0 ≈ index(nccleaned, X) @test gdalarray ≈ nccleaned end + @testset "to grd" begin nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write("testgrd.gri", nccleaned; force=true) @test (@allocations write("testgrd.gri", nccleaned; force=true)) < 1e4 - grdarray = Raster("testgrd.gri"); + grdarray = Raster("testgrd.gri", maskingval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) @test bounds(grdarray) == bounds(nccleaned) @test index(grdarray, Y) ≈ reverse(index(nccleaned, Y)) .- 0.5 @test index(grdarray, X) ≈ index(nccleaned, X) .- 1.0 @test reverse(grdarray; dims=Y) ≈ nccleaned - # rm("testgrd.gri") - # rm("testgrd.grd") + rm("testgrd.gri") + rm("testgrd.grd") end @testset "write points" begin @@ -463,7 +458,7 @@ end @testset "load ncstack" begin @test ncstack isa RasterStack - @test ismissing(missingval(ncstack)) + @test isnothing(missingval(ncstack)) @test dims(ncstack[:abso4]) == dims(ncstack, (X, Y, Ti)) @test refdims(ncstack) == () # Loads child as a regular Raster @@ -574,14 +569,15 @@ end rm("test_2.nc") end -# Groups if !haskey(ENV, "CI") - path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") - stack = RasterStack(path; group="Geophysical_Data") - lazy_stack = RasterStack(path; group="Geophysical_Data", lazy=true) - rast = Raster(path; name=:surface_temp, group="Geophysical_Data") - lazy_rast = Raster(path; name=:surface_temp, group="Geophysical_Data", lazy=true) - @test all(stack[:surface_temp] .=== read(lazy_stack[:surface_temp]) .=== rast .=== read(lazy_rast)) + @testset "HDF5 with Groups" begin + path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") + stack = RasterStack(path; group="Geophysical_Data") + lazy_stack = RasterStack(path; group="Geophysical_Data", lazy=true) + rast = Raster(path; name=:surface_temp, group="Geophysical_Data") + lazy_rast = Raster(path; name=:surface_temp, group="Geophysical_Data", lazy=true) + @test all(stack[:surface_temp] .=== read(lazy_stack[:surface_temp]) .=== rast .=== read(lazy_rast)) + end end nothing From 1a0c0d68ff461bb8ec3a0b0eb0b9de063a3f73d1 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Tue, 13 Aug 2024 22:40:50 +0200 Subject: [PATCH 12/38] add raw keyword --- src/array.jl | 47 ++++++++++++-------------------- src/methods/shared_docstrings.jl | 7 +++++ src/modifieddiskarray.jl | 47 ++++++++++++++++++-------------- src/stack.jl | 47 ++++++++++++++++---------------- src/utils.jl | 30 ++++++++++++++++++-- test/sources/ncdatasets.jl | 13 +++++++-- 6 files changed, 112 insertions(+), 79 deletions(-) diff --git a/src/array.jl b/src/array.jl index 6103cec9d..86be8cfd3 100644 --- a/src/array.jl +++ b/src/array.jl @@ -217,14 +217,13 @@ $METADATA_KEYWORD $CONSTRUCTOR_CRS_KEYWORD $CONSTRUCTOR_MAPPEDCRS_KEYWORD $REFDIMS_KEYWORD -$SCALED_KEYWORD When a filepath `String` is used: $DROPBAND_KEYWORD $LAZY_KEYWORD $SOURCE_KEYWORD -- `write`: defines the default `write` keyword value when calling `open` on the Raster. `false` by default. - Only makes sense to use when `lazy=true`. +$SCALED_KEYWORD +$RAW_KEYWORD When A is an `AbstractDimArray`: - `data`: can replace the data in an existing `AbstractRaster` @@ -311,24 +310,24 @@ function Raster(ds, filename::AbstractString; source=nokw, replace_missing=nokw, coerce=convert, - scaled=true, + scaled=nokw, write=false, lazy=false, dropband=true, checkmem=CHECKMEM[], mod=nokw, + raw=false, )::Raster - _maybewarn_replace_missing(replace_missing) + scaled, maskingval = _raw_check(raw, scaled, maskingval) + _maybe_warn_replace_missing(replace_missing) name1 = filekey(ds, name) source = _sourcetrait(filename, source) - data1, dims1, metadata1, missingval2 = _open(source, ds; name=name1, group, mod=NoMod()) do var - metadata1 = isnokw(metadata) ? _metadata(var) : metadata - missingval1 = isnokwornothing(missingval) ? Rasters.missingval(var, metadata1) : missingval + data_out, dims_out, metadata_out, missingval_out = _open(source, ds; name=name1, group, mod=NoMod()) do var + metadata_out = isnokw(metadata) ? _metadata(var) : metadata + missingval1 = isnokw(missingval) ? Rasters.missingval(var, metadata_out) : missingval maskingval1 = isnokw(maskingval) && !isnothing(missingval1) ? missing : maskingval - # If maskingval is `nothing` use missingval as missingval - missingval2 = isnokwornothing(maskingval1) ? missingval1 : maskingval1 - mod = isnokw(mod) ? _mod(eltype(var), metadata1, missingval1, maskingval1; scaled, coerce) : mod - data = if lazy + mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval1, maskingval1; scaled, coerce) : mod + data_out = if lazy FileArray{typeof(source)}(var, filename; name=name1, group, mod, write ) @@ -338,27 +337,17 @@ function Raster(ds, filename::AbstractString; x = Array(modvar) x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug end - dims1 = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data) - data, dims1, metadata1, missingval2 + # If maskingval is `nothing` use missingval as missingval + dims_out = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data) + missingval_out = isnokwornothing(maskingval1) ? missingval1 : maskingval1 + data_out, dims_out, metadata_out, missingval_out end - name2 = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) - raster = Raster(data1, dims1, refdims, name2, metadata1, missingval2) - return dropband ? _drop_single_band(raster, lazy) : raster + name_out = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) + raster = Raster(data_out, dims_out, refdims, name_out, metadata_out, missingval_out) + return _maybe_drop_single_band(raster, dropband, lazy) end filekey(ds, name) = name filekey(filename::String) = Symbol(splitext(basename(filename))[1]) DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{<:Dimension}}) = Raster - -function _drop_single_band(raster, lazy::Bool) - if hasdim(raster, Band()) && size(raster, Band()) < 2 - if lazy - return view(raster, Band(1)) # TODO fix dropdims in DiskArrays - else - return dropdims(raster; dims=Band()) - end - else - return raster - end -end diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index d6e880e64..729edf9bf 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -134,6 +134,13 @@ const OFFSET_KEYWORD = """ - `offset`: set `offset` for `x * scale + offset` transformations. """ +const RAW_KEYWORD = """ +- `raw`: Turn of all scaling and masking and load the raw values from disk. + `false` by default. If `true`, `scaled` will be set to `false` and `maskingval` + will be set to `nothing`. A warning will be printed if `scaled` or `maskingval` + are manually set to another value. +""" + const SCALED_KEYWORD = """ - `scaled`: apply scale and offset as `x * scale + offset`. `true` by default. This is common where data has been convert to e.g. UInt8 to save disk space. diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 5f581d83d..59449e91e 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -26,6 +26,7 @@ struct Mod{T1,T2,Mi,Ma,S,O,F} <: AbstractModifications new{T1,T,map(typeof, vals)...,typeof(coerce)}(vals..., coerce) end end + Base.eltype(::Mod{T1}) where T1 = T1 source_eltype(::Mod{<:Any,T2}) where T2 = T2 @@ -62,50 +63,53 @@ DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(parent(A)) function DiskArrays.readblock!( A::ModifiedDiskArray{false,<:Any,0}, out_block, I::AbstractVector... ) - out_block[] = _applymod(parent(A)[I...], A.mod) - return nothing + out_block[] = _applymod(parent(A)[I...][], A.mod) + return out_block end function DiskArrays.readblock!( A::ModifiedDiskArray{true,T,<:Any,0}, out_block, I::AbstractVector... ) where T out_block[] = _invertmod(Val{T}(), parent(A)[I...], A.mod) - return nothing + return out_block end function DiskArrays.readblock!( A::ModifiedDiskArray{false}, out_block, I::AbstractVector... ) - out_block .= _applymod.(parent(A)[I...], (A.mod,)) - return nothing + inner_block = similar(out_block, eltype(parent(A))) + DiskArrays.readblock!(parent(A), inner_block, I...) + out_block .= _applymod.(inner_block, (A.mod,)) + return out_block end function DiskArrays.readblock!( A::ModifiedDiskArray{true,T}, out_block, I::AbstractVector... ) where T - out_block .= _invertmod.(Ref(Val{T}()), view(parent(A), I...), Ref(A.mod)) - return nothing + out_block .= _invertmod.((Val{T}(),), parent(A)[I...], (A.mod,)) + return out_block end function DiskArrays.writeblock!( A::ModifiedDiskArray{false,<:Any,0,<:AbstractArray{T}}, block, I::AbstractVector... ) where T - A.var[I...] = _invertmod(Val{source_eltype(A.mod)}(), block[], A.mod) + + parent(A)[I...] = _invertmod(Val{source_eltype(A.mod)}(), block[], A.mod) return nothing end function DiskArrays.writeblock!( A::ModifiedDiskArray{true,<:Any,0,<:AbstractArray{T}}, _block, I::AbstractVector... ) where T - A.var[I...] = _applymod(Val{eltype(A.mod)}(), block[], A.mod) + parent(A)[I...] = _applymod(Val{eltype(A.mod)}(), block[], A.mod) return nothing end function DiskArrays.writeblock!( - A::ModifiedDiskArray{false,<:Any,<:Any,<:AbstractArray{T}}, block, I::AbstractVector... + A::ModifiedDiskArray{<:Any,<:Any,<:Any,<:AbstractArray{T}}, block, I::AbstractVector... ) where T - A.var[I...] .= _invertmod.(Val{source_eltype(A.mod)}(), block, Ref(A.mod)) + parent(A)[I...] = _invertmod.((Val{source_eltype(A.mod)}(),), block, (A.mod,)) return nothing end function DiskArrays.writeblock!( A::ModifiedDiskArray{true,<:Any,<:Any,<:AbstractArray{T}}, _block, I::AbstractVector... ) where T -A.var[I...] .= _applymod.((Val{eltype(A.mod)}(),), block, (A.mod,)) + parent(A)[I...] = _applymod.((Val{eltype(A.mod)}(),), block, (A.mod,)) return nothing end @@ -116,7 +120,7 @@ Base.@assume_effects :foldable function _applymod(x, m::Mod) _scaleoffset(x, m) end end -_applymod(x, m::NoMod) = x +Base.@assume_effects :foldable _applymod(x, m::NoMod) = x _ismissing(x, mv) = isequal(x, mv) _ismissing(_, ::Nothing) = false @@ -139,16 +143,17 @@ Base.@assume_effects :foldable function _invertmod(::Val{T}, x, m::Mod) where T end return _scaleoffset_inv(T, tm, m) end -_invertmod(v, x, m::NoMod) = x +Base.@assume_effects :foldable _invertmod(v, x, m::NoMod) = x -_scaleoffset_inv(::Type{T}, x, m::Mod) where T = _scaleoffset_inv(m.coerce, T, x, m) -_scaleoffset_inv(coerce::Base.Callable, ::Type{T}, x, m::Mod) where T = - coerce(T, _scaleoffset_inv1(x, m.scale, m.offset)) +Base.@assume_effects :foldable _scaleoffset_inv(::Type{T}, x, m::Mod) where T = + _scaleoffset_inv(m.coerce, T, x, m)::T +Base.@assume_effects :foldable _scaleoffset_inv(coerce::Base.Callable, ::Type{T}, x, m::Mod) where T = + coerce(T, _scaleoffset_inv1(x, m.scale, m.offset))::T -_scaleoffset_inv1(x, scale, offset) = (x - offset) / scale -_scaleoffset_inv1(x, scale, ::Nothing) = x / scale -_scaleoffset_inv1(x, ::Nothing, offset) = x - offset -_scaleoffset_inv1(x, ::Nothing, ::Nothing) = x +Base.@assume_effects :foldable _scaleoffset_inv1(x, scale, offset) = (x - offset) / scale +Base.@assume_effects :foldable _scaleoffset_inv1(x, scale, ::Nothing) = x / scale +Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, offset) = x - offset +Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, ::Nothing) = x function _stack_mods( diff --git a/src/stack.jl b/src/stack.jl index 8d4ad2025..a8b1735e1 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -166,7 +166,6 @@ $GROUP_KEYWORD - `missingval`: a single value for all layers or a `NamedTuple` of missingval for each layer. `nothing` specifies no missing value. $MASKINGVAL_KEYWORD -$SCALED_KEYWORD $CONSTRUCTOR_CRS_KEYWORD $CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. @@ -175,6 +174,8 @@ For when one or multiple filepaths are used: $DROPBAND_KEYWORD $LAZY_KEYWORD +$RAW_KEYWORD +$SCALED_KEYWORD $SOURCE_KEYWORD For when a single `Raster` is used: @@ -368,17 +369,23 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; layerdims::Union{NoKW,NamedTuple{K}}=nokw, missingval=nokw, maskingval=nokw, + replace_missing=nokw, + scaled=nokw, + raw=false, kw... ) where K - missingval1 = collect(_stack_nt(filenames, missingval)) - maskingval1 = collect(_stack_nt(filenames, maskingval)) + _maybe_warn_replace_missing(replace_missing) + scaled, maskingval = _raw_check(raw, scaled, maskingval) + + layermissingval = collect(_stack_nt(filenames, missingval)) + layermaskingval = collect(_stack_nt(filenames, maskingval)) fn = collect(filenames) layermetadata = layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) layerdims = layerdims isa NamedTuple ? collect(layerdims) : map(_ -> NoKW(), fn) - layers = map(K, fn, layermetadata, layerdims, missingval1, maskingval1) do name, fn, md, d, mv, ma + layers = map(K, fn, layermetadata, layerdims, layermissingval, layermaskingval) do name, fn, md, d, mv, ma Raster(fn; source=_sourcetrait(fn, source), - dims=d, name, metadata=md, missingval=mv, maskingval=ma, kw... + dims=d, name, metadata=md, missingval=mv, maskingval=ma, scaled, kw... ) end return RasterStack(NamedTuple{K}(layers); resize, metadata) @@ -387,15 +394,20 @@ end function RasterStack(filename::AbstractString; lazy::Bool=false, dropband::Bool=true, + raw::Bool=false, source::Union{Symbol,Source,NoKW}=nokw, missingval=nokw, maskingval=nokw, name=nokw, - group=nokw, - scaled=true, - coerce=convert, + group::Union{Symbol,AbstractString,NoKW}=nokw, + scaled::Union{Bool,NoKW}=nokw, + coerce=nokw, + replace_missing=nokw, kw... ) + _maybe_warn_replace_missing(replace_missing) + scaled, maskingval = _raw_check(raw, scaled, maskingval) + source = _sourcetrait(filename, source) st = if isdir(filename) # Load as a whole directory @@ -427,22 +439,14 @@ function RasterStack(filename::AbstractString; else # With bands actings as layers raster = Raster(filename; - source, lazy, missingval, maskingval, scaled, coerce, dropband=false + source, lazy, missingval, maskingval, scaled, coerce, dropband=false, ) RasterStack(raster; kw...) end end # Maybe drop the Band dimension - if dropband && hasdim(st, Band()) && size(st, Band()) == 1 - if lazy - return view(st, Band(1)) # TODO fix dropdims in DiskArrays - else - return dropdims(st; dims=Band()) - end - else - return st - end + return _maybe_drop_single_band(st, dropband, lazy) end function DD.modify(f, s::AbstractRasterStack{<:FileStack{<:Any,K}}) where K @@ -508,7 +512,7 @@ function _layer_stack(filename; crs=nokw, mappedcrs=nokw, coerce=convert, - scaled=true, + scaled=nokw, checkmem=true, lazy=false, kw... @@ -564,11 +568,6 @@ function _layer_stack(filename; return RasterStack(data; field_kw..., kw...) end - # _return_lifted(NamedTuple{name}, source, dims, refdims, layerdims, metadata, layermetadata1, missingval, lazy, layers, mods, checkmem, group) -# function _return_lifted( -# ::Type{NT}, source, dims, refdims, layerdims, metadata, layermetadata, missingval, lazy, layers, mods, checkmem, group -# ) where NT<:NamedTuple{K} where K -# end # Try to sort the dimensions by layer dimension into a sensible # order that applies without permutation, preferencing the layers diff --git a/src/utils.jl b/src/utils.jl index d89c2ca7a..f617a7920 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -299,6 +299,32 @@ function _checkregular(A::AbstractArray) return true end +# Constructor helpers + +function _raw_check(raw, scaled, maskingval) + if raw + scaled isa Bool && scaled && @warn "`scaled=true` set to `false` because of `raw=true`" + isnokwornothing(maskingval) || @warn "`maskingval=$maskingval` set to `nothing` because of `raw=true`" + return false, nothing + else + scaled = isnokw(scaled) ? true : scaled + return scaled, maskingval + end +end + +function _maybe_drop_single_band(x, dropband::Bool, lazy::Bool) + dropband || return x + if hasdim(x, Band()) && size(x, Band()) < 2 + if lazy + return view(x, Band(1)) # TODO fix dropdims in DiskArrays + else + return dropdims(x; dims=Band()) + end + else + return x + end +end + # Memory @@ -423,8 +449,8 @@ end # Warnings and erros -_maybewarn_replace_missing(replace_missing::NoKW) = nothing -function _maybewarn_replace_missing(replace_missing) +_maybe_warn_replace_missing(replace_missing::NoKW) = nothing +function _maybe_warn_replace_missing(replace_missing) @warn "`replace_missing` keyword no longer used. Set `maskingval` to nothing for no replacement, to `missing` to mask `missingval` with `missing`, or any other value" end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index db05060be..0885a71fe 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -45,7 +45,8 @@ stackkeys = ( end @testset "Raster" begin - @time ncarray = Raster(ncsingle) + @time ncarray = Raster(ncsingle); + @time ncarray = Raster(ncsingle; maskingval=nothing); @time lazyarray = Raster(ncsingle; lazy=true) @time eagerarray = Raster(ncsingle; lazy=false) @test_throws ArgumentError Raster("notafile.nc") @@ -58,11 +59,12 @@ end @time read(lazyarray); end - @testset "cf" begin @time cfarray = Raster(ncsingle) + @testset "scaling and maskign" begin @time cfarray = Raster(ncsingle) @time cfarray = Raster(ncsingle) @time cf_nomask_array = Raster(ncsingle; maskingval=nothing) @time nocfarray = Raster(ncsingle; scaled=false) @time nocf_nomask_array = Raster(ncsingle; scaled=false, maskingval=nothing) + @time raw_array = Raster(ncsingle; raw=true) @time lazycfarray = Raster(ncsingle; lazy=true, scaled=false) @time lazynocfarray = Raster(ncsingle; lazy=true, scaled=false) @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, maskingval=nothing) @@ -70,9 +72,12 @@ end @test missingval(nocfarray) === missing @test missingval(cf_nomask_array) === 1.0f20 @test missingval(nocf_nomask_array) === 1.0f20 + @test missingval(raw_array) === 1.0f20 @test all(skipmissing(cfarray) .=== skipmissing(nocfarray)) @test parent(cfarray) isa Array{Union{Float32,Missing}} @test parent(nocfarray) isa Array{Union{Float32,Missing}} + @test parent(nocf_nomask_array) isa Array{Float32} + @test parent(raw_array) isa Array{Float32} open(lazycfarray) do A @test parent(A) isa Rasters.ModifiedDiskArray{false,Union{Missing,Float32}} end @@ -110,7 +115,9 @@ end @testset "handle empty variables" begin st = RasterStack((empty=view(ncarray, 1, 1, 1), full=ncarray)) empty_test = tempname() * ".nc" - write(empty_test, st) + using ProfileView + @profview write(empty_test, st) + rast = Raster(empty_test) st = RasterStack(empty_test) @test name(rast) == name(st[:empty]) == :empty From ce3b152485843a476bc05eb0e718d8199ba56c38 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Tue, 13 Aug 2024 23:06:18 +0200 Subject: [PATCH 13/38] clean up extensions --- .../GeometryOpsDimensionalDataExt.jl | 20 ++++++ ext/RastersArchGDALExt/RastersArchGDALExt.jl | 9 +-- ext/RastersArchGDALExt/gdal_source.jl | 2 - .../RastersCoordinateTransformationsExt.jl | 9 +-- .../RastersGRIBDatasetsExt.jl | 26 +------ .../gribdatasets_source.jl | 3 - ext/RastersMakieExt/RastersMakieExt.jl | 7 +- .../RastersNCDatasetsExt.jl | 24 +++---- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 13 ++-- .../RastersZarrDatasetsExt.jl | 13 ++++ .../zarrdatasets_source.jl | 14 ++++ test/sources/commondatamodel.jl | 24 +++++++ test/sources/zarr.jl | 71 +++++++++++++++++++ 13 files changed, 168 insertions(+), 67 deletions(-) create mode 100644 ext/GeometryOpsDimensionalDataExt/GeometryOpsDimensionalDataExt.jl create mode 100644 ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl create mode 100644 ext/RastersZarrDatasetsExt/zarrdatasets_source.jl create mode 100644 test/sources/commondatamodel.jl create mode 100644 test/sources/zarr.jl diff --git a/ext/GeometryOpsDimensionalDataExt/GeometryOpsDimensionalDataExt.jl b/ext/GeometryOpsDimensionalDataExt/GeometryOpsDimensionalDataExt.jl new file mode 100644 index 000000000..e0132a5df --- /dev/null +++ b/ext/GeometryOpsDimensionalDataExt/GeometryOpsDimensionalDataExt.jl @@ -0,0 +1,20 @@ +module GeometryOpsDimensionalDataExt + +import DimensionalData as DD +import GeometryOps as GO +import GeoInterface as GI + +function GO.polygonize(A::DD.AbstractDimArray; dims=(DD.X(), DD.Y()), crs=GI.crs(A), kw...) + lookups = DD.lookup(A, dims) + bounds_vecs = if DD.isintervals(lookups) + map(DD.intervalbounds, lookups) + else + @warn "`polygonsize` is not possible for `Points` sampling, as polygons cover space by definition. Treating as `Intervals`, but this may not be appropriate" + map(lookups) do l + Dd.intervalbounds(DD.set(l, DD.Intervals())) + end + end + GO.polygonize(bounds_vecs..., DD.AbstractDimArray; crs, kw...) +end + +end diff --git a/ext/RastersArchGDALExt/RastersArchGDALExt.jl b/ext/RastersArchGDALExt/RastersArchGDALExt.jl index e7292ecae..bf855bb9d 100644 --- a/ext/RastersArchGDALExt/RastersArchGDALExt.jl +++ b/ext/RastersArchGDALExt/RastersArchGDALExt.jl @@ -1,10 +1,7 @@ module RastersArchGDALExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, ArchGDAL, CommonDataModel -else - using ..Rasters, ..ArchGDAL, ..CommonDataModel -end +using Rasters +using ArchGDAL import DiskArrays, Extents, @@ -24,12 +21,12 @@ using Rasters: GDALsource, AbstractProjected, AbstractRaster, AbstractRasterStac import Rasters: reproject, resample, warp, cellsize, nokw, isnokw, isnokwornothing +const AG = ArchGDAL const RA = Rasters const DD = DimensionalData const DA = DiskArrays const GI = GeoInterface const LA = Lookups -const CDM = CommonDataModel include("cellsize.jl") include("gdal_source.jl") diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index ad4a092ae..9d41731a4 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -1,5 +1,3 @@ -const AG = ArchGDAL - const GDAL_LOCUS = Start() const GDAL_DIM_ORDER = (X(), Y(), Band()) diff --git a/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl b/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl index 39d0ee745..1643f7a1e 100644 --- a/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl +++ b/ext/RastersCoordinateTransformationsExt/RastersCoordinateTransformationsExt.jl @@ -1,11 +1,7 @@ module RastersCoordinateTransformationsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, CoordinateTransformations -else - using ..Rasters, ..CoordinateTransformations -end - +using Rasters +using CoordinateTransformations using DimensionalData using Rasters.Lookups using Rasters.Dimensions @@ -16,7 +12,6 @@ const RA = Rasters const DD = DimensionalData const LA = Lookups - include("affineprojected.jl") end # module diff --git a/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl b/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl index 59c69048f..ca81de017 100644 --- a/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl +++ b/ext/RastersGRIBDatasetsExt/RastersGRIBDatasetsExt.jl @@ -1,32 +1,12 @@ module RastersGRIBDatasetsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, GRIBDatasets, CommonDataModel -else - using ..Rasters, ..GRIBDatasets, ..CommonDataModel -end - -import DiskArrays, - FillArrays, - Extents, - GeoInterface, - Missings +using Rasters +using GRIBDatasets -using Dates, - DimensionalData, - GeoFormatTypes - -using Rasters.Lookups -using Rasters.Dimensions using Rasters: GRIBsource -using CommonDataModel: AbstractDataset - const RA = Rasters -const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface -const LA = Lookups +const GDS = GRIBDatasets include("gribdatasets_source.jl") diff --git a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl index f3824cbda..72f63fd11 100644 --- a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl +++ b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl @@ -1,6 +1,3 @@ -const GDS = GRIBDatasets -const CDM = CommonDataModel - function RA.OpenStack(fs::RA.FileStack{GRIBsource,K}) where K RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs))) end diff --git a/ext/RastersMakieExt/RastersMakieExt.jl b/ext/RastersMakieExt/RastersMakieExt.jl index 0c71302e7..4ddae96c3 100644 --- a/ext/RastersMakieExt/RastersMakieExt.jl +++ b/ext/RastersMakieExt/RastersMakieExt.jl @@ -1,10 +1,7 @@ module RastersMakieExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Makie, Rasters -else - using ..Makie, ..Rasters -end +using Makie +using Rasters using Rasters.DimensionalData using Rasters.Dimensions diff --git a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl index b8e50adc5..1a8cdae86 100644 --- a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl +++ b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl @@ -1,20 +1,12 @@ module RastersNCDatasetsExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, NCDatasets, CommonDataModel -else - using ..Rasters, ..NCDatasets, ..CommonDataModel -end - -import DiskArrays, - FillArrays, - Extents, - GeoInterface, - Missings +using Rasters +using NCDatasets +using CommonDataModel +using Dates +using DimensionalData, -using Dates, - DimensionalData, - GeoFormatTypes +import Missings using Rasters.Lookups using Rasters.Dimensions @@ -22,10 +14,10 @@ using Rasters: CDMsource, NCDsource, NoKW, nokw, isnokw using CommonDataModel: AbstractDataset +const NCD = NCDatasets +const CDM = CommonDataModel const RA = Rasters const DD = DimensionalData -const DA = DiskArrays -const GI = GeoInterface const LA = Lookups include("ncdatasets_source.jl") diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index d9690398b..a57c8f5bf 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -1,9 +1,12 @@ -const NCD = NCDatasets - -const UNNAMED_NCD_FILE_KEY = "unnamed" - const NCDAllowedType = Union{Int8,UInt8,Int16,UInt16,Int32,UInt32,Int64,UInt64,Float32,Float64,Char,String} +function RA._check_allowed_type(::RA.NCDsource, eltyp) + eltyp <: NCDAllowedType || throw(ArgumentError(""" + Element type $eltyp cannot be written to NetCDF. Convert it to one of $(Base.uniontypes(NCDAllowedType)), + usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) + """ + )) +end function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; append=false, force=false, @@ -18,7 +21,7 @@ function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; mode = !isfile(filename) || !append ? "c" : "a"; ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(A))) try - _writevar!(ds, A; kw...) + RA._writevar!(ds, A; kw...) finally close(ds) end diff --git a/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl b/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl new file mode 100644 index 000000000..bb44066aa --- /dev/null +++ b/ext/RastersZarrDatasetsExt/RastersZarrDatasetsExt.jl @@ -0,0 +1,13 @@ +module RastersZarrDatasetsExt + +using Rasters +using ZarrDatasets + +using ZarrDatasets: ZarrDatasets as ZD +using Rasters: Zarrsource + +const RA = Rasters + +include("zarrdatasets_source.jl") + +end diff --git a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl new file mode 100644 index 000000000..e308ca4bf --- /dev/null +++ b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl @@ -0,0 +1,14 @@ +function RA.OpenStack(fs::RA.FileStack{Zarrsource,K}) where K + RA.OpenStack{Zarrsource,K}(ZD.ZarrDataset(RA.filename(fs))) +end + +# In ZarrDatasets, the file is open for reading the values and closed afterwards. +Base.close(os::RA.OpenStack{Zarrsource}) = nothing + +function RA._open(f, ::Zarrsource, filename::AbstractString; write=false, kw...) + ds = ZarrDatasets.ZarrDataset(filename) + RA._open(f, Zarrsource(), ds; kw...) +end + +RA._sourcetrait(::ZD.ZarrVariable) = Zarrsource() +RA._sourcetrait(::ZD.ZarrDataset) = Zarrsource() diff --git a/test/sources/commondatamodel.jl b/test/sources/commondatamodel.jl new file mode 100644 index 000000000..451d18b31 --- /dev/null +++ b/test/sources/commondatamodel.jl @@ -0,0 +1,24 @@ +using Rasters, NCDatasets, Test +import Rasters: ForwardOrdered, ReverseOrdered, Regular +@testset "step" begin + # test if regular indices are correctly rounded + f32_indices = range(0.075f0, 10.075f0; step = 0.05f0) |> collect + @test Rasters._cdmspan(f32_indices, ForwardOrdered())[1] === Regular(0.05) + + f32_indices_rev = range(10.075f0, 0.075f0; step = -0.05f0) |> collect + @test Rasters._cdmspan(f32_indices_rev, ReverseOrdered())[1] === Regular(-0.05) + + # test if regular indices are not rounded when they should not + indices_one_third = range(0, 10; length = 31) |> collect + @test Rasters._cdmspan(indices_one_third, ForwardOrdered())[1] === Regular(1/3) + + # test when reading a file + ras = Raster(rand(X(f32_indices), Y(indices_one_third))) + tempfile = tempname() * ".nc" + write(tempfile, ras) + ras_read = Raster(tempfile) + steps = step.(dims(ras_read)) + @test steps[1] == 0.05 + @test steps[2] == 1/3 + +end \ No newline at end of file diff --git a/test/sources/zarr.jl b/test/sources/zarr.jl new file mode 100644 index 000000000..ef4c2d29c --- /dev/null +++ b/test/sources/zarr.jl @@ -0,0 +1,71 @@ +using Rasters, Zarr +using ZarrDatasets +using Rasters: FileArray, FileStack, Zarrsource, crs, bounds, name, trim + +path = "https://s3.bgc-jena.mpg.de:9000/esdl-esdc-v3.0.2/esdc-16d-2.5deg-46x72x1440-3.0.2.zarr" + +@testset "Zarr Raster open" begin + + +zraster = Raster(path, name="air_temperature_2m") +lazyarray = Raster(path, lazy=true, name="air_temperature_2m") +eagerarray = Raster(path, lazy=false, name="air_temperature_2m") +@test_throws ArgumentError Raster("notafile.zarr/") + +@testset "lazyness" begin + # Eager is the default + @test parent(zraster) isa Array + @test parent(lazyarray) isa FileArray + @test parent(eagerarray) isa Array +end +@testset "read" begin + @time A = read(lazyarray); + @test A isa Raster + @test parent(A) isa Array + A2 = copy(A) .= 0 + @time read!(ncarray, A2); + A3 = copy(A) .= 0 + @time read!(ncsingle, A3) + @test all(A .=== A2) + @test all(A .=== A3) +end + +@testset "array properties" begin + @test name.(dims(zraster)) == (:X, :Y, :Ti) + @test length(dims(zraster, X)) == 144 + @test index(zraster,X) == collect(-178.75:2.5:178.75) + # TODO the spatial bounds are strange, because the data is point data + # We should find a dataset that has actual intervals + @test bounds(zraster) == ( + (-178.75, 178.75), + (-88.75, 88,75), + (DateTime("1979-01-09T00:00:00"), DateTime("2021-12-27T00:00:00")), + ) +end +@testset "dimensions" begin + @test ndims(zraster) == 3 + @test length.(dims(zraster)) == (144, 72, 989) + @test dims(zraster) isa Tuple{<:X,<:Y,<:Ti} + @test refdims(zraster) == () + @test val.(span(ncarray)) == (2.5, 2.5, (nothing, nothing)) + @test typeof(lookup(ncarray)) <: Tuple{<:Mapped,<:Mapped,<:Sampled} +end +@testset "other fields" begin + @test ismissing(missingval(zraster)) + @test metadata(r)["original_name"] == "t2m" + @test metadata(zraster) isa Metadata{<:Rasters.CDMsource, Dict{String, Any}} + @test name(zraster) == :air_temperature_2m +end + +@testset "indexing" begin + @test zraster[Ti(1)] isa Raster{<:Any,2} + @test zraster[Y(1), Ti(1)] isa Raster{<:Any,1} + @test zraster[X(1), Ti(1)] isa Raster{<:Any,1} + @test zraster[X(1), Y(1), Ti(1)] == -28.866226f0 == parent(zraster)[1,1,1] + @test zraster[X(30), Y(30), Ti(1)] isa Float32 + # Alaska + @test zraster[Y(Near(-88.75)), X(Near(-178.74)), Ti(1)] ==-28.866226f0 + @test zraster[Ti(At(DateTime(1979,1,9))), X(At(-178.75)), Y(At(-88.75))] == -28.866226f0 +end + +end From 329e92e98f2db24d87f60a7e4866aab1ed2a5ecd Mon Sep 17 00:00:00 2001 From: rafaqz Date: Tue, 13 Aug 2024 23:08:19 +0200 Subject: [PATCH 14/38] updates --- Project.toml | 11 ++-- src/methods/zonal.jl | 27 +++++++--- src/sources/commondatamodel.jl | 97 ++++++++++++++++++++++++++++++++-- src/sources/sources.jl | 11 ++-- src/stack.jl | 2 +- src/write.jl | 2 +- test/methods.jl | 12 +++++ 7 files changed, 142 insertions(+), 20 deletions(-) diff --git a/Project.toml b/Project.toml index 33f025717..c7b44f79e 100644 --- a/Project.toml +++ b/Project.toml @@ -28,10 +28,10 @@ Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298" GRIBDatasets = "82be9cdb-ee19-4151-bdb3-b400788d9abc" -HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" RasterDataSources = "3cb90ccd-e1b6-4867-9617-4276c8b2ca36" +ZarrDatasets = "519a4cdf-1362-424a-9ea1-b1d782dbb24b" [extensions] RastersArchGDALExt = "ArchGDAL" @@ -40,10 +40,11 @@ RastersGRIBDatasetsExt = "GRIBDatasets" RastersMakieExt = "Makie" RastersNCDatasetsExt = "NCDatasets" RastersRasterDataSourcesExt = "RasterDataSources" +RastersZarrDatasetsExt = "ZarrDatasets" [compat] -Aqua = "0.8" Adapt = "2, 3.0, 4" +Aqua = "0.8" ArchGDAL = "0.9, 0.10" CFTime = "0.1" ColorTypes = "0.10, 0.11" @@ -58,7 +59,6 @@ FillArrays = "0.12, 0.13, 1" Flatten = "0.4" GRIBDatasets = "0.2, 0.3" GeoFormatTypes = "0.4" -GeometryBasics = "0.4" GeoInterface = "1" Makie = "0.19, 0.20, 0.21" Missings = "0.4, 1" @@ -73,6 +73,7 @@ Setfield = "0.6, 0.7, 0.8, 1" Shapefile = "0.10, 0.11" Statistics = "1" Test = "1" +ZarrDatasets = "0.1" julia = "1.10" [extras] @@ -81,8 +82,8 @@ ArchGDAL = "c9ce4bd3-c3d5-55b8-8973-c0e20141b8c3" CFTime = "179af706-886a-5703-950a-314cd64e0468" CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" GRIBDatasets = "82be9cdb-ee19-4151-bdb3-b400788d9abc" +GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" @@ -93,4 +94,4 @@ Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", "GeometryBasics", "GRIBDatasets", "NCDatasets", "Plots", "RasterDataSources", "SafeTestsets", "Shapefile", "Statistics", "Test"] +test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", "GeometryBasics", "GRIBDatasets", "NCDatasets", "Plots", "RasterDataSources", "SafeTestsets", "Shapefile", "Statistics", "Test", "ZarrDatasets"] diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index b0b26e650..54496d2ff 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -108,16 +108,31 @@ function _zonal(f, x::RasterStackOrArray, ::Nothing, data; progress=true, thread geoms = _get_geometries(data, geometrycolumn) n = length(geoms) n == 0 && return [] - zs = _alloc_zonal(f, x, first(geoms), n; kw...) - _run(1:n, threaded, progress, "Applying $f to each geometry...") do i + zs, start_index = _alloc_zonal(f, x, geoms, n; kw...) + start_index == n + 1 && return zs + _run(start_index:n, threaded, progress, "Applying $f to each geometry...") do i zs[i] = _zonal(f, x, geoms[i]; kw...) end return zs end -function _alloc_zonal(f, x, geom, n; kw...) - z1 = _zonal(f, x, geom; kw...) +function _alloc_zonal(f, x, geoms, n; kw...) + # Find first non-missing entry and count number of missing entries + n_missing::Int = 0 + z1 = _zonal(f, x, first(geoms); kw...) + for geom in geoms + z1 = _zonal(f, x, geom; kw...) + if !ismissing(z1) + break + end + n_missing += 1 + end zs = Vector{Union{Missing,typeof(z1)}}(undef, n) - zs[1] = z1 - return zs + zs[1:n_missing] .= missing + # Exit early when all elements are missing + if n_missing == n + return zs, n_missing + 1 + end + zs[n_missing + 1] = z1 + return zs, n_missing + 1 end diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index ffc4b756b..bbc0d48ff 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -96,12 +96,14 @@ function _nondimnames(ds) end union(dimnames, boundsnames)::Vector{String} else - dimnames::Vector{String} + collect(dimnames)::Vector{String} end - nondim = setdiff(keys(ds), toremove) + # Maybe this should be fixed in ZarrDatasets but it works with this patch. + nondim = collect(setdiff(keys(ds), toremove)) return nondim end + function _layers(ds::AbstractDataset, ::NoKW=nokw, ::NoKW=nokw) nondim = _nondimnames(ds) grid_mapping = String[] @@ -274,7 +276,14 @@ function _cdmlookup( else boundskey = var.attrib["bounds"] boundsmatrix = Array(ds[boundskey]) - Explicit(boundsmatrix), Intervals(Center()) + locus = if mapreduce(==, &, view(boundsmatrix, 1, :), index) + Start() + elseif mapreduce(==, &, view(boundsmatrix, 2, :), index) + End() + else + Center() + end + Explicit(boundsmatrix), Intervals(locus) end end @@ -314,7 +323,15 @@ end function _cdmspan(index, order) # Handle a length 1 index length(index) == 1 && return Regular(zero(eltype(index))), Points() - step = index[2] - index[1] + + step = if eltype(index) <: AbstractFloat + # Calculate step, avoiding as many floating point errors as possible + st = Base.step(Base.range(Float64(first(index)), Float64(last(index)); length = length(index))) + st_rd = round(st, digits = Base.floor(Int,-log10(eps(eltype(index))))) # round to nearest digit within machine epsilon + isapprox(st_rd, st; atol = eps(eltype(index))) ? st_rd : st # keep the rounded number if it is very close to the original + else + index[2] - index[1] + end for i in 2:length(index)-1 # If any step sizes don't match, its Irregular if !(index[i+1] - index[i] ≈ step) @@ -408,3 +425,75 @@ function _cdmshiftlocus(lookup::AbstractSampled, dim::Dimension) end _unuseddimerror(dimname) = error("Dataset contains unused dimension $dimname") + + +# Add a var array to a dataset before writing it. +function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; + verbose=true, + missingval=nokw, + chunks=nokw, + chunksizes=_chunks_to_tuple(A, dims(A), chunks), + kw... +) where {T,N} + missingval = missingval isa NoKW ? Rasters.missingval(A) : missingval + _def_dim_var!(ds, A) + attrib = _attribdict(metadata(A)) + # Set _FillValue + eltyp = Missings.nonmissingtype(T) + _check_allowed_type(_sourcetrait(ds), eltyp) + if ismissing(missingval) + fillval = if haskey(attrib, "_FillValue") && attrib["_FillValue"] isa eltyp + attrib["_FillValue"] + else + CDM.fillvalue(eltyp) + end + attrib["_FillValue"] = fillval + A = replace_missing(A, fillval) + elseif Rasters.missingval(A) isa T + attrib["_FillValue"] = missingval + else + verbose && !(missingval isa Nothing) && @warn "`missingval` $(missingval) is not the same type as your data $T." + end + + key = if string(DD.name(A)) == "" + UNNAMED_CDM_FILE_KEY + else + string(DD.name(A)) + end + + dimnames = lowercase.(string.(map(name, dims(A)))) + var = CDM.defVar(ds, key, eltyp, dimnames; attrib=attrib, chunksizes, kw...) |> CFDiskArray + + # Write with a DiskArrays.jl broadcast + var .= A + + return nothing +end + +_check_allowed_type(trait, eltyp) = nothing + +_def_dim_var!(ds::AbstractDataset, A) = map(d -> _def_dim_var!(ds, d), dims(A)) +function _def_dim_var!(ds::AbstractDataset, dim::Dimension) + dimname = lowercase(string(DD.name(dim))) + haskey(ds.dim, dimname) && return nothing + CDM.defDim(ds, dimname, length(dim)) + lookup(dim) isa NoLookup && return nothing + + # Shift index before conversion to Mapped + dim = _cdmshiftlocus(dim) + if dim isa Y || dim isa X + dim = convertlookup(Mapped, dim) + end + # Attributes + attrib = _attribdict(metadata(dim)) + _cdm_set_axis_attrib!(attrib, dim) + # Bounds variables + if sampling(dim) isa Intervals + bounds = Dimensions.dim2boundsmatrix(dim) + boundskey = get(metadata(dim), :bounds, string(dimname, "_bnds")) + push!(attrib, "bounds" => boundskey) + CDM.defVar(ds, boundskey, bounds, ("bnds", dimname)) + end + CDM.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib=attrib) + return nothing +end diff --git a/src/sources/sources.jl b/src/sources/sources.jl index 0118f1312..4c981478d 100644 --- a/src/sources/sources.jl +++ b/src/sources/sources.jl @@ -1,13 +1,14 @@ # Source dispatch singletons abstract type Source end -struct GRDsource <: Source end -struct GDALsource <: Source end - abstract type CDMsource <: Source end struct GRIBsource <: CDMsource end struct NCDsource <: CDMsource end +struct Zarrsource <: CDMsource end + +struct GRDsource <: Source end +struct GDALsource <: Source end # Deprecations const CDMfile = CDMsource @@ -21,6 +22,7 @@ const SYMBOL2SOURCE = Dict( :grd => GRDsource(), :netcdf => NCDsource(), :grib => GRIBsource(), + :zarr => Zarrsource(), ) const SOURCE2SYMBOL = Dict(map(reverse, collect(pairs(SYMBOL2SOURCE)))) @@ -30,11 +32,13 @@ const SOURCE2EXT = Dict( GRDsource() => (".grd", ".gri"), NCDsource() => (".nc", ".nc4", ".h5",), GRIBsource() => (".grib",), + Zarrsource() => (".zarr", ".zarr/"), ) const SOURCE2PACKAGENAME = Dict( GDALsource() => "ArchGDAL", NCDsource() => "NCDatasets", GRIBsource() => "GRIBDatasets", + Zarrsource() => "ZarrDatasets", ) const EXT2SOURCE = Dict( @@ -44,6 +48,7 @@ const EXT2SOURCE = Dict( ".nc4" => NCDsource(), ".h5" => NCDsource(), ".grib" => GRIBsource(), + ".zarr" => Zarrsource(), ) # exception to be raised when backend extension is not satisfied diff --git a/src/stack.jl b/src/stack.jl index a8b1735e1..7782e6b5d 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -409,7 +409,7 @@ function RasterStack(filename::AbstractString; scaled, maskingval = _raw_check(raw, scaled, maskingval) source = _sourcetrait(filename, source) - st = if isdir(filename) + st = if isdir(filename) && !(source isa Zarrsource) # Load as a whole directory filenames = readdir(filename) length(filenames) > 0 || throw(ArgumentError("No files in directory $filename")) diff --git a/src/write.jl b/src/write.jl index 46fdf4794..c97519afa 100644 --- a/src/write.jl +++ b/src/write.jl @@ -239,4 +239,4 @@ function check_can_write(filename, force) return true end check_can_write(::Type{Bool}, filename::Union{Nothing,NoKW}, force) = true -check_can_write(::Type{Bool}, filename, force) = (force || !isfile(filename)) +check_can_write(::Type{Bool}, filename, force) = (force || (!isfile(filename) && !isdir(filename))) diff --git a/test/methods.jl b/test/methods.jl index c683a6691..de563362e 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -301,6 +301,18 @@ end sum(st) end +@testset "zonal return missing" begin + a = Raster((1:26) * (1:31)', (X(-20:5), Y(0:30))) + out_bounds_pointvec = [(-40.0, -40.0), (-40.0, -35.0), (-35.0, -35.0), (-35.0, -40.0)] + out_bounds_polygon = ArchGDAL.createpolygon(out_bounds_pointvec) + @test ismissing(zonal(sum, a; of=[polygon, out_bounds_polygon, polygon])[2]) && + ismissing(zonal(sum, a; of=[out_bounds_polygon, polygon])[1]) && + ismissing(zonal(sum, a; of=(geometry=out_bounds_polygon, x=:a, y=:b))) && + ismissing(zonal(sum, a; of=[(geometry=out_bounds_polygon, x=:a, y=:b)])[1]) + @test zonal(sum, a; of=[out_bounds_polygon, out_bounds_polygon, polygon])[3] == + sum(skipmissing(mask(a; with=polygon))) +end + @testset "classify" begin A1 = [missing 1; 2 3] ga1 = Raster(A1, (X, Y); missingval=missing) From 981dd69119c8fa64a5c60946c3890f34841722d2 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Wed, 14 Aug 2024 01:42:33 +0200 Subject: [PATCH 15/38] mosaic fixes and standardisation --- ext/RastersArchGDALExt/gdal_source.jl | 3 - .../RastersNCDatasetsExt.jl | 2 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 127 ++---------------- .../zarrdatasets_source.jl | 22 +++ src/create.jl | 2 +- src/methods/mosaic.jl | 19 +-- src/modifieddiskarray.jl | 4 +- src/sources/commondatamodel.jl | 85 ++++++++---- test/sources/ncdatasets.jl | 16 +-- test/sources/zarr.jl | 10 +- 10 files changed, 119 insertions(+), 171 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 9d41731a4..96b75c837 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -116,9 +116,6 @@ RA._open(f, ::GDALsource, A::AG.RasterDataset; mod=RA.NoMod(), kw...) = # These methods are type piracy on DimensionalData/ArchGDAL and may have to move some day - -RA._dims(var::CDM.CFVariable{<:Any,<:Any,<:AG.RasterDataset}, crs=nokw, mappedcrs=nokw) = - RA._dims(var.var, crs, mappedcrs) # We allow passing in crs and mappedcrs manually function RA._dims(raster::AG.RasterDataset, crs=nokw, mappedcrs=nokw) gt_dims = try diff --git a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl index 1a8cdae86..8513b5c27 100644 --- a/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl +++ b/ext/RastersNCDatasetsExt/RastersNCDatasetsExt.jl @@ -4,7 +4,7 @@ using Rasters using NCDatasets using CommonDataModel using Dates -using DimensionalData, +using DimensionalData import Missings diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index a57c8f5bf..92a6bcf4e 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -7,7 +7,8 @@ function RA._check_allowed_type(::RA.NCDsource, eltyp) """ )) end -function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; + +function Base.write(filename::AbstractString, source::NCDsource, A::AbstractRaster; append=false, force=false, kw... @@ -21,13 +22,13 @@ function Base.write(filename::AbstractString, ::NCDsource, A::AbstractRaster; mode = !isfile(filename) || !append ? "c" : "a"; ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(A))) try - RA._writevar!(ds, A; kw...) + RA._writevar!(ds, source, A; kw...) finally close(ds) end return filename end -function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStack; +function Base.write(filename::AbstractString, source::NCDsource, s::AbstractRasterStack; append=false, force=false, missingval=nokw, @@ -46,7 +47,7 @@ function Base.write(filename::AbstractString, ::NCDsource, s::AbstractRasterStac missingval = RA._stack_missingvals(s, isnokw(missingval) ? maskingval : missingval) try map(keys(s)) do k - _writevar!(ds, s[k]; + RA._writevar!(ds, source, s[k]; missingval=missingval[k], maskingval=maskingval[k], kw... @@ -81,11 +82,12 @@ RA._sourcetrait(::NCD.Variable) = NCDsource() return scale, offset end -RA.missingval(var::NCD.Variable, args...) = _mv(CDM.attribs(var)) -RA.missingval(var::NCD.Variable, md::Metadata{<:NCDsource}) = _mv(md) +RA.missingval(var::NCD.Variable, args...) = + RA.missingval(RA.Metadata{NCDsource}(CDM.attribs(var))) +RA.missingval(var::NCD.Variable, md::RA.Metadata{<:NCDsource}) = RA.missingval(md) -# TODO: handle multiple missing values -function _mv(md) +function RA.missingval(md::RA.Metadata{NCDsource}) + # TODO: handle multiple missing values fv = get(md, "_FillValue", nothing) mv = get(md, "missing_value", nothing) if isnothing(fv) @@ -96,120 +98,13 @@ function _mv(md) return mv end else - if isnothing(mv) + if !isnothing(mv) fv == mv || @warn "Both '_FillValue' $fv and 'missing_value' $mv were found. Currently we only use the first." end return fv end end -# Add a var array to a dataset before writing it. -function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; - verbose=true, - missingval=nokw, - maskingval=nokw, - metadata=nokw, - chunks=nokw, - chunksizes=RA._chunks_to_tuple(A, dims(A), chunks), - scale=nokw, - offset=nokw, - coerce=convert, - eltype=Missings.nonmissingtype(T), - write=true, - name=DD.name(A), - options=nokw, - driver=nokw, - kw... -) where {T,N} - eltype <: NCDAllowedType || throw(ArgumentError(""" - Element type $eltype cannot be written to NetCDF. Convert it to one of $(Base.uniontypes(NCDAllowedType)), - usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) - """ - )) - _def_dim_var!(ds, A) - metadata = if isnokw(metadata) - DD.metadata(A) - elseif isnothing(metadata) - NoMetadata() - else - metadata - end - - maskingval = isnokw(maskingval) ? RA.missingval(A) : maskingval - missingval = isnokw(missingval) ? RA.missingval(A) : missingval - missingval = if ismissing(missingval) - # See if there is a missing value in metadata - mv = _mv(metadata) - # But only use it if its the right type - mv isa eltype ? mv : RA._writeable_missing(eltype; verbose=true) - else - missingval - end - - attrib = RA._attribdict(metadata) - # Scale and offset - scale = if isnokw(scale) || isnothing(scale) - delete!(attrib, "scale_factor") - nothing - else - attrib["scale_factor"] = scale - end - offset = if isnokw(offset) || isnothing(offset) - delete!(attrib, "add_offset") - nothing - else - attrib["add_offset"] = offset - end - - mod = RA._writer_mod(eltype; missingval, maskingval, scale, offset, coerce) - - if !isnothing(mod.missingval) - attrib["_FillValue"] = missingval - end - - key = if isnokw(name) || string(name) == "" - UNNAMED_NCD_FILE_KEY - else - string(name) - end - - dimnames = lowercase.(string.(map(RA.name, dims(A)))) - var = NCD.defVar(ds, key, eltype, dimnames; attrib=attrib, chunksizes, kw...) - - if write - # Write with a DiskArays.jl broadcast - RA._maybe_modify(var.var, mod) .= A - end - - return nothing -end - -_def_dim_var!(ds::AbstractDataset, A) = map(d -> _def_dim_var!(ds, d), dims(A)) -function _def_dim_var!(ds::AbstractDataset, dim::Dimension) - dimname = lowercase(string(DD.name(dim))) - haskey(ds.dim, dimname) && return nothing - NCD.defDim(ds, dimname, length(dim)) - lookup(dim) isa NoLookup && return nothing - - # Shift index before conversion to Mapped - dim = RA._cdmshiftlocus(dim) - if dim isa Y || dim isa X - dim = convertlookup(Mapped, dim) - end - # Attributes - attrib = RA._attribdict(metadata(dim)) - RA._cdm_set_axis_attrib!(attrib, dim) - # Bounds variables - if sampling(dim) isa Intervals - bounds = Dimensions.dim2boundsmatrix(dim) - boundskey = get(metadata(dim), :bounds, string(dimname, "_bnds")) - push!(attrib, "bounds" => boundskey) - NCD.defVar(ds, boundskey, bounds, ("bnds", dimname)) - end - NCD.defVar(ds, dimname, Vector(index(dim)), (dimname,); attrib=attrib) - return nothing -end - # precompilation # const _NCDVar = NCDatasets.CFVariable{Union{Missing, Float32}, 3, NCDatasets.Variable{Float32, 3, NCDatasets.NCDataset}, NCDatasets.Attributes{NCDatasets.NCDataset{Nothing}}, NamedTuple{(:fillvalue, :scale_factor, :add_offset, :calendar, :time_origin, :time_factor), Tuple{Float32, Nothing, Nothing, Nothing, Nothing, Nothing}}} diff --git a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl index e308ca4bf..e71277737 100644 --- a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl +++ b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl @@ -12,3 +12,25 @@ end RA._sourcetrait(::ZD.ZarrVariable) = Zarrsource() RA._sourcetrait(::ZD.ZarrDataset) = Zarrsource() + +RA.missingval(var::ZD.ZarrVariable, args...) = RA.missingval(RA.Metadata{Zarrsource}(CDM.attribs(var))) +RA.missingval(var::ZD.ZarrVariable, md::RA.Metadata{<:Zarrsource}) = RA.missingval(md) + +# TODO: handle multiple missing values +function RA.missingval(md::RA.Metadata{<:Zarrsource}) + fv = get(md, "_FillValue", nothing) + mv = get(md, "missing_value", nothing) + if isnothing(fv) + if mv isa Vector + length(mv) > 1 && @warn "'missing_value' $mv has multiple values. Currently we only uses the first." + return first(mv) + else + return mv + end + else + if !isnothing(mv) + fv == mv || @warn "Both '_FillValue' $fv and 'missing_value' $mv were found. Currently we only use the first." + end + return fv + end +end diff --git a/src/create.jl b/src/create.jl index 12c7bc332..608979979 100644 --- a/src/create.jl +++ b/src/create.jl @@ -258,7 +258,7 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTu write = false # Leave fill undefined A = FillArrays.Zeros{eltype}(map(length, dims)) else - fill isa T || throw(ArgumentError("fill must be of type $T, got $fill")) + fill isa eltype || throw(ArgumentError("fill must be of type $eltype, got $fill")) write = true # Write fill to disk A = FillArrays.Fill{eltype}(fill, map(length, dims)) end diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 3a63b1e45..3de4868b6 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -78,18 +78,19 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; maskingval = isnokw(maskingval) ? Rasters.missingval(first(regions)) : maskingval missingval = isnokw(missingval) ? Rasters.missingval(first(regions)) : missingval # missingval is not ooptional here - missingval = ismissing(missingval) || isnothing(missingval) ? _type_missingval(eltype(A1)) : missingval + if !isnothing(filename) && (ismissing(missingval) || isnothing(missingval)) + missingval = _type_missingval(eltype(A1)) + end T = Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) dims = _mosaic(Tuple(map(DD.dims, regions))) l1 = first(regions) - A = create(filename, T, dims; - name=name(l1), - fill=missingval, - metadata=metadata(l1), - missingval, - maskingval, - driver, + A = create(filename, T, dims; + name=name(l1), + fill=missingval, + missingval, + maskingval, + driver, options, force ) @@ -161,7 +162,7 @@ $EXPERIMENTAL mosaic!(f::Function, x::RasterStackOrArray, regions::RasterStackOrArray...; kw...) = mosaic!(f, x, regions; kw...) function mosaic!(f::Function, A::AbstractRaster{T}, regions; - missingval=Rasters.missingval(A), + missingval=Rasters.missingval(A), atol=_default_atol(T) ) where T isnokwornothing(missingval) && throw(ArgumentError("destination array must have a `missingval`")) diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 59449e91e..a4070ef80 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -75,9 +75,7 @@ end function DiskArrays.readblock!( A::ModifiedDiskArray{false}, out_block, I::AbstractVector... ) - inner_block = similar(out_block, eltype(parent(A))) - DiskArrays.readblock!(parent(A), inner_block, I...) - out_block .= _applymod.(inner_block, (A.mod,)) + out_block .= _applymod.(parent(A)[I...], (A.mod,)) return out_block end function DiskArrays.readblock!( diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index bbc0d48ff..5fa525dbe 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -1,5 +1,7 @@ const CDM = CommonDataModel +const UNNAMED_FILE_KEY = "unnamed" + const CDM_DIM_MAP = Dict( "lat" => Y, "latitude" => Y, @@ -426,46 +428,79 @@ end _unuseddimerror(dimname) = error("Dataset contains unused dimension $dimname") - # Add a var array to a dataset before writing it. -function _writevar!(ds::AbstractDataset, A::AbstractRaster{T,N}; +function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T,N}; verbose=true, missingval=nokw, + maskingval=nokw, + metadata=nokw, chunks=nokw, chunksizes=_chunks_to_tuple(A, dims(A), chunks), + scale=nokw, + offset=nokw, + coerce=convert, + eltype=Missings.nonmissingtype(T), + write=true, + name=DD.name(A), + options=nokw, + driver=nokw, kw... ) where {T,N} - missingval = missingval isa NoKW ? Rasters.missingval(A) : missingval + _check_allowed_type(source, eltype) _def_dim_var!(ds, A) - attrib = _attribdict(metadata(A)) - # Set _FillValue - eltyp = Missings.nonmissingtype(T) - _check_allowed_type(_sourcetrait(ds), eltyp) - if ismissing(missingval) - fillval = if haskey(attrib, "_FillValue") && attrib["_FillValue"] isa eltyp - attrib["_FillValue"] - else - CDM.fillvalue(eltyp) - end - attrib["_FillValue"] = fillval - A = replace_missing(A, fillval) - elseif Rasters.missingval(A) isa T - attrib["_FillValue"] = missingval + metadata = if isnokw(metadata) + DD.metadata(A) + elseif isnothing(metadata) + NoMetadata() + else + metadata + end + + maskingval = isnokw(maskingval) ? Rasters.missingval(A) : maskingval + missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval + missingval = if ismissing(missingval) + # See if there is a missing value in metadata + mv = _mv(metadata) + # But only use it if its the right type + mv isa eltype ? mv : _writeable_missing(eltype; verbose=true) + else + missingval + end + + attrib = _attribdict(metadata) + # Scale and offset + scale = if isnokw(scale) || isnothing(scale) + delete!(attrib, "scale_factor") + nothing else - verbose && !(missingval isa Nothing) && @warn "`missingval` $(missingval) is not the same type as your data $T." + attrib["scale_factor"] = scale end + offset = if isnokw(offset) || isnothing(offset) + delete!(attrib, "add_offset") + nothing + else + attrib["add_offset"] = offset + end + + mod = _writer_mod(eltype; missingval, maskingval, scale, offset, coerce) - key = if string(DD.name(A)) == "" - UNNAMED_CDM_FILE_KEY + if !isnothing(mod.missingval) + attrib["_FillValue"] = missingval + end + + key = if isnokw(name) || string(name) == "" + UNNAMED_FILE_KEY else - string(DD.name(A)) + string(name) end - dimnames = lowercase.(string.(map(name, dims(A)))) - var = CDM.defVar(ds, key, eltyp, dimnames; attrib=attrib, chunksizes, kw...) |> CFDiskArray + dimnames = lowercase.(string.(map(Rasters.name, dims(A)))) + var = CDM.defVar(ds, key, eltype, dimnames; attrib=attrib, chunksizes, kw...) - # Write with a DiskArrays.jl broadcast - var .= A + if write + # Write with a DiskArays.jl broadcast + _maybe_modify(var.var, mod) .= A + end return nothing end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 0885a71fe..b505ab396 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -46,7 +46,6 @@ end @testset "Raster" begin @time ncarray = Raster(ncsingle); - @time ncarray = Raster(ncsingle; maskingval=nothing); @time lazyarray = Raster(ncsingle; lazy=true) @time eagerarray = Raster(ncsingle; lazy=false) @test_throws ArgumentError Raster("notafile.nc") @@ -115,14 +114,14 @@ end @testset "handle empty variables" begin st = RasterStack((empty=view(ncarray, 1, 1, 1), full=ncarray)) empty_test = tempname() * ".nc" - using ProfileView - @profview write(empty_test, st) + write(empty_test, st) rast = Raster(empty_test) st = RasterStack(empty_test) - @test name(rast) == name(st[:empty]) == :empty - @test size(rast) == size(st[:empty]) == () - @test all(st[:full] .=== ncarray) + @test name(rast) == name(st.empty) == :empty + @test size(rast) == size(st.empty) == () + @test st.empty[] === missing + @test all(st.full .=== ncarray) st = RasterStack(empty_test; lazy=true) end @@ -210,11 +209,12 @@ end A1 = ncarray[X(1:80), Y(1:100)] A2 = ncarray[X(50:150), Y(90:150)] tempfile = tempname() * ".nc" - Afile = mosaic(first, read(A1), read(A2); missingval=missing, atol=1e-7, filename=tempfile, force=true) - Amem = mosaic(first, A1, A2; missingval=missing, atol=1e-7) + Afile = mosaic(first, read(A1), read(A2); atol=1e-7, filename=tempfile, force=true) + Amem = mosaic(first, A1, A2; atol=1e-7) Atest = ncarray[X(1:150), Y(1:150)] Atest[X(1:49), Y(101:150)] .= missing Atest[X(81:150), Y(1:89)] .= missing + read(Afile) @test all(Atest .=== Afile .=== Amem) end @testset "slice" begin diff --git a/test/sources/zarr.jl b/test/sources/zarr.jl index ef4c2d29c..c362defb1 100644 --- a/test/sources/zarr.jl +++ b/test/sources/zarr.jl @@ -1,15 +1,15 @@ -using Rasters, Zarr +using Rasters +using Zarr using ZarrDatasets using Rasters: FileArray, FileStack, Zarrsource, crs, bounds, name, trim path = "https://s3.bgc-jena.mpg.de:9000/esdl-esdc-v3.0.2/esdc-16d-2.5deg-46x72x1440-3.0.2.zarr" @testset "Zarr Raster open" begin - -zraster = Raster(path, name="air_temperature_2m") -lazyarray = Raster(path, lazy=true, name="air_temperature_2m") -eagerarray = Raster(path, lazy=false, name="air_temperature_2m") +zraster = Raster(path; name="air_temperature_2m") +lazyarray = Raster(path; lazy=true, name="air_temperature_2m") +eagerarray = Raster(path; lazy=false, name="air_temperature_2m") @test_throws ArgumentError Raster("notafile.zarr/") @testset "lazyness" begin From b7e63f2ef2c592a3dfc7c73c569830d027dc24a8 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Thu, 15 Aug 2024 02:16:21 +0200 Subject: [PATCH 16/38] bugfixes --- ext/RastersArchGDALExt/gdal_source.jl | 2 +- src/array.jl | 4 ++-- src/sources/commondatamodel.jl | 2 +- src/sources/grd.jl | 14 ++++++++++++-- test/sources/grd.jl | 6 +++--- test/sources/rasterdatasources.jl | 23 ++++++++++++++++------- 6 files changed, 35 insertions(+), 16 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 96b75c837..45a7d69c4 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -255,7 +255,7 @@ function RA.Raster(ds::AG.RasterDataset; else Raster(Array(RA._maybe_modify(ds, mod)), dims; kw...) end - return dropband ? RA._drop_single_band(raster, lazy) : raster + return RA._maybe_drop_single_band(raster, dropband, lazy) end RA.missingval(ds::AG.Dataset, args...) = RA.missingval(AG.RasterDataset(ds)) diff --git a/src/array.jl b/src/array.jl index 86be8cfd3..813f07f8d 100644 --- a/src/array.jl +++ b/src/array.jl @@ -42,7 +42,7 @@ abstract type AbstractRaster{T,N,D,A} <: AbstractDimArray{T,N,D,A} end Returns the value representing missing data in the dataset """ function missingval end -missingval(_) = missing +missingval(_) = nothing missingval(::AbstractArray{T}) where T = Missing <: T ? missing : nothing missingval(A::AbstractRaster) = A.missingval @@ -338,7 +338,7 @@ function Raster(ds, filename::AbstractString; x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug end # If maskingval is `nothing` use missingval as missingval - dims_out = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data) + dims_out = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data_out) missingval_out = isnokwornothing(maskingval1) ? missingval1 : maskingval1 data_out, dims_out, metadata_out, missingval_out end diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 5fa525dbe..d6ef02a8a 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -460,7 +460,7 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval missingval = if ismissing(missingval) # See if there is a missing value in metadata - mv = _mv(metadata) + mv = Rasters.missingval(metadata) # But only use it if its the right type mv isa eltype ? mv : _writeable_missing(eltype; verbose=true) else diff --git a/src/sources/grd.jl b/src/sources/grd.jl index dfb4cef5d..0bfe5f3a6 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -221,13 +221,23 @@ function _write_gri(filename, v, ::NoMod, A::Array) write(io, A) end end -function _write_gri(filename, v, mod, A) +function _write_gri(filename, v, mod, A::AbstractArray) open(filename; write=true, lock=false) do io for x in A # We are modifying the source array so invert the modifications write(io, _invertmod(v, x, mod)) end end end +# Specialise to avoid `Ref` allocations +function _write_gri(filename, v, mod, A::AbstractArray{Union{Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64}}) + open(filename; write=true, lock=false) do io + ref = Ref(first(A)) + for x in A # We are modifying the source array so invert the modifications + ref[] = _invertmod(v, x, mod) + write(io, ref) + end + end +end function _write_grd(filename, T, dims, missingval, name) filename = splitext(filename)[1] @@ -245,7 +255,7 @@ function _write_grd(filename, T, dims, missingval, name) nodatavalue = missingval # Metadata: grd file - open(filename * ".grd"; write=true) do IO + open(filename * ".grd"; write=true, lock=false) do IO write(IO, """ [general] diff --git a/test/sources/grd.jl b/test/sources/grd.jl index ed8f4f367..a64ac2491 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -225,7 +225,7 @@ grdpath = stem * ".gri" # 1 band is added again on save @test size(saved) == size(grdarray[Band(1)]) @test parent(saved) == parent(grdarray[Band(1)]) - @test_broken (@allocations write(filename2, view(grdarray, Band(1)); force = true)) < 1e3 + @test (@allocations write(filename2, view(grdarray, Band(1)); force = true)) < 1e3 end @testset "3d with subset" begin @@ -251,7 +251,7 @@ grdpath = stem * ".gri" @test all(parent(saved) .=== parent(geoA)) @test saved isa typeof(geoA) @test parent(saved) == parent(geoA) - @test_broken (@allocations write(filename, GRDsource(), geoA; force = true)) < 1e3 + @test (@allocations write(filename, GRDsource(), geoA; force = true)) < 1e3 end @testset "to netcdf" begin @@ -265,7 +265,7 @@ grdpath = stem * ".gri" @test index(saved, Y) ≈ index(grdarray, Y) .+ 0.5 @test bounds(saved, Y) == bounds(grdarray, Y) @test bounds(saved, X) == bounds(grdarray, X) - @test_broken (@allocations write(filename2, grdarray[Band(1)]; force = true)) < 1e3 + @test (@allocations write(filename2, grdarray[Band(1)]; force = true)) < 1e3 end @testset "to gdal" begin diff --git a/test/sources/rasterdatasources.jl b/test/sources/rasterdatasources.jl index e142a239c..7a450f657 100644 --- a/test/sources/rasterdatasources.jl +++ b/test/sources/rasterdatasources.jl @@ -13,12 +13,12 @@ using Rasters, RasterDataSources, Test, Dates, ArchGDAL, NCDatasets @testset "load WorldClim Climate" begin # Weather time-series - ser = RasterSeries(WorldClim{Climate}, :prec; res="10m", month=Jan:March, mappedcrs=EPSG(4326)) + ser = RasterSeries(WorldClim{Climate}, :prec; res="10m", month=Jan:March, mappedcrs=EPSG(4326), raw=true) # Select Australia, using regular lat/lon selectors A = ser[month=Jan] @test A isa Raster A[Y(Between(-10, -45)), X(Between(110, 160))] - st = RasterStack(WorldClim{Climate}, (:prec, :tmax); month=1) + st = RasterStack(WorldClim{Climate}, (:prec, :tmax); month=1, raw=true) @test st[:prec] == A @test missingval(st) == (prec=-32768, tmax=-3.4f38) @test st isa RasterStack{(:prec,:tmax),@NamedTuple{prec::Int16,tmax::Float32},2} @@ -28,8 +28,9 @@ end A = Raster(WorldClim{BioClim}, :Bio_1; mappedcrs=EPSG(4326)) A[Y(Between(-10, -45)), X(Between(110, 160))] @test A isa Raster + @test missingval(A) === missing st = RasterStack(WorldClim{BioClim}, (1, 2)) - st[:bio1] + @test all(st.bio1 .=== A) @test st isa RasterStack @test A isa Raster end @@ -39,19 +40,27 @@ end @test Rasters.name(A) == :bio1 st = RasterStack(CHELSA{BioClim}, (:bio1, :BIO2); lazy=true) @test keys(st) == (:bio1, :bio2) - @test A isa Raster + @test A isa Raster{Float64,2} @test st isa RasterStack - @test st[:bio2] isa Raster + @test st.bio2 isa Raster{Float64,2} + + A = Raster(CHELSA{BioClim}, 1; lazy=true, raw=true) + st = RasterStack(CHELSA{BioClim}, (:bio1, :BIO2); lazy=true, raw=true) + @test A isa Raster{UInt16,2} + @test st isa RasterStack{(:bio1, :bio2),@NamedTuple{bio1::UInt16, bio2::UInt16}} + @test st.bio2 isa Raster{UInt16,2} + # Allow forcing keywords st = RasterStack(CHELSA{BioClim}, (1, 2); lazy=true, missingval=-Int16(9999), + maskingval=nothing, metadata=Rasters.NoMetadata(), crs=nothing, mappedcrs=EPSG(4326), ) - @test missingval(st) === Int16(-9999) - @test missingval(st.bio1) == Int16(-9999) + @test missingval(st) === -9999.0 + @test missingval(st.bio1) == -9999.0 @test metadata(st) == Rasters.NoMetadata() end From 1c20275fe9a4a8a8d283d08642d96deef8a282e4 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Fri, 16 Aug 2024 20:33:06 +0200 Subject: [PATCH 17/38] closured in create --- ext/RastersArchGDALExt/gdal_source.jl | 10 ++++- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 5 ++- src/create.jl | 37 +++++++++++----- src/methods/classify.jl | 6 +-- src/methods/crop_extend.jl | 44 +++++++++---------- src/methods/mask.jl | 6 +-- src/methods/mosaic.jl | 19 ++++---- src/methods/rasterize.jl | 9 ++-- src/methods/replace_missing.jl | 11 +++-- src/sources/commondatamodel.jl | 7 ++- src/sources/grd.jl | 22 +++++----- src/write.jl | 1 + test/create.jl | 17 +++++++ test/methods.jl | 8 ++-- test/sources/grd.jl | 2 +- 15 files changed, 123 insertions(+), 81 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 45a7d69c4..e0345094d 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -54,9 +54,11 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} offset=nokw, coerce=nokw, eltype=Missings.nonmissingtype(T), + f=identity, kw... ) where T RA.check_can_write(filename, force) + write = f === identity ? write : true A1 = _maybe_permute_to_gdal(A) # Missing values @@ -76,7 +78,13 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} if write mod = RA._writer_mod(eltype; missingval, maskingval, scale, offset, coerce) open(A1; write=true) do O - AG.RasterDataset(dataset) .= RA._maybe_modify(parent(O), mod; invert=true) + R = RA._maybe_modify(AG.RasterDataset(dataset), mod) + R .= parent(O) + if hasdim(A, Band()) + f(R) + else + f(view(R, :, :, 1)) + end end end end diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 92a6bcf4e..3b0b81bf4 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -28,13 +28,13 @@ function Base.write(filename::AbstractString, source::NCDsource, A::AbstractRast end return filename end -function Base.write(filename::AbstractString, source::NCDsource, s::AbstractRasterStack; +function Base.write(filename::AbstractString, source::Source, s::AbstractRasterStack{K,T}; append=false, force=false, missingval=nokw, maskingval=nokw, kw... -) +) where {Source<:NCDsource,K,T} mode = if append isfile(filename) ? "a" : "c" else @@ -53,6 +53,7 @@ function Base.write(filename::AbstractString, source::NCDsource, s::AbstractRast kw... ) end + f(RA.OpenStack{Source,K,T}(ds)) finally close(ds) end diff --git a/src/create.jl b/src/create.jl index 608979979..5d4aa7946 100644 --- a/src/create.jl +++ b/src/create.jl @@ -119,6 +119,7 @@ RasterStack("created.nc") └───────────────────────────────────────────────────────────────────────────────────────────┘ ``` """ +create(f::Base.Callable, args...; kw...) = create(args...; kw..., f) create(A::Union{AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, A; kw...) create(T::Union{Type,TypeNamedTuple}, A::Union{Tuple,Extents.Extent,AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, T, A; kw...) @@ -170,13 +171,13 @@ function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTupl reverse_y=nokw, kw... ) - ds = _extent2dims(extent; size, res, crs, sampling) - ds = if reverse_y isa Bool && reverse_y && hasdim(ds, Y()) - DD.setdims(ds, reverse(dims(ds, Y()))) + dims = _extent2dims(extent; size, res, crs, sampling) + dims = if reverse_y isa Bool && reverse_y && hasdim(ds, Y()) + DD.setdims(ds, reverse(DD.dims(ds, Y()))) else - ds + dims end - return create(filename, T, ds; kw...) + return create(filename, T, dims; kw...) end function create(filename::Nothing, ::Type{T}, dims::Tuple; missingval=nokw, @@ -190,6 +191,7 @@ function create(filename::Nothing, ::Type{T}, dims::Tuple; chunks=nokw, driver=nokw, options=nokw, + f=identity, kw... ) where T if verbose @@ -203,10 +205,15 @@ function create(filename::Nothing, ::Type{T}, dims::Tuple; else similar(parent, eltype, size(dims)) end + # Maybe fill the array if !(isnokw(fill) || isnothing(fill)) fill!(data, fill) end - return Raster(data, dims; missingval, kw...) + + # Apply `f` before returning + rast = Raster(data, dims; missingval, kw...) + f(rast) + return rast end function create(filename::Nothing, types::NamedTuple, dims::Tuple; suffix=keys(types), @@ -221,6 +228,7 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; fill=nokw, layerdims=nokw, layermetadata=nokw, + f=identity, kw... ) missingval = isnokwornothing(missingval) ? maskingval : missingval @@ -234,7 +242,9 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; parent, missingval=lmv, maskingval=lma, fill=lfv, metadata=lm, driver, options, ) end - return RasterStack(layers; kw...) + st = RasterStack(layers; kw...) + f(st) + return st end function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTuple; name=nokw, @@ -250,6 +260,7 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTu verbose=true, force=false, coerce=nokw, + f=identity, kw... ) where T eltype = Missings.nonmissingtype(T) @@ -264,9 +275,12 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTu end # Create layers of zero arrays rast = Raster(A, dims; name, missingval) - Rasters.write(filename, source, rast; + Rasters.write(f, filename, source, rast; eltype, chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write, kw... - ) + ) do W + # write returns a variable, wrap it as a Raster + f(rebuild(rast, W)) + end # Don't pass in `missingval`, read it again from disk in case it changed return Raster(filename; source, lazy, metadata, maskingval, dropband, coerce) end @@ -285,6 +299,7 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple offset=nokw, dropband=!hasdim(dims, Band), coerce=nokw, + f=identity, kw... ) layerdims = if isnokwornothing(layerdims) @@ -313,7 +328,9 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple stack = RasterStack(layers, dims; layerdims, layermetadata, missingval) fn = Rasters.write(filename, stack; chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write=write[], kw... - ) + ) do W + f(rebuild(stack; data=W)) + end # Don't pass in `missingval`, read it again from disk in case it changed st = RasterStack(fn; source, lazy, metadata, layerdims, maskingval, dropband, coerce) return st diff --git a/src/methods/classify.jl b/src/methods/classify.jl index 22a587944..300770bd1 100644 --- a/src/methods/classify.jl +++ b/src/methods/classify.jl @@ -58,11 +58,9 @@ function classify(A::AbstractRaster, pairs::Union{Tuple,AbstractArray}; # We use `Val{T}` to force type stability through the closure valT = Val{T}() f(x) = _convert_val(valT, _classify(x, pairs, lower, upper, others, Rasters.missingval(A), missingval)) - A1 = create(filename, T, A; suffix, missingval) - open(A1; write=true) do O - broadcast!(f, O, A) + return create(filename, T, A; suffix, missingval) do C + broadcast!(f, C, A) end - return A1 end function classify(xs::AbstractRasterStack, pairs...; suffix=keys(xs), kw...) mapargs(xs, suffix) do x, s diff --git a/src/methods/crop_extend.jl b/src/methods/crop_extend.jl index 4fd409619..fd7fe4970 100644 --- a/src/methods/crop_extend.jl +++ b/src/methods/crop_extend.jl @@ -178,12 +178,11 @@ _extend_to(x::RasterStackOrArray, to::Dimension; kw...) = _extend_to(x, (to,); k function _extend_to(A::AbstractRaster, to::DimTuple; filename=nothing, - suffix=nothing, missingval=(isnothing(missingval(A)) ? nokw : missingval(A)), fill=nokw, touches=false, - force=false, verbose=true, + kw... ) others = otherdims(to, A) # Allow not specifying all dimensions @@ -209,38 +208,35 @@ function _extend_to(A::AbstractRaster, to::DimTuple; fill = isnokwornothing(missingval) ? zero(Missings.nonmissingtype(eltype(A))) : missingval end # Create a new extended array - newA = create(filename, eltype(A), final_to; - suffix, + return create(filename, eltype(A), final_to; parent=parent(A), missingval, name=name(A), metadata=metadata(A), maskingval=Rasters.missingval(A), - fill, - force, verbose, - ) - # Input checks - map(dims(A, to), dims(newA, to)) do d1, d2 - if lookup(d1) isa Union{AbstractSampled,NoLookup} - b1, b2 = bounds(d1), bounds(d2) - b1[1] >= b2[1] || throw(ArgumentError("Lower bound of $(basetypeof(d1)) lookup of `$(b2[1])` are not larger than the original `$(b1[1])`")) - b1[2] <= b2[2] || throw(ArgumentError("Upper bound of $(basetypeof(d2)) lookup of `$(b2[2])` is not larger than the original `$(b1[2])`")) - elseif lookup(d1) isa Categorical - map(lookup(d1)) do x - x in d2 || throw(ArgumentError("category $x not in new dimension")) + fill, + kw... + ) do C + # Input checks + map(dims(A, to), dims(C, to)) do d1, d2 + if lookup(d1) isa Union{AbstractSampled,NoLookup} + b1, b2 = bounds(d1), bounds(d2) + b1[1] >= b2[1] || throw(ArgumentError("Lower bound of $(basetypeof(d1)) lookup of `$(b2[1])` are not larger than the original `$(b1[1])`")) + b1[2] <= b2[2] || throw(ArgumentError("Upper bound of $(basetypeof(d2)) lookup of `$(b2[2])` is not larger than the original `$(b1[2])`")) + elseif lookup(d1) isa Categorical + map(lookup(d1)) do x + x in d2 || throw(ArgumentError("category $x not in new dimension")) + end end end - end - # The missingval may have changed for disk-based arrays - if !isequal(missingval, Rasters.missingval(newA)) - A = replace_missing(A, Rasters.missingval(newA)) - end - open(newA; write=true) do O + # The missingval may have changed for disk-based arrays + if !isequal(Rasters.missingval(A), Rasters.missingval(C)) + A = replace_missing(A, Rasters.missingval(C)) + end # Somehow this is slow from disk? - broadcast_dims!(identity, view(O, rangedims...), A) + broadcast_dims!(identity, view(C, rangedims...), A) end - return newA end function _extend_to(st::AbstractRasterStack, to::DimTuple; suffix=keys(st), kw...) mapargs((A, s) -> _extend_to(A, to; suffix=s, kw...), st, suffix) diff --git a/src/methods/mask.jl b/src/methods/mask.jl index f5fa87608..232ed3b4e 100644 --- a/src/methods/mask.jl +++ b/src/methods/mask.jl @@ -89,12 +89,10 @@ function _mask(A::AbstractRaster, with::AbstractRaster; filename=nothing, suffix=nothing, missingval=_missingval_or_missing(A), kw... ) missingval = ismissing(missingval) ? missing : convert(eltype(A), missingval) - A1 = create(filename, A; suffix, missingval) - open(A1; write=true) do a + return create(filename, A; suffix, missingval) do C # The values array will be be written to A1 in `mask!` - mask!(a; with, missingval, values=A, kw...) + mask!(C; with, missingval, values=A, kw...) end - return A1 end function _mask(xs::AbstractRasterStack, with::AbstractRaster; suffix=keys(xs), kw...) mapargs((x, s) -> mask(x; with, suffix=s, kw...), xs, suffix) diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 3de4868b6..f019e3a24 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -75,17 +75,22 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; force=false, kw... ) + isnothing(missingval) && throw(ArgumentError("missingval cannot be `nothing` for `mosaic`")) maskingval = isnokw(maskingval) ? Rasters.missingval(first(regions)) : maskingval - missingval = isnokw(missingval) ? Rasters.missingval(first(regions)) : missingval - # missingval is not ooptional here - if !isnothing(filename) && (ismissing(missingval) || isnothing(missingval)) + missingval = if isnokw(missingval) + mv = Rasters.missingval(first(regions)) + isnokwornothing(mv) ? missing : mv + else + missingval + end + if !isnothing(filename) && (ismissing(missingval) || isnokwornothing(missingval)) missingval = _type_missingval(eltype(A1)) end T = Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) dims = _mosaic(Tuple(map(DD.dims, regions))) l1 = first(regions) - A = create(filename, T, dims; + return create(filename, T, dims; name=name(l1), fill=missingval, missingval, @@ -93,11 +98,9 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; driver, options, force - ) - open(A; write=true) do O - mosaic!(f, O, regions; missingval, kw...) + ) do C + mosaic!(f, C, regions; missingval, kw...) end - return A end function _mosaic(f::Function, ::AbstractRasterStack, regions; filename=nothing, diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index 3cebe1b38..d6c07fd28 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -476,15 +476,12 @@ function alloc_rasterize(f, r::RasterCreator; metadata=r.metadata, suffix=r.suffix, ) + maskingval = nothing if prod(size(r.to)) == 0 throw(ArgumentError("Destination array is is empty, with size $(size(r.to))). Rasterization is not possible")) end - A = create(r.filename, eltype, r.to; name, missingval, metadata, suffix) - # TODO f should apply to the file when it is initially created - # instead of reopening but we need a `create(f, filename, ...)` method - open(A; write=true) do A - A .= Ref(missingval) - f(A) + A = create(r.filename, fill=missingval, eltype, r.to; name, missingval, maskingval, metadata, suffix) do O + f(O) end return A end diff --git a/src/methods/replace_missing.jl b/src/methods/replace_missing.jl index 58c85b72c..98f0ecb64 100644 --- a/src/methods/replace_missing.jl +++ b/src/methods/replace_missing.jl @@ -23,7 +23,7 @@ missing """ replace_missing(x; missingval=missing, kw...) = replace_missing(x, missingval; kw...) function replace_missing(A::AbstractRaster{T}, missingval::MV; - filename=nothing, suffix=nothing + filename=nothing, kw... ) where {T,MV} MT = if ismissing(missingval) promote_type(T, Missing) @@ -32,18 +32,17 @@ function replace_missing(A::AbstractRaster{T}, missingval::MV; end old_missingval = Rasters.missingval(A) missingval = convert(MT, missingval) + maskingval = nothing repmissing(x) = isequal(x, old_missingval) || ismissing(x) ? missingval : x # Disk-backed arrays need to be lazy, memory-backed don't. # But in both cases we make sure we return an array with the missingval # in the eltype, even if there are no missing values in the array. if !isnothing(filename) - A1 = create(filename, MT, dims(A); - parent=parent(A), suffix, missingval, name=name(A), metadata=metadata(A) - ) - open(A1; write=true) do O + return create(filename, MT, dims(A); + parent=parent(A), missingval, maskingval, name=name(A), metadata=metadata(A), kw... + ) do O O .= repmissing.(A) end - return A1 else # We need to force T of Union{T,Missing} for DiskArrays broadcasts if isdisk(A) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index d6ef02a8a..327ca71f4 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -444,9 +444,11 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, name=DD.name(A), options=nokw, driver=nokw, + f=identity, kw... ) where {T,N} _check_allowed_type(source, eltype) + write = f === identity ? write : true _def_dim_var!(ds, A) metadata = if isnokw(metadata) DD.metadata(A) @@ -498,8 +500,11 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, var = CDM.defVar(ds, key, eltype, dimnames; attrib=attrib, chunksizes, kw...) if write + m = _maybe_modify(var.var, mod) # Write with a DiskArays.jl broadcast - _maybe_modify(var.var, mod) .= A + m .= A + # Apply `f` while the variable is open + f(m) end return nothing diff --git a/src/sources/grd.jl b/src/sources/grd.jl index 0bfe5f3a6..51f0a64b9 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -171,9 +171,11 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; offset=nokw, coerce=nokw, eltype=Missings.nonmissingtype(eltype(A)), + f=identity, kw... ) check_can_write(filename, force) + write = f === identity ? write : true haskey(REVGRD_DATATYPE_TRANSLATION, eltype) || throw(ArgumentError(""" Element type $eltype cannot be written to grd file. Convert it to one of $(keys(REVGRD_DATATYPE_TRANSLATION)), usually by broadcasting the desired type constructor over the `Raster`, e.g. `newrast = Float32.(rast)`")) @@ -210,28 +212,26 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; gri_filename = filename * ".gri" isfile(gri_filename) && rm(gri_filename) _write_gri(gri_filename, Val{source_eltype(mod)}(), mod, parent(correctedA)) - _write_grd(filename, eltype, dims(A), missingval, name(A)) + if write + _mmapgrd(filename, source_eltype(mod), size(A); write=true) do M + f(rebuild(A, _maybe_modify(M, mod))) + end + end + return filename * ".grd" end -function _write_gri(filename, v, ::NoMod, A::Array) +function _write_gri(filename, v, ::NoMod, A::Array{T}) where T open(filename; write=true, lock=false) do io write(io, A) end end function _write_gri(filename, v, mod, A::AbstractArray) open(filename; write=true, lock=false) do io - for x in A # We are modifying the source array so invert the modifications - write(io, _invertmod(v, x, mod)) - end - end -end -# Specialise to avoid `Ref` allocations -function _write_gri(filename, v, mod, A::AbstractArray{Union{Int16,UInt16,Int32,UInt32,Int64,UInt64,Float16,Float32,Float64}}) - open(filename; write=true, lock=false) do io - ref = Ref(first(A)) + # Avoid `Ref` allocations + ref = Ref{source_eltype(mod)}(_invertmod(v, first(A), mod)) for x in A # We are modifying the source array so invert the modifications ref[] = _invertmod(v, x, mod) write(io, ref) diff --git a/src/write.jl b/src/write.jl index c97519afa..f3e9524a6 100644 --- a/src/write.jl +++ b/src/write.jl @@ -226,6 +226,7 @@ function Base.write(path::AbstractString, A::AbstractRasterSeries; written_paths end end +Base.write(f::Base.Callable, args...; kw...) = write(args...; f, kw...) # Trait for source data that has stack layers haslayers(T) = false diff --git a/test/create.jl b/test/create.jl index 3eaf98058..61967e8bb 100644 --- a/test/create.jl +++ b/test/create.jl @@ -51,6 +51,7 @@ end @testset "create RasterStack" begin + st = Rasters.create((a=Int32, b=Float64, c=Bool), Extents.Extent(X=(0, 10), Y=(0, 5)); size=(X=1024, Y=1024), sampling=(X=Points(), Y=Intervals()), @@ -123,6 +124,7 @@ for ext in (".nc", ".tif", ".grd") fn = "created$ext" created = Rasters.create(fn, UInt8, (X(1:10), Y(1:10)); missingval=0xff, + maskingval=nothing, fill=0x01, force=true ) @@ -162,6 +164,7 @@ end @testset "create .nc stack" begin created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); missingval=(a=0xff, b=typemax(Float32)), + maskingval=nothing, fill=(a=0x01, b=1.0f0), layerdims=(a=(X,), b=(X, Y)), force=true, @@ -173,4 +176,18 @@ end @test all(created.b .=== 1.0f0) st = RasterStack("created.nc"; maskingval=nothing) @test missingval(st) == (a=0xff, b=typemax(Float32)) + + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)), + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) + @test missingval(created) === missing + @test size(created.a) == (10,) + @test size(created.b) == (10, 10) + @test all(created.a .=== 0x01) + @test all(created.b .=== 1.0f0) + st = RasterStack("created.nc"; maskingval=nothing) + @test missingval(st) == (a=0xff, b=typemax(Float32)) end diff --git a/test/methods.jl b/test/methods.jl index de563362e..dc8586e1a 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -57,6 +57,7 @@ gaMi = replace_missing(ga) end) testfile = tempname() * ".tif" dNaN = replace_missing(ga, NaN32; filename=testfile) + read(dNaN) @test all(isequal.(dNaN, [NaN32 7.0f0; 2.0f0 NaN32])) stNaN = replace_missing(st, NaN32; filename="teststack.tif") @test all(map(stNaN[Band(1)], (a=[NaN32 7.0f0; 2.0f0 NaN32], b=[1.0 0.4; 2.0 NaN])) do x, y @@ -213,7 +214,7 @@ end end end -@testset "mask_replace_missing" begin +@testset "mask" begin # Floating point rasters a = Raster([1.0 0.0; 1.0 1.0], dims=(X, Y), missingval=0.0) b = Raster([1.0 1.0; 1.0 0.0], dims=(X, Y), missingval=0.0) @@ -331,7 +332,8 @@ end @test_throws ArgumentError classify(ga1, [1, 2, 3]) end -@testset "points" begin dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) +@testset "points" begin + dimz = (X(9.0:1.0:10.0), Y(0.1:0.1:0.2)) rast = Raster([1 2; 3 4], dimz; name=:test) rast2 = Raster([5 6; 7 8], dimz; name=:test2, missingval=5) rast_m = Raster([1 2; 3 missing], dimz; name=:test) @@ -621,7 +623,7 @@ end [missing 0.2 0.1; 1.2 1.1 0.3; 1.4 1.3 missing] - ) + ) # 3 dimensions A1 = Raster(ones(2, 2, 2), (X(2.0:-1.0:1.0), Y(5.0:1.0:6.0), Ti(DateTime(2001):Year(1):DateTime(2002)))) diff --git a/test/sources/grd.jl b/test/sources/grd.jl index a64ac2491..f21034ca0 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -225,7 +225,7 @@ grdpath = stem * ".gri" # 1 band is added again on save @test size(saved) == size(grdarray[Band(1)]) @test parent(saved) == parent(grdarray[Band(1)]) - @test (@allocations write(filename2, view(grdarray, Band(1)); force = true)) < 1e3 + @test (@allocations write(filename2, view(grdarray, Band(1)); force=true, verbose=false)) < 1e3 end @testset "3d with subset" begin From a034c60be1419dbbb0a34af0ee07a2a3854d33ae Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sat, 17 Aug 2024 03:06:44 +0200 Subject: [PATCH 18/38] bugfixes --- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 1 + src/create.jl | 2 +- src/write.jl | 2 +- test/sources/grd.jl | 3 ++- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 3b0b81bf4..4cec5650c 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -33,6 +33,7 @@ function Base.write(filename::AbstractString, source::Source, s::AbstractRasterS force=false, missingval=nokw, maskingval=nokw, + f=identity, kw... ) where {Source<:NCDsource,K,T} mode = if append diff --git a/src/create.jl b/src/create.jl index 5d4aa7946..99233f6ea 100644 --- a/src/create.jl +++ b/src/create.jl @@ -119,7 +119,7 @@ RasterStack("created.nc") └───────────────────────────────────────────────────────────────────────────────────────────┘ ``` """ -create(f::Base.Callable, args...; kw...) = create(args...; kw..., f) +create(f::Function, args...; kw...) = create(args...; kw..., f) create(A::Union{AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, A; kw...) create(T::Union{Type,TypeNamedTuple}, A::Union{Tuple,Extents.Extent,AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, T, A; kw...) diff --git a/src/write.jl b/src/write.jl index f3e9524a6..a093cad40 100644 --- a/src/write.jl +++ b/src/write.jl @@ -226,7 +226,7 @@ function Base.write(path::AbstractString, A::AbstractRasterSeries; written_paths end end -Base.write(f::Base.Callable, args...; kw...) = write(args...; f, kw...) +Base.write(f::Function, args...; kw...) = write(args...; f, kw...) # Trait for source data that has stack layers haslayers(T) = false diff --git a/test/sources/grd.jl b/test/sources/grd.jl index f21034ca0..4a709cbf0 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -225,7 +225,8 @@ grdpath = stem * ".gri" # 1 band is added again on save @test size(saved) == size(grdarray[Band(1)]) @test parent(saved) == parent(grdarray[Band(1)]) - @test (@allocations write(filename2, view(grdarray, Band(1)); force=true, verbose=false)) < 1e3 + write(filename2, grdarray[Band(1)]; force=true, verbose=false) + @test (@allocations write(filename2, grdarray[Band(1)]; force=true, verbose=false)) < 1e3 end @testset "3d with subset" begin From 54056cba5d257f19640aed5ce2affa52eca2f457 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sat, 17 Aug 2024 03:38:06 +0200 Subject: [PATCH 19/38] fix resample test for nokw --- test/resample.jl | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/test/resample.jl b/test/resample.jl index d08449512..f1678c10b 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -157,6 +157,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @test dims(resampled_3D, Z) == Z(1:2) end + maskingval = Rasters.nokw for maskingval in (nothing, missing, Rasters.nokw) # Resample cea.tif using resample cea = Raster(raster_path; missingval=0x00, name=:cea, maskingval) @@ -166,18 +167,19 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) cea_permuted = permutedims(Raster(raster_path; missingval=0x00, name=:cea_permuted, maskingval), (Y, X)) permuted_output = resample(cea_permuted, output_res; missingval=0x00, maskingval, crs=output_crs, method) - AG_output1 = if maskingval === missing - replace(AG_output, 0x00 => missing) - else + AG_output1 = if isnothing(maskingval) AG_output + else + replace(AG_output, 0x00 => missing) end # Compare ArchGDAL, resample and permuted resample @test all(AG_output1 .=== raster_output .=== read(disk_output) .=== permutedims(permuted_output, (X, Y))) + @test all(AG_output1 .=== raster_output .=== read(disk_output) .=== permutedims(permuted_output, (X, Y))) @test abs(step(dims(raster_output, Y))) ≈ abs(step(dims(raster_output, X))) ≈ abs(step(dims(disk_output, X))) ≈ abs(step(dims(permuted_output, X))) ≈ output_res - @test name(cea) == name(raster_output) + @test Rasters.name(cea) == Rasters.name(raster_output) rm("resample.tif") end From 6a5772737bb3590fe13714bbceafe23bf966d93a Mon Sep 17 00:00:00 2001 From: rafaqz Date: Sat, 17 Aug 2024 21:56:53 +0200 Subject: [PATCH 20/38] test create with a function --- .../gribdatasets_source.jl | 2 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 2 +- .../zarrdatasets_source.jl | 2 +- src/create.jl | 37 +++++++------ src/openstack.jl | 21 ++++++-- src/sources/commondatamodel.jl | 2 - test/create.jl | 54 ++++++++++++++----- 7 files changed, 82 insertions(+), 38 deletions(-) diff --git a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl index 72f63fd11..a43292693 100644 --- a/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl +++ b/ext/RastersGRIBDatasetsExt/gribdatasets_source.jl @@ -1,5 +1,5 @@ function RA.OpenStack(fs::RA.FileStack{GRIBsource,K}) where K - RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs))) + RA.OpenStack{GRIBsource,K}(GDS.GRIBDataset(RA.filename(fs)), fs.mods) end # In GRIBDatasets, the file is open for reading the values and closed afterwards. diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 4cec5650c..f890349ba 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -64,7 +64,7 @@ end Base.close(os::RA.OpenStack{NCDsource}) = NCD.close(RA.dataset(os)) function RA.OpenStack(fs::RA.FileStack{NCDsource,K}) where K - RA.OpenStack{NCDsource,K}(NCD.Dataset(RA.filename(fs))) + RA.OpenStack{NCDsource,K}(NCD.Dataset(RA.filename(fs)), fs.mods) end function RA._open(f, ::NCDsource, filename::AbstractString; write=false, kw...) diff --git a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl index e71277737..667de06f1 100644 --- a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl +++ b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl @@ -1,5 +1,5 @@ function RA.OpenStack(fs::RA.FileStack{Zarrsource,K}) where K - RA.OpenStack{Zarrsource,K}(ZD.ZarrDataset(RA.filename(fs))) + RA.OpenStack{Zarrsource,K}(ZD.ZarrDataset(RA.filename(fs)), fs.mods) end # In ZarrDatasets, the file is open for reading the values and closed afterwards. diff --git a/src/create.jl b/src/create.jl index 99233f6ea..8dd0f611a 100644 --- a/src/create.jl +++ b/src/create.jl @@ -1,8 +1,7 @@ const TypeNamedTuple = NamedTuple{<:Any,<:Tuple{Vararg{Type}}} """ - create([filename], template::Raster; kw...) - create([filename], type, template; kw...) + create([f!], [filename], template; kw...) Create a new, uninitialised [`Raster`](@ref) or [`RasterStack`](@ref). @@ -17,6 +16,10 @@ in which case `T` will depend on the tyepe promotion of `scale`, `offset` and `m If types is a `NamedTuple` of types, the result will be a `RasterStack`. In this case `fill` and `missingval` can be single values (for all layers) or `NamedTuple` with the same names to specify per-layer. +`f!` will be applied to the `Raster` or `RasterStack` while it is stil open after creation, +to avoid opening it twice. The return value of `f!` is disguarded but modifications +to the `Raster` or the `RasterStack` layers will be written to disk or changd in memory. + ## Arguments - `filename`: a String file path, which will create a file on disk and return it as @@ -73,32 +76,36 @@ We use standard lat/lon (EPSG:4326) as the crs, and force writing if the file ex using Rasters, NCDatasets, ArchGDAL, Extents, Dates using Rasters.Lookups rast = Rasters.create("created.tif", UInt8, Extents.Extent(X=(0, 120), Y=(-80, 80), Band=(0, 12)); - res=(X=1.0, Y=1.0, Band=1), + res=(X=10.0, Y=10.0, Band=1), # size=(X=100, Y=100, Band=12), maskingval=nothing, name=:myraster, crs=EPSG(4326), force=true, + fill=0x01, sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Intervals(Start())), -) -using ProfileView -@profview open(rast; write=true) do A - A .= Rasters.Missings.nonmissingtype(eltype(A))(1) - nothing +) do A + # While we have the newly created raster open, we can write to it + A[X=1:10, Y=1:10] .= 0xff end -Raster("created.tif"; maskingval=nothing) -rm("created.tif") -extent = Extents.Extent(X=(0, 120), Y=(-80, 80))#, Band=(1, 3)) -types = (a=UInt8, b=Int32, c=Float64=>Y) -rast = Rasters.create("created.nc", types, extent; +read(rast) +``` + +We can also create a `RasterStack` by passing a `NamedTuple` of types: + +```julia +ext = Extents.Extent(X=(0, 120), Y=(-80, 80))#, Band=(1, 3)) +types = (a=UInt8, b=Int32, c=Float64) +rast = Rasters.create("created.nc", types, ext; # res=(X=1.0, Y=1.0, Band=1), maskingval=nothing, size=(X=100, Y=100), crs=EPSG(4326), force=true, - sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Points()), -) + # sampling=(X=Intervals(Start()), Y=Intervals(Start()), Band=Points()), +end + RasterStack("created.nc") ╭───────────────────────────────────────────╮ diff --git a/src/openstack.jl b/src/openstack.jl index 84c3a0f04..0f3bec92c 100644 --- a/src/openstack.jl +++ b/src/openstack.jl @@ -12,15 +12,21 @@ contained in a single file. `X` is a backend type like `NCDsource`, and `K` is a tuple of `Symbol` keys. """ -struct OpenStack{X,K,T,DS} +struct OpenStack{X,K,T,DS,M} dataset::DS + mods::M +end +function OpenStack{X,K,T}( + dataset::DS, mods::M=NoMod() +) where {X,K,T,DS,M} + OpenStack{X,K,T,DS,M}(dataset, mods) end -OpenStack{X,K,T}(dataset::DS) where {X,K,T,DS} = OpenStack{X,K,T,DS}(dataset) dataset(os::OpenStack) = os.dataset # OpenStack has `X` and `K` parameter that is not recoverable from fields. -ConstructionBase.constructorof(::Type{<:OpenStack{X,K,T}}) where {X,K,T} = OpenStack{X,K,T} +ConstructionBase.constructorof(::Type{<:OpenStack{X,K,T}}) where {X,K,T} = + OpenStack{X,K,T} DD.data_eltype(::OpenStack{<:Any,<:Any,T}) where T = T @@ -28,4 +34,11 @@ Base.keys(::OpenStack{<:Any,K}) where K = K # TODO test this, and does it make sense to return an iterator here? Base.values(os::OpenStack{<:Any,K}) where K = (os[k] for k in K) # Indexing OpenStack returns memory-backed Raster. -Base.getindex(os::OpenStack, key::Symbol) = dataset(os)[key] +function Base.getindex(os::OpenStack{<:Any,K}, key::Symbol) where K + mods = os.mods + if mods isa AbstractModifications + _maybe_modify(dataset(os)[key], mods) + else + _maybe_modify(dataset(os)[key], NamedTuple{K}(mods)[key]) + end +end diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 327ca71f4..ed6cff28e 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -33,8 +33,6 @@ const CDM_STANDARD_NAME_MAP = Dict( "time" => Ti, ) -Base.getindex(os::OpenStack{<:CDMsource}, name::Symbol) = CFDiskArray(dataset(os)[name]) - _sourcetrait(var::CDM.CFVariable) = _sourcetrait(var.var) # Rasters methods for CDM types ############################### diff --git a/test/create.jl b/test/create.jl index 61967e8bb..eeedd1b60 100644 --- a/test/create.jl +++ b/test/create.jl @@ -8,8 +8,12 @@ using Rasters: isdisk, ismem, filename crs=EPSG(4326), chunks=(X=128, Y=128), force=true, - name=:testname - ) + name=:testname, + fill=Int32(2), + ) do A + A .*= 3 + end + @test all(rast .=== Int32(6)) @test crs(rast) == EPSG(4326) @test size(rast) == (1024, 1024) @test Rasters.name(rast) == :testname @@ -24,7 +28,11 @@ using Rasters: isdisk, ismem, filename name=:testname, missingval=missing, reverse_y=false, - ) + fill=2.0, + ) do A + A .*= 3 + end + @test all(rast .=== 6.0) @test crs(rast) == EPSG(4326) @test size(rast) == (50, 50, 12) @test Rasters.name(rast) == :testname @@ -49,9 +57,7 @@ using Rasters: isdisk, ismem, filename @test eltype(rast1) == eltype(rast) end - @testset "create RasterStack" begin - st = Rasters.create((a=Int32, b=Float64, c=Bool), Extents.Extent(X=(0, 10), Y=(0, 5)); size=(X=1024, Y=1024), sampling=(X=Points(), Y=Intervals()), @@ -60,7 +66,9 @@ end verbose=false, missingval=(a=Int32(-9999), b=Float64(-9999), c=false), fill=(a=Int32(-9999), b=0, c=false), - ) + ) do st + st.c .= true + end @test crs(st) == EPSG(4326) @test size(st) == (1024, 1024) @test Rasters.name(st) == (:a, :b, :c) @@ -70,7 +78,7 @@ end @test isintervals(st, Y) @test all(x -> x === Int32(-9999), st.a) @test all(x -> x === 0.0, st.b) - @test all(x -> x === false, st.c) + @test all(x -> x === true, st.c) st2 = Rasters.create((a=UInt8, b=Float32), st; layerdims=(a=(X(), Y()), b=(Y(),)), @@ -81,7 +89,6 @@ end @test eltype(st2) === @NamedTuple{a::UInt8, b::Float32} @test missingval(st2) === (a=UInt8(0), b=1.0f0) - @testset "from template with new dims" begin st1 = Rasters.create(st; layerdims=(a=(X, Y), b=(Y,), c=(X,)), @@ -96,6 +103,7 @@ end @test basedims(st1.b) == (Y(),) @test basedims(st1.c) == (X(),) end + @testset "from template with new layers" begin st1 = Rasters.create((c=UInt8, d=Int16), st; missingval=(c=0x00, d=Int16(1)), @@ -106,6 +114,7 @@ end @test eltype(st1) == @NamedTuple{c::UInt8,d::Int16} @test missingval(st1) === (c=0x00, d=Int16(1)) end + @testset "from template with new dims and layers" begin st1 = Rasters.create((c=UInt8, d=Int16), st; layerdims=(c=(X, Y), d=(Y,)), @@ -147,14 +156,13 @@ for ext in (".nc", ".tif", ".grd") missingval=typemax(Int16), scale=0.1, offset=5.0, + fill=Int16(1), force=true, - ); - open(created; write=true) do O - O .= 2 - nothing + ) do C + C .*= 3 end - @test all(Raster(fn) .=== 2.0) - @test all(Raster(fn; scaled=false) .== Int16(-30)) + @test all(Raster(fn) .=== 3.0) + @test all(Raster(fn; scaled=false) .== Int16(-20)) @test missingval(Raster(fn; maskingval=nothing, scaled=false)) === typemax(Int16) end end @@ -190,4 +198,22 @@ end @test all(created.b .=== 1.0f0) st = RasterStack("created.nc"; maskingval=nothing) @test missingval(st) == (a=0xff, b=typemax(Float32)) + + @testset "with a function" begin + created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); + missingval=(a=0xff, b=typemax(Float32)), + maskingval=nothing, + fill=(a=0x01, b=1.0f0), + layerdims=(a=(X,), b=(X, Y)), + force=true, + ) do st + map(layers(st)) do A + A .*= 2 + end + end + @test all(read(created.a) .=== 0x02) + @test all(read(created.b) .=== 2.0f0) + end end + + From 8500e4262863f44e4fb8197775a43efa6a2e3f7e Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 3 Sep 2024 11:10:02 +0200 Subject: [PATCH 21/38] Apply suggestions from code review Co-authored-by: Anshul Singhvi --- src/create.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/create.jl b/src/create.jl index 8dd0f611a..7d751d864 100644 --- a/src/create.jl +++ b/src/create.jl @@ -10,15 +10,15 @@ If it is `nothing` or not passed, an in-memory `Raster` will be created. If type is a `Type` return value is a `Raster`. The `eltype` will usually be `T`, except where `scale` and/or `offset` keywords are used or a `missingval` of a different type is specified, -in which case `T` will depend on the tyepe promotion of `scale`, `offset` and `missingval` with `T`. +in which case `T` will depend on the type promotion of `scale`, `offset` and `missingval` with `T`. `maskingval` will also affect the `eltype` of the openeded raster if you `create` to a file. If types is a `NamedTuple` of types, the result will be a `RasterStack`. In this case `fill` and `missingval` can be single values (for all layers) or `NamedTuple` with the same names to specify per-layer. `f!` will be applied to the `Raster` or `RasterStack` while it is stil open after creation, -to avoid opening it twice. The return value of `f!` is disguarded but modifications -to the `Raster` or the `RasterStack` layers will be written to disk or changd in memory. +to avoid opening it twice. The return value of `f!` is disregarded but modifications +to the `Raster` or the `RasterStack` layers will be written to disk or changed in memory. ## Arguments From c2258400942ae680e4706153f4189d1bc9ff419b Mon Sep 17 00:00:00 2001 From: Anshul Singhvi Date: Wed, 11 Sep 2024 05:56:04 -0700 Subject: [PATCH 22/38] Hook up scale and offset to Zarr datasets via CDM (#732) This fixes a strange issue I was having where Rasters.jl read the wrong values from a Kerchunk/Zarr dataset. --- .../zarrdatasets_source.jl | 5 ++ test/sources/zarr.jl | 63 +++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl index 667de06f1..ffe723686 100644 --- a/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl +++ b/ext/RastersZarrDatasetsExt/zarrdatasets_source.jl @@ -16,6 +16,11 @@ RA._sourcetrait(::ZD.ZarrDataset) = Zarrsource() RA.missingval(var::ZD.ZarrVariable, args...) = RA.missingval(RA.Metadata{Zarrsource}(CDM.attribs(var))) RA.missingval(var::ZD.ZarrVariable, md::RA.Metadata{<:Zarrsource}) = RA.missingval(md) +@inline function RA.get_scale(metadata::RA.Metadata{<: Zarrsource}, scaled::Bool) + scale = scaled ? get(metadata, "scale_factor", nothing) : nothing + offset = scaled ? get(metadata, "add_offset", nothing) : nothing + return scale, offset +end # TODO: handle multiple missing values function RA.missingval(md::RA.Metadata{<:Zarrsource}) fv = get(md, "_FillValue", nothing) diff --git a/test/sources/zarr.jl b/test/sources/zarr.jl index c362defb1..2cac0ead1 100644 --- a/test/sources/zarr.jl +++ b/test/sources/zarr.jl @@ -68,4 +68,67 @@ end @test zraster[Ti(At(DateTime(1979,1,9))), X(At(-178.75)), Y(At(-88.75))] == -28.866226f0 end +@testset "CF conventions" begin + path = tempname() * ".zarr" + global_attrs = Dict( + "Conventions" => "CF-1.7", + "_FillValue" => -999.0, + "units" => "K", + "project" => "GOES", + ) + zg = zgroup(path; attrs = global_attrs) + x_attrs = Dict( + "units" => "rad", + "add_offset" => 0, + "_ARRAY_DIMENSIONS" => ["x"], + "axis" => "X", + "long_name" => "GOES fixed grid projection x-coordinate", + "scale_factor" => 1, + "_Netcdf4Dimid" => 1, + "standard_name" => "projection_x_coordinate", + "NAME" => "x", + ) + xs = zcreate(Float64, zg, "x", 100; attrs = x_attrs) + xs .= LinRange(-5.434177815866809e6, 5.322929839864517e6, 100) + + y_attrs = Dict( + "units" => "rad", + "add_offset" => 5.258454335331338e6, + "_ARRAY_DIMENSIONS" => ["y"], + "axis" => "Y", + "long_name" => "GOES fixed grid projection y-coordinate", + "scale_factor" => -1.0625617981243342e7, + "_Netcdf4Dimid" => 1, + "standard_name" => "projection_y_coordinate", + "NAME" => "y", + ) + ys = zcreate(Float64, zg, "y", 100; attrs = y_attrs) + ys .= LinRange(0, 1, 100) + + val_attrs = Dict( + "_Netcdf4Dimid" => 0, + "scale_factor" => 2.0, + "add_offset" => 180.0, + "_FillValue" => -1, + "units" => "K", + "_ARRAY_DIMENSIONS" => ["y", "x"], + "grid_mapping" => "goes_imager_projection", + "valid_range" => [0, -6], + ) + vals = zcreate(Float64, zg, "values", 100, 100; attrs = val_attrs) + vals .= (data = rand(100, 100)) + vals[1, 1] = 1.0 + vals[end, end] = 0.0 + + + ra = Raster(path) + + @test extrema(ra) == (180.0, 182.0) # test scale and offset + @test Rasters.GeoInterface.extent(ra) == Rasters.GeoInterface.Extent(X = (-5.434177815866809e6, 5.322929839864517e6), Y = (-5.367163645912004e6, 5.258454335331338e6)) + @test Rasters.isreverse(ra.dims[2]) + @test Rasters.isforward(ra.dims[1]) + @test extrema(ra.dims[1]) == extrema(xs) + @test extrema(ra.dims[2]) == reverse(extrema(ys)) .* y_attrs["scale_factor"] .+ y_attrs["add_offset"] +end + end From cfa1b951c399f2affabbbd8142bff79707f0fd70 Mon Sep 17 00:00:00 2001 From: rafaqz Date: Thu, 19 Sep 2024 15:31:37 +0200 Subject: [PATCH 23/38] use coalesceval instead of maskingval --- ext/RastersArchGDALExt/gdal_source.jl | 18 +++--- ext/RastersArchGDALExt/warp.jl | 4 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 8 +-- src/array.jl | 12 ++-- src/create.jl | 34 +++++----- src/extensions.jl | 2 +- src/methods/crop_extend.jl | 2 +- src/methods/mosaic.jl | 6 +- src/methods/rasterize.jl | 4 +- src/methods/replace_missing.jl | 4 +- src/methods/shared_docstrings.jl | 10 +-- src/modifieddiskarray.jl | 64 +++++++++---------- src/sources/commondatamodel.jl | 6 +- src/sources/grd.jl | 8 +-- src/stack.jl | 28 ++++---- src/utils.jl | 8 +-- src/write.jl | 18 +++--- test/create.jl | 16 ++--- test/resample.jl | 20 +++--- test/sources/gdal.jl | 8 +-- test/sources/grd.jl | 10 +-- test/sources/ncdatasets.jl | 10 +-- test/sources/rasterdatasources.jl | 2 +- 23 files changed, 151 insertions(+), 151 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index e0345094d..58bd7bf41 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -49,7 +49,7 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} verbose=true, write=true, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, scale=nokw, offset=nokw, coerce=nokw, @@ -62,8 +62,8 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} A1 = _maybe_permute_to_gdal(A) # Missing values - maskingval = isnokw(maskingval) ? RA.missingval(A) : maskingval - missingval = isnokw(missingval) ? maskingval : missingval + coalesceval = isnokw(coalesceval) ? RA.missingval(A) : coalesceval + missingval = isnokw(missingval) ? coalesceval : missingval missingval = if ismissing(missingval) # See if there is a missing value in metadata # But only use it if its the right type @@ -76,7 +76,7 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} missingval, _block_template=A1, scale, offset, verbose, kw... ) do dataset if write - mod = RA._writer_mod(eltype; missingval, maskingval, scale, offset, coerce) + mod = RA._writer_mod(eltype; missingval, coalesceval, scale, offset, coerce) open(A1; write=true) do O R = RA._maybe_modify(AG.RasterDataset(dataset), mod) R .= parent(O) @@ -248,15 +248,15 @@ function RA.Raster(ds::AG.RasterDataset; name=nokw, metadata=RA._metadata(ds), missingval=RA.missingval(ds), - maskingval=missing, + coalesceval=missing, lazy=false, dropband=false, scaled=true, coerce=convert, ) filelist = AG.filelist(ds) - mod = RA._mod(eltype(ds), metadata, missingval, maskingval; scaled, coerce) - kw = (; refdims, name, metadata, missingval=Rasters.maskingval(mod)) + mod = RA._mod(eltype(ds), metadata, missingval, coalesceval; scaled, coerce) + kw = (; refdims, name, metadata, missingval=Rasters.coalesceval(mod)) raster = if lazy && length(filelist) > 0 filename = first(filelist) Raster(FileArray{GDALsource}(ds, filename; mod), dims; kw...) @@ -310,7 +310,7 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; verbose=false, eltype=Missings.nonmissingtype(eltype(A)), missingval=Rasters.missingval(A), - maskingval=Rasters.missingval(A), + coalesceval=Rasters.missingval(A), kw... ) A1 = _maybe_permute_to_gdal(A) @@ -318,7 +318,7 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; _block_template=A1, missingval, scale, offset, verbose, kw... ) do dataset rds = AG.RasterDataset(dataset) - mod = RA._writer_mod(eltype; missingval=RA.missingval(rds), maskingval, scale, offset, coerce) + mod = RA._writer_mod(eltype; missingval=RA.missingval(rds), coalesceval, scale, offset, coerce) open(A1) do O RA._maybe_modify(rds, mod) .= parent(O) end diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index 0e555cbab..731967322 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -18,7 +18,7 @@ function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", missingval=nokw, - maskingval=Rasters.missingval(A), + coalesceval=Rasters.missingval(A), name=Rasters.name(A), kw... ) @@ -44,7 +44,7 @@ function _warp(A::AbstractRaster, flags::Dict; out = AG.Dataset(A1; filename=tempfile, missingval, kw...) do dataset AG.gdalwarp([dataset], flagvect; warp_kw...) do warped # Read the raster lazily, dropping Band if there is none in `A` - raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, maskingval) + raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, coalesceval) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index f890349ba..c80c86ea8 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -32,7 +32,7 @@ function Base.write(filename::AbstractString, source::Source, s::AbstractRasterS append=false, force=false, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, f=identity, kw... ) where {Source<:NCDsource,K,T} @@ -44,13 +44,13 @@ function Base.write(filename::AbstractString, source::Source, s::AbstractRasterS end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) - maskingval = RA._stack_nt(s, isnokw(maskingval) ? Rasters.missingval(s) : maskingval) - missingval = RA._stack_missingvals(s, isnokw(missingval) ? maskingval : missingval) + coalesceval = RA._stack_nt(s, isnokw(coalesceval) ? Rasters.missingval(s) : coalesceval) + missingval = RA._stack_missingvals(s, isnokw(missingval) ? coalesceval : missingval) try map(keys(s)) do k RA._writevar!(ds, source, s[k]; missingval=missingval[k], - maskingval=maskingval[k], + coalesceval=coalesceval[k], kw... ) end diff --git a/src/array.jl b/src/array.jl index 813f07f8d..130701960 100644 --- a/src/array.jl +++ b/src/array.jl @@ -304,7 +304,7 @@ function Raster(ds, filename::AbstractString; group=nokw, metadata=nokw, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, crs=nokw, mappedcrs=nokw, source=nokw, @@ -318,15 +318,15 @@ function Raster(ds, filename::AbstractString; mod=nokw, raw=false, )::Raster - scaled, maskingval = _raw_check(raw, scaled, maskingval) + scaled, coalesceval = _raw_check(raw, scaled, coalesceval) _maybe_warn_replace_missing(replace_missing) name1 = filekey(ds, name) source = _sourcetrait(filename, source) data_out, dims_out, metadata_out, missingval_out = _open(source, ds; name=name1, group, mod=NoMod()) do var metadata_out = isnokw(metadata) ? _metadata(var) : metadata missingval1 = isnokw(missingval) ? Rasters.missingval(var, metadata_out) : missingval - maskingval1 = isnokw(maskingval) && !isnothing(missingval1) ? missing : maskingval - mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval1, maskingval1; scaled, coerce) : mod + coalesceval1 = isnokw(coalesceval) && !isnothing(missingval1) ? missing : coalesceval + mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval1, coalesceval1; scaled, coerce) : mod data_out = if lazy FileArray{typeof(source)}(var, filename; name=name1, group, mod, write @@ -337,9 +337,9 @@ function Raster(ds, filename::AbstractString; x = Array(modvar) x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug end - # If maskingval is `nothing` use missingval as missingval + # If coalesceval is `nothing` use missingval as missingval dims_out = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data_out) - missingval_out = isnokwornothing(maskingval1) ? missingval1 : maskingval1 + missingval_out = isnokwornothing(coalesceval1) ? missingval1 : coalesceval1 data_out, dims_out, metadata_out, missingval_out end name_out = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) diff --git a/src/create.jl b/src/create.jl index 7d751d864..21afd95a4 100644 --- a/src/create.jl +++ b/src/create.jl @@ -11,7 +11,7 @@ If it is `nothing` or not passed, an in-memory `Raster` will be created. If type is a `Type` return value is a `Raster`. The `eltype` will usually be `T`, except where `scale` and/or `offset` keywords are used or a `missingval` of a different type is specified, in which case `T` will depend on the type promotion of `scale`, `offset` and `missingval` with `T`. -`maskingval` will also affect the `eltype` of the openeded raster if you `create` to a file. +`coalesceval` will also affect the `eltype` of the openeded raster if you `create` to a file. If types is a `NamedTuple` of types, the result will be a `RasterStack`. In this case `fill` and `missingval` can be single values (for all layers) or `NamedTuple` with the same names to specify per-layer. @@ -78,7 +78,7 @@ using Rasters.Lookups rast = Rasters.create("created.tif", UInt8, Extents.Extent(X=(0, 120), Y=(-80, 80), Band=(0, 12)); res=(X=10.0, Y=10.0, Band=1), # size=(X=100, Y=100, Band=12), - maskingval=nothing, + coalesceval=nothing, name=:myraster, crs=EPSG(4326), force=true, @@ -99,7 +99,7 @@ ext = Extents.Extent(X=(0, 120), Y=(-80, 80))#, Band=(1, 3)) types = (a=UInt8, b=Int32, c=Float64) rast = Rasters.create("created.nc", types, ext; # res=(X=1.0, Y=1.0, Band=1), - maskingval=nothing, + coalesceval=nothing, size=(X=100, Y=100), crs=EPSG(4326), force=true, @@ -188,7 +188,7 @@ function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTupl end function create(filename::Nothing, ::Type{T}, dims::Tuple; missingval=nokw, - maskingval=nothing, + coalesceval=nothing, fill=nokw, parent=nokw, verbose=true, @@ -204,8 +204,8 @@ function create(filename::Nothing, ::Type{T}, dims::Tuple; if verbose isnokw(chunks) || @warn "`chunks` of `$chunks` found. But `chunks` are not used for in-memory rasters" end - # maskingval determines missingval here as we don't use both - missingval = isnokwornothing(maskingval) ? missingval : maskingval + # coalesceval determines missingval here as we don't use both + missingval = isnokwornothing(coalesceval) ? missingval : coalesceval eltype = isnokwornothing(missingval) ? T : promote_type(T, typeof(missingval)) data = if isnokw(parent) || isnothing(parent) Array{eltype}(undef, dims) @@ -231,22 +231,22 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; options=nokw, parent=nokw, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, fill=nokw, layerdims=nokw, layermetadata=nokw, f=identity, kw... ) - missingval = isnokwornothing(missingval) ? maskingval : missingval + missingval = isnokwornothing(missingval) ? coalesceval : missingval layerdims = isnokw(layerdims) ? map(_ -> basedims(dims), types) : layerdims layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, types) layerfill = fill isa NamedTuple ? fill : map(_ -> fill, types) layermissingvals = missingval isa NamedTuple ? missingval : map(_ -> missingval, types) - layermaskingvals = maskingval isa NamedTuple ? maskingval : map(_ -> maskingval, types) - layers = map(types, layermissingvals, layermaskingvals, layerfill, layerdims, layermetadata) do T, lmv, lma, lfv, ld, lm + layercoalescevals = coalesceval isa NamedTuple ? coalesceval : map(_ -> coalesceval, types) + layers = map(types, layermissingvals, layercoalescevals, layerfill, layerdims, layermetadata) do T, lmv, lma, lfv, ld, lm create(nothing, T, DD.dims(dims, ld); - parent, missingval=lmv, maskingval=lma, fill=lfv, metadata=lm, driver, options, + parent, missingval=lmv, coalesceval=lma, fill=lfv, metadata=lm, driver, options, ) end st = RasterStack(layers; kw...) @@ -256,7 +256,7 @@ end function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTuple; name=nokw, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, fill=nokw, metadata=nokw, chunks=nokw, @@ -283,20 +283,20 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTu # Create layers of zero arrays rast = Raster(A, dims; name, missingval) Rasters.write(f, filename, source, rast; - eltype, chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write, kw... + eltype, chunks, metadata, scale, offset, missingval, coalesceval, verbose, force, coerce, write, kw... ) do W # write returns a variable, wrap it as a Raster f(rebuild(rast, W)) end # Don't pass in `missingval`, read it again from disk in case it changed - return Raster(filename; source, lazy, metadata, maskingval, dropband, coerce) + return Raster(filename; source, lazy, metadata, coalesceval, dropband, coerce) end function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::DimTuple; lazy=true, verbose=true, force=false, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, fill=nokw, metadata=nokw, layerdims=nokw, @@ -334,11 +334,11 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple # Create layers of zero arrays stack = RasterStack(layers, dims; layerdims, layermetadata, missingval) fn = Rasters.write(filename, stack; - chunks, metadata, scale, offset, missingval, maskingval, verbose, force, coerce, write=write[], kw... + chunks, metadata, scale, offset, missingval, coalesceval, verbose, force, coerce, write=write[], kw... ) do W f(rebuild(stack; data=W)) end # Don't pass in `missingval`, read it again from disk in case it changed - st = RasterStack(fn; source, lazy, metadata, layerdims, maskingval, dropband, coerce) + st = RasterStack(fn; source, lazy, metadata, layerdims, coalesceval, dropband, coerce) return st end diff --git a/src/extensions.jl b/src/extensions.jl index 07ce6ac9a..ad7f85f73 100644 --- a/src/extensions.jl +++ b/src/extensions.jl @@ -118,7 +118,7 @@ $FILENAME_KEYWORD $SUFFIX_KEYWORD - `missingval`: the missing value to use during warping, will default to `Rasters.missingval(A). -- `maskingval`: the missing value to mask with after warping +- `coalesceval`: the missing value to mask with after warping Any additional keywords are passed to `ArchGDAL.Dataset`. ## Example diff --git a/src/methods/crop_extend.jl b/src/methods/crop_extend.jl index fd7fe4970..2754605ca 100644 --- a/src/methods/crop_extend.jl +++ b/src/methods/crop_extend.jl @@ -213,7 +213,7 @@ function _extend_to(A::AbstractRaster, to::DimTuple; missingval, name=name(A), metadata=metadata(A), - maskingval=Rasters.missingval(A), + coalesceval=Rasters.missingval(A), verbose, fill, kw... diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index f019e3a24..295ce3ab1 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -67,7 +67,7 @@ end mosaic(f::Function, regions; kw...) = _mosaic(f, first(regions), regions; kw...) function _mosaic(f::Function, A1::AbstractRaster, regions; missingval=nokw, - maskingval=nokw, + coalesceval=nokw, filename=nothing, suffix=nothing, driver=nokw, @@ -76,7 +76,7 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; kw... ) isnothing(missingval) && throw(ArgumentError("missingval cannot be `nothing` for `mosaic`")) - maskingval = isnokw(maskingval) ? Rasters.missingval(first(regions)) : maskingval + coalesceval = isnokw(coalesceval) ? Rasters.missingval(first(regions)) : coalesceval missingval = if isnokw(missingval) mv = Rasters.missingval(first(regions)) isnokwornothing(mv) ? missing : mv @@ -94,7 +94,7 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; name=name(l1), fill=missingval, missingval, - maskingval, + coalesceval, driver, options, force diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index d6c07fd28..ac1bcd5b5 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -476,11 +476,11 @@ function alloc_rasterize(f, r::RasterCreator; metadata=r.metadata, suffix=r.suffix, ) - maskingval = nothing + coalesceval = nothing if prod(size(r.to)) == 0 throw(ArgumentError("Destination array is is empty, with size $(size(r.to))). Rasterization is not possible")) end - A = create(r.filename, fill=missingval, eltype, r.to; name, missingval, maskingval, metadata, suffix) do O + A = create(r.filename, fill=missingval, eltype, r.to; name, missingval, coalesceval, metadata, suffix) do O f(O) end return A diff --git a/src/methods/replace_missing.jl b/src/methods/replace_missing.jl index 98f0ecb64..3ce2eef9c 100644 --- a/src/methods/replace_missing.jl +++ b/src/methods/replace_missing.jl @@ -32,14 +32,14 @@ function replace_missing(A::AbstractRaster{T}, missingval::MV; end old_missingval = Rasters.missingval(A) missingval = convert(MT, missingval) - maskingval = nothing + coalesceval = nothing repmissing(x) = isequal(x, old_missingval) || ismissing(x) ? missingval : x # Disk-backed arrays need to be lazy, memory-backed don't. # But in both cases we make sure we return an array with the missingval # in the eltype, even if there are no missing values in the array. if !isnothing(filename) return create(filename, MT, dims(A); - parent=parent(A), missingval, maskingval, name=name(A), metadata=metadata(A), kw... + parent=parent(A), missingval, coalesceval, name=name(A), metadata=metadata(A), kw... ) do O O .= repmissing.(A) end diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 729edf9bf..707449f2b 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -136,8 +136,8 @@ const OFFSET_KEYWORD = """ const RAW_KEYWORD = """ - `raw`: Turn of all scaling and masking and load the raw values from disk. - `false` by default. If `true`, `scaled` will be set to `false` and `maskingval` - will be set to `nothing`. A warning will be printed if `scaled` or `maskingval` + `false` by default. If `true`, `scaled` will be set to `false` and `coalesceval` + will be set to `nothing`. A warning will be printed if `scaled` or `coalesceval` are manually set to another value. """ @@ -163,9 +163,9 @@ const MISSINGVAL_KEYWORD = """ """ const MASKINGVAL_KEYWORD = """ -- `maskingval`: A value to convert `missingval` to, by default `missing`. If this is set it - will be the return value of `missingval(raster)` - `maskingval` becomes the new `missingval`. - Setting `maskingval` to `nothing` means no masking will occur, and the original `missingval` +- `coalesceval`: A value to convert `missingval` to, by default `missing`. If this is set it + will be the return value of `missingval(raster)` - `coalesceval` becomes the new `missingval`. + Setting `coalesceval` to `nothing` means no masking will occur, and the original `missingval` will be the final `missingval`. This can give better performance than using `missing`. Another efficient option is to use e.g. `zero(eltype(raster))` to replace missing values with zero. """ diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index a4070ef80..9ec9bfbdf 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -12,16 +12,16 @@ source_eltype(::NoMod{T}) where T = T struct Mod{T1,T2,Mi,Ma,S,O,F} <: AbstractModifications missingval::Mi - maskingval::Ma + coalesceval::Ma scale::S offset::O coerce::F - function Mod(::Type{T}, missingval, maskingval, scale, offset, coerce) where T - maskingval = maskingval === missingval ? nothing : maskingval + function Mod(::Type{T}, missingval, coalesceval, scale, offset, coerce) where T + coalesceval = coalesceval === missingval ? nothing : coalesceval if isnokw(coerce) || isnothing(coerce) coerce = convert end - vals = map(_nokw2nothing, (missingval, maskingval, scale, offset)) + vals = map(_nokw2nothing, (missingval, coalesceval, scale, offset)) T1 = _resolve_mod_eltype(T, vals...) new{T1,T,map(typeof, vals)...,typeof(coerce)}(vals..., coerce) end @@ -31,17 +31,17 @@ Base.eltype(::Mod{T1}) where T1 = T1 source_eltype(::Mod{<:Any,T2}) where T2 = T2 -function _resolve_mod_eltype(::Type{T}, missingval, maskingval, scale, offset) where T - T1 = isnothing(maskingval) ? T : promote_type(T, typeof(maskingval)) +function _resolve_mod_eltype(::Type{T}, missingval, coalesceval, scale, offset) where T + T1 = isnothing(coalesceval) ? T : promote_type(T, typeof(coalesceval)) T2 = isnothing(scale) ? T1 : promote_type(T1, typeof(scale)) T3 = isnothing(offset) ? T2 : promote_type(T2, typeof(offset)) return T3 end missingval(m::Mod) = m.missingval -maskingval(m::Mod) = isnothing(m.maskingval) ? m.missingval : m.maskingval +coalesceval(m::Mod) = isnothing(m.coalesceval) ? m.missingval : m.coalesceval missingval(m::NoMod) = m.missingval -maskingval(m::NoMod) = missingval(m) +coalesceval(m::NoMod) = missingval(m) struct ModifiedDiskArray{I,T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} var::V @@ -56,7 +56,7 @@ Base.parent(A::ModifiedDiskArray) = A.var Base.size(A::ModifiedDiskArray, args...) = size(parent(A), args...) filename(A::ModifiedDiskArray) = filename(parent(A)) missingval(A::ModifiedDiskArray) = A.missingval -maskingval(A::ModifiedDiskArray) = A.maskingval +coalesceval(A::ModifiedDiskArray) = A.coalesceval DiskArrays.haschunks(A::ModifiedDiskArray) = DiskArrays.haschunks(parent(A)) DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(parent(A)) @@ -113,7 +113,7 @@ end Base.@assume_effects :foldable function _applymod(x, m::Mod) if _ismissing(x, missingval(m)) - maskingval(m) + coalesceval(m) else _scaleoffset(x, m) end @@ -133,7 +133,7 @@ Base.@assume_effects :foldable function _invertmod(::Val{T}, x, m::Mod) where T tm = if isnothing(m.missingval) x else - if _ismissing(x, m.maskingval) + if _ismissing(x, m.coalesceval) return m.missingval else x @@ -155,58 +155,58 @@ Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, ::Nothing) = x function _stack_mods( - eltypes::Vector, metadata::Vector, missingval::Vector, maskingval; + eltypes::Vector, metadata::Vector, missingval::Vector, coalesceval; scaled, coerce ) map(eltypes, metadata, missingval) do T, md, mv scale, offset = get_scale(md, scaled) - _mod(T, mv, maskingval, scale, offset, coerce) + _mod(T, mv, coalesceval, scale, offset, coerce) end end function _stack_mods( - eltypes::Vector, metadata::Vector, missingval, maskingval::Vector; + eltypes::Vector, metadata::Vector, missingval, coalesceval::Vector; scaled::Bool, coerce ) - map(eltypes, metadata, maskingval) do T, md, mk + map(eltypes, metadata, coalesceval) do T, md, mk scale, offset = get_scale(md, scaled) _mod(T, missingval, mk, scale, offset, coerce) end end function _stack_mods( - eltypes::Vector, metadata::Vector, missingval::Vector, maskingval::Vector; + eltypes::Vector, metadata::Vector, missingval::Vector, coalesceval::Vector; scaled::Bool, coerce ) - map(eltypes, metadata, missingval, maskingval) do T, md, mv, mk + map(eltypes, metadata, missingval, coalesceval) do T, md, mv, mk scale, offset = get_scale(md, scaled) _mod(mv, mk, scale, offset, coerce) end end function _stack_mods( - eltypes::Vector, metadata::Vector, missingval, maskingval; + eltypes::Vector, metadata::Vector, missingval, coalesceval; scaled::Bool, coerce ) map(eltypes, metadata) do T, md scale, offset = get_scale(md, scaled) - _mod(T, missingval, maskingval, scale, offset, coerce) + _mod(T, missingval, coalesceval, scale, offset, coerce) end end -function _mod(::Type{T}, metadata, missingval, maskingval; scaled::Bool, coerce) where T +function _mod(::Type{T}, metadata, missingval, coalesceval; scaled::Bool, coerce) where T scale, offset = get_scale(metadata, scaled) - _mod(T, missingval, maskingval, scale, offset, coerce) + _mod(T, missingval, coalesceval, scale, offset, coerce) end -function _mod(::Type{T}, missingval, maskingval, scale, offset, coerce) where T - maskingval = if isnokw(maskingval) +function _mod(::Type{T}, missingval, coalesceval, scale, offset, coerce) where T + coalesceval = if isnokw(coalesceval) # If there is no missingval dont mask isnokwornothing(missingval) ? nothing : missing else - # Unless maskingval was passed explicitly - maskingval === missingval ? nothing : maskingval + # Unless coalesceval was passed explicitly + coalesceval === missingval ? nothing : coalesceval end - if isnokwornothing(maskingval) && isnokwornothing(scale) && isnokwornothing(offset) + if isnokwornothing(coalesceval) && isnokwornothing(scale) && isnokwornothing(offset) return NoMod{T}(missingval) else - return Mod(T, missingval, maskingval, scale, offset, coerce) + return Mod(T, missingval, coalesceval, scale, offset, coerce) end end @@ -217,9 +217,9 @@ end return scale, offset end -function _writer_mod(::Type{T}; missingval, maskingval, scale, offset, coerce) where T +function _writer_mod(::Type{T}; missingval, coalesceval, scale, offset, coerce) where T missingval1 = if isnokw(missingval) || isnothing(missingval) - if isnokw(maskingval) || isnothing(maskingval) + if isnokw(coalesceval) || isnothing(coalesceval) nothing else _type_missingval(T) @@ -229,16 +229,16 @@ function _writer_mod(::Type{T}; missingval, maskingval, scale, offset, coerce) w else missingval end - maskingval1 = if isnokw(maskingval) + coalesceval1 = if isnokw(coalesceval) if Missing <: T missing else nothing end else - maskingval + coalesceval end - return _mod(T, missingval1, maskingval1, scale, offset, coerce) + return _mod(T, missingval1, coalesceval1, scale, offset, coerce) end _mod_eltype(::AbstractArray{T}, ::NoMod) where T = T diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index ed6cff28e..cfb507490 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -430,7 +430,7 @@ _unuseddimerror(dimname) = error("Dataset contains unused dimension $dimname") function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T,N}; verbose=true, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, metadata=nokw, chunks=nokw, chunksizes=_chunks_to_tuple(A, dims(A), chunks), @@ -456,7 +456,7 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, metadata end - maskingval = isnokw(maskingval) ? Rasters.missingval(A) : maskingval + coalesceval = isnokw(coalesceval) ? Rasters.missingval(A) : coalesceval missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval missingval = if ismissing(missingval) # See if there is a missing value in metadata @@ -482,7 +482,7 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, attrib["add_offset"] = offset end - mod = _writer_mod(eltype; missingval, maskingval, scale, offset, coerce) + mod = _writer_mod(eltype; missingval, coalesceval, scale, offset, coerce) if !isnothing(mod.missingval) attrib["_FillValue"] = missingval diff --git a/src/sources/grd.jl b/src/sources/grd.jl index 51f0a64b9..05f6e576d 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -165,7 +165,7 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; verbose=true, write=true, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, chunks=nokw, scale=nokw, offset=nokw, @@ -185,8 +185,8 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; chunks isa NoKW || @warn "specifying chunks not supported for .grd files" missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval - maskingval = isnokw(maskingval) ? Rasters.missingval(A) : maskingval - missingval = if ismissing(missingval) || isnothing(missingval) && !isnothing(maskingval) + coalesceval = isnokw(coalesceval) ? Rasters.missingval(A) : coalesceval + missingval = if ismissing(missingval) || isnothing(missingval) && !isnothing(coalesceval) # See if there is a missing value in metadata mv = _grd_mv(eltype, metadata(A); verbose=false) # Otherwise define one @@ -208,7 +208,7 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; filename = splitext(filename)[1] # Data: write a raw gri file from the array - mod = _writer_mod(eltype; missingval, maskingval, scale, offset, coerce) + mod = _writer_mod(eltype; missingval, coalesceval, scale, offset, coerce) gri_filename = filename * ".gri" isfile(gri_filename) && rm(gri_filename) _write_gri(gri_filename, Val{source_eltype(mod)}(), mod, parent(correctedA)) diff --git a/src/stack.jl b/src/stack.jl index 7782e6b5d..a49e20683 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -368,24 +368,24 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; layermetadata::Union{NoKW,NamedTuple{K}}=nokw, layerdims::Union{NoKW,NamedTuple{K}}=nokw, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, replace_missing=nokw, scaled=nokw, raw=false, kw... ) where K _maybe_warn_replace_missing(replace_missing) - scaled, maskingval = _raw_check(raw, scaled, maskingval) + scaled, coalesceval = _raw_check(raw, scaled, coalesceval) layermissingval = collect(_stack_nt(filenames, missingval)) - layermaskingval = collect(_stack_nt(filenames, maskingval)) + layercoalesceval = collect(_stack_nt(filenames, coalesceval)) fn = collect(filenames) layermetadata = layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) layerdims = layerdims isa NamedTuple ? collect(layerdims) : map(_ -> NoKW(), fn) - layers = map(K, fn, layermetadata, layerdims, layermissingval, layermaskingval) do name, fn, md, d, mv, ma + layers = map(K, fn, layermetadata, layerdims, layermissingval, layercoalesceval) do name, fn, md, d, mv, ma Raster(fn; source=_sourcetrait(fn, source), - dims=d, name, metadata=md, missingval=mv, maskingval=ma, scaled, kw... + dims=d, name, metadata=md, missingval=mv, coalesceval=ma, scaled, kw... ) end return RasterStack(NamedTuple{K}(layers); resize, metadata) @@ -397,7 +397,7 @@ function RasterStack(filename::AbstractString; raw::Bool=false, source::Union{Symbol,Source,NoKW}=nokw, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, name=nokw, group::Union{Symbol,AbstractString,NoKW}=nokw, scaled::Union{Bool,NoKW}=nokw, @@ -406,7 +406,7 @@ function RasterStack(filename::AbstractString; kw... ) _maybe_warn_replace_missing(replace_missing) - scaled, maskingval = _raw_check(raw, scaled, maskingval) + scaled, coalesceval = _raw_check(raw, scaled, coalesceval) source = _sourcetrait(filename, source) st = if isdir(filename) && !(source isa Zarrsource) @@ -421,14 +421,14 @@ function RasterStack(filename::AbstractString; name end RasterStack(joinpath.(Ref(filename), filenames); - missingval, maskingval, scaled, coerce, lazy, dropband, group, kw... + missingval, coalesceval, scaled, coerce, lazy, dropband, group, kw... ) else # Load as a single file if haslayers(source) # With multiple named layers l_st = _layer_stack(filename; - source, name, lazy, group, missingval, maskingval, scaled, coerce, kw... + source, name, lazy, group, missingval, coalesceval, scaled, coerce, kw... ) # Maybe split the stack into separate arrays to remove extra dims. if !isnokw(name) @@ -439,7 +439,7 @@ function RasterStack(filename::AbstractString; else # With bands actings as layers raster = Raster(filename; - source, lazy, missingval, maskingval, scaled, coerce, dropband=false, + source, lazy, missingval, coalesceval, scaled, coerce, dropband=false, ) RasterStack(raster; kw...) end @@ -508,7 +508,7 @@ function _layer_stack(filename; layermetadata=nokw, layerdims=nokw, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, crs=nokw, mappedcrs=nokw, coerce=convert, @@ -541,7 +541,7 @@ function _layer_stack(filename; missingval end eltypes = map(eltype, layers.vars) - mods = _stack_mods(eltypes, layermetadata1, missingval1, maskingval; scaled, coerce) + mods = _stack_mods(eltypes, layermetadata1, missingval1, coalesceval; scaled, coerce) data = if lazy vars = ntuple(i -> layers.vars[i], length(name)) mods = ntuple(i -> mods[i], length(name)) @@ -557,10 +557,10 @@ function _layer_stack(filename; missingval = map(mods) do mod if isnothing(Rasters.missingval(mod)) nothing - elseif isnothing(Rasters.maskingval(mod)) + elseif isnothing(Rasters.coalesceval(mod)) Rasters.missingval(mod) else - Rasters.maskingval(mod) + Rasters.coalesceval(mod) end end |> NT return data, (; dims, refdims, layerdims, metadata, layermetadata=NT(layermetadata1), missingval) diff --git a/src/utils.jl b/src/utils.jl index f617a7920..229dc44d9 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -301,14 +301,14 @@ end # Constructor helpers -function _raw_check(raw, scaled, maskingval) +function _raw_check(raw, scaled, coalesceval) if raw scaled isa Bool && scaled && @warn "`scaled=true` set to `false` because of `raw=true`" - isnokwornothing(maskingval) || @warn "`maskingval=$maskingval` set to `nothing` because of `raw=true`" + isnokwornothing(coalesceval) || @warn "`coalesceval=$coalesceval` set to `nothing` because of `raw=true`" return false, nothing else scaled = isnokw(scaled) ? true : scaled - return scaled, maskingval + return scaled, coalesceval end end @@ -451,7 +451,7 @@ end _maybe_warn_replace_missing(replace_missing::NoKW) = nothing function _maybe_warn_replace_missing(replace_missing) - @warn "`replace_missing` keyword no longer used. Set `maskingval` to nothing for no replacement, to `missing` to mask `missingval` with `missing`, or any other value" + @warn "`replace_missing` keyword no longer used. Set `coalesceval` to nothing for no replacement, to `missing` to mask `missingval` with `missing`, or any other value" end @noinline _warn_disk() = @warn "Disk-based objects may be very slow here. User `read` first." diff --git a/src/write.jl b/src/write.jl index a093cad40..b456c117d 100644 --- a/src/write.jl +++ b/src/write.jl @@ -76,12 +76,12 @@ Returns `filename`. function Base.write(filename::AbstractString, A::AbstractRaster; source=_sourcetrait(filename), missingval=nokw, - maskingval=nokw, + coalesceval=nokw, kw... ) missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval - maskingval = isnokw(maskingval) ? missingval : maskingval - write(filename, _sourcetrait(source), A; missingval, maskingval, kw...) + coalesceval = isnokw(coalesceval) ? missingval : coalesceval + write(filename, _sourcetrait(source), A; missingval, coalesceval, kw...) end Base.write(A::AbstractRaster; kw...) = write(filename(A), A; kw...) # Fallback @@ -120,14 +120,14 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; source=_sourcetrait(path, ext), verbose=true, missingval=nokw, - maskingval=nokw, + coalesceval=nokw, kw... ) source = _sourcetrait(source) - maskingval = _stack_nt(s, isnokw(maskingval) ? Rasters.missingval(s) : maskingval) - missingval = _stack_missingvals(s, isnokw(missingval) ? maskingval : missingval) + coalesceval = _stack_nt(s, isnokw(coalesceval) ? Rasters.missingval(s) : coalesceval) + missingval = _stack_missingvals(s, isnokw(missingval) ? coalesceval : missingval) if haslayers(source) - write(path, source, s; missingval, maskingval, kw...) + write(path, source, s; missingval, coalesceval, kw...) else # Otherwise write separate files for each layer if isnothing(ext) @@ -146,9 +146,9 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; if verbose @warn string("Cannot write complete stacks to \"", ext, "\", writing layers as individual files") end - map(keys(s), suffix1, missingval, maskingval) do key, suf, mv, ma + map(keys(s), suffix1, missingval, coalesceval) do key, suf, mv, ma fn = string(base, suf, ext) - write(fn, source, s[key]; missingval=mv, maskingval=ma, kw...) + write(fn, source, s[key]; missingval=mv, coalesceval=ma, kw...) end |> NamedTuple{keys(s)} end end diff --git a/test/create.jl b/test/create.jl index eeedd1b60..687409884 100644 --- a/test/create.jl +++ b/test/create.jl @@ -133,11 +133,11 @@ for ext in (".nc", ".tif", ".grd") fn = "created$ext" created = Rasters.create(fn, UInt8, (X(1:10), Y(1:10)); missingval=0xff, - maskingval=nothing, + coalesceval=nothing, fill=0x01, force=true ) - @test all(Raster(fn; maskingval=nothing) .=== 0x01) + @test all(Raster(fn; coalesceval=nothing) .=== 0x01) @test missingval(created) === 0xff if ext == ".grd" @@ -150,7 +150,7 @@ for ext in (".nc", ".tif", ".grd") nothing end @test all(Raster(fn) .=== Int16(2)) - @test missingval(Raster(fn; maskingval=nothing)) === typemax(Int16) + @test missingval(Raster(fn; coalesceval=nothing)) === typemax(Int16) else @time created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); missingval=typemax(Int16), @@ -163,7 +163,7 @@ for ext in (".nc", ".tif", ".grd") end @test all(Raster(fn) .=== 3.0) @test all(Raster(fn; scaled=false) .== Int16(-20)) - @test missingval(Raster(fn; maskingval=nothing, scaled=false)) === typemax(Int16) + @test missingval(Raster(fn; coalesceval=nothing, scaled=false)) === typemax(Int16) end end end @@ -172,7 +172,7 @@ end @testset "create .nc stack" begin created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); missingval=(a=0xff, b=typemax(Float32)), - maskingval=nothing, + coalesceval=nothing, fill=(a=0x01, b=1.0f0), layerdims=(a=(X,), b=(X, Y)), force=true, @@ -182,7 +182,7 @@ end @test size(created.b) == (10, 10) @test all(created.a .=== 0x01) @test all(created.b .=== 1.0f0) - st = RasterStack("created.nc"; maskingval=nothing) + st = RasterStack("created.nc"; coalesceval=nothing) @test missingval(st) == (a=0xff, b=typemax(Float32)) created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); @@ -196,13 +196,13 @@ end @test size(created.b) == (10, 10) @test all(created.a .=== 0x01) @test all(created.b .=== 1.0f0) - st = RasterStack("created.nc"; maskingval=nothing) + st = RasterStack("created.nc"; coalesceval=nothing) @test missingval(st) == (a=0xff, b=typemax(Float32)) @testset "with a function" begin created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); missingval=(a=0xff, b=typemax(Float32)), - maskingval=nothing, + coalesceval=nothing, fill=(a=0x01, b=1.0f0), layerdims=(a=(X,), b=(X, Y)), force=true, diff --git a/test/resample.jl b/test/resample.jl index f1678c10b..d65b7d760 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -23,8 +23,8 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) end end - cea = Raster(raster_path; missingval=0x00, name=:cea, maskingval=nothing) - raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, maskingval=nothing) + cea = Raster(raster_path; missingval=0x00, name=:cea, coalesceval=nothing) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, coalesceval=nothing) @testset "missingval propagates" begin @test missingval(resample(cea; res=output_res, crs=output_crs, method)) == 0x00 @@ -157,17 +157,17 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @test dims(resampled_3D, Z) == Z(1:2) end - maskingval = Rasters.nokw - for maskingval in (nothing, missing, Rasters.nokw) + coalesceval = Rasters.nokw + for coalesceval in (nothing, missing, Rasters.nokw) # Resample cea.tif using resample - cea = Raster(raster_path; missingval=0x00, name=:cea, maskingval) - raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, maskingval) - disk_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, maskingval, filename="resample.tif") + cea = Raster(raster_path; missingval=0x00, name=:cea, coalesceval) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, coalesceval) + disk_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, coalesceval, filename="resample.tif") - cea_permuted = permutedims(Raster(raster_path; missingval=0x00, name=:cea_permuted, maskingval), (Y, X)) - permuted_output = resample(cea_permuted, output_res; missingval=0x00, maskingval, crs=output_crs, method) + cea_permuted = permutedims(Raster(raster_path; missingval=0x00, name=:cea_permuted, coalesceval), (Y, X)) + permuted_output = resample(cea_permuted, output_res; missingval=0x00, coalesceval, crs=output_crs, method) - AG_output1 = if isnothing(maskingval) + AG_output1 = if isnothing(coalesceval) AG_output else replace(AG_output, 0x00 => missing) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 1a3aac041..f7a1342e7 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -28,7 +28,7 @@ gdalpath = maybedownload(url) @testset "cf" begin # This file has no scale/offset so cf does nothing @time cfarray = Raster(gdalpath; missingval=0x00) - @time cf_nomask_array = Raster(gdalpath; maskingval=nothing) + @time cf_nomask_array = Raster(gdalpath; coalesceval=nothing) @time nocfarray = Raster(gdalpath; scaled=false) @time lazycfarray = Raster(gdalpath; lazy=true, missingval=0x00) @time lazynocfarray = Raster(gdalpath; lazy=true, scaled=false) @@ -278,7 +278,7 @@ gdalpath = maybedownload(url) tempfile3 = tempname() * ".tif" Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile1) Afile2 = mosaic(first, A1, A2; - missingval=0x00, atol=1e-8, filename=tempfile2, maskingval=missing + missingval=0x00, atol=1e-8, filename=tempfile2, coalesceval=missing ) @test missingval(Afile2) === missing Amem = mosaic(first, A1, A2; missingval=0x00, atol=1e-8) @@ -471,7 +471,7 @@ gdalpath = maybedownload(url) filename = tempname() * ".tif" write(filename, A) @test missingval(Raster(filename)) === missing - @test missingval(Raster(filename; maskingval=nothing)) === typemax(UInt8) + @test missingval(Raster(filename; coalesceval=nothing)) === typemax(UInt8) rm(filename) end @@ -521,7 +521,7 @@ gdalpath = maybedownload(url) # Handle WorldClim/ucdavis unreliability A = nothing try - A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, maskingval=nothing) + A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, coalesceval=nothing) catch end if !isnothing(A) diff --git a/test/sources/grd.jl b/test/sources/grd.jl index 4a709cbf0..663fa6175 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -29,11 +29,11 @@ grdpath = stem * ".gri" @test parent(eagerarray) isa Array end - @testset "maskingval keyword" begin + @testset "coalesceval keyword" begin @time missingarray = Raster(grdpath) @test missingval(missingarray) === missing @test eltype(missingarray) === Union{Missing,Float32} - @time missingarray = Raster(grdpath; maskingval=nothing) + @time missingarray = Raster(grdpath; coalesceval=nothing) @test missingval(missingarray) === -3.4f38 @test eltype(missingarray) === Float32 end @@ -178,7 +178,7 @@ grdpath = stem * ".gri" tn = tempname() tempgrd = tn * ".grd" tempgri = tn * ".gri" - Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, maskingval=nothing) + Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, coalesceval=nothing) Amem = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1) Atest = grdarray[X(1:80), Y(1:60)] Atest[X(1:26), Y(31:60)] .= 0.0f0 @@ -274,7 +274,7 @@ grdpath = stem * ".gri" gdalfilename = tempname() * ".tif" write(gdalfilename, GDALsource(), grdarray[Band(1)]; force = true) @test (@allocations write(gdalfilename, GDALsource(), grdarray[Band(1)]; force = true)) < 1e4 - gdalarray = Raster(gdalfilename; maskingval=nothing) + gdalarray = Raster(gdalfilename; coalesceval=nothing) # @test convert(ProjString, crs(gdalarray)) == convert(ProjString, EPSG(4326)) @test val(dims(gdalarray, X)) ≈ val(dims(grdarray, X)) @test val(dims(gdalarray, Y)) ≈ val(dims(grdarray, Y)) @@ -294,7 +294,7 @@ grdpath = stem * ".gri" @test missingval(Raster(filename)) === missing filename = tempname() * ".grd" write(filename, A) - @test missingval(Raster(filename; maskingval=nothing)) === typemin(Float32) + @test missingval(Raster(filename; coalesceval=nothing)) === typemin(Float32) end end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index b505ab396..8190702b0 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -60,13 +60,13 @@ end @testset "scaling and maskign" begin @time cfarray = Raster(ncsingle) @time cfarray = Raster(ncsingle) - @time cf_nomask_array = Raster(ncsingle; maskingval=nothing) + @time cf_nomask_array = Raster(ncsingle; coalesceval=nothing) @time nocfarray = Raster(ncsingle; scaled=false) - @time nocf_nomask_array = Raster(ncsingle; scaled=false, maskingval=nothing) + @time nocf_nomask_array = Raster(ncsingle; scaled=false, coalesceval=nothing) @time raw_array = Raster(ncsingle; raw=true) @time lazycfarray = Raster(ncsingle; lazy=true, scaled=false) @time lazynocfarray = Raster(ncsingle; lazy=true, scaled=false) - @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, maskingval=nothing) + @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, coalesceval=nothing) @test missingval(cfarray) === missing @test missingval(nocfarray) === missing @test missingval(cf_nomask_array) === 1.0f20 @@ -355,7 +355,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write(gdalfilename, nccleaned; force=true) @test (@allocations write(gdalfilename, nccleaned; force=true)) < 1e4 - gdalarray = Raster(gdalfilename; maskingval=nothing) + gdalarray = Raster(gdalfilename; coalesceval=nothing) # gdalarray WKT is missing one AUTHORITY # @test_broken crs(gdalarray) == convert(WellKnownText, EPSG(4326)) # But the Proj representation is the same @@ -371,7 +371,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write("testgrd.gri", nccleaned; force=true) @test (@allocations write("testgrd.gri", nccleaned; force=true)) < 1e4 - grdarray = Raster("testgrd.gri", maskingval=nothing); + grdarray = Raster("testgrd.gri", coalesceval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) @test bounds(grdarray) == bounds(nccleaned) @test index(grdarray, Y) ≈ reverse(index(nccleaned, Y)) .- 0.5 diff --git a/test/sources/rasterdatasources.jl b/test/sources/rasterdatasources.jl index 7a450f657..c95d53f11 100644 --- a/test/sources/rasterdatasources.jl +++ b/test/sources/rasterdatasources.jl @@ -54,7 +54,7 @@ end st = RasterStack(CHELSA{BioClim}, (1, 2); lazy=true, missingval=-Int16(9999), - maskingval=nothing, + coalesceval=nothing, metadata=Rasters.NoMetadata(), crs=nothing, mappedcrs=EPSG(4326), From 5f9be64482e3adf13765f2ef78c6998ada00825e Mon Sep 17 00:00:00 2001 From: rafaqz Date: Tue, 24 Sep 2024 20:48:23 +0200 Subject: [PATCH 24/38] combine missingval and maskingval --- ext/RastersArchGDALExt/gdal_source.jl | 16 ++- ext/RastersArchGDALExt/warp.jl | 4 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 8 +- src/array.jl | 47 ++++---- src/create.jl | 27 ++--- src/extensions.jl | 5 +- src/methods/crop_extend.jl | 1 - src/methods/mosaic.jl | 11 +- src/methods/rasterize.jl | 3 +- src/methods/replace_missing.jl | 3 +- src/methods/shared_docstrings.jl | 18 ++-- src/methods/zonal.jl | 8 +- src/modifieddiskarray.jl | 100 +++++++----------- src/sources/commondatamodel.jl | 6 +- src/sources/grd.jl | 12 ++- src/stack.jl | 31 ++---- src/utils.jl | 18 +++- src/write.jl | 14 +-- test/create.jl | 15 ++- test/resample.jl | 20 ++-- test/sources/gdal.jl | 8 +- test/sources/grd.jl | 13 ++- test/sources/ncdatasets.jl | 10 +- test/sources/rasterdatasources.jl | 1 - 24 files changed, 178 insertions(+), 221 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 58bd7bf41..a906ce606 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -49,7 +49,6 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} verbose=true, write=true, missingval=nokw, - coalesceval=nokw, scale=nokw, offset=nokw, coerce=nokw, @@ -62,8 +61,7 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} A1 = _maybe_permute_to_gdal(A) # Missing values - coalesceval = isnokw(coalesceval) ? RA.missingval(A) : coalesceval - missingval = isnokw(missingval) ? coalesceval : missingval + missingval = isnokw(missingval) ? RA.missingval(A) : missingval missingval = if ismissing(missingval) # See if there is a missing value in metadata # But only use it if its the right type @@ -76,7 +74,7 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} missingval, _block_template=A1, scale, offset, verbose, kw... ) do dataset if write - mod = RA._writer_mod(eltype; missingval, coalesceval, scale, offset, coerce) + mod = RA._writer_mod(eltype; missingval, scale, offset, coerce) open(A1; write=true) do O R = RA._maybe_modify(AG.RasterDataset(dataset), mod) R .= parent(O) @@ -247,16 +245,15 @@ function RA.Raster(ds::AG.RasterDataset; refdims=(), name=nokw, metadata=RA._metadata(ds), - missingval=RA.missingval(ds), - coalesceval=missing, + missingval=RA.missingval(ds) => missing, lazy=false, dropband=false, scaled=true, coerce=convert, ) filelist = AG.filelist(ds) - mod = RA._mod(eltype(ds), metadata, missingval, coalesceval; scaled, coerce) - kw = (; refdims, name, metadata, missingval=Rasters.coalesceval(mod)) + mod = RA._mod(eltype(ds), metadata, missingval; scaled, coerce) + kw = (; refdims, name, metadata, missingval=Rasters._outer_missingval(mod)) raster = if lazy && length(filelist) > 0 filename = first(filelist) Raster(FileArray{GDALsource}(ds, filename; mod), dims; kw...) @@ -310,7 +307,6 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; verbose=false, eltype=Missings.nonmissingtype(eltype(A)), missingval=Rasters.missingval(A), - coalesceval=Rasters.missingval(A), kw... ) A1 = _maybe_permute_to_gdal(A) @@ -318,7 +314,7 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; _block_template=A1, missingval, scale, offset, verbose, kw... ) do dataset rds = AG.RasterDataset(dataset) - mod = RA._writer_mod(eltype; missingval=RA.missingval(rds), coalesceval, scale, offset, coerce) + mod = RA._writer_mod(eltype; missingval=RA.missingval(rds), scale, offset, coerce) open(A1) do O RA._maybe_modify(rds, mod) .= parent(O) end diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index 731967322..0e555cbab 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -18,7 +18,7 @@ function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", missingval=nokw, - coalesceval=Rasters.missingval(A), + maskingval=Rasters.missingval(A), name=Rasters.name(A), kw... ) @@ -44,7 +44,7 @@ function _warp(A::AbstractRaster, flags::Dict; out = AG.Dataset(A1; filename=tempfile, missingval, kw...) do dataset AG.gdalwarp([dataset], flagvect; warp_kw...) do warped # Read the raster lazily, dropping Band if there is none in `A` - raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, coalesceval) + raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, maskingval) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index c80c86ea8..f890349ba 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -32,7 +32,7 @@ function Base.write(filename::AbstractString, source::Source, s::AbstractRasterS append=false, force=false, missingval=nokw, - coalesceval=nokw, + maskingval=nokw, f=identity, kw... ) where {Source<:NCDsource,K,T} @@ -44,13 +44,13 @@ function Base.write(filename::AbstractString, source::Source, s::AbstractRasterS end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) - coalesceval = RA._stack_nt(s, isnokw(coalesceval) ? Rasters.missingval(s) : coalesceval) - missingval = RA._stack_missingvals(s, isnokw(missingval) ? coalesceval : missingval) + maskingval = RA._stack_nt(s, isnokw(maskingval) ? Rasters.missingval(s) : maskingval) + missingval = RA._stack_missingvals(s, isnokw(missingval) ? maskingval : missingval) try map(keys(s)) do k RA._writevar!(ds, source, s[k]; missingval=missingval[k], - coalesceval=coalesceval[k], + maskingval=maskingval[k], kw... ) end diff --git a/src/array.jl b/src/array.jl index 130701960..dbfc84c35 100644 --- a/src/array.jl +++ b/src/array.jl @@ -10,7 +10,7 @@ wish to disable memory checks. This setting can be overridden with the `checkmem` keyword, where applicable. """ -function checkmem!(checkmem::Bool) +function checkmem!(checkmem::Bool) !checkmem || @warn "Setting `checkmem` to `false` globally may lead to out-of-memory errors or system crashes" CHECKMEM[] = checkmem return checkmem @@ -195,12 +195,12 @@ end Raster(A::AbstractDimArray; kw...) Raster(A::AbstractArray, dims; kw...) -A generic [`AbstractRaster`](@ref) for spatial/raster array data. It can hold -either memory-backed arrays or, if `lazy=true`, a [`FileArray`](@ref), -which stores the `String` path to an unopened file. +A generic [`AbstractRaster`](@ref) for spatial/raster array data. It can hold +either memory-backed arrays or, if `lazy=true`, a [`FileArray`](@ref), +which stores the `String` path to an unopened file. -If `lazy=true`, the file will only be opened lazily when it is indexed with `getindex` -or when `read(A)` is called. Broadcasting, taking a view, reversing, and most other +If `lazy=true`, the file will only be opened lazily when it is indexed with `getindex` +or when `read(A)` is called. Broadcasting, taking a view, reversing, and most other methods will _not_ load data from disk; they will be applied later, lazily. # Arguments @@ -210,12 +210,11 @@ methods will _not_ load data from disk; they will be applied later, lazily. # Keywords $NAME_KEYWORD -$GROUP_KEYWORD +$GROUP_KEYWORD $MISSINGVAL_KEYWORD -$MASKINGVAL_KEYWORD $METADATA_KEYWORD -$CONSTRUCTOR_CRS_KEYWORD -$CONSTRUCTOR_MAPPEDCRS_KEYWORD +$CONSTRUCTOR_CRS_KEYWORD +$CONSTRUCTOR_MAPPEDCRS_KEYWORD $REFDIMS_KEYWORD When a filepath `String` is used: @@ -288,8 +287,8 @@ function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Va )::Raster Raster(filename; dims, kw...) end -function Raster(filename::AbstractString; - source=nokw, +function Raster(filename::AbstractString; + source=nokw, kw... ) source = _sourcetrait(filename, source) @@ -304,7 +303,6 @@ function Raster(ds, filename::AbstractString; group=nokw, metadata=nokw, missingval=nokw, - coalesceval=nokw, crs=nokw, mappedcrs=nokw, source=nokw, @@ -318,17 +316,26 @@ function Raster(ds, filename::AbstractString; mod=nokw, raw=false, )::Raster - scaled, coalesceval = _raw_check(raw, scaled, coalesceval) + scaled, missingval = _raw_check(raw, scaled, missingval) _maybe_warn_replace_missing(replace_missing) name1 = filekey(ds, name) source = _sourcetrait(filename, source) data_out, dims_out, metadata_out, missingval_out = _open(source, ds; name=name1, group, mod=NoMod()) do var metadata_out = isnokw(metadata) ? _metadata(var) : metadata - missingval1 = isnokw(missingval) ? Rasters.missingval(var, metadata_out) : missingval - coalesceval1 = isnokw(coalesceval) && !isnothing(missingval1) ? missing : coalesceval - mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval1, coalesceval1; scaled, coerce) : mod + missingval_out = if isnokw(missingval) + # Detect missingval and convert it to missing + Rasters.missingval(var, metadata_out) => missing + elseif missingval isa Pair && missingval[1] == Rasters.missingval + # Autodetect first missingval + Rasters.missingval(var, metadata_out) => missingval[2] + else + # Use whatever the user passed in + missingval + end + @show missingval missingval_out + mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval_out; scaled, coerce) : mod data_out = if lazy - FileArray{typeof(source)}(var, filename; + FileArray{typeof(source)}(var, filename; name=name1, group, mod, write ) else @@ -337,10 +344,8 @@ function Raster(ds, filename::AbstractString; x = Array(modvar) x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug end - # If coalesceval is `nothing` use missingval as missingval dims_out = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data_out) - missingval_out = isnokwornothing(coalesceval1) ? missingval1 : coalesceval1 - data_out, dims_out, metadata_out, missingval_out + data_out, dims_out, metadata_out, missingval end name_out = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) raster = Raster(data_out, dims_out, refdims, name_out, metadata_out, missingval_out) diff --git a/src/create.jl b/src/create.jl index 21afd95a4..7edf35bf3 100644 --- a/src/create.jl +++ b/src/create.jl @@ -11,7 +11,8 @@ If it is `nothing` or not passed, an in-memory `Raster` will be created. If type is a `Type` return value is a `Raster`. The `eltype` will usually be `T`, except where `scale` and/or `offset` keywords are used or a `missingval` of a different type is specified, in which case `T` will depend on the type promotion of `scale`, `offset` and `missingval` with `T`. -`coalesceval` will also affect the `eltype` of the openeded raster if you `create` to a file. +If `missingval` is a `Pair` of `on_disk_missingval => user_facing_missingval`, the user facing value +will effect `T`, not the internal on-disk value. If types is a `NamedTuple` of types, the result will be a `RasterStack`. In this case `fill` and `missingval` can be single values (for all layers) or `NamedTuple` with the same names to specify per-layer. @@ -40,7 +41,6 @@ $WRITE_MISSINGVAL_KEYWORD If there is no `fill`, raster values may remain undefined. They may be set to `missingval` on disk, but this is not guaranteed. It us often more efficient to use `fill` than to fill manually after `create`. -$MASKINGVAL_KEYWORD $SOURCE_KEYWORD - `lazy`: A `Bool` specifying if to load data lazily from disk. For `create` `lazy=true` is the default, as creating a disk-based file is normally associated @@ -78,7 +78,6 @@ using Rasters.Lookups rast = Rasters.create("created.tif", UInt8, Extents.Extent(X=(0, 120), Y=(-80, 80), Band=(0, 12)); res=(X=10.0, Y=10.0, Band=1), # size=(X=100, Y=100, Band=12), - coalesceval=nothing, name=:myraster, crs=EPSG(4326), force=true, @@ -99,7 +98,6 @@ ext = Extents.Extent(X=(0, 120), Y=(-80, 80))#, Band=(1, 3)) types = (a=UInt8, b=Int32, c=Float64) rast = Rasters.create("created.nc", types, ext; # res=(X=1.0, Y=1.0, Band=1), - coalesceval=nothing, size=(X=100, Y=100), crs=EPSG(4326), force=true, @@ -188,7 +186,6 @@ function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTupl end function create(filename::Nothing, ::Type{T}, dims::Tuple; missingval=nokw, - coalesceval=nothing, fill=nokw, parent=nokw, verbose=true, @@ -204,8 +201,7 @@ function create(filename::Nothing, ::Type{T}, dims::Tuple; if verbose isnokw(chunks) || @warn "`chunks` of `$chunks` found. But `chunks` are not used for in-memory rasters" end - # coalesceval determines missingval here as we don't use both - missingval = isnokwornothing(coalesceval) ? missingval : coalesceval + missingval = missingval isa Pair ? last(missingval) : missingval eltype = isnokwornothing(missingval) ? T : promote_type(T, typeof(missingval)) data = if isnokw(parent) || isnothing(parent) Array{eltype}(undef, dims) @@ -231,22 +227,19 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; options=nokw, parent=nokw, missingval=nokw, - coalesceval=nokw, fill=nokw, layerdims=nokw, layermetadata=nokw, f=identity, kw... ) - missingval = isnokwornothing(missingval) ? coalesceval : missingval layerdims = isnokw(layerdims) ? map(_ -> basedims(dims), types) : layerdims layermetadata = layermetadata isa NamedTuple ? layermetadata : map(_ -> layermetadata, types) layerfill = fill isa NamedTuple ? fill : map(_ -> fill, types) layermissingvals = missingval isa NamedTuple ? missingval : map(_ -> missingval, types) - layercoalescevals = coalesceval isa NamedTuple ? coalesceval : map(_ -> coalesceval, types) - layers = map(types, layermissingvals, layercoalescevals, layerfill, layerdims, layermetadata) do T, lmv, lma, lfv, ld, lm + layers = map(types, layermissingvals, layerfill, layerdims, layermetadata) do T, lmv, lfv, ld, lm create(nothing, T, DD.dims(dims, ld); - parent, missingval=lmv, coalesceval=lma, fill=lfv, metadata=lm, driver, options, + parent, missingval=lmv, fill=lfv, metadata=lm, driver, options, ) end st = RasterStack(layers; kw...) @@ -256,7 +249,6 @@ end function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTuple; name=nokw, missingval=nokw, - coalesceval=nokw, fill=nokw, metadata=nokw, chunks=nokw, @@ -283,20 +275,19 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTu # Create layers of zero arrays rast = Raster(A, dims; name, missingval) Rasters.write(f, filename, source, rast; - eltype, chunks, metadata, scale, offset, missingval, coalesceval, verbose, force, coerce, write, kw... + eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write, kw... ) do W # write returns a variable, wrap it as a Raster f(rebuild(rast, W)) end # Don't pass in `missingval`, read it again from disk in case it changed - return Raster(filename; source, lazy, metadata, coalesceval, dropband, coerce) + return Raster(filename; source, lazy, metadata, dropband, coerce) end function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::DimTuple; lazy=true, verbose=true, force=false, missingval=nokw, - coalesceval=nokw, fill=nokw, metadata=nokw, layerdims=nokw, @@ -334,11 +325,11 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple # Create layers of zero arrays stack = RasterStack(layers, dims; layerdims, layermetadata, missingval) fn = Rasters.write(filename, stack; - chunks, metadata, scale, offset, missingval, coalesceval, verbose, force, coerce, write=write[], kw... + chunks, metadata, scale, offset, missingval, verbose, force, coerce, write=write[], kw... ) do W f(rebuild(stack; data=W)) end # Don't pass in `missingval`, read it again from disk in case it changed - st = RasterStack(fn; source, lazy, metadata, layerdims, coalesceval, dropband, coerce) + st = RasterStack(fn; source, lazy, metadata, layerdims, dropband, coerce) return st end diff --git a/src/extensions.jl b/src/extensions.jl index ad7f85f73..36f20a024 100644 --- a/src/extensions.jl +++ b/src/extensions.jl @@ -117,8 +117,9 @@ Run `using ArchGDAL` to make this method available. $FILENAME_KEYWORD $SUFFIX_KEYWORD - `missingval`: the missing value to use during warping, will default to - `Rasters.missingval(A). -- `coalesceval`: the missing value to mask with after warping + `Rasters.missingval(A). Passing a pair will specify the missing value + to use after warping. + Any additional keywords are passed to `ArchGDAL.Dataset`. ## Example diff --git a/src/methods/crop_extend.jl b/src/methods/crop_extend.jl index 8e849d178..2026e8451 100644 --- a/src/methods/crop_extend.jl +++ b/src/methods/crop_extend.jl @@ -212,7 +212,6 @@ function _extend_to(A::AbstractRaster, to::DimTuple; missingval, name=name(A), metadata=metadata(A), - coalesceval=Rasters.missingval(A), verbose, fill, kw... diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 251e189fd..6e4b14ff8 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -67,7 +67,6 @@ end mosaic(f::Function, regions; kw...) = _mosaic(f, first(regions), regions; kw...) function _mosaic(f::Function, A1::AbstractRaster, regions; missingval=nokw, - coalesceval=nokw, filename=nothing, suffix=nothing, driver=nokw, @@ -76,7 +75,6 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; kw... ) isnothing(missingval) && throw(ArgumentError("missingval cannot be `nothing` for `mosaic`")) - coalesceval = isnokw(coalesceval) ? Rasters.missingval(first(regions)) : coalesceval missingval = if isnokw(missingval) mv = Rasters.missingval(first(regions)) isnokwornothing(mv) ? missing : mv @@ -84,9 +82,13 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; missingval end if !isnothing(filename) && (ismissing(missingval) || isnokwornothing(missingval)) - missingval = _type_missingval(eltype(A1)) + missingval = _type_missingval(eltype(A1)) => missing + end + T = if missingval isa Pair + Base.promote_type(typeof(last(missingval)), Base.promote_eltype(regions...)) + else + Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) end - T = Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) dims = _mosaic(Tuple(map(DD.dims, regions))) l1 = first(regions) @@ -94,7 +96,6 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; name=name(l1), fill=missingval, missingval, - coalesceval, driver, options, force diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index b0c4b384f..1e835c6e6 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -476,11 +476,10 @@ function alloc_rasterize(f, r::RasterCreator; metadata=r.metadata, suffix=r.suffix, ) - coalesceval = nothing if prod(size(r.to)) == 0 throw(ArgumentError("Destination array is is empty, with size $(size(r.to))). Rasterization is not possible")) end - A = create(r.filename, fill=missingval, eltype, r.to; name, missingval, coalesceval, metadata, suffix) do O + A = create(r.filename, fill=missingval, eltype, r.to; name, missingval => nothing, metadata, suffix) do O f(O) end return A diff --git a/src/methods/replace_missing.jl b/src/methods/replace_missing.jl index 3ce2eef9c..7a820c6e2 100644 --- a/src/methods/replace_missing.jl +++ b/src/methods/replace_missing.jl @@ -32,14 +32,13 @@ function replace_missing(A::AbstractRaster{T}, missingval::MV; end old_missingval = Rasters.missingval(A) missingval = convert(MT, missingval) - coalesceval = nothing repmissing(x) = isequal(x, old_missingval) || ismissing(x) ? missingval : x # Disk-backed arrays need to be lazy, memory-backed don't. # But in both cases we make sure we return an array with the missingval # in the eltype, even if there are no missing values in the array. if !isnothing(filename) return create(filename, MT, dims(A); - parent=parent(A), missingval, coalesceval, name=name(A), metadata=metadata(A), kw... + parent=parent(A), missingval, name=name(A), metadata=metadata(A), kw... ) do O O .= repmissing.(A) end diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 707449f2b..69940e8f4 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -136,9 +136,9 @@ const OFFSET_KEYWORD = """ const RAW_KEYWORD = """ - `raw`: Turn of all scaling and masking and load the raw values from disk. - `false` by default. If `true`, `scaled` will be set to `false` and `coalesceval` - will be set to `nothing`. A warning will be printed if `scaled` or `coalesceval` - are manually set to another value. + `false` by default. If `true`, `scaled` will be set to `false` and `missingval` + will to the existing missing value in the file. A warning will be printed if + `scaled` or `missingval` are manually set to another value. """ const SCALED_KEYWORD = """ @@ -160,14 +160,10 @@ const MISSINGVAL_KEYWORD = """ - `missingval`: value representing missing data, normally detected from the file. Set manually when you know the value is not specified or is incorrect. This will *not* change any values in the raster, it simply assigns which value is treated as missing. -""" - -const MASKINGVAL_KEYWORD = """ -- `coalesceval`: A value to convert `missingval` to, by default `missing`. If this is set it - will be the return value of `missingval(raster)` - `coalesceval` becomes the new `missingval`. - Setting `coalesceval` to `nothing` means no masking will occur, and the original `missingval` - will be the final `missingval`. This can give better performance than using `missing`. - Another efficient option is to use e.g. `zero(eltype(raster))` to replace missing values with zero. + To specify the outer missing value of a file, use a `Pair`: `missingval=innerval => outerval`. + By default `innerval` will be detected, and `outerval` will be `missing`. + If you want the `innerval` detected automatically, but a custom `outerval`, + pass the `Rasters.missingval` function as the first argument, `missingval=missingval => outerval`. """ const NAME_KEYWORD = """ diff --git a/src/methods/zonal.jl b/src/methods/zonal.jl index 476386688..509a47c33 100644 --- a/src/methods/zonal.jl +++ b/src/methods/zonal.jl @@ -83,7 +83,7 @@ _zonal(f, x::Raster, of::Extents.Extent; skipmissing=true) = _maybe_skipmissing_call(f, crop(x; to=of, touches=true), skipmissing) function _zonal(f, x::RasterStack, ext::Extents.Extent; skipmissing=true) cropped = crop(x; to=ext, touches=true) - prod(size(cropped)) > 0 || return missing + length(cropped) > 0 || return missing return map(cropped) do A _maybe_skipmissing_call(f, A, skipmissing) end @@ -99,7 +99,7 @@ function _zonal(f, x::AbstractRaster, ::GI.AbstractGeometryTrait, geom; skipmissing=true, kw... ) cropped = crop(x; to=geom, touches=true) - prod(size(cropped)) > 0 || return missing + length(cropped) > 0 || return missing masked = mask(cropped; with=geom, kw...) return _maybe_skipmissing_call(f, masked, skipmissing) end @@ -107,10 +107,10 @@ function _zonal(f, st::AbstractRasterStack, ::GI.AbstractGeometryTrait, geom; skipmissing=true, kw... ) cropped = crop(st; to=geom, touches=true) - prod(size(cropped)) > 0 || return map(_ -> missing, st) + length(cropped) > 0 || return map(_ -> missing, st) masked = mask(cropped; with=geom, kw...) return map(masked) do A - prod(size(A)) > 0 || return missing + length(A) > 0 || return missing _maybe_skipmissing_call(f, A, skipmissing) end end diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 9ec9bfbdf..a449dc0c8 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -12,16 +12,15 @@ source_eltype(::NoMod{T}) where T = T struct Mod{T1,T2,Mi,Ma,S,O,F} <: AbstractModifications missingval::Mi - coalesceval::Ma scale::S offset::O coerce::F - function Mod(::Type{T}, missingval, coalesceval, scale, offset, coerce) where T - coalesceval = coalesceval === missingval ? nothing : coalesceval + function Mod{T}(missingval, scale, offset, coerce) where T + missingval = missingval isa Pair && missingval[1] == missingval[2] ? missingval[1] : missingval if isnokw(coerce) || isnothing(coerce) coerce = convert end - vals = map(_nokw2nothing, (missingval, coalesceval, scale, offset)) + vals = map(_nokw2nothing, (missingval, scale, offset)) T1 = _resolve_mod_eltype(T, vals...) new{T1,T,map(typeof, vals)...,typeof(coerce)}(vals..., coerce) end @@ -30,18 +29,24 @@ end Base.eltype(::Mod{T1}) where T1 = T1 source_eltype(::Mod{<:Any,T2}) where T2 = T2 - -function _resolve_mod_eltype(::Type{T}, missingval, coalesceval, scale, offset) where T - T1 = isnothing(coalesceval) ? T : promote_type(T, typeof(coalesceval)) +function _resolve_mod_eltype(::Type{T}, missingval, scale, offset) where T + omv = _outer_missingval(missingval) + T1 = isnothing(omv) ? T : promote_type(T, omv) T2 = isnothing(scale) ? T1 : promote_type(T1, typeof(scale)) T3 = isnothing(offset) ? T2 : promote_type(T2, typeof(offset)) return T3 end missingval(m::Mod) = m.missingval -coalesceval(m::Mod) = isnothing(m.coalesceval) ? m.missingval : m.coalesceval missingval(m::NoMod) = m.missingval -coalesceval(m::NoMod) = missingval(m) + +_inner_missingval(m::Mod) = _inner_missingval(m.missingval) +_inner_missingval(mv) = mv +_inner_missingval(mv::Pair) = mv[1] + +_outer_missingval(m::Mod) = _outer_missingval(m.missingval) +_outer_missingval(mv) = mv +_outer_missingval(mv::Pair) = mv[2] struct ModifiedDiskArray{I,T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} var::V @@ -56,7 +61,6 @@ Base.parent(A::ModifiedDiskArray) = A.var Base.size(A::ModifiedDiskArray, args...) = size(parent(A), args...) filename(A::ModifiedDiskArray) = filename(parent(A)) missingval(A::ModifiedDiskArray) = A.missingval -coalesceval(A::ModifiedDiskArray) = A.coalesceval DiskArrays.haschunks(A::ModifiedDiskArray) = DiskArrays.haschunks(parent(A)) DiskArrays.eachchunk(A::ModifiedDiskArray) = DiskArrays.eachchunk(parent(A)) @@ -112,13 +116,13 @@ function DiskArrays.writeblock!( end Base.@assume_effects :foldable function _applymod(x, m::Mod) - if _ismissing(x, missingval(m)) - coalesceval(m) + if _ismissing(x, _inner_missingval(m)) + _outer_missingval(m) else _scaleoffset(x, m) end end -Base.@assume_effects :foldable _applymod(x, m::NoMod) = x +Base.@assume_effects :foldable _applymod(x, ::NoMod) = x _ismissing(x, mv) = isequal(x, mv) _ismissing(_, ::Nothing) = false @@ -130,20 +134,16 @@ _scaleoffset(x, scale, ::Nothing) = x * scale _scaleoffset(x, ::Nothing, ::Nothing) = x Base.@assume_effects :foldable function _invertmod(::Val{T}, x, m::Mod) where T - tm = if isnothing(m.missingval) - x + tm = if !isnothing(m.missingval) && _ismissing(x, _outer_missingval(m)) + return _inner_missingval(m) else - if _ismissing(x, m.coalesceval) - return m.missingval - else - x - end + x end return _scaleoffset_inv(T, tm, m) end Base.@assume_effects :foldable _invertmod(v, x, m::NoMod) = x -Base.@assume_effects :foldable _scaleoffset_inv(::Type{T}, x, m::Mod) where T = +Base.@assume_effects :foldable _scaleoffset_inv(::Type{T}, x, m::Mod) where T = _scaleoffset_inv(m.coerce, T, x, m)::T Base.@assume_effects :foldable _scaleoffset_inv(coerce::Base.Callable, ::Type{T}, x, m::Mod) where T = coerce(T, _scaleoffset_inv1(x, m.scale, m.offset))::T @@ -155,58 +155,51 @@ Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, ::Nothing) = x function _stack_mods( - eltypes::Vector, metadata::Vector, missingval::Vector, coalesceval; + eltypes::Vector, metadata::Vector, missingval::Vector; scaled, coerce ) map(eltypes, metadata, missingval) do T, md, mv scale, offset = get_scale(md, scaled) - _mod(T, mv, coalesceval, scale, offset, coerce) + _mod(T, mv, scale, offset, coerce) end end function _stack_mods( - eltypes::Vector, metadata::Vector, missingval, coalesceval::Vector; + eltypes::Vector, metadata::Vector, missingval; scaled::Bool, coerce ) - map(eltypes, metadata, coalesceval) do T, md, mk + map(eltypes, metadata) do T, md, mk scale, offset = get_scale(md, scaled) _mod(T, missingval, mk, scale, offset, coerce) end end function _stack_mods( - eltypes::Vector, metadata::Vector, missingval::Vector, coalesceval::Vector; + eltypes::Vector, metadata::Vector, missingval::Vector; scaled::Bool, coerce ) - map(eltypes, metadata, missingval, coalesceval) do T, md, mv, mk + map(eltypes, metadata, missingval) do T, md, mv, mk scale, offset = get_scale(md, scaled) _mod(mv, mk, scale, offset, coerce) end end function _stack_mods( - eltypes::Vector, metadata::Vector, missingval, coalesceval; + eltypes::Vector, metadata::Vector, missingval; scaled::Bool, coerce ) map(eltypes, metadata) do T, md scale, offset = get_scale(md, scaled) - _mod(T, missingval, coalesceval, scale, offset, coerce) + _mod(T, missingval, scale, offset, coerce) end end -function _mod(::Type{T}, metadata, missingval, coalesceval; scaled::Bool, coerce) where T +function _mod(::Type{T}, metadata, missingval; scaled::Bool, coerce) where T scale, offset = get_scale(metadata, scaled) - _mod(T, missingval, coalesceval, scale, offset, coerce) + _mod(T, missingval, scale, offset, coerce) end -function _mod(::Type{T}, missingval, coalesceval, scale, offset, coerce) where T - coalesceval = if isnokw(coalesceval) - # If there is no missingval dont mask - isnokwornothing(missingval) ? nothing : missing - else - # Unless coalesceval was passed explicitly - coalesceval === missingval ? nothing : coalesceval - end - if isnokwornothing(coalesceval) && isnokwornothing(scale) && isnokwornothing(offset) +function _mod(::Type{T}, missingval, scale, offset, coerce) where T + if (isnokwornothing(missingval) || !(missingval isa Pair)) && isnokwornothing(scale) && isnokwornothing(offset) return NoMod{T}(missingval) else - return Mod(T, missingval, coalesceval, scale, offset, coerce) + return Mod{T}(missingval, scale, offset, coerce) end end @@ -217,28 +210,15 @@ end return scale, offset end -function _writer_mod(::Type{T}; missingval, coalesceval, scale, offset, coerce) where T - missingval1 = if isnokw(missingval) || isnothing(missingval) - if isnokw(coalesceval) || isnothing(coalesceval) - nothing - else - _type_missingval(T) - end - elseif ismissing(missingval) - _type_missingval(T) +function _writer_mod(::Type{T}; missingval, scale, offset, coerce) where T + missingval1 = if missingval isa Pair + reverse(missingval) + elseif isnokw(missingval) + nothing else missingval end - coalesceval1 = if isnokw(coalesceval) - if Missing <: T - missing - else - nothing - end - else - coalesceval - end - return _mod(T, missingval1, coalesceval1, scale, offset, coerce) + return _mod(T, missingval1, scale, offset, coerce) end _mod_eltype(::AbstractArray{T}, ::NoMod) where T = T diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index cfb507490..ded7d64dd 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -430,7 +430,6 @@ _unuseddimerror(dimname) = error("Dataset contains unused dimension $dimname") function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T,N}; verbose=true, missingval=nokw, - coalesceval=nokw, metadata=nokw, chunks=nokw, chunksizes=_chunks_to_tuple(A, dims(A), chunks), @@ -456,13 +455,12 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, metadata end - coalesceval = isnokw(coalesceval) ? Rasters.missingval(A) : coalesceval missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval missingval = if ismissing(missingval) # See if there is a missing value in metadata mv = Rasters.missingval(metadata) # But only use it if its the right type - mv isa eltype ? mv : _writeable_missing(eltype; verbose=true) + mv isa eltype ? mv : _writeable_missing(eltype; verbose=true) => missing else missingval end @@ -482,7 +480,7 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, attrib["add_offset"] = offset end - mod = _writer_mod(eltype; missingval, coalesceval, scale, offset, coerce) + mod = _writer_mod(eltype; missingval, scale, offset, coerce) if !isnothing(mod.missingval) attrib["_FillValue"] = missingval diff --git a/src/sources/grd.jl b/src/sources/grd.jl index 05f6e576d..2b53d7b46 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -165,7 +165,6 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; verbose=true, write=true, missingval=nokw, - coalesceval=nokw, chunks=nokw, scale=nokw, offset=nokw, @@ -185,12 +184,15 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; chunks isa NoKW || @warn "specifying chunks not supported for .grd files" missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval - coalesceval = isnokw(coalesceval) ? Rasters.missingval(A) : coalesceval - missingval = if ismissing(missingval) || isnothing(missingval) && !isnothing(coalesceval) + missingval = if ismissing(missingval) # See if there is a missing value in metadata mv = _grd_mv(eltype, metadata(A); verbose=false) # Otherwise define one - isnothing(mv) ? _writeable_missing(eltype; verbose) : mv + (isnothing(mv) ? _writeable_missing(eltype; verbose) : mv) => missing + elseif missingval isa Pair && first(missingval) == Rasters.missingval + mv = _grd_mv(eltype, metadata(A); verbose=false) + # Otherwise define one + (isnothing(mv) ? _writeable_missing(eltype; verbose) : mv) => missingval[2] else missingval end @@ -208,7 +210,7 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; filename = splitext(filename)[1] # Data: write a raw gri file from the array - mod = _writer_mod(eltype; missingval, coalesceval, scale, offset, coerce) + mod = _writer_mod(eltype; missingval, scale, offset, coerce) gri_filename = filename * ".gri" isfile(gri_filename) && rm(gri_filename) _write_gri(gri_filename, Val{source_eltype(mod)}(), mod, parent(correctedA)) diff --git a/src/stack.jl b/src/stack.jl index f38b2e864..963f2f85d 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -165,7 +165,6 @@ $GROUP_KEYWORD - `metadata`: A `Dict` or `DimensionalData.Metadata` object. - `missingval`: a single value for all layers or a `NamedTuple` of missingval for each layer. `nothing` specifies no missing value. -$MASKINGVAL_KEYWORD $CONSTRUCTOR_CRS_KEYWORD $CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. @@ -368,24 +367,22 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; layermetadata::Union{NoKW,NamedTuple{K}}=nokw, layerdims::Union{NoKW,NamedTuple{K}}=nokw, missingval=nokw, - coalesceval=nokw, replace_missing=nokw, scaled=nokw, raw=false, kw... ) where K _maybe_warn_replace_missing(replace_missing) - scaled, coalesceval = _raw_check(raw, scaled, coalesceval) + scaled, missingval = _raw_check(raw, scaled, missingval) layermissingval = collect(_stack_nt(filenames, missingval)) - layercoalesceval = collect(_stack_nt(filenames, coalesceval)) fn = collect(filenames) layermetadata = layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) layerdims = layerdims isa NamedTuple ? collect(layerdims) : map(_ -> NoKW(), fn) - layers = map(K, fn, layermetadata, layerdims, layermissingval, layercoalesceval) do name, fn, md, d, mv, ma + layers = map(K, fn, layermetadata, layerdims, layermissingval) do name, fn, md, d, mv Raster(fn; source=_sourcetrait(fn, source), - dims=d, name, metadata=md, missingval=mv, coalesceval=ma, scaled, kw... + dims=d, name, metadata=md, missingval=mv, scaled, kw... ) end return RasterStack(NamedTuple{K}(layers); resize, metadata) @@ -397,7 +394,6 @@ function RasterStack(filename::AbstractString; raw::Bool=false, source::Union{Symbol,Source,NoKW}=nokw, missingval=nokw, - coalesceval=nokw, name=nokw, group::Union{Symbol,AbstractString,NoKW}=nokw, scaled::Union{Bool,NoKW}=nokw, @@ -406,7 +402,7 @@ function RasterStack(filename::AbstractString; kw... ) _maybe_warn_replace_missing(replace_missing) - scaled, coalesceval = _raw_check(raw, scaled, coalesceval) + scaled, missingval = _raw_check(raw, scaled, missingval) source = _sourcetrait(filename, source) st = if isdir(filename) && !(source isa Zarrsource) @@ -421,14 +417,14 @@ function RasterStack(filename::AbstractString; name end RasterStack(joinpath.(Ref(filename), filenames); - missingval, coalesceval, scaled, coerce, lazy, dropband, group, kw... + missingval, scaled, coerce, lazy, dropband, group, kw... ) else # Load as a single file if haslayers(source) # With multiple named layers l_st = _layer_stack(filename; - source, name, lazy, group, missingval, coalesceval, scaled, coerce, kw... + source, name, lazy, group, missingval, scaled, coerce, kw... ) # Maybe split the stack into separate arrays to remove extra dims. if !isnokw(name) @@ -439,7 +435,7 @@ function RasterStack(filename::AbstractString; else # With bands actings as layers raster = Raster(filename; - source, lazy, missingval, coalesceval, scaled, coerce, dropband=false, + source, lazy, missingval, scaled, coerce, dropband=false, ) RasterStack(raster; kw...) end @@ -508,7 +504,6 @@ function _layer_stack(filename; layermetadata=nokw, layerdims=nokw, missingval=nokw, - coalesceval=nokw, crs=nokw, mappedcrs=nokw, coerce=convert, @@ -541,7 +536,7 @@ function _layer_stack(filename; missingval end eltypes = map(eltype, layers.vars) - mods = _stack_mods(eltypes, layermetadata1, missingval1, coalesceval; scaled, coerce) + mods = _stack_mods(eltypes, layermetadata1, missingval1; scaled, coerce) data = if lazy vars = ntuple(i -> layers.vars[i], length(name)) mods = ntuple(i -> mods[i], length(name)) @@ -554,15 +549,7 @@ function _layer_stack(filename; x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug end |> NT end - missingval = map(mods) do mod - if isnothing(Rasters.missingval(mod)) - nothing - elseif isnothing(Rasters.coalesceval(mod)) - Rasters.missingval(mod) - else - Rasters.coalesceval(mod) - end - end |> NT + missingval = map(_outer_missingval, mods) |> NT return data, (; dims, refdims, layerdims, metadata, layermetadata=NT(layermetadata1), missingval) end return RasterStack(data; field_kw..., kw...) diff --git a/src/utils.jl b/src/utils.jl index 8e9cc8c44..29c613b0b 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -314,14 +314,18 @@ end # Constructor helpers -function _raw_check(raw, scaled, coalesceval) +function _raw_check(raw, scaled, missingval) if raw scaled isa Bool && scaled && @warn "`scaled=true` set to `false` because of `raw=true`" - isnokwornothing(coalesceval) || @warn "`coalesceval=$coalesceval` set to `nothing` because of `raw=true`" - return false, nothing + if missingval isa Pair + @warn "`missingval=$missingval` target value is not used because of `raw=true`" + return false, Rasters.missingval + else + return false, missingval + end else scaled = isnokw(scaled) ? true : scaled - return scaled, coalesceval + return scaled, missingval end end @@ -464,7 +468,11 @@ end _maybe_warn_replace_missing(replace_missing::NoKW) = nothing function _maybe_warn_replace_missing(replace_missing) - @warn "`replace_missing` keyword no longer used. Set `coalesceval` to nothing for no replacement, to `missing` to mask `missingval` with `missing`, or any other value" + @warn """ + `replace_missing` keyword no longer used. Rasters now automatically replaces `missingval` with `missing`. + Set `missingval=Rasters.missingval`, to keep the internal missing value, or to replace with some value besides + `missing` use e.g. `missingval=Rasters.missingval => NaN` for `NaN`. + """ end @noinline _warn_disk() = @warn "Disk-based objects may be very slow here. User `read` first." diff --git a/src/write.jl b/src/write.jl index 7faaafb59..0ad84007b 100644 --- a/src/write.jl +++ b/src/write.jl @@ -61,12 +61,10 @@ Returns `filename`. function Base.write(filename::AbstractString, A::AbstractRaster; source=_sourcetrait(filename), missingval=nokw, - coalesceval=nokw, kw... ) missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval - coalesceval = isnokw(coalesceval) ? missingval : coalesceval - write(filename, _sourcetrait(source), A; missingval, coalesceval, kw...) + write(filename, _sourcetrait(source), A; missingval, kw...) end Base.write(A::AbstractRaster; kw...) = write(filename(A), A; kw...) # Fallback @@ -105,14 +103,12 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; source=_sourcetrait(path, ext), verbose=true, missingval=nokw, - coalesceval=nokw, kw... ) source = _sourcetrait(source) - coalesceval = _stack_nt(s, isnokw(coalesceval) ? Rasters.missingval(s) : coalesceval) - missingval = _stack_missingvals(s, isnokw(missingval) ? coalesceval : missingval) + missingval = _stack_missingvals(s, missingval) if haslayers(source) - write(path, source, s; missingval, coalesceval, kw...) + write(path, source, s; missingval, kw...) else # Otherwise write separate files for each layer if isnothing(ext) @@ -131,9 +127,9 @@ function Base.write(path::AbstractString, s::AbstractRasterStack; if verbose @warn string("Cannot write complete stacks to \"", ext, "\", writing layers as individual files") end - map(keys(s), suffix1, missingval, coalesceval) do key, suf, mv, ma + map(keys(s), suffix1, missingval) do key, suf, mv fn = string(base, suf, ext) - write(fn, source, s[key]; missingval=mv, coalesceval=ma, kw...) + write(fn, source, s[key]; missingval=mv, kw...) end |> NamedTuple{keys(s)} end end diff --git a/test/create.jl b/test/create.jl index 687409884..931271c6d 100644 --- a/test/create.jl +++ b/test/create.jl @@ -132,12 +132,11 @@ for ext in (".nc", ".tif", ".grd") @testset "create $ext" begin fn = "created$ext" created = Rasters.create(fn, UInt8, (X(1:10), Y(1:10)); - missingval=0xff, - coalesceval=nothing, + missingval=0xff=>nothing, fill=0x01, force=true ) - @test all(Raster(fn; coalesceval=nothing) .=== 0x01) + @test all(Raster(fn; missingval=missingval=>nothing) .=== 0x01) @test missingval(created) === 0xff if ext == ".grd" @@ -150,7 +149,7 @@ for ext in (".nc", ".tif", ".grd") nothing end @test all(Raster(fn) .=== Int16(2)) - @test missingval(Raster(fn; coalesceval=nothing)) === typemax(Int16) + @test missingval(Raster(fn; missingval=missingval=>nothing)) === typemax(Int16) else @time created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); missingval=typemax(Int16), @@ -163,7 +162,7 @@ for ext in (".nc", ".tif", ".grd") end @test all(Raster(fn) .=== 3.0) @test all(Raster(fn; scaled=false) .== Int16(-20)) - @test missingval(Raster(fn; coalesceval=nothing, scaled=false)) === typemax(Int16) + @test missingval(Raster(fn; missingval=missingval=>nothing, scaled=false)) === typemax(Int16) end end end @@ -172,7 +171,6 @@ end @testset "create .nc stack" begin created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); missingval=(a=0xff, b=typemax(Float32)), - coalesceval=nothing, fill=(a=0x01, b=1.0f0), layerdims=(a=(X,), b=(X, Y)), force=true, @@ -182,7 +180,7 @@ end @test size(created.b) == (10, 10) @test all(created.a .=== 0x01) @test all(created.b .=== 1.0f0) - st = RasterStack("created.nc"; coalesceval=nothing) + st = RasterStack("created.nc"; missingval) @test missingval(st) == (a=0xff, b=typemax(Float32)) created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); @@ -196,13 +194,12 @@ end @test size(created.b) == (10, 10) @test all(created.a .=== 0x01) @test all(created.b .=== 1.0f0) - st = RasterStack("created.nc"; coalesceval=nothing) + st = RasterStack("created.nc"; missingval) @test missingval(st) == (a=0xff, b=typemax(Float32)) @testset "with a function" begin created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); missingval=(a=0xff, b=typemax(Float32)), - coalesceval=nothing, fill=(a=0x01, b=1.0f0), layerdims=(a=(X,), b=(X, Y)), force=true, diff --git a/test/resample.jl b/test/resample.jl index d65b7d760..d1234d349 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -23,8 +23,8 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) end end - cea = Raster(raster_path; missingval=0x00, name=:cea, coalesceval=nothing) - raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, coalesceval=nothing) + cea = Raster(raster_path; missingval=0x00, name=:cea, missingval) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00) @testset "missingval propagates" begin @test missingval(resample(cea; res=output_res, crs=output_crs, method)) == 0x00 @@ -157,17 +157,17 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @test dims(resampled_3D, Z) == Z(1:2) end - coalesceval = Rasters.nokw - for coalesceval in (nothing, missing, Rasters.nokw) + maskingval = Rasters.nokw + for maskingval in (nothing, missing, Rasters.nokw) # Resample cea.tif using resample - cea = Raster(raster_path; missingval=0x00, name=:cea, coalesceval) - raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, coalesceval) - disk_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00, coalesceval, filename="resample.tif") + cea = Raster(raster_path; missingval=0x00=>maskingval, name=:cea) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00 => maskingval) + disk_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00 => maskingval, filename="resample.tif") - cea_permuted = permutedims(Raster(raster_path; missingval=0x00, name=:cea_permuted, coalesceval), (Y, X)) - permuted_output = resample(cea_permuted, output_res; missingval=0x00, coalesceval, crs=output_crs, method) + cea_permuted = permutedims(Raster(raster_path; missingval=0x00 => maskingval, name=:cea_permuted), (Y, X)) + permuted_output = resample(cea_permuted, output_res; missingval=0x00 => maskingval, crs=output_crs, method) - AG_output1 = if isnothing(coalesceval) + AG_output1 = if isnothing(maskingval) AG_output else replace(AG_output, 0x00 => missing) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index c5d611de2..f3986e659 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -28,7 +28,7 @@ gdalpath = maybedownload(url) @testset "cf" begin # This file has no scale/offset so cf does nothing @time cfarray = Raster(gdalpath; missingval=0x00) - @time cf_nomask_array = Raster(gdalpath; coalesceval=nothing) + @time cf_nomask_array = Raster(gdalpath; maskingval=nothing) @time nocfarray = Raster(gdalpath; scaled=false) @time lazycfarray = Raster(gdalpath; lazy=true, missingval=0x00) @time lazynocfarray = Raster(gdalpath; lazy=true, scaled=false) @@ -278,7 +278,7 @@ gdalpath = maybedownload(url) tempfile3 = tempname() * ".tif" Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile1) Afile2 = mosaic(first, A1, A2; - missingval=0x00, atol=1e-8, filename=tempfile2, coalesceval=missing + missingval=0x00, atol=1e-8, filename=tempfile2, maskingval=missing ) @test missingval(Afile2) === missing Amem = mosaic(first, A1, A2; missingval=0x00, atol=1e-8) @@ -471,7 +471,7 @@ gdalpath = maybedownload(url) filename = tempname() * ".tif" write(filename, A) @test missingval(Raster(filename)) === missing - @test missingval(Raster(filename; coalesceval=nothing)) === typemax(UInt8) + @test missingval(Raster(filename; maskingval=nothing)) === typemax(UInt8) rm(filename) end @@ -521,7 +521,7 @@ gdalpath = maybedownload(url) # Handle WorldClim/ucdavis unreliability A = nothing try - A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, coalesceval=nothing) + A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, maskingval=nothing) catch end if !isnothing(A) diff --git a/test/sources/grd.jl b/test/sources/grd.jl index 663fa6175..dedf915f6 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -29,13 +29,16 @@ grdpath = stem * ".gri" @test parent(eagerarray) isa Array end - @testset "coalesceval keyword" begin + @testset "maskingval" begin @time missingarray = Raster(grdpath) @test missingval(missingarray) === missing @test eltype(missingarray) === Union{Missing,Float32} - @time missingarray = Raster(grdpath; coalesceval=nothing) + @time missingarray = Raster(grdpath; missingval) @test missingval(missingarray) === -3.4f38 @test eltype(missingarray) === Float32 + @time missingarray = Raster(grdpath; missingval=missingval => NaN32) + @test missingval(missingarray) === NaN32 + @test eltype(missingarray) === Float32 end @testset "open" begin @@ -178,7 +181,7 @@ grdpath = stem * ".gri" tn = tempname() tempgrd = tn * ".grd" tempgri = tn * ".gri" - Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, coalesceval=nothing) + Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, maskingval=nothing) Amem = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1) Atest = grdarray[X(1:80), Y(1:60)] Atest[X(1:26), Y(31:60)] .= 0.0f0 @@ -274,7 +277,7 @@ grdpath = stem * ".gri" gdalfilename = tempname() * ".tif" write(gdalfilename, GDALsource(), grdarray[Band(1)]; force = true) @test (@allocations write(gdalfilename, GDALsource(), grdarray[Band(1)]; force = true)) < 1e4 - gdalarray = Raster(gdalfilename; coalesceval=nothing) + gdalarray = Raster(gdalfilename; maskingval=nothing) # @test convert(ProjString, crs(gdalarray)) == convert(ProjString, EPSG(4326)) @test val(dims(gdalarray, X)) ≈ val(dims(grdarray, X)) @test val(dims(gdalarray, Y)) ≈ val(dims(grdarray, Y)) @@ -294,7 +297,7 @@ grdpath = stem * ".gri" @test missingval(Raster(filename)) === missing filename = tempname() * ".grd" write(filename, A) - @test missingval(Raster(filename; coalesceval=nothing)) === typemin(Float32) + @test missingval(Raster(filename; maskingval=nothing)) === typemin(Float32) end end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 8190702b0..b505ab396 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -60,13 +60,13 @@ end @testset "scaling and maskign" begin @time cfarray = Raster(ncsingle) @time cfarray = Raster(ncsingle) - @time cf_nomask_array = Raster(ncsingle; coalesceval=nothing) + @time cf_nomask_array = Raster(ncsingle; maskingval=nothing) @time nocfarray = Raster(ncsingle; scaled=false) - @time nocf_nomask_array = Raster(ncsingle; scaled=false, coalesceval=nothing) + @time nocf_nomask_array = Raster(ncsingle; scaled=false, maskingval=nothing) @time raw_array = Raster(ncsingle; raw=true) @time lazycfarray = Raster(ncsingle; lazy=true, scaled=false) @time lazynocfarray = Raster(ncsingle; lazy=true, scaled=false) - @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, coalesceval=nothing) + @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, maskingval=nothing) @test missingval(cfarray) === missing @test missingval(nocfarray) === missing @test missingval(cf_nomask_array) === 1.0f20 @@ -355,7 +355,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write(gdalfilename, nccleaned; force=true) @test (@allocations write(gdalfilename, nccleaned; force=true)) < 1e4 - gdalarray = Raster(gdalfilename; coalesceval=nothing) + gdalarray = Raster(gdalfilename; maskingval=nothing) # gdalarray WKT is missing one AUTHORITY # @test_broken crs(gdalarray) == convert(WellKnownText, EPSG(4326)) # But the Proj representation is the same @@ -371,7 +371,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write("testgrd.gri", nccleaned; force=true) @test (@allocations write("testgrd.gri", nccleaned; force=true)) < 1e4 - grdarray = Raster("testgrd.gri", coalesceval=nothing); + grdarray = Raster("testgrd.gri", maskingval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) @test bounds(grdarray) == bounds(nccleaned) @test index(grdarray, Y) ≈ reverse(index(nccleaned, Y)) .- 0.5 diff --git a/test/sources/rasterdatasources.jl b/test/sources/rasterdatasources.jl index c95d53f11..6cd00cf99 100644 --- a/test/sources/rasterdatasources.jl +++ b/test/sources/rasterdatasources.jl @@ -54,7 +54,6 @@ end st = RasterStack(CHELSA{BioClim}, (1, 2); lazy=true, missingval=-Int16(9999), - coalesceval=nothing, metadata=Rasters.NoMetadata(), crs=nothing, mappedcrs=EPSG(4326), From b2cb4b2fc33233eca63fc2462c0fccf18e9750df Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 3 Dec 2024 09:47:17 +0100 Subject: [PATCH 25/38] one missingval --- src/array.jl | 13 ++++++------- src/methods/shared_docstrings.jl | 21 ++++++++------------- src/stack.jl | 5 +++-- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/src/array.jl b/src/array.jl index dbfc84c35..1fdd590b2 100644 --- a/src/array.jl +++ b/src/array.jl @@ -316,8 +316,8 @@ function Raster(ds, filename::AbstractString; mod=nokw, raw=false, )::Raster - scaled, missingval = _raw_check(raw, scaled, missingval) _maybe_warn_replace_missing(replace_missing) + scaled, missingval = _raw_check(raw, scaled, missingval) name1 = filekey(ds, name) source = _sourcetrait(filename, source) data_out, dims_out, metadata_out, missingval_out = _open(source, ds; name=name1, group, mod=NoMod()) do var @@ -325,14 +325,13 @@ function Raster(ds, filename::AbstractString; missingval_out = if isnokw(missingval) # Detect missingval and convert it to missing Rasters.missingval(var, metadata_out) => missing - elseif missingval isa Pair && missingval[1] == Rasters.missingval - # Autodetect first missingval - Rasters.missingval(var, metadata_out) => missingval[2] - else - # Use whatever the user passed in + elseif missingval isa Pair missingval + elseif missingval == Rastesr.missingval + Rasters.missingval(var, metadata_out) + else + Rasters.missingval(var, metadata_out) => missingval end - @show missingval missingval_out mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval_out; scaled, coerce) : mod data_out = if lazy FileArray{typeof(source)}(var, filename; diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 69940e8f4..57cfd028d 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -115,12 +115,6 @@ const GROUP_KEYWORD = """ at any nested depth, i.e `group=:group1 => :group2 => :group3`. """ -const REPLACE_MISSING_KEYWORD = """ -- `replace_missing`: replace `missingval` with `missing`. This is done lazily if `lazy=true`. - Note that currently for NetCDF and GRIB files `replace_missing` is always true. - In future `replace_missing=false` will also work for these data sources. -""" - const CHECKMEMORY_KEYWORD = """ - `checkmemory`: If `true` (the default), check if there is enough memory for the operation. `false` will ignore memory needs. @@ -157,13 +151,14 @@ const COERCE_KEYWORD = """ """ const MISSINGVAL_KEYWORD = """ -- `missingval`: value representing missing data, normally detected from the file. Set manually - when you know the value is not specified or is incorrect. This will *not* change any - values in the raster, it simply assigns which value is treated as missing. - To specify the outer missing value of a file, use a `Pair`: `missingval=innerval => outerval`. - By default `innerval` will be detected, and `outerval` will be `missing`. - If you want the `innerval` detected automatically, but a custom `outerval`, - pass the `Rasters.missingval` function as the first argument, `missingval=missingval => outerval`. +- `missingval`: value representing missing data, normally detected from the file and + automatically converted to `missing`. Setting to an alternate value, such as `0` + or `NaN` may be desirable for improved perfomance. `nothing` specifies no missing value. + Using the same `missingval` the file already has removes the overhead of replacing it. + If the file has an incorrect value, we can manually define the transformation + as a pair e.g. `correct_value => missing`, `correct_value => NaN` or + `correct_value => correct_value` to keep it the same and remove the overhead of changing it. + When `raw=true` is set, `missingval` is not changed from the value specified in the file. """ const NAME_KEYWORD = """ diff --git a/src/stack.jl b/src/stack.jl index 963f2f85d..c16b445af 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -163,8 +163,9 @@ Load a file path or a `NamedTuple` of paths as a `RasterStack`, or convert argum Has no effect when `NameTuple` is used - the `NamedTuple` keys are the layer names. $GROUP_KEYWORD - `metadata`: A `Dict` or `DimensionalData.Metadata` object. -- `missingval`: a single value for all layers or a `NamedTuple` of - missingval for each layer. `nothing` specifies no missing value. +$MISSINGVAL_KEYWORD + For `RasterStack` a `NamedTuple` can also be passed if layers + should have different `missingval`. $CONSTRUCTOR_CRS_KEYWORD $CONSTRUCTOR_MAPPEDCRS_KEYWORD - `refdims`: `Tuple` of `Dimension` that the stack was sliced from. From 821c640c25d6d936e1f7672fcf7e909b509cf64d Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 9 Dec 2024 01:55:22 +0100 Subject: [PATCH 26/38] fixes --- ext/RastersArchGDALExt/warp.jl | 15 +-- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 1 - src/modifieddiskarray.jl | 25 +---- src/utils.jl | 99 ++++++++++--------- test/sources/gdal.jl | 8 +- test/sources/grd.jl | 6 +- test/sources/ncdatasets.jl | 10 +- 7 files changed, 75 insertions(+), 89 deletions(-) diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index 0e555cbab..e3e1d5d42 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -17,8 +17,7 @@ end function _warp(A::AbstractRaster, flags::Dict; filename=nothing, suffix="", - missingval=nokw, - maskingval=Rasters.missingval(A), + missingval=Rasters.missingval(A), name=Rasters.name(A), kw... ) @@ -32,19 +31,23 @@ function _warp(A::AbstractRaster, flags::Dict; tempfile = isnothing(filename) ? nothing : tempname() * ".tif" warp_kw = isnothing(filename) || filename == "/vsimem/tmp" ? () : (; dest=filename) # We really need a missingval for `warp`, as it may rotate and add missing value - missingval = if RA.isnokw(missingval) + mv1, mv2 = if RA.isnokw(missingval) if RA.missingval(A) isa Union{Missing,Nothing} RA._type_missingval(Missings.nonmissingtype(eltype(A))) else RA.missingval(A) end - else + elseif missingval isa Pair missingval + elseif missingval isa Missing + RA._type_missingval(Missings.nonmissingtype(eltype(A))), missing + else + missingval, missingval end - out = AG.Dataset(A1; filename=tempfile, missingval, kw...) do dataset + out = AG.Dataset(A1; filename=tempfile, missingval=mv1, kw...) do dataset AG.gdalwarp([dataset], flagvect; warp_kw...) do warped # Read the raster lazily, dropping Band if there is none in `A` - raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, maskingval) + raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, missingval=mv2) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index f890349ba..d3a483e44 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -32,7 +32,6 @@ function Base.write(filename::AbstractString, source::Source, s::AbstractRasterS append=false, force=false, missingval=nokw, - maskingval=nokw, f=identity, kw... ) where {Source<:NCDsource,K,T} diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index a449dc0c8..fc577cc9d 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -10,13 +10,13 @@ NoMod{T}(::NoKW) where T = NoMod{T}(nothing) Base.eltype(::NoMod{T}) where T = T source_eltype(::NoMod{T}) where T = T -struct Mod{T1,T2,Mi,Ma,S,O,F} <: AbstractModifications +struct Mod{T1,T2,Mi,S,O,F} <: AbstractModifications missingval::Mi scale::S offset::O coerce::F function Mod{T}(missingval, scale, offset, coerce) where T - missingval = missingval isa Pair && missingval[1] == missingval[2] ? missingval[1] : missingval + missingval = missingval isa Pair && missingval[1] === missingval[2] ? missingval[1] : missingval if isnokw(coerce) || isnothing(coerce) coerce = convert end @@ -31,7 +31,7 @@ source_eltype(::Mod{<:Any,T2}) where T2 = T2 function _resolve_mod_eltype(::Type{T}, missingval, scale, offset) where T omv = _outer_missingval(missingval) - T1 = isnothing(omv) ? T : promote_type(T, omv) + T1 = isnothing(omv) ? T : promote_type(T, typeof(omv)) T2 = isnothing(scale) ? T1 : promote_type(T1, typeof(scale)) T3 = isnothing(offset) ? T2 : promote_type(T2, typeof(offset)) return T3 @@ -153,25 +153,6 @@ Base.@assume_effects :foldable _scaleoffset_inv1(x, scale, ::Nothing) = x / scal Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, offset) = x - offset Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, ::Nothing) = x - -function _stack_mods( - eltypes::Vector, metadata::Vector, missingval::Vector; - scaled, coerce -) - map(eltypes, metadata, missingval) do T, md, mv - scale, offset = get_scale(md, scaled) - _mod(T, mv, scale, offset, coerce) - end -end -function _stack_mods( - eltypes::Vector, metadata::Vector, missingval; - scaled::Bool, coerce -) - map(eltypes, metadata) do T, md, mk - scale, offset = get_scale(md, scaled) - _mod(T, missingval, mk, scale, offset, coerce) - end -end function _stack_mods( eltypes::Vector, metadata::Vector, missingval::Vector; scaled::Bool, coerce diff --git a/src/utils.jl b/src/utils.jl index d1a50767f..3d49a3e1c 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,29 +1,30 @@ # File paths, urls and strings -# filter_ext(path, ext::AbstractString) = -# filter(fn -> splitext(fn)[2] == ext, readdir(path; join=true)) -# filter_ext(path, exts::Union{Tuple,AbstractArray}) = -# filter(fn -> splitext(fn)[2] in exts, readdir(path; join=true)) -# filter_ext(path, ext::Nothing) = readdir(path; join=true) - -# _maybe_add_suffix(filename::Nothing, suffix) = nothing -# _maybe_add_suffix(filename::Nothing, suffix::Union{Nothing,NoKW}) = nothing -# _maybe_add_suffix(filename, suffix::Union{Nothing,NoKW}) = filename -# function _maybe_add_suffix(filename, suffix) -# base, ext = splitext(filename) -# if string(suffix) == "" -# filename -# else -# return string(base, "_", suffix, ext) -# end -# end +filter_ext(path, ext::AbstractString) = + filter(fn -> splitext(fn)[2] == ext, readdir(path; join=true)) +filter_ext(path, exts::Union{Tuple,AbstractArray}) = + filter(fn -> splitext(fn)[2] in exts, readdir(path; join=true)) +filter_ext(path, ext::Nothing) = readdir(path; join=true) + +_maybe_add_suffix(filename::Nothing, suffix) = nothing +_maybe_add_suffix(filename::Nothing, suffix::Union{Nothing,NoKW}) = nothing +_maybe_add_suffix(filename, suffix::Union{Nothing,NoKW}) = filename +function _maybe_add_suffix(filename, suffix) + base, ext = splitext(filename) + if string(suffix) == "" + filename + else + return string(base, "_", suffix, ext) + end +end + +# Modified from IsURL.jl, many thanks to @zlatanvasovic +const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" +const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" -# # Modified from IsURL.jl, many thanks to @zlatanvasovic -# const WINDOWSREGEX = r"^[a-zA-Z]:[\\]" -# const URLREGEX = r"^[a-zA-Z][a-zA-Z\d+\-.]*:" +_isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) -# _isurl(str::AbstractString) = !occursin(WINDOWSREGEX, str) && occursin(URLREGEX, str) function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missingval=nokw) where T if ismissing(Rasters.missingval(A)) newmissingval = missingval isa NoKW ? _type_missingval(Missings.nonmissingtype(T)) : missingval @@ -35,18 +36,18 @@ function _maybe_use_type_missingval(A::AbstractRaster{T}, source::Source, missin end end -# cleankeys(name) = (_cleankey(name),) -# function cleankeys(keys::Union{NamedTuple,Tuple,AbstractArray}) -# Tuple(map(_cleankey, keys, ntuple(i -> i, length(keys)))) -# end +cleankeys(name) = (_cleankey(name),) +function cleankeys(keys::Union{NamedTuple,Tuple,AbstractArray}) + Tuple(map(_cleankey, keys, ntuple(i -> i, length(keys)))) +end -# function _cleankey(name::Union{Symbol,AbstractString,Name,NoName}, i=1) -# if name in (NoName(), Symbol(""), Name(Symbol(""))) -# Symbol("layer$i") -# else -# Symbol(name) -# end -# end +function _cleankey(name::Union{Symbol,AbstractString,Name,NoName}, i=1) + if name in (NoName(), Symbol(""), Name(Symbol(""))) + Symbol("layer$i") + else + Symbol(name) + end +end # We often need to convert the locus and the lookup in the same step, # as doing it in the wrong order can give errors. @@ -149,18 +150,18 @@ _extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; kw...) = function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Tuple; sampling::Tuple, kw... ) - ranges = map(values(to), res, sampling) do (start, stop_closed), step, s + ranges = map(values(to), res, sampling) do (start, stop_closed), step, samp stop_open = stop_closed + maybe_eps(stop_closed; grow=false) - length = ceil(Int, (stop_open - start) / r) + length = ceil(Int, (stop_open - start) / step) r = if step >= zero(step) range(; start, step, stop=stop_open) else range(; start=stop_open, step, stop=start) end - if s isa Intervals - if locus(s) isa Start + if samp isa Intervals + if locus(samp) isa Start r[1:end-1] - elseif locus(s) isa End + elseif locus(samp) isa End r[2:end] else # Center r .+ abs(step) / 2 @@ -172,14 +173,14 @@ end function _extent2dims(to::Extents.Extent, size::Tuple, res::Union{Nothing,NoKW}; sampling::Tuple, crs, mappedcrs ) - ranges = map(values(to), size, sampling) do (start, stop_closed), length, sa + ranges = map(values(to), size, sampling) do (start, stop_closed), length, samp stop_open = stop_closed + maybe_eps(stop_closed; grow=false) step = (stop_open - start) / length range(; start, step, length) - if sa isa Points - range(; start, stop, length) + if samp isa Points + range(; start, step, length) else - range(; start, stop, length=length+1)[1:end-1] + range(; start, step, length=length+1)[1:end-1] end end return _extent2dims(to, ranges; sampling, crs, mappedcrs) @@ -459,18 +460,20 @@ end function _without_mapped_crs(f, A::AbstractRaster, mappedcrs::GeoFormat) A = setmappedcrs(A, nothing) x = f(A) - if x isa AbstractRaster - x = setmappedcrs(x, mappedcrs) + return if x isa AbstractRaster + setmappedcrs(x, mappedcrs) + else + x end - return x end function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) - st1 = map(A -> setmappedcrs(A, nothing), st) + st1 = maplayers(A -> setmappedcrs(A, nothing), st) x = f(st1) - if x isa AbstractRasterStack - x = map(A -> setmappedcrs(A, mappedcrs(st)), x) + return if x isa AbstractRasterStack + setmappedcrs(x, mappedcrs(st)) + else + x end - return x end diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 50271eabd..1f9a3898f 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -28,7 +28,7 @@ gdalpath = maybedownload(url) @testset "cf" begin # This file has no scale/offset so cf does nothing @time cfarray = Raster(gdalpath; missingval=0x00) - @time cf_nomask_array = Raster(gdalpath; maskingval=nothing) + @time cf_nomask_array = Raster(gdalpath; missingval=nothing) @time nocfarray = Raster(gdalpath; scaled=false) @time lazycfarray = Raster(gdalpath; lazy=true, missingval=0x00) @time lazynocfarray = Raster(gdalpath; lazy=true, scaled=false) @@ -278,7 +278,7 @@ gdalpath = maybedownload(url) tempfile3 = tempname() * ".tif" Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile1) Afile2 = mosaic(first, A1, A2; - missingval=0x00, atol=1e-8, filename=tempfile2, maskingval=missing + missingval=0x00, atol=1e-8, filename=tempfile2, missingval=missing ) @test missingval(Afile2) === missing Amem = mosaic(first, A1, A2; missingval=0x00, atol=1e-8) @@ -471,7 +471,7 @@ gdalpath = maybedownload(url) filename = tempname() * ".tif" write(filename, A) @test missingval(Raster(filename)) === missing - @test missingval(Raster(filename; maskingval=nothing)) === typemax(UInt8) + @test missingval(Raster(filename; missingval=nothing)) === typemax(UInt8) rm(filename) end @@ -521,7 +521,7 @@ gdalpath = maybedownload(url) # Handle WorldClim/ucdavis unreliability A = nothing try - A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, maskingval=nothing) + A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, missingval=nothing) catch end if !isnothing(A) diff --git a/test/sources/grd.jl b/test/sources/grd.jl index 214b2e6a4..31411e636 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -29,7 +29,7 @@ grdpath = stem * ".gri" @test parent(eagerarray) isa Array end - @testset "maskingval" begin + @testset "missingval" begin @time missingarray = Raster(grdpath) @test missingval(missingarray) === missing @test eltype(missingarray) === Union{Missing,Float32} @@ -181,7 +181,7 @@ grdpath = stem * ".gri" tn = tempname() tempgrd = tn * ".grd" tempgri = tn * ".gri" - Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, maskingval=nothing) + Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, missingval=nothing) Amem = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1) Atest = grdarray[X(1:80), Y(1:60)] Atest[X(1:26), Y(31:60)] .= 0.0f0 @@ -299,7 +299,7 @@ grdpath = stem * ".gri" @test missingval(Raster(filename)) === missing filename = tempname() * ".grd" write(filename, A) - @test missingval(Raster(filename; maskingval=nothing)) === typemin(Float32) + @test missingval(Raster(filename; missingval=nothing)) === typemin(Float32) end end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 74c3144a9..5ed152660 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -60,13 +60,13 @@ end @testset "scaling and maskign" begin @time cfarray = Raster(ncsingle) @time cfarray = Raster(ncsingle) - @time cf_nomask_array = Raster(ncsingle; maskingval=nothing) + @time cf_nomask_array = Raster(ncsingle; missingval=nothing) @time nocfarray = Raster(ncsingle; scaled=false) - @time nocf_nomask_array = Raster(ncsingle; scaled=false, maskingval=nothing) + @time nocf_nomask_array = Raster(ncsingle; scaled=false, missingval=nothing) @time raw_array = Raster(ncsingle; raw=true) @time lazycfarray = Raster(ncsingle; lazy=true, scaled=false) @time lazynocfarray = Raster(ncsingle; lazy=true, scaled=false) - @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, maskingval=nothing) + @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, missingval=nothing) @test missingval(cfarray) === missing @test missingval(nocfarray) === missing @test missingval(cf_nomask_array) === 1.0f20 @@ -355,7 +355,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write(gdalfilename, nccleaned; force=true) @test (@allocations write(gdalfilename, nccleaned; force=true)) < 1e4 - gdalarray = Raster(gdalfilename; maskingval=nothing) + gdalarray = Raster(gdalfilename; missingval=nothing) # gdalarray WKT is missing one AUTHORITY # @test_broken crs(gdalarray) == convert(WellKnownText, EPSG(4326)) # But the Proj representation is the same @@ -371,7 +371,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) write("testgrd.gri", nccleaned; force=true) @test (@allocations write("testgrd.gri", nccleaned; force=true)) < 1e4 - grdarray = Raster("testgrd.gri", maskingval=nothing); + grdarray = Raster("testgrd.gri", missingval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) @test bounds(grdarray) == bounds(nccleaned) @test index(grdarray, Y) ≈ reverse(index(nccleaned, Y)) .- 0.5 From fdf4d3005c4bb20495bf3babda9c497a1a3de234 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Tue, 10 Dec 2024 14:59:43 +0100 Subject: [PATCH 27/38] fix mosaic --- src/methods/mosaic.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 8eaae3d61..8b79f3a2f 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -100,7 +100,7 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; options, force ) do C - mosaic!(f, C, regions; missingval, kw...) + _mosaic!(f, C, regions; missingval, kw...) end end function _mosaic(f::Function, ::AbstractRasterStack, regions; From 29cf6c6ad6e648e18cd8fca42895ec6f02d9e73f Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 26 Dec 2024 22:21:08 +0100 Subject: [PATCH 28/38] tweaks and bugfixes --- ext/RastersArchGDALExt/gdal_source.jl | 18 ++- ext/RastersArchGDALExt/warp.jl | 4 +- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 4 +- src/array.jl | 63 +++++++--- src/create.jl | 87 ++++++++----- src/filearray.jl | 3 - src/methods/burning/array_init.jl | 14 ++- src/methods/mosaic.jl | 2 +- src/methods/rasterize.jl | 2 +- src/modifieddiskarray.jl | 17 ++- src/sources/commondatamodel.jl | 1 + src/sources/grd.jl | 10 ++ src/stack.jl | 54 ++++---- src/utils.jl | 119 ++++++++++-------- test/create.jl | 17 +-- test/methods.jl | 49 ++++++-- test/rasterize.jl | 18 ++- test/sources/gdal.jl | 59 ++++----- test/sources/ncdatasets.jl | 11 +- test/warp.jl | 8 +- 20 files changed, 356 insertions(+), 204 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 77341de3d..5c385744b 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -34,8 +34,8 @@ const GDAL_VIRTUAL_FILESYSTEMS = "/vsi" .* ( "sparse", ) -# Array ######################################################################## +# TODO more cases of return values here, like wrapped disk arrays RA.cleanreturn(A::AG.RasterDataset) = Array(A) RA.haslayers(::GDALsource) = false RA._sourcetrait(A::AG.RasterDataset) = GDALsource() @@ -286,7 +286,7 @@ function RA.missingval(rasterds::AG.RasterDataset, args...) end end -# GDAL always returns well known text +# GDAL always returns well known text crs function RA.crs(raster::AG.RasterDataset, args...) WellKnownText(GeoFormatTypes.CRS(), string(AG.getproj(raster.ds))) end @@ -344,6 +344,7 @@ function _missingval_from_gdal(T::Type{<:Integer}, x::Integer; verbose=true) end _missingval_from_gdal(T, x) = x +# Make sure driver is sensible function _check_driver(::Nothing, driver) isnokwornothing(driver) || isempty(driver) ? "MEM" : driver end @@ -374,26 +375,36 @@ function _create_with_driver(f, filename, dims::Tuple, T; _block_template=nothing, kw... ) + # Allow but discourage south-up verbose && _maybe_warn_south_up(dims, verbose, "Creating a South-up raster. You may wish to reverse the `Y` dimension to use conventional North-up") options = isnokwornothing(options) ? Dict{String,String}() : options + # Pairs should not get this far + @assert !(missingval isa Pair) + # If missingval is missing, generate one GDAL can write missingval = ismissing(missingval) ? RA._writeable_missing(T; verbose) : missingval + # Make sure dimensions are valid for GDAL _gdal_validate(dims) + # Move x and y locus to start x, y = map(DD.dims(dims, (XDim, YDim))) do d maybeshiftlocus(Start(), RA.nolookup_to_sampled(d)) end + # Band handling - a 2d raster has 1 band newdims = hasdim(dims, Band()) ? (x, y, DD.dims(dims, Band)) : (x, y) nbands = hasdim(dims, Band) ? length(DD.dims(dims, Band())) : 1 + # Driver detection driver = _check_driver(filename, driver) options_vec = _process_options(driver, options; _block_template, chunks, verbose) gdaldriver = driver isa String ? AG.getdriver(driver) : driver + # Keywords and filenames for AG.create create_kw = (; width=length(x), height=length(y), nbands, dtype=T,) filename = isnothing(filename) ? "" : filename + # Not all drivers can be directly created, some need an intermediate step if AG.shortname(gdaldriver) in GDAL_DRIVERS_SUPPORTING_CREATE AG.create(filename; driver=gdaldriver, options=options_vec, create_kw...) do dataset _set_dataset_properties!(dataset, newdims, missingval, scale, offset) @@ -416,6 +427,7 @@ function _create_with_driver(f, filename, dims::Tuple, T; end end +# Makie sure gdal can actually write this raster @noinline function _gdal_validate(dims) all(hasdim(dims, (XDim, YDim))) || throw(ArgumentError("`Raster` must have both an `X` and `Y` to be converted to an ArchGDAL `Dataset`")) if length(dims) === 3 @@ -605,7 +617,7 @@ _maybe_restore_from_gdal(A, dims::Tuple) = _maybe_reorder(permutedims(A, dims), _maybe_restore_from_gdal(A, dims::Union{Tuple{<:XDim,<:YDim,<:Band},Tuple{<:XDim,<:YDim}}) = _maybe_reorder(A, dims) -function _maybe_reorder(A, dims) +function _maybe_reorder(A, dims::Tuple) if all(map(l -> l isa AbstractSampled, lookup(dims, (XDim, YDim)))) && all(map(l -> l isa AbstractSampled, lookup(A, (XDim, YDim)))) reorder(A, dims) diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index e3e1d5d42..ca1136a13 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -45,13 +45,15 @@ function _warp(A::AbstractRaster, flags::Dict; missingval, missingval end out = AG.Dataset(A1; filename=tempfile, missingval=mv1, kw...) do dataset - AG.gdalwarp([dataset], flagvect; warp_kw...) do warped + x = AG.gdalwarp([dataset], flagvect; warp_kw...) do warped # Read the raster lazily, dropping Band if there is none in `A` raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, missingval=mv2) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end + return x end + # And permute the dimensions back to what they were in A out1 = _maybe_restore_from_gdal(out, dims(A)) out2 = _reset_gdalwarp_sampling(out1, A) diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index d3a483e44..e2e844f69 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -43,13 +43,11 @@ function Base.write(filename::AbstractString, source::Source, s::AbstractRasterS end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) - maskingval = RA._stack_nt(s, isnokw(maskingval) ? Rasters.missingval(s) : maskingval) - missingval = RA._stack_missingvals(s, isnokw(missingval) ? maskingval : missingval) + missingval = RA._stack_nt(s, isnokw(missingval) ? Rasters.missingval(s) : missingval) try map(keys(s)) do k RA._writevar!(ds, source, s[k]; missingval=missingval[k], - maskingval=maskingval[k], kw... ) end diff --git a/src/array.jl b/src/array.jl index 32d356376..48c64cab4 100644 --- a/src/array.jl +++ b/src/array.jl @@ -239,6 +239,7 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Noth new{T,N,D,R,A,Na,Me,typeof(missingval1)}(data, dims, refdims, name, metadata, missingval1) end end +# Create a Raster from and AbstractArray and dims function Raster(A::AbstractArray{T,N}, dims::Tuple; refdims=(), name=Symbol(""), @@ -252,12 +253,15 @@ function Raster(A::AbstractArray{T,N}, dims::Tuple; A = isnokw(mappedcrs) ? A : setmappedcrs(A, mappedcrs) return A end +# Create a Raster from and AbstractVector and dims, +# reshaping the Vector to match the dimensions function Raster(A::AbstractArray{T,1}, dims::Tuple{<:Dimension,<:Dimension,Vararg}; kw... )::Raster{T,length(dims)} where T Raster(reshape(A, map(length, dims)), dims; kw...) end Raster(A::AbstractArray{<:Any,1}, dim::Dimension; kw...) = Raster(A, (dim,); kw...) +# Load a Raster from a table function Raster(table, dims::Tuple; name=nokw, kw... @@ -268,7 +272,9 @@ function Raster(table, dims::Tuple; A = reshape(cols[name], map(length, dims)) return Raster(A, dims; name, kw...) end +# Load a Raster from another AbstractArray with `dims` as keyword Raster(A::AbstractArray; dims, kw...) = Raster(A, dims; kw...)::Raster +# Load a Raster from another AbstractDimArray function Raster(A::AbstractDimArray; data=parent(A), dims=dims(A), @@ -280,11 +286,13 @@ function Raster(A::AbstractDimArray; )::Raster return Raster(data, dims; refdims, name, metadata, missingval, kw...) end +# Load a Raster from a string filename and predefined dimensions function Raster(filename::AbstractString, dims::Tuple{<:Dimension,<:Dimension,Vararg}; kw... )::Raster Raster(filename; dims, kw...) end +# Load a Raster from a string filename function Raster(filename::AbstractString; source=nokw, kw... @@ -294,6 +302,7 @@ function Raster(filename::AbstractString; Raster(ds, filename; source, kw...) end::Raster end +# Load a Raster from an opened Dataset function Raster(ds, filename::AbstractString; dims=nokw, refdims=(), @@ -306,50 +315,76 @@ function Raster(ds, filename::AbstractString; source=nokw, replace_missing=nokw, coerce=convert, - scaled=nokw, - write=false, - lazy=false, - dropband=true, - checkmem=CHECKMEM[], + scaled::Union{Bool,NoKW}=nokw, + verbose::Bool=true, + write::Bool=false, + lazy::Bool=false, + dropband::Bool=true, + checkmem::Bool=CHECKMEM[], + raw::Bool=false, mod=nokw, - raw=false, )::Raster _maybe_warn_replace_missing(replace_missing) - scaled, missingval = _raw_check(raw, scaled, missingval) + # `raw` option will ignore `scaled` and `missingval` + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) + # TODO use a clearer name for this name1 = filekey(ds, name) + # Detect the source from filename source = _sourcetrait(filename, source) + # Open the dataset and variable specified by `name`, at `group` level if provided + # At this level we do not apply `mod`. data_out, dims_out, metadata_out, missingval_out = _open(source, ds; name=name1, group, mod=NoMod()) do var metadata_out = isnokw(metadata) ? _metadata(var) : metadata + # Missingval input options missingval_out = if isnokw(missingval) - # Detect missingval and convert it to missing - Rasters.missingval(var, metadata_out) => missing + mv = Rasters.missingval(var, metadata_out) + isnothing(mv) ? nothing : mv => missing + elseif isnothing(missingval) + nothing elseif missingval isa Pair + # Pair: inner and outer missing values are manually defined missingval - elseif missingval == Rastesr.missingval - Rasters.missingval(var, metadata_out) + elseif missingval == Rasters.missingval + # `missingval` func: detect missing value and keep it as-is + mv = Rasters.missingval(var, metadata_out) + mv => mv else + # Otherwise: detect missing value and convert it to `missingval` Rasters.missingval(var, metadata_out) => missingval end + # Generate mod for scaling mod = isnokw(mod) ? _mod(eltype(var), metadata_out, missingval_out; scaled, coerce) : mod + # Define or load the data array data_out = if lazy + # Define a lay FileArray FileArray{typeof(source)}(var, filename; name=name1, group, mod, write ) else modvar = _maybe_modify(var, mod) + # Check the data will fit in memory checkmem && _checkobjmem(modvar) - x = Array(modvar) - x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug + # Move the modified array to memory + @show mod + Array(modvar) end + # Generate dims dims_out = isnokw(dims) ? _dims(var, crs, mappedcrs) : format(dims, data_out) - data_out, dims_out, metadata_out, missingval + # Return the data to the parent function + mv_outer = _outer_missingval(mod) + data_out, dims_out, metadata_out, mv_outer end + # Use name or an empty Symbol name_out = name1 isa Union{NoKW,Nothing} ? Symbol("") : Symbol(name1) + # Define the raster raster = Raster(data_out, dims_out, refdims, name_out, metadata_out, missingval_out) + # Maybe drop a single band dimension return _maybe_drop_single_band(raster, dropband, lazy) end filekey(ds, name) = name filekey(filename::String) = Symbol(splitext(basename(filename))[1]) +# Add a `dimconstructor` method so `AbstractProjected` lookups create a Raster +# TODO this should be unwrapped to `DD.lookupconstructor` to avoid future ambiguities DD.dimconstructor(::Tuple{<:Dimension{<:AbstractProjected},Vararg{Dimension}}) = Raster \ No newline at end of file diff --git a/src/create.jl b/src/create.jl index 7edf35bf3..51b99e4c1 100644 --- a/src/create.jl +++ b/src/create.jl @@ -124,24 +124,32 @@ RasterStack("created.nc") └───────────────────────────────────────────────────────────────────────────────────────────┘ ``` """ +# Create with a function that will be called to fill the raster create(f::Function, args...; kw...) = create(args...; kw..., f) +# Create from Raster or RasterStack with no filename create(A::Union{AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, A; kw...) +# Create from type and Raster or RasterStack with no filename create(T::Union{Type,TypeNamedTuple}, A::Union{Tuple,Extents.Extent,AbstractRaster,AbstractRasterStack}; kw...) = create(nothing, T, A; kw...) +# Create from filename and Raster function create(filename::Union{AbstractString,Nothing}, A::AbstractRaster{T}; missingval=missingval(A), # Only take missingval here when types are not specified kw... ) where T create(filename, T, A; missingval, kw...) end +# Create from filename and RasterStack function create(filename::Union{AbstractString,Nothing}, st::AbstractRasterStack; missingval=missingval(st), # Only take missingval here when types are not specified kw... ) create(filename, map(eltype, layers(st)), st; missingval, kw...) end +# Create Raster from filename, type and a Raster, using its +# parent as the parent type so e.g. CuArray will propagate create(filename::Union{AbstractString,Nothing}, T::Union{Type,TypeNamedTuple}, A::AbstractRaster; kw...) = create(filename, T, dims(A); parent=parent(A), kw...) +# Create RasterStack from filename, NamedTuple type and dims function create(filename::Union{AbstractString,Nothing}, T::NamedTuple{K1}, st::AbstractRasterStack{K2}; metadata=metadata(st), layerdims=nokw, @@ -156,6 +164,7 @@ function create(filename::Union{AbstractString,Nothing}, T::NamedTuple{K1}, st:: parent=first(parent(st)), metadata, missingval, layerdims, layermetadata, kw... ) end +# Create from filename, type and dims function create(filename::AbstractString, T::Union{Type,NamedTuple}, dims::Tuple; lazy=true, parent=nokw, @@ -168,6 +177,7 @@ function create(filename::AbstractString, T::Union{Type,NamedTuple}, dims::Tuple # This calls `create` in the /sources file for this `source` return create(filename, source, T, dims; lazy, missingval, kw...) end +# Create from filename, type and extent with res or size keywords function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTuple}, extent::Extents.Extent; res=nokw, size=nokw, @@ -184,40 +194,46 @@ function create(filename::Union{AbstractString,Nothing}, T::Union{Type,NamedTupl end return create(filename, T, dims; kw...) end +# Create in-memory Raster from type and dims function create(filename::Nothing, ::Type{T}, dims::Tuple; missingval=nokw, fill=nokw, parent=nokw, verbose=true, + f=identity, # Not used but here for consistency suffix=nokw, force=false, chunks=nokw, driver=nokw, options=nokw, - f=identity, kw... ) where T if verbose - isnokw(chunks) || @warn "`chunks` of `$chunks` found. But `chunks` are not used for in-memory rasters" + isnokw(chunks) || _warn_keyword_not_used("chunks", chunks) + isnokw(driver) || _warn_keyword_not_used("driver", driver) + isnokw(options) || _warn_keyword_not_used("options", options) end - missingval = missingval isa Pair ? last(missingval) : missingval - eltype = isnokwornothing(missingval) ? T : promote_type(T, typeof(missingval)) - data = if isnokw(parent) || isnothing(parent) + # Split inner and outer missingval if needed + # For in-memory rasters we just ignore the inner value + mv_inner, mv_outer = missingval isa Pair ? missingval : (missingval, missingval) + # Get the element type from T and outer missingval + eltype = isnokwornothing(mv_outer) ? T : promote_type(T, typeof(mv_outer)) + # Create the array + data = if isnokwornothing(parent) Array{eltype}(undef, dims) else similar(parent, eltype, size(dims)) end # Maybe fill the array - if !(isnokw(fill) || isnothing(fill)) - fill!(data, fill) - end - + isnokwornothing(fill) || fill!(data, fill) + # Wrap as a Raster + rast = Raster(data, dims; missingval=mv_outer, kw...) # Apply `f` before returning - rast = Raster(data, dims; missingval, kw...) f(rast) return rast end +# Create in-memory RasterStack from type and dims function create(filename::Nothing, types::NamedTuple, dims::Tuple; suffix=keys(types), force=false, @@ -246,7 +262,8 @@ function create(filename::Nothing, types::NamedTuple, dims::Tuple; f(st) return st end -function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTuple; +# Create on-disk Raster from filename, source, type and dims +function create(filename::AbstractString, source::Source, ::Type{T}, dims::Tuple; name=nokw, missingval=nokw, fill=nokw, @@ -262,8 +279,8 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTu f=identity, kw... ) where T + mv_inner, mv_outer = _missingval_pair(missingval) eltype = Missings.nonmissingtype(T) - if isnokw(fill) || isnothing(fill) write = false # Leave fill undefined A = FillArrays.Zeros{eltype}(map(length, dims)) @@ -273,17 +290,19 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::DimTu A = FillArrays.Fill{eltype}(fill, map(length, dims)) end # Create layers of zero arrays - rast = Raster(A, dims; name, missingval) + rast = Raster(A, dims; name, missingval=mv_inner) Rasters.write(f, filename, source, rast; - eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write, kw... + eltype, chunks, metadata, scale, offset, missingval=mv_inner, verbose, force, coerce, write, kw... ) do W # write returns a variable, wrap it as a Raster - f(rebuild(rast, W)) + f(rebuild(rast; data=W)) end # Don't pass in `missingval`, read it again from disk in case it changed - return Raster(filename; source, lazy, metadata, dropband, coerce) + r = Raster(filename; source, lazy, metadata, dropband, coerce, missingval=mv_outer) + return r end -function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::DimTuple; +# Create on-disk RasterStack from filename, source, type and dims +function create(filename::AbstractString, source::Source, layertypes::NamedTuple, dims::Tuple; lazy=true, verbose=true, force=false, @@ -306,30 +325,34 @@ function create(filename::AbstractString, source::Source, layertypes::NamedTuple else layerdims end + mv_inner, mv_outer = _missingval_pair(missingval) + # Only write value to disk variables if fill is defined + write = !isnokwornothing(fill) + # Make sure fill is per-layer + fill_nt = fill isa NamedTuple ? fill : map(_ -> fill, layertypes) # Define no-allocation layers with FillArrays - # We need a fill value for each layer - fill = fill isa NamedTuple ? fill : map(_ -> fill, layertypes) - # We update `write` in the closure below - write = Ref(false) - layers = map(layertypes, layerdims, fill) do T, ld, f + layers = map(layertypes, layerdims, fill_nt) do T, ld, fi lks = lookup(dims, ld) eltype = Missings.nonmissingtype(T) size = map(length, lks) - if isnokwornothing(f) + if isnokwornothing(fi) A = FillArrays.Zeros{eltype}(size) else - write[] = true # Write fill to disk - A = FillArrays.Fill{eltype}(f, size) + A = FillArrays.Fill{eltype}(fi, size) end end # Create layers of zero arrays - stack = RasterStack(layers, dims; layerdims, layermetadata, missingval) - fn = Rasters.write(filename, stack; - chunks, metadata, scale, offset, missingval, verbose, force, coerce, write=write[], kw... + st1 = RasterStack(layers, dims; layerdims, layermetadata, missingval=mv_inner) + fn = Rasters.write(filename, st1; + chunks, metadata, scale, offset, missingval=mv_inner, verbose, force, coerce, write, kw... ) do W - f(rebuild(stack; data=W)) + # write returns a variable, wrap it as a RasterStack + f(rebuild(st1; data=W)) end - # Don't pass in `missingval`, read it again from disk in case it changed - st = RasterStack(fn; source, lazy, metadata, layerdims, dropband, coerce) - return st + st2 = RasterStack(fn; source, lazy, metadata, layerdims, dropband, coerce, missingval=mv_outer) + return st2 end + +_warn_keyword_not_used(label, obj) = @warn "`$label` of `$obj` found. But `chunks` are not used for in-memory rasters" +_missingval_pair(missingval::Pair) = missingval +_missingval_pair(missingval) = missingval => missingval \ No newline at end of file diff --git a/src/filearray.jl b/src/filearray.jl index 64be5098e..a599731d1 100644 --- a/src/filearray.jl +++ b/src/filearray.jl @@ -105,9 +105,6 @@ DA.eachchunk(A::RasterDiskArray) = A.eachchunk DA.readblock!(A::RasterDiskArray, aout, r::AbstractUnitRange...) = aout .= parent(A)[r...] DA.writeblock!(A::RasterDiskArray, v, r::AbstractUnitRange...) = parent(A)[r...] .= v -# Already open, doesn't use `name` -_open(f, ::Source, A::RasterDiskArray; name=nokw, group=nokw) = f(A) - struct MissingDiskArray{T,N,V} <: DiskArrays.AbstractDiskArray{T,N} var::V end diff --git a/src/methods/burning/array_init.jl b/src/methods/burning/array_init.jl index f8f1e1fc8..a700a6454 100644 --- a/src/methods/burning/array_init.jl +++ b/src/methods/burning/array_init.jl @@ -18,14 +18,14 @@ function _init_bools(to::Nothing, T::Type, data; # Get the extent of the geometries ext = _extent(data; geometrycolumn) isnothing(ext) && throw(ArgumentError("no recognised dimensions, extent or geometry")) - return _init_bools(ext, T, data; collapse, res, size) + return _init_bools(ext, T, data; collapse, res, size, kw...) end function _init_bools(to::Extents.Extent, T::Type, data; collapse=nokw, size=nokw, res=nokw, sampling=nokw, kw... ) # Convert the extent to dims (there must be `res` or `size` in `kw`) - ext = _extent2dims(to; size, res, sampling, kw...) - _init_bools(to, ext, T, data; collapse, kw...) + dims = _extent2dims(to; size, res, sampling, kw...) + _init_bools(to, dims, T, data; collapse, kw...) end function _init_bools(to, dims::DimTuple, T::Type, data; collapse::Union{Bool,Nothing,NoKW}=nokw, kw... @@ -43,12 +43,16 @@ function _init_bools(to, dims::DimTuple, T::Type, data; end end -function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; missingval::Bool=false, metadata=NoMetadata(), kw...) +function _alloc_bools(to, dims::DimTuple, ::Type{BitArray}; + missingval::Bool=false, metadata=NoMetadata(), kw... +) # Use a BitArray vals = missingval == false ? falses(size(dims)) : trues(size(dims)) return Raster(vals, dims; missingval, metadata) end -function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; missingval=false, metadata=NoMetadata(), kw...) where T +function _alloc_bools(to, dims::DimTuple, ::Type{<:Array{T}}; + missingval=false, metadata=NoMetadata(), kw... +) where T # Use an Array data = fill!(Raster{T}(undef, dims), missingval) return rebuild(data; missingval, metadata) diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index 8b79f3a2f..d4d39629c 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -167,7 +167,7 @@ mosaic!(f::Function, dest::RasterStackOrArray, regions::RasterStackOrArray...; k _mosaic!(f, dest, regions; kw...) function _mosaic!(f::Function, A::AbstractRaster{T}, regions::Union{Tuple,AbstractArray}; - missingval=missingval(A), atol=maybe_eps(T) + missingval=missingval(A), atol=nothing ) where T isnokwornothing(missingval) && throw(ArgumentError("destination array must have a `missingval`")) _without_mapped_crs(A) do A1 diff --git a/src/methods/rasterize.jl b/src/methods/rasterize.jl index 6c12a2eae..a79345820 100644 --- a/src/methods/rasterize.jl +++ b/src/methods/rasterize.jl @@ -479,7 +479,7 @@ function alloc_rasterize(f, r::RasterCreator; if prod(size(r.to)) == 0 throw(ArgumentError("Destination array is is empty, with size $(size(r.to))). Rasterization is not possible")) end - A = create(r.filename, fill=missingval, eltype, r.to; name, missingval => nothing, metadata, suffix) do O + A = create(r.filename, fill=missingval, eltype, r.to; name, missingval, metadata, suffix) do O f(O) end return A diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index fc577cc9d..f281dd3a4 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -44,9 +44,9 @@ _inner_missingval(m::Mod) = _inner_missingval(m.missingval) _inner_missingval(mv) = mv _inner_missingval(mv::Pair) = mv[1] -_outer_missingval(m::Mod) = _outer_missingval(m.missingval) -_outer_missingval(mv) = mv +_outer_missingval(m::AbstractModifications) = _outer_missingval(m.missingval) _outer_missingval(mv::Pair) = mv[2] +_outer_missingval(mv) = mv struct ModifiedDiskArray{I,T,N,V,M} <: DiskArrays.AbstractDiskArray{T,N} var::V @@ -153,17 +153,15 @@ Base.@assume_effects :foldable _scaleoffset_inv1(x, scale, ::Nothing) = x / scal Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, offset) = x - offset Base.@assume_effects :foldable _scaleoffset_inv1(x, ::Nothing, ::Nothing) = x -function _stack_mods( - eltypes::Vector, metadata::Vector, missingval::Vector; +function _stack_mods(eltypes::Vector, metadata::Vector, missingval::AbstractVector; scaled::Bool, coerce ) - map(eltypes, metadata, missingval) do T, md, mv, mk + map(eltypes, metadata, missingval) do T, md, mv scale, offset = get_scale(md, scaled) - _mod(mv, mk, scale, offset, coerce) + _mod(T, mv, scale, offset, coerce) end end -function _stack_mods( - eltypes::Vector, metadata::Vector, missingval; +function _stack_mods(eltypes::Vector, metadata::Vector, missingval::Pair; scaled::Bool, coerce ) map(eltypes, metadata) do T, md @@ -177,7 +175,8 @@ function _mod(::Type{T}, metadata, missingval; scaled::Bool, coerce) where T _mod(T, missingval, scale, offset, coerce) end function _mod(::Type{T}, missingval, scale, offset, coerce) where T - if (isnokwornothing(missingval) || !(missingval isa Pair)) && isnokwornothing(scale) && isnokwornothing(offset) + if (isnokwornothing(missingval) || !(missingval isa Pair && !(isnothing(last(missingval))))) && + isnokwornothing(scale) && isnokwornothing(offset) return NoMod{T}(missingval) else return Mod{T}(missingval, scale, offset, coerce) diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 30c17cc09..9a4dc8990 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -462,6 +462,7 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, metadata end + @assert !(missingval isa Pair) missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval missingval = if ismissing(missingval) # See if there is a missing value in metadata diff --git a/src/sources/grd.jl b/src/sources/grd.jl index ee1ba4a2d..c653d43dd 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -41,6 +41,16 @@ filename(grd::GRDdataset) = grd.filename filekey(grd::GRDdataset, name::NoKW) = get(attrib(grd), "layername", Symbol("")) filekey(A::RasterDiskArray{GRDsource}, name) = filekey(A.attrib, name) +# Already open, doesn't use `name` +function _open(f, ::GRDsource, A::RasterDiskArray{GRDsource}; + name=nokw, + group=nokw, + mod=NoMod(), + kw... +) + cleanreturn(f(_maybe_modify(A, mod))) +end + Base.eltype(::GRDdataset{T}) where T = T function Base.size(grd::GRDdataset) ncols = parse(Int, grd.attrib["ncols"]) diff --git a/src/stack.jl b/src/stack.jl index c80009ec5..8fe81df14 100644 --- a/src/stack.jl +++ b/src/stack.jl @@ -314,7 +314,7 @@ function RasterStack(table, dims::Tuple; layers = map(name) do k col = Tables.getcolumn(table, k) reshape(col, map(length, dims)) - end |> NamedTuple{name} + end |> NamedTuple{cleankeys(name)} end return RasterStack(layers, dims; kw...) end @@ -365,7 +365,7 @@ function RasterStack( name=map(filekey, filenames), kw... ) - RasterStack(NamedTuple{cleankeys(Tuple(name))}(filenames); kw...) + RasterStack(NamedTuple{cleankeys(name)}(filenames); kw...) end function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; source=nokw, @@ -377,10 +377,11 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; replace_missing=nokw, scaled=nokw, raw=false, + verbose=true, kw... ) where K _maybe_warn_replace_missing(replace_missing) - scaled, missingval = _raw_check(raw, scaled, missingval) + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) layermissingval = collect(_stack_nt(filenames, missingval)) fn = collect(filenames) @@ -389,7 +390,7 @@ function RasterStack(filenames::NamedTuple{K,<:Tuple{<:AbstractString,Vararg}}; layers = map(K, fn, layermetadata, layerdims, layermissingval) do name, fn, md, d, mv Raster(fn; source=_sourcetrait(fn, source), - dims=d, name, metadata=md, missingval=mv, scaled, kw... + dims=d, name, metadata=md, missingval=mv, scaled, verbose, kw... ) end return RasterStack(NamedTuple{K}(layers); resize, metadata) @@ -405,11 +406,12 @@ function RasterStack(filename::AbstractString; group::Union{Symbol,AbstractString,NoKW}=nokw, scaled::Union{Bool,NoKW}=nokw, coerce=nokw, - replace_missing=nokw, + verbose::Bool=true, + replace_missing=nokw, # deprecated kw... ) _maybe_warn_replace_missing(replace_missing) - scaled, missingval = _raw_check(raw, scaled, missingval) + scaled, missingval = _raw_check(raw, scaled, missingval, verbose) source = _sourcetrait(filename, source) st = if isdir(filename) && !(source isa Zarrsource) @@ -527,39 +529,49 @@ function _layer_stack(filename; dimdict = _dimdict(ds, crs, mappedcrs) refdims = isnokw(refdims) || isnothing(refdims) ? () : refdims metadata = isnokw(metadata) ? _metadata(ds) : metadata - layerdims = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims - dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims) - layermetadata1 = if isnokw(layermetadata) + layerdims_vec = isnokw(layerdims) ? _layerdims(ds; layers, dimdict) : layerdims + dims = _sort_by_layerdims(isnokw(dims) ? _dims(ds, dimdict) : dims, layerdims_vec) + layermetadata_vec = if isnokw(layermetadata) _layermetadata(ds; layers) else layermetadata isa NamedTuple ? collect(layermetadata) : map(_ -> NoKW(), fn) end name = Tuple(map(Symbol, layers.names)) NT = NamedTuple{name} - missingval1 = if isnokw(missingval) - map(Rasters.missingval, layers.vars, layermetadata1) + layer_mvs = map(Rasters.missingval, layers.vars, layermetadata_vec) + missingval_vec = if isnokw(missingval) + layer_mvs .=> missing elseif missingval isa NamedTuple - keys(missingval1) == name || throw(ArgumentError("`missingval` names $(keys(missingval)) do not match layer names $name")) - collect(missingval) + keys(missingval) == name || throw(ArgumentError("`missingval` names $(keys(missingval)) do not match layer names $name")) + layer_mvs .=> collect(missingval) + elseif missingval === Rasters.missingval + layer_mvs else - missingval - end - eltypes = map(eltype, layers.vars) - mods = _stack_mods(eltypes, layermetadata1, missingval1; scaled, coerce) + layer_mvs .=> (missingval,) # Wrap in case its not iterable + end::Vector + eltype_vec = map(eltype, layers.vars) + mod_vec = _stack_mods(eltype_vec, layermetadata_vec, missingval_vec; scaled, coerce) data = if lazy vars = ntuple(i -> layers.vars[i], length(name)) - mods = ntuple(i -> mods[i], length(name)) + mods = ntuple(i -> mod_vec[i], length(name)) FileStack{typeof(source)}(ds, filename; name, group, mods, vars) else - map(layers.vars, layermetadata1, mods) do var, md, mod + map(layers.vars, layermetadata_vec, mod_vec) do var, md, mod modvar = _maybe_modify(var, mod) checkmem && _checkobjmem(modvar) x = Array(modvar) x isa AbstractArray ? x : fill(x) # Catch an NCDatasets bug end |> NT end - missingval = map(_outer_missingval, mods) |> NT - return data, (; dims, refdims, layerdims, metadata, layermetadata=NT(layermetadata1), missingval) + mv_outer = NT(map(_outer_missingval, mod_vec)) + return data, (; + dims, + refdims, + layerdims=NT(layerdims_vec), + metadata, + layermetadata=NT(layermetadata_vec), + missingval=mv_outer, + ) end return RasterStack(data; field_kw..., kw...) end diff --git a/src/utils.jl b/src/utils.jl index 3d49a3e1c..ceb9ef904 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -119,73 +119,85 @@ function _fix_missingval(::Type{T}, missingval::M) where {T,M} end -# EPS - -maybe_eps(dims::DimTuple; kw...) = map(maybe_eps, dims; kw...) -maybe_eps(dim::Dimension; kw...) = maybe_eps(eltype(dim); kw...) -maybe_eps(x; kw...) = maybe_eps(typeof(x); kw...) -maybe_eps(::Type; kw...) = nothing -maybe_eps(T::Type{<:AbstractFloat}; kw...) = _default_eps(T; kw...) - -_default_eps(T::Type{<:Float32}; grow=true) = grow ? 100eps(T) : eps(T) -_default_eps(T::Type{<:Float64}; grow=true) = grow ? 1000eps(T) : eps(T) -_default_eps(T::Type{<:Integer}) = T(1) -_default_eps(::Type) = nothing - # Extents function _extent2dims(to::Extents.Extent; - size=nokw, res=nokw, crs=nokw, mappedcrs=nokw, sampling=nokw, + size=nokw, res=nokw, crs=nokw, mappedcrs=nokw, sampling=nokw, kw... ) sampling = _match_to_extent(to, isnokw(sampling) ? Intervals(Start()) : sampling) - _extent2dims(to, size, res; crs, mappedcrs, sampling) + _extent2dims(to, size, res; crs, mappedcrs, sampling, kw...) end _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Union{Nothing,NoKW}; kw...) = throw(ArgumentError("Pass either `size` or `res` keywords or a `Tuple` of `Dimension`s for `to`.")) _extent2dims(to::Extents.Extent, size, res; kw...) = _size_and_res_error() -_extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res; kw...) = - _extent2dims(to, size, _match_to_extent(to, res); kw...) -_extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; kw...) = - _extent2dims(to, _match_to_extent(to, size), res; kw...) -function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res::Tuple; - sampling::Tuple, kw... +function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res; + sampling::Tuple, closed=true, kw... ) - ranges = map(values(to), res, sampling) do (start, stop_closed), step, samp - stop_open = stop_closed + maybe_eps(stop_closed; grow=false) - length = ceil(Int, (stop_open - start) / step) - r = if step >= zero(step) - range(; start, step, stop=stop_open) - else - range(; start=stop_open, step, stop=start) - end + res = _match_to_extent(to, res) + ranges = map(values(to), res, sampling) do (start, stop), step, samp + @assert step >= 0 "only positive `res` are supported, got $step" if samp isa Intervals - if locus(samp) isa Start - r[1:end-1] - elseif locus(samp) isa End - r[2:end] - else # Center - r .+ abs(step) / 2 + if locus(samp) isa End + reverse(range(; start=stop+step, step=-step, stop=start+step)) + else + r = range(; start, step, stop) + if locus(samp) isa Start + r + else # Center + r .+ step / 2 + end end + else + range(; start, step, stop) end end return _extent2dims(to, ranges; sampling, kw...) end -function _extent2dims(to::Extents.Extent, size::Tuple, res::Union{Nothing,NoKW}; - sampling::Tuple, crs, mappedcrs +function _extent2dims(to::Extents.Extent, size, res::Union{Nothing,NoKW}; + sampling::Tuple, crs, mappedcrs, closed=true, kw... ) - ranges = map(values(to), size, sampling) do (start, stop_closed), length, samp - stop_open = stop_closed + maybe_eps(stop_closed; grow=false) - step = (stop_open - start) / length - range(; start, step, length) + size = _match_to_extent(to, size) + ranges = map(values(to), size, sampling) do (start, stop), length, samp + r1 = range(; start, stop, length) if samp isa Points - range(; start, step, length) + r1 else - range(; start, step, length=length+1)[1:end-1] + # We need to buffer extent for a closed interval input so that e.g. + # The raster will actually contain points of the extent, as raster + # pixels are closed/open intervals not closed/closed. + # Its hard to add a very small amount to any fp number and have + # it propagate through to the final extent. But this setup seems to work. + # We use the step or r1 to offset the end point of r, the buffer with 10 float + # steps, which seems to be enough. But it's arbitrary and count be revised. + nfloatsteps = 10 + step = (stop - start) / length + if locus(samp) isa End + start_open = if (closed && start isa AbstractFloat) + prevfloat(start + step, nfloatsteps) + else + start + step + end + reverse(range(; start=stop, stop=start_open, length)) + else + stop_open = if closed && stop isa AbstractFloat + nextfloat(stop - step, nfloatsteps) + else + stop - step + end + r = range(; start, stop=stop_open, length) + if locus(samp) isa Start + r + else # Center + r .+ (step / 2) + end + end end end return _extent2dims(to, ranges; sampling, crs, mappedcrs) end -function _extent2dims(::Extents.Extent{K}, ranges; crs, mappedcrs, sampling::Tuple) where K +function _extent2dims(::Extents.Extent{K}, ranges; + crs, mappedcrs, sampling::Tuple, kw... +) where K crs = isnokw(crs) ? nothing : crs mappedcrs = isnokw(mappedcrs) ? nothing : mappedcrs emptydims = map(name2dim, K) @@ -325,16 +337,19 @@ end # Constructor helpers -function _raw_check(raw, scaled, missingval) +function _raw_check(raw, scaled, missingval, verbose) if raw - scaled isa Bool && scaled && @warn "`scaled=true` set to `false` because of `raw=true`" - if missingval isa Pair - @warn "`missingval=$missingval` target value is not used because of `raw=true`" - return false, Rasters.missingval - else - return false, missingval + # Scaled is false if raw is true + scaled isa Bool && scaled && verbose && @warn "`scaled=true` set to `false` because of `raw=true`" + # Only missingval of `nothing` has a meaning with `raw=true`, + # it turns off missingval completely. Other msissingval values are + # ignored and a warning is thrown unless verbose=false + if !isnokwornothing(missingval) + verbose && @warn "`missingval=$missingval` target value is not used because of `raw=true`" end + return false, isnothing(missingval) ? nothing : Rasters.missingval else + # Otherwise scaled is true and missingval is unchanged scaled = isnokw(scaled) ? true : scaled return scaled, missingval end @@ -470,7 +485,7 @@ function _without_mapped_crs(f, st::AbstractRasterStack, mappedcrs::GeoFormat) st1 = maplayers(A -> setmappedcrs(A, nothing), st) x = f(st1) return if x isa AbstractRasterStack - setmappedcrs(x, mappedcrs(st)) + setmappedcrs(x, mappedcrs) else x end diff --git a/test/create.jl b/test/create.jl index 931271c6d..b578abc4d 100644 --- a/test/create.jl +++ b/test/create.jl @@ -129,16 +129,17 @@ end ext = ".nc" for ext in (".nc", ".tif", ".grd") - @testset "create $ext" begin - fn = "created$ext" + @testset "create $ext" begin + fn = tempname() * ext created = Rasters.create(fn, UInt8, (X(1:10), Y(1:10)); - missingval=0xff=>nothing, + missingval=0xff, fill=0x01, force=true ) - @test all(Raster(fn; missingval=missingval=>nothing) .=== 0x01) + @test all(Raster(fn) .=== 0x01) @test missingval(created) === 0xff + fn = tempname() * ext if ext == ".grd" created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); missingval=typemax(Int16), @@ -149,7 +150,7 @@ for ext in (".nc", ".tif", ".grd") nothing end @test all(Raster(fn) .=== Int16(2)) - @test missingval(Raster(fn; missingval=missingval=>nothing)) === typemax(Int16) + @test missingval(Raster(fn; missingval)) === typemax(Int16) else @time created = Rasters.create(fn, Int16, (X(1:10), Y(1:10)); missingval=typemax(Int16), @@ -162,12 +163,11 @@ for ext in (".nc", ".tif", ".grd") end @test all(Raster(fn) .=== 3.0) @test all(Raster(fn; scaled=false) .== Int16(-20)) - @test missingval(Raster(fn; missingval=missingval=>nothing, scaled=false)) === typemax(Int16) + @test missingval(Raster(fn; missingval, scaled=false)) === typemax(Int16) end end end - @testset "create .nc stack" begin created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); missingval=(a=0xff, b=typemax(Float32)), @@ -184,11 +184,12 @@ end @test missingval(st) == (a=0xff, b=typemax(Float32)) created = Rasters.create("created.nc", (a=UInt8, b=Float32), (X(1:10), Y(1:10)); - missingval=(a=0xff, b=typemax(Float32)), + missingval=(a=0xff, b=typemax(Float32)) => missing, fill=(a=0x01, b=1.0f0), layerdims=(a=(X,), b=(X, Y)), force=true, ) + @test missingval(created) === missing @test size(created.a) == (10,) @test size(created.b) == (10, 10) diff --git a/test/methods.jl b/test/methods.jl index 3abb0a5d1..c2e15124f 100644 --- a/test/methods.jl +++ b/test/methods.jl @@ -90,16 +90,51 @@ end @test all(boolmask(se2, alllayers=false) .=== [true true; true false]) @test dims(boolmask(ga)) === dims(ga) x = boolmask(polygon; res=1.0, boundary=:touches) - @test x == trues(X(Projected(-20:1.0:0.0; sampling=Intervals(Start()), crs=nothing)), Y(Projected(10.0:1.0:30.0; sampling=Intervals(Start()), crs=nothing))) + @test x == trues(X(Projected(-20:1.0:0.0; sampling=Intervals(Start()), crs=nothing)), + Y(Projected(10.0:1.0:30.0; sampling=Intervals(Start()), crs=nothing))) @test all(x .!= boolmask(polygon; res=1.0, invert=true, boundary=:touches)) @test parent(x) isa BitMatrix # With a :geometry axis - x = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches) - @test all(x .!= boolmask([polygon, polygon]; collapse=false, res=1.0, invert=true, boundary=:touches)) - @test eltype(x) == Bool - @test size(x) == (21, 21, 2) - @test sum(x) == 882 - @test parent(x) isa BitArray{3} + x1 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches) + x2 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches, sampling=Intervals(Center())) + x3 = boolmask([polygon, polygon]; collapse=false, res=1.0, boundary=:touches, sampling=Intervals(End())) + @test extent(x1) == extent(x2) == extent(x3) == Extent(X = (-20.0, 1.0), Y = (10.0, 31.0), geometry = (1, 2)) + x4 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches) + x5 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(Center())) + x6 = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(End())) + xs = (x1, x2, x3, x4, x5, x6) + @test all(x1 .!= boolmask([polygon, polygon]; collapse=false, res=1.0, invert=true, boundary=:touches)) + @test sampling(x1, X) isa Intervals{Start} + @test sampling(x2, X) isa Intervals{Center} + @test sampling(x3, X) isa Intervals{End} + @test sampling(x4, X) isa Intervals{Start} + @test sampling(x5, X) isa Intervals{Center} + @test sampling(x6, X) isa Intervals{End} + for x in xs + @test eltype(x1) == Bool + @test size(x) == (21, 21, 2) + @test sum(x) == 882 + @test parent(x) isa BitArray{3} + @test eltype(x) == Bool + end + @testset "size adds nextfloat" begin + s = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches) + bounds(s, X)[1] == -20.0 + bounds(s, X)[2] > 0.0 + bounds(s, Y)[1] == 10.0 + bounds(s, Y)[2] > 30.0 + c = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(Center())) + bounds(c, X)[1] == -20.0 + bounds(c, X)[2] > 0.0 + bounds(c, Y)[1] == 10.0 + bounds(c, Y)[2] > 30.0 + e = boolmask([polygon, polygon]; collapse=false, size=(21, 21), boundary=:touches, sampling=Intervals(End())) + bounds(e, X)[1] < -20.0 + bounds(e, X)[2] == 0.0 + bounds(e, Y)[1] < 10.0 + bounds(e, Y)[2] == 30.0 + end + x = boolmask([polygon, polygon]; collapse=true, res=1.0, boundary=:touches) @test all(x .!= boolmask([polygon, polygon]; collapse=true, res=1.0, invert=true, boundary=:touches)) @test size(x) == (21, 21) diff --git a/test/rasterize.jl b/test/rasterize.jl index 919583b87..105f37ce6 100644 --- a/test/rasterize.jl +++ b/test/rasterize.jl @@ -277,8 +277,8 @@ end size=(250, 250), fill=UInt8(1), missingval=UInt8(0), ); # using Plots - # heatmap(parent(parent(rasters_raster))) - # heatmap(reverse(gdal_raster[:, :, 1]; dims=2)) + # Plots.heatmap(parent(parent(rasters_raster))) + # Plots.heatmap(reverse(gdal_raster[:, :, 1]; dims=2)) # Same results as GDAL @test sum(gdal_raster) == sum(rasters_raster) @test reverse(gdal_raster[:, :, 1]; dims=2) == rasters_raster @@ -289,11 +289,15 @@ end rasters_touches_raster = rasterize(last, shphandle.shapes; size=(250, 250), fill=UInt64(1), missingval=UInt64(0), boundary=:touches ) + # Plots.heatmap(reverse(gdal_touches_raster[:, :, 1]) + # Plots.heatmap(parent(rasters_touches_raster)) + # missingval(rasters_touches_raster) # Not quite the same answer as GDAL @test sum(gdal_touches_raster) == sum(rasters_touches_raster) @test reverse(gdal_touches_raster[:, :, 1], dims=2) == rasters_touches_raster # Test that its knwon to be off by 2: - @test count(reverse(gdal_touches_raster[:, :, 1], dims=2) .== rasters_touches_raster) == length(rasters_touches_raster) + @test count(reverse(gdal_touches_raster[:, :, 1], dims=2) .== rasters_touches_raster) == + length(rasters_touches_raster) # Two pixels differ in the angled line, top right # using Plots # Plots.heatmap(reverse(gdal_touches_raster[:, :, 1], dims=2)) @@ -402,7 +406,9 @@ end @test sum(skipmissing(prod_r)) == (12 * 1 + 8 * 2 + 8 * 3 + 12 * 4) + (4 * 1 * 2 + 4 * 2 * 3 + 4 * 3 * 4) - prod_st = rasterize(prod, polygons; res=5, fill=(a=1:4, b=4:-1:1), missingval=missing, boundary=:center, threaded) + prod_st = rasterize(prod, polygons; + res=5, fill=(a=1:4, b=4:-1:1), missingval=missing, boundary=:center, threaded + ) @test_broken all(prod_st.a .=== rot180(parent(prod_st.b))) @test all(prod_r .=== prod_st.a) @@ -429,8 +435,8 @@ end # The outlines of these plots should exactly mactch, # with three values of 2 on the diagonal # using Plots - # Plots.plot(reduced_raster; clims=(0, 3)) - # Plots.plot!(polygons; opacity=0.3, fillcolor=:black) + # Plots.plot(reduced_raster_sum_touches; clims=(0, 3)) + # Plots.plot(polygons; opacity=0.3, fillcolor=:black) reduced_center = rasterize(sum, polygons; res=5, fill=1, boundary=:center, threaded) reduced_touches = rasterize(sum, polygons; res=5, fill=1, boundary=:touches, threaded) reduced_inside = rasterize(sum, polygons; res=5, fill=1, boundary=:inside, threaded) diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 1f9a3898f..871d6c321 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -7,7 +7,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) url = "https://download.osgeo.org/geotiff/samples/gdal_eg/cea.tif" gdalpath = maybedownload(url) -@testset "Raster" begin +#@testset "Raster" begin @test_throws ArgumentError Raster("notafile.tif") @time gdalarray = Raster(gdalpath; name=:test) @@ -16,7 +16,7 @@ gdalpath = maybedownload(url) @testset "lazyness" begin # Eager is the default - @test parent(gdalarray) isa Array + @test parent(gdalarray) isa Array # its a reshaped array now @test parent(lazyarray) isa DiskArrays.AbstractDiskArray @test parent(eagerarray) isa Array @testset "lazy broadcast" begin @@ -27,19 +27,19 @@ gdalpath = maybedownload(url) @testset "cf" begin # This file has no scale/offset so cf does nothing - @time cfarray = Raster(gdalpath; missingval=0x00) + @time cfarray = Raster(gdalpath) @time cf_nomask_array = Raster(gdalpath; missingval=nothing) - @time nocfarray = Raster(gdalpath; scaled=false) - @time lazycfarray = Raster(gdalpath; lazy=true, missingval=0x00) - @time lazynocfarray = Raster(gdalpath; lazy=true, scaled=false) - @test parent(cfarray) isa Base.ReshapedArray{Union{UInt8,Missing},2} - @test parent(cf_nomask_array) isa Array{UInt8,2} - @test parent(nocfarray) isa Array{UInt8,2} + @time rawarray = Raster(gdalpath; raw=true) + @time lazycfarray = Raster(gdalpath; lazy=true) + @time lazyrawarray = Raster(gdalpath; lazy=true, raw=true) + @test parent(cfarray) isa Matrix{UInt8} + @test parent(cf_nomask_array) isa Matrix{UInt8} + @test parent(rawarray) isa Matrix{UInt8} open(lazycfarray) do A - @test parent(A) isa DiskArrays.SubDiskArray{Union{Missing,UInt8}} - @test parent(parent(A)) isa Rasters.ModifiedDiskArray{false,Union{Missing,UInt8}} + @test parent(A) isa DiskArrays.SubDiskArray{UInt8} + @test parent(parent(A)) isa ArchGDAL.RasterDataset{UInt8} end - open(lazynocfarray) do A + open(lazyrawarray) do A @test parent(A) isa DiskArrays.SubDiskArray{UInt8} @test parent(parent(A)) isa ArchGDAL.RasterDataset{UInt8} end @@ -133,7 +133,7 @@ gdalpath = maybedownload(url) @testset "other fields" begin # This file has an incorrect missing value - @test missingval(gdalarray) === nothing + @test missingval(gdalarray) === missing @test metadata(gdalarray) isa Metadata{GDALsource,Dict{String,Any}} @test basename(metadata(gdalarray)["filepath"]) == "cea.tif" metadata(gdalarray)["filepath"] @@ -151,7 +151,7 @@ gdalpath = maybedownload(url) @testset "custom keywords" begin customgdalarray = Raster(gdalpath; name=:test, crs=EPSG(1000), mappedcrs=EPSG(4326), refdims=(Ti(),), - write=true, lazy=true, dropband=false, replace_missing=true, + write=true, lazy=true, dropband=false, ) @test name(customgdalarray) == :test @test refdims(customgdalarray) == (Ti(),) @@ -163,12 +163,12 @@ gdalpath = maybedownload(url) @test mappedcrs(dims(customgdalarray, Y)) == EPSG(4326) @test mappedcrs(dims(customgdalarray, X)) == EPSG(4326) @test parent(customgdalarray) isa FileArray - @test eltype(customgdalarray) == UInt8 + @test eltype(customgdalarray) == Union{UInt8,Missing} # Needs to be separate as it overrides crs/mappedcrs dimsgdalarray = Raster(gdalpath; dims=(Z(), X(), Y()), ) - @test dims(dimsgdalarray) isa Tuple{<:Z,X,Y} + @test dims(dimsgdalarray) isa Tuple{Z,X,Y} end @testset "indexing" begin @@ -269,7 +269,7 @@ gdalpath = maybedownload(url) rm(tempfile) end - @testset "mosaic" begin + #@testset "mosaic" begin @time gdalarray = Raster(gdalpath; name=:test) A1 = gdalarray[X(1:300), Y(1:200)] A2 = gdalarray[X(57:500), Y(101:301)] @@ -277,9 +277,7 @@ gdalpath = maybedownload(url) tempfile2 = tempname() * ".tif" tempfile3 = tempname() * ".tif" Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile1) - Afile2 = mosaic(first, A1, A2; - missingval=0x00, atol=1e-8, filename=tempfile2, missingval=missing - ) + Afile2 = mosaic(first, A1, A2; atol=1e-8, filename=tempfile2) @test missingval(Afile2) === missing Amem = mosaic(first, A1, A2; missingval=0x00, atol=1e-8) Atest = gdalarray[X(1:500), Y(1:301)] @@ -295,12 +293,12 @@ gdalpath = maybedownload(url) @testset "conversion to Raster" begin geoA = gdalarray[X(1:50), Y(1:1), Band(1)] @test size(geoA) == (50, 1) - @test eltype(geoA) <: UInt8 + @test eltype(geoA) <: Union{UInt8,Missing} @time geoA isa Raster{UInt8,1} - @test dims(geoA) isa Tuple{<:X,Y} - @test refdims(geoA) isa Tuple{<:Band} + @test dims(geoA) isa Tuple{X,Y} + @test refdims(geoA) isa Tuple{Band} @test metadata(geoA) == metadata(gdalarray) - @test missingval(geoA) === nothing + @test missingval(geoA) === missing @test name(geoA) == :test end @@ -311,7 +309,11 @@ gdalpath = maybedownload(url) filename = tempname() * ".asc" @time write(filename, gdalarray; force=true) saved1 = Raster(filename); - @test all(saved1 .== gdalarray) + @test all( + parent(saved1 .=== gdalarray) + parent(saved1) + parent(gdalarray) + ) # @test typeof(saved1) == typeof(geoA) @test val(dims(saved1, X)) ≈ val(dims(gdalarray, X)) @test val(dims(saved1, Y)) ≈ val(dims(gdalarray, Y)) @@ -585,7 +587,7 @@ gdalpath = maybedownload(url) end -@testset "RasterStack" begin +#@testset "RasterStack" begin @time gdalstack = RasterStack((a=gdalpath, b=gdalpath)) @test length(layers(gdalstack)) == 2 @@ -711,7 +713,7 @@ end base, ext = splitext(filename) filename_b = string(base, "_b", ext) saved = read(Raster(filename_b)) - @test all(saved .== geoA) + @test all(saved .=== geoA) end @testset "write multiple files with custom suffix" begin @@ -740,7 +742,6 @@ end gdalstack2 = RasterStack(filenames; lazy=true) @test DiskArrays.eachchunk(gdalstack2[:b])[1] == (1:128, 1:128) end - end @testset "show" begin @@ -842,7 +843,7 @@ end end end -@testset "series" begin +#@testset "series" begin gdalser = RasterSeries([gdalpath, gdalpath], (Ti(),); mappedcrs=EPSG(4326), name=:test) @test read(gdalser[Ti(1)]) == read(Raster(gdalpath; mappedcrs=EPSG(4326), name=:test)) @test read(gdalser[Ti(1)]) == read(Raster(gdalpath; mappedcrs=EPSG(4326), name=:test)) diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 5ed152660..3725c9ac6 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -3,7 +3,7 @@ using Rasters, DimensionalData, Test, Statistics, Dates, CFTime, Plots using Rasters.Lookups, Rasters.Dimensions using Rasters.DiskArrays import ArchGDAL, NCDatasets -using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name, trim +using Rasters: FileArray, FileStack, NCDsource, crs, bounds, name, trim, metadata testdir = realpath(joinpath(dirname(pathof(Rasters)), "../test")) include(joinpath(testdir, "test_utils.jl")) @@ -44,7 +44,7 @@ stackkeys = ( ) end -@testset "Raster" begin +#@testset "Raster" begin @time ncarray = Raster(ncsingle); @time lazyarray = Raster(ncsingle; lazy=true) @time eagerarray = Raster(ncsingle; lazy=false) @@ -58,7 +58,8 @@ end @time read(lazyarray); end - @testset "scaling and maskign" begin @time cfarray = Raster(ncsingle) + @testset "scaling and maskign" begin + @time cfarray = Raster(ncsingle) @time cfarray = Raster(ncsingle) @time cf_nomask_array = Raster(ncsingle; missingval=nothing) @time nocfarray = Raster(ncsingle; scaled=false) @@ -69,8 +70,8 @@ end @time lazynocf_nomask_array = Raster(ncsingle; lazy=true, scaled=false, missingval=nothing) @test missingval(cfarray) === missing @test missingval(nocfarray) === missing - @test missingval(cf_nomask_array) === 1.0f20 - @test missingval(nocf_nomask_array) === 1.0f20 + @test missingval(cf_nomask_array) === nothing + @test missingval(nocf_nomask_array) === nothing @test missingval(raw_array) === 1.0f20 @test all(skipmissing(cfarray) .=== skipmissing(nocfarray)) @test parent(cfarray) isa Array{Union{Float32,Missing}} diff --git a/test/warp.jl b/test/warp.jl index df64ccf2b..95a5d4438 100644 --- a/test/warp.jl +++ b/test/warp.jl @@ -19,13 +19,13 @@ gdalpath = maybedownload(url) warped = warp(r, Dict(:t_srs => "EPSG:25832")) @test warped isa Raster @test size(warped) == (720, 721) - # the crs is way off, the image is rotated - all four corners should be black + # the crs is rotatedso the image is rotated an all four corners should be black missingval(warped) === nothing @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff # now compute mean squared error of the back transformation - warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_), res=map(step, lookup(r)))) + warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_), res=map(step, lookup(r)), missingval=0xff)) # subtracting UInts brings us into hell -> Int # we also need to shrink the range because of some bleed during warp - diff_ = Int.(warped_back[2:end-1, 2:end-1]) .- r - @test sum(x->x^2, diff_) / prod(size(diff_)) < 600 + diff_ = parent(warped_back[2:end-1, 2:end-1]) .- r + @test sum(x -> x^2, diff_) / prod(size(diff_)) < 600 end From d4ea7997770bfc8c9b9844cf379d33e39c627b5c Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 9 Jan 2025 15:17:29 +0100 Subject: [PATCH 29/38] bugfix everything --- ext/RastersArchGDALExt/gdal_source.jl | 70 +++++++--------- ext/RastersArchGDALExt/warp.jl | 16 +--- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 3 +- src/array.jl | 13 +-- src/create.jl | 2 +- src/methods/mosaic.jl | 16 ++-- src/modifieddiskarray.jl | 29 ++++--- src/sources/commondatamodel.jl | 28 +++---- src/sources/grd.jl | 24 ++---- src/utils.jl | 2 +- test/create.jl | 7 +- test/resample.jl | 21 +++-- test/runtests.jl | 4 +- test/sources/commondatamodel.jl | 2 +- test/sources/gdal.jl | 80 +++++++++---------- test/sources/grd.jl | 28 +++---- test/sources/ncdatasets.jl | 65 +++++++-------- test/sources/zarr.jl | 23 ++++-- test/warp.jl | 15 ++-- 19 files changed, 209 insertions(+), 239 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 5c385744b..99bf0cb84 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -61,20 +61,13 @@ function Base.write(filename::AbstractString, ::GDALsource, A::AbstractRaster{T} A1 = _maybe_permute_to_gdal(A) # Missing values - missingval = isnokw(missingval) ? RA.missingval(A) : missingval - missingval = if ismissing(missingval) - # See if there is a missing value in metadata - # But only use it if its the right type - RA._writeable_missing(eltype; verbose=true) - else - missingval - end + missingval_pair = RA._write_missingval_pair(A, missingval; eltype, verbose) _create_with_driver(filename, dims(A1), eltype; - missingval, _block_template=A1, scale, offset, verbose, kw... + missingval=missingval_pair[1], _block_template=A1, scale, offset, verbose, kw... ) do dataset if write - mod = RA._writer_mod(eltype; missingval, scale, offset, coerce) + mod = RA._mod(eltype, missingval_pair, scale, offset, coerce) open(A1; write=true) do O R = RA._maybe_modify(AG.RasterDataset(dataset), mod) R .= parent(O) @@ -209,15 +202,15 @@ function RA._dims(raster::AG.RasterDataset, crs=nokw, mappedcrs=nokw) end # TODO make metadata optional, its slow to get -function RA._metadata(raster::AG.RasterDataset, args...) - band = AG.getband(raster.ds, 1) +function RA._metadata(rds::AG.RasterDataset, args...) + band = AG.getband(rds.ds, 1) metadata = RA._metadatadict(GDALsource()) # color = AG.getname(AG.getcolorinterp(band)) scale = AG.getscale(band) offset = AG.getoffset(band) # norvw = AG.noverview(band) units = AG.getunittype(band) - filelist = AG.filelist(raster) + filepath = _getfilepath(rds) # Set metadata if they are not default values if scale != oneunit(scale) metadata["scale"] = scale @@ -228,8 +221,8 @@ function RA._metadata(raster::AG.RasterDataset, args...) if units != "" metadata["units"] = units end - if length(filelist) > 0 - metadata["filepath"] = first(filelist) + if !isnothing(filepath) + metadata["filepath"] = filepath end return metadata end @@ -238,41 +231,26 @@ end # Create a Raster from a dataset RA.Raster(ds::AG.Dataset; kw...) = Raster(AG.RasterDataset(ds); kw...) -function RA.Raster(ds::AG.RasterDataset; - crs=crs(ds), - mappedcrs=nokw, - dims=RA._dims(ds, crs, mappedcrs), - refdims=(), - name=nokw, - metadata=RA._metadata(ds), - missingval=RA.missingval(ds) => missing, - lazy=false, - dropband=false, - scaled=true, - coerce=convert, -) - filelist = AG.filelist(ds) - mod = RA._mod(eltype(ds), metadata, missingval; scaled, coerce) - kw = (; refdims, name, metadata, missingval=Rasters._outer_missingval(mod)) - raster = if lazy && length(filelist) > 0 - filename = first(filelist) - Raster(FileArray{GDALsource}(ds, filename; mod), dims; kw...) +function RA.Raster(ds::AG.RasterDataset; lazy=false, kw...) + filepath = if lazy + fp = _getfilepath(ds) + isnothing(fp) ? "/vsimem" : fp else - Raster(Array(RA._maybe_modify(ds, mod)), dims; kw...) + "" end - return RA._maybe_drop_single_band(raster, dropband, lazy) + Raster(ds, filepath; kw...) end RA.missingval(ds::AG.Dataset, args...) = RA.missingval(AG.RasterDataset(ds)) -function RA.missingval(rasterds::AG.RasterDataset, args...) +function RA.missingval(rds::AG.RasterDataset, args...) # All bands have the same missingval in GDAL - band = AG.getband(rasterds.ds, 1) + band = AG.getband(rds.ds, 1) # GDAL will set this hasnodataval = Ref(Cint(0)) # Int64 and UInt64 need special casing in GDAL - nodataval = if eltype(rasterds) == Int64 + nodataval = if eltype(rds) == Int64 AG.GDAL.gdalgetrasternodatavalueasint64(band, hasnodataval) - elseif eltype(rasterds) == UInt64 + elseif eltype(rds) == UInt64 AG.GDAL.gdalgetrasternodatavalueasuint64(band, hasnodataval) else AG.GDAL.gdalgetrasternodatavalue(band, hasnodataval) @@ -314,7 +292,7 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; _block_template=A1, missingval, scale, offset, verbose, kw... ) do dataset rds = AG.RasterDataset(dataset) - mod = RA._writer_mod(eltype; missingval=RA.missingval(rds), scale, offset, coerce) + mod = RA._mod(eltype, RA.missingval(rds), scale, offset, coerce) open(A1) do O RA._maybe_modify(rds, mod) .= parent(O) end @@ -675,6 +653,15 @@ RA.affine2geotransform(am) = error(USING_COORDINATETRANSFORMATIONS_MESSAGE) _isaligned(geotransform) = geotransform[GDAL_ROT1] == 0 && geotransform[GDAL_ROT2] == 0 +function _getfilepath(ds) + filelist = AG.filelist(ds) + if length(filelist) == 0 + return nothing + else + return first(filelist) + end +end + # precompilation # function _precompile(::Type{GDALsource}) # ccall(:jl_generating_output, Cint, ()) == 1 || return nothing @@ -697,3 +684,4 @@ _isaligned(geotransform) = geotransform[GDAL_ROT1] == 0 && geotransform[GDAL_ROT # end # _precompile(GRDsource) + diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index ca1136a13..d50d88794 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -30,19 +30,11 @@ function _warp(A::AbstractRaster, flags::Dict; # If it does, we can just open it and use it directly. tempfile = isnothing(filename) ? nothing : tempname() * ".tif" warp_kw = isnothing(filename) || filename == "/vsimem/tmp" ? () : (; dest=filename) - # We really need a missingval for `warp`, as it may rotate and add missing value - mv1, mv2 = if RA.isnokw(missingval) - if RA.missingval(A) isa Union{Missing,Nothing} - RA._type_missingval(Missings.nonmissingtype(eltype(A))) - else - RA.missingval(A) - end - elseif missingval isa Pair - missingval - elseif missingval isa Missing - RA._type_missingval(Missings.nonmissingtype(eltype(A))), missing + # We really need a missingval for `warp`, as it may rotate and add missing values + mv1, mv2 = if RA.isnokw(missingval) && isnothing(RA.missingval(A1)) + RA._type_missingval(Missings.nonmissingtype(eltype(A1))) => missing else - missingval, missingval + RA._write_missingval_pair(A1, missingval; verbose=false, eltype=eltype(A1)) end out = AG.Dataset(A1; filename=tempfile, missingval=mv1, kw...) do dataset x = AG.gdalwarp([dataset], flagvect; warp_kw...) do warped diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index e2e844f69..024dc7255 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -20,7 +20,8 @@ function Base.write(filename::AbstractString, source::NCDsource, A::AbstractRast "c" end mode = !isfile(filename) || !append ? "c" : "a"; - ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(A))) + attrib = RA._attribdict(metadata(A)) + ds = NCD.Dataset(filename, mode) try RA._writevar!(ds, source, A; kw...) finally diff --git a/src/array.jl b/src/array.jl index 48c64cab4..ab92d8376 100644 --- a/src/array.jl +++ b/src/array.jl @@ -17,7 +17,7 @@ function checkmem!(checkmem::Bool) end const FLATTEN_SELECT = FileArray -const FLATTEN_IGNORE = Union{Dict,Set,Base.MultiplicativeInverses.SignedMultiplicativeInverse} +const FLATTEN_IGNORE = Union{Dict,Set,Base.MultiplicativeInverses.SignedMultiplicativeInverse,Array} """ AbstractRaster <: DimensionalData.AbstractDimArray @@ -239,6 +239,9 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Noth new{T,N,D,R,A,Na,Me,typeof(missingval1)}(data, dims, refdims, name, metadata, missingval1) end end +Raster(f::Function, args...; kw...) = Raster(args...; f, kw...) +# For ambiguity with dataset methods +Raster(f::Function, s::AbstractString; kw...) = Raster(s; f, kw...) # Create a Raster from and AbstractArray and dims function Raster(A::AbstractArray{T,N}, dims::Tuple; refdims=(), @@ -323,6 +326,7 @@ function Raster(ds, filename::AbstractString; checkmem::Bool=CHECKMEM[], raw::Bool=false, mod=nokw, + f=identity, )::Raster _maybe_warn_replace_missing(replace_missing) # `raw` option will ignore `scaled` and `missingval` @@ -338,13 +342,13 @@ function Raster(ds, filename::AbstractString; # Missingval input options missingval_out = if isnokw(missingval) mv = Rasters.missingval(var, metadata_out) - isnothing(mv) ? nothing : mv => missing + isnothing(mv) ? nothing => nothing : mv => missing elseif isnothing(missingval) - nothing + nothing => nothing elseif missingval isa Pair # Pair: inner and outer missing values are manually defined missingval - elseif missingval == Rasters.missingval + elseif missingval === Rasters.missingval # `missingval` func: detect missing value and keep it as-is mv = Rasters.missingval(var, metadata_out) mv => mv @@ -365,7 +369,6 @@ function Raster(ds, filename::AbstractString; # Check the data will fit in memory checkmem && _checkobjmem(modvar) # Move the modified array to memory - @show mod Array(modvar) end # Generate dims diff --git a/src/create.jl b/src/create.jl index 51b99e4c1..25cdba8ae 100644 --- a/src/create.jl +++ b/src/create.jl @@ -292,7 +292,7 @@ function create(filename::AbstractString, source::Source, ::Type{T}, dims::Tuple # Create layers of zero arrays rast = Raster(A, dims; name, missingval=mv_inner) Rasters.write(f, filename, source, rast; - eltype, chunks, metadata, scale, offset, missingval=mv_inner, verbose, force, coerce, write, kw... + eltype, chunks, metadata, scale, offset, missingval, verbose, force, coerce, write, kw... ) do W # write returns a variable, wrap it as a Raster f(rebuild(rast; data=W)) diff --git a/src/methods/mosaic.jl b/src/methods/mosaic.jl index d4d39629c..2e9a3f5df 100644 --- a/src/methods/mosaic.jl +++ b/src/methods/mosaic.jl @@ -81,21 +81,21 @@ function _mosaic(f::Function, A1::AbstractRaster, regions; else missingval end - if !isnothing(filename) && (ismissing(missingval) || isnokwornothing(missingval)) - missingval = _type_missingval(eltype(A1)) => missing - end - T = if missingval isa Pair - Base.promote_type(typeof(last(missingval)), Base.promote_eltype(regions...)) + missingval_pair = if !isnothing(filename) && (ismissing(missingval) || isnokwornothing(missingval)) + _type_missingval(eltype(A1)) => missing + elseif missingval isa Pair + missingval else - Base.promote_type(typeof(missingval), Base.promote_eltype(regions...)) + missingval => missingval end + T = Base.promote_type(typeof(last(missingval_pair)), Base.promote_eltype(regions...)) dims = _mosaic(Tuple(map(DD.dims, regions))) l1 = first(regions) return create(filename, T, dims; name=name(l1), - fill=missingval, - missingval, + fill=missingval_pair[1], + missingval=missingval_pair, driver, options, force diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index f281dd3a4..c2518b95f 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -190,17 +190,6 @@ end return scale, offset end -function _writer_mod(::Type{T}; missingval, scale, offset, coerce) where T - missingval1 = if missingval isa Pair - reverse(missingval) - elseif isnokw(missingval) - nothing - else - missingval - end - return _mod(T, missingval1, scale, offset, coerce) -end - _mod_eltype(::AbstractArray{T}, ::NoMod) where T = T _mod_eltype(::AbstractArray, m::Mod{T}) where T = T @@ -210,3 +199,21 @@ _mod_inverse_eltype(::AbstractArray{T}, m::Mod) where T = _maybe_modify(var, m::Mod; kw...) = ModifiedDiskArray(var, m; kw...) _maybe_modify(var, ::NoMod; kw...) = var + +_write_missingval_pair(A, missingval::Pair; kw...) = missingval +function _write_missingval_pair(A, missingval; verbose=true, eltype, metadata=metadata(A))::Pair + source_mv = Rasters.missingval(A) + if isnothing(mv) + # See if there is a missing value in metadata + source_mv = Rasters.missingval(metadata) + end + disk_mv = if isnothing(source_mv) + nothing + elseif isnokw(missingval) || ismissing(missingval) + _writeable_missing(eltype; verbose) + else + missingval + end + + return disk_mv => source_mv +end \ No newline at end of file diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 9a4dc8990..5800c0cca 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -400,7 +400,14 @@ function _parse_period(period_str::String) end end -_attribdict(md::Metadata{<:CDMsource}) = Dict{String,Any}(string(k) => v for (k, v) in md) +function _attribdict(md::Metadata{<:CDMsource}) + attrib = Dict{String,Any}() + for (k, v) in md + # v isa Tuple && continue + # attrib[string(k)] = v + end + return attrib +end _attribdict(md) = Dict{String,Any}() # Add axis and standard name attributes to dimension variables @@ -462,16 +469,7 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, metadata end - @assert !(missingval isa Pair) - missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval - missingval = if ismissing(missingval) - # See if there is a missing value in metadata - mv = Rasters.missingval(metadata) - # But only use it if its the right type - mv isa eltype ? mv : _writeable_missing(eltype; verbose=true) => missing - else - missingval - end + missingval_pair = _write_missingval_pair(A, missingval; eltype, verbose, metadata) attrib = _attribdict(metadata) # Scale and offset @@ -488,10 +486,10 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, attrib["add_offset"] = offset end - mod = _writer_mod(eltype; missingval, scale, offset, coerce) + mod = _mod(eltype, missingval_pair, scale, offset, coerce) - if !isnothing(mod.missingval) - attrib["_FillValue"] = missingval + if !isnothing(missingval_pair[1]) + attrib["_FillValue"] = missingval_pair[1] end key = if isnokw(name) || string(name) == "" @@ -501,7 +499,7 @@ function _writevar!(ds::AbstractDataset, source::CDMsource, A::AbstractRaster{T, end dimnames = lowercase.(string.(map(Rasters.name, dims(A)))) - var = CDM.defVar(ds, key, eltype, dimnames; attrib=attrib, chunksizes, kw...) + var = CDM.defVar(ds, key, eltype, dimnames; attrib, chunksizes, kw...) if write m = _maybe_modify(var.var, mod) diff --git a/src/sources/grd.jl b/src/sources/grd.jl index c653d43dd..b7f7556ba 100644 --- a/src/sources/grd.jl +++ b/src/sources/grd.jl @@ -192,20 +192,10 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; )) isnokwornothing(scale) && isnokwornothing(offset) || throw(ArgumentError("Cant write scale or offset to .grd files")) chunks isa NoKW || @warn "specifying chunks not supported for .grd files" + # Missing values + missingval_pair = _write_missingval_pair(A, missingval; eltype, verbose) - missingval = isnokw(missingval) ? Rasters.missingval(A) : missingval - missingval = if ismissing(missingval) - # See if there is a missing value in metadata - mv = _grd_mv(eltype, metadata(A); verbose=false) - # Otherwise define one - (isnothing(mv) ? _writeable_missing(eltype; verbose) : mv) => missing - elseif missingval isa Pair && first(missingval) == Rasters.missingval - mv = _grd_mv(eltype, metadata(A); verbose=false) - # Otherwise define one - (isnothing(mv) ? _writeable_missing(eltype; verbose) : mv) => missingval[2] - else - missingval - end + # Missing values if hasdim(A, Band) correctedA = permutedims(A, (X, Y, Band)) |> @@ -220,11 +210,11 @@ function Base.write(filename::String, ::GRDsource, A::AbstractRaster; filename = splitext(filename)[1] # Data: write a raw gri file from the array - mod = _writer_mod(eltype; missingval, scale, offset, coerce) + mod = _mod(eltype, missingval_pair, scale, offset, coerce) gri_filename = filename * ".gri" isfile(gri_filename) && rm(gri_filename) _write_gri(gri_filename, Val{source_eltype(mod)}(), mod, parent(correctedA)) - _write_grd(filename, eltype, dims(A), missingval, name(A)) + _write_grd(filename, eltype, dims(A), missingval_pair[1], name(A)) if write _mmapgrd(filename, source_eltype(mod), size(A); write=true) do M @@ -295,7 +285,7 @@ end # Rasters methods function _open(f, ::GRDsource, filename::AbstractString; - mod=RA.NoMod(), + mod=NoMod(), write=false, kw... ) @@ -309,7 +299,7 @@ function _open(f, ::GRDsource, filename::AbstractString; end _open(f, ::GRDsource, attrib::GRDdataset; kw...) = f(attrib) function _open(f, ::GRDsource, A::RasterDiskArray; - mod=RA.NoMod(), + mod=NoMod(), kw... ) cleanreturn(f(_maybe_modify(A, mod))) diff --git a/src/utils.jl b/src/utils.jl index ceb9ef904..eab8e7bc4 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -135,7 +135,7 @@ function _extent2dims(to::Extents.Extent, size::Union{Nothing,NoKW}, res; ) res = _match_to_extent(to, res) ranges = map(values(to), res, sampling) do (start, stop), step, samp - @assert step >= 0 "only positive `res` are supported, got $step" + @assert step >= zero(step) "only positive `res` are supported, got $step" if samp isa Intervals if locus(samp) isa End reverse(range(; start=stop+step, step=-step, stop=start+step)) diff --git a/test/create.jl b/test/create.jl index b578abc4d..e76579c04 100644 --- a/test/create.jl +++ b/test/create.jl @@ -20,7 +20,8 @@ using Rasters: isdisk, ismem, filename @test missingval(rast) === nothing @test ispoints(rast) - rast = @test_nowarn Rasters.create(Float64, Extents.Extent(X=(0, 10), Y=(0, 5), Ti=(DateTime(2001), DateTime(2002))); + ext = Extents.Extent(X=(0, 10), Y=(0, 5), Ti=(DateTime(2001), DateTime(2002))) + rast = @test_nowarn Rasters.create(Float64, ext; res=(X=0.2, Y=0.1, Ti=Month(1)), crs=EPSG(4326), force=true, @@ -34,7 +35,9 @@ using Rasters: isdisk, ismem, filename end @test all(rast .=== 6.0) @test crs(rast) == EPSG(4326) - @test size(rast) == (50, 50, 12) + # We need closed/open extents to fix this + @test_broken extent(rast) == ext + @test_broken size(rast) == (50, 50, 12) @test Rasters.name(rast) == :testname @test missingval(rast) === missing @test isintervals(rast) diff --git a/test/resample.jl b/test/resample.jl index c24179906..abddd8778 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -24,7 +24,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) end end - cea = Raster(raster_path; missingval=0x00, name=:cea, missingval) + cea = Raster(raster_path; missingval=0x00, name=:cea) raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00) @testset "missingval propagates" begin @@ -133,24 +133,23 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @test dims(resampled_3D, Z) == Z(1:2) end - maskingval = Rasters.nokw - for maskingval in (nothing, missing, Rasters.nokw) + mv = Rasters.nokw + for mv in (nothing, missing, Rasters.nokw) # Resample cea.tif using resample - cea = Raster(raster_path; missingval=0x00 => maskingval, name=:cea) - raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00 => maskingval) - disk_output = resample(cea; res=output_res, crs=output_crs, method, missingval=0x00 => maskingval, filename="resample.tif") + cea = Raster(raster_path; missingval=mv, name=:cea) + raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=mv) + disk_output = resample(cea; res=output_res, crs=output_crs, method, missingval=mv, filename="resample.tif") - cea_permuted = permutedims(Raster(raster_path; missingval=0x00 => maskingval, name=:cea_permuted), (Y, X)) - permuted_output = resample(cea_permuted, output_res; missingval=0x00 => maskingval, crs=output_crs, method) + cea_permuted = permutedims(Raster(raster_path; missingval=mv, name=:cea_permuted), (Y, X)) + permuted_output = resample(cea_permuted, output_res; missingval=mv, crs=output_crs, method) - AG_output1 = if isnothing(maskingval) + AG_output1 = if isnothing(mv) AG_output else - replace(AG_output, 0x00 => missing) + replace(AG_output, 0xff => missing) end # Compare ArchGDAL, resample and permuted resample @test all(AG_output1 .=== raster_output .=== read(disk_output) .=== permutedims(permuted_output, (X, Y))) - @test all(AG_output1 .=== raster_output .=== read(disk_output) .=== permutedims(permuted_output, (X, Y))) @test abs(step(dims(raster_output, Y))) ≈ abs(step(dims(raster_output, X))) ≈ abs(step(dims(disk_output, X))) ≈ diff --git a/test/runtests.jl b/test/runtests.jl index 55f13eb5e..052ba3137 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -27,7 +27,7 @@ end # CommondataModel sources @time @safetestset "commondatamodel" begin include("sources/commondatamodel.jl") end @time @safetestset "ncdatasets" begin include("sources/ncdatasets.jl") end -# @time @safetestset "zarr" begin include("sources/zarr.jl") end # TODO: FIXME +@time @safetestset "zarr" begin include("sources/zarr.jl") end if !Sys.iswindows() # GRIBDatasets doesn't work on Windows for now @time @safetestset "gribdatasets" begin include("sources/gribdatasets.jl") end @@ -40,4 +40,4 @@ if !haskey(ENV, "CI") @time @safetestset "rasterdatasources" begin include("sources/rasterdatasources.jl") end end @time @safetestset "plot recipes" begin include("plotrecipes.jl") end -@time @safetestset "resample" begin include("resample.jl") end +@time @safetestset "resample" begin include("resample.jl") end \ No newline at end of file diff --git a/test/sources/commondatamodel.jl b/test/sources/commondatamodel.jl index 6e2460092..0cda80779 100644 --- a/test/sources/commondatamodel.jl +++ b/test/sources/commondatamodel.jl @@ -15,7 +15,7 @@ import Rasters: ForwardOrdered, ReverseOrdered, Regular # test when reading a file ras = Raster(rand(X(f32_indices), Y(indices_one_third))) tempfile = tempname() * ".nc" - write(tempfile, ras) + write(tempfile, ras; force=true) ras_read = Raster(tempfile) steps = step.(dims(ras_read)) @test steps[1] == 0.05 diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 871d6c321..523962aa8 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -7,7 +7,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) url = "https://download.osgeo.org/geotiff/samples/gdal_eg/cea.tif" gdalpath = maybedownload(url) -#@testset "Raster" begin +@testset "Raster" begin @test_throws ArgumentError Raster("notafile.tif") @time gdalarray = Raster(gdalpath; name=:test) @@ -122,8 +122,8 @@ gdalpath = maybedownload(url) @test ndims(gdalarray) == 2 @test dims(gdalarray) isa Tuple{<:X,<:Y} @test lookup(refdims(gdalarray), Band) isa DimensionalData.Categorical; - # @test span(gdalarray, (Y, X)) == - # (Regular(-60.02213698319351), Regular(60.02213698319374)) + @test span(gdalarray, (Y, X)) == + (Regular(-60.02213698319351), Regular(60.02213698319374)) @test sampling(gdalarray, (Y, X)) == (Intervals(Start()), Intervals(Start())) # Bounds calculated in python using rasterio @@ -133,7 +133,7 @@ gdalpath = maybedownload(url) @testset "other fields" begin # This file has an incorrect missing value - @test missingval(gdalarray) === missing + @test missingval(gdalarray) === nothing @test metadata(gdalarray) isa Metadata{GDALsource,Dict{String,Any}} @test basename(metadata(gdalarray)["filepath"]) == "cea.tif" metadata(gdalarray)["filepath"] @@ -163,7 +163,7 @@ gdalpath = maybedownload(url) @test mappedcrs(dims(customgdalarray, Y)) == EPSG(4326) @test mappedcrs(dims(customgdalarray, X)) == EPSG(4326) @test parent(customgdalarray) isa FileArray - @test eltype(customgdalarray) == Union{UInt8,Missing} + @test eltype(customgdalarray) == UInt8 # Needs to be separate as it overrides crs/mappedcrs dimsgdalarray = Raster(gdalpath; dims=(Z(), X(), Y()), @@ -187,7 +187,7 @@ gdalpath = maybedownload(url) @test gdalarray[Y(4.224e6..4.226e6), Band(1)] isa Raster end - @testset "methods" begin + @testset "methods" begin @testset "mean" begin @test all(mean(gdalarray; dims=Y) .=== mean(parent(gdalarray); dims=2)) end @@ -269,23 +269,23 @@ gdalpath = maybedownload(url) rm(tempfile) end - #@testset "mosaic" begin + @testset "mosaic" begin @time gdalarray = Raster(gdalpath; name=:test) A1 = gdalarray[X(1:300), Y(1:200)] A2 = gdalarray[X(57:500), Y(101:301)] tempfile1 = tempname() * ".tif" tempfile2 = tempname() * ".tif" tempfile3 = tempname() * ".tif" - Afile = mosaic(first, A1, A2; missingval=0x00, atol=1e-8, filename=tempfile1) + Afile = mosaic(first, A1, A2; missingval=0xff, atol=1e-8, filename=tempfile1) Afile2 = mosaic(first, A1, A2; atol=1e-8, filename=tempfile2) + collect(Afile) + collect(Afile2) @test missingval(Afile2) === missing - Amem = mosaic(first, A1, A2; missingval=0x00, atol=1e-8) + Amem = mosaic(first, A1, A2; missingval=0xff, atol=1e-8) Atest = gdalarray[X(1:500), Y(1:301)] - Atest[X(1:56), Y(201:301)] .= 0x00 - Atest[X(301:500), Y(1:100)] .= 0x00 - @test all(Atest .=== Amem .=== Afile .== replace_missing(Afile2, 0x00)) - Afile3 = mosaic(first, A1, A2; atol=1e-8, filename=tempfile3) - @test missingval(Afile3) === 0xff + Atest[X(1:56), Y(201:301)] .= 0xff + Atest[X(301:500), Y(1:100)] .= 0xff + @test all(Atest .=== Amem .=== Afile .=== replace_missing(Afile2, 0xff)) end end # methods @@ -293,12 +293,12 @@ gdalpath = maybedownload(url) @testset "conversion to Raster" begin geoA = gdalarray[X(1:50), Y(1:1), Band(1)] @test size(geoA) == (50, 1) - @test eltype(geoA) <: Union{UInt8,Missing} + @test eltype(geoA) <: UInt8 @time geoA isa Raster{UInt8,1} @test dims(geoA) isa Tuple{X,Y} @test refdims(geoA) isa Tuple{Band} @test metadata(geoA) == metadata(gdalarray) - @test missingval(geoA) === missing + @test missingval(geoA) === nothing @test name(geoA) == :test end @@ -309,11 +309,7 @@ gdalpath = maybedownload(url) filename = tempname() * ".asc" @time write(filename, gdalarray; force=true) saved1 = Raster(filename); - @test all( - parent(saved1 .=== gdalarray) - parent(saved1) - parent(gdalarray) - ) + @test all(parent(saved1 .== gdalarray)) # @test typeof(saved1) == typeof(geoA) @test val(dims(saved1, X)) ≈ val(dims(gdalarray, X)) @test val(dims(saved1, Y)) ≈ val(dims(gdalarray, Y)) @@ -421,15 +417,15 @@ gdalpath = maybedownload(url) end @testset "to grd" begin - write("testgrd.gri", gdalarray; force=true) - @test (@allocations write("testgrd.gri", gdalarray; force=true)) < 1e4 - grdarray = Raster("testgrd.gri") + fn = joinpath(tempdir(), tempname() * ".gri") + write(fn, gdalarray; force=true) + @test (@allocations write(fn, gdalarray; force=true)) < 1e4 + grdarray = Raster(fn) @test crs(grdarray) == convert(ProjString, crs(gdalarray)) @test all(map((a, b) -> all(a .≈ b), bounds(grdarray), bounds(gdalarray))) @test index(grdarray, Y) ≈ index(gdalarray, Y) @test val(dims(grdarray, X)) ≈ val(dims(gdalarray, X)) @test grdarray == gdalarray - rm("testgrd.gri") end @testset "from Raster" begin @@ -473,7 +469,7 @@ gdalpath = maybedownload(url) filename = tempname() * ".tif" write(filename, A) @test missingval(Raster(filename)) === missing - @test missingval(Raster(filename; missingval=nothing)) === typemax(UInt8) + @test missingval(Raster(filename; missingval)) === typemax(UInt8) rm(filename) end @@ -587,7 +583,7 @@ gdalpath = maybedownload(url) end -#@testset "RasterStack" begin +@testset "RasterStack" begin @time gdalstack = RasterStack((a=gdalpath, b=gdalpath)) @test length(layers(gdalstack)) == 2 @@ -712,7 +708,7 @@ end write(filename, gdalstack; force=true) base, ext = splitext(filename) filename_b = string(base, "_b", ext) - saved = read(Raster(filename_b)) + saved = Raster(filename_b) @test all(saved .=== geoA) end @@ -722,7 +718,7 @@ end base, ext = splitext(filename) filename_b = string(base, "_second", ext) saved = read(Raster(filename_b)) - @test all(saved .== geoA) + @test all(saved .=== geoA) end @testset "write netcdf" begin @@ -843,7 +839,7 @@ end end end -#@testset "series" begin +@testset "series" begin gdalser = RasterSeries([gdalpath, gdalpath], (Ti(),); mappedcrs=EPSG(4326), name=:test) @test read(gdalser[Ti(1)]) == read(Raster(gdalpath; mappedcrs=EPSG(4326), name=:test)) @test read(gdalser[Ti(1)]) == read(Raster(gdalpath; mappedcrs=EPSG(4326), name=:test)) @@ -897,20 +893,19 @@ end @testset "detect dimension from file name" begin tifser = RasterSeries([gdalpath, gdalpath], Ti([DateTime(2001), DateTime(2002)])) - mkpath("tifseries") - write("tifseries/test.tif", tifser; force=true) - @test isfile("tifseries/test_2001-01-01T00:00:00.tif") - @test isfile("tifseries/test_2002-01-01T00:00:00.tif") - ser1 = RasterSeries("tifseries", Ti(DateTime)) - ser2 = RasterSeries("tifseries", Ti(DateTime); lazy=true) - ser3 = RasterSeries("tifseries/test.tif", Ti(DateTime)) - ser4 = RasterSeries("tifseries", Ti(DateTime; order=ForwardOrdered()); ext=".tif") - ser5 = RasterSeries("tifseries/test", Ti(DateTime); ext=".tif") + path = mkpath(joinpath(tempdir(), tempname(), "tifseries")) + write(joinpath(path, "test.tif"), tifser; force=true) + @test isfile(joinpath(path, "test_2001-01-01T00:00:00.tif")) + @test isfile(joinpath(path, "test_2002-01-01T00:00:00.tif")) + ser1 = RasterSeries(path, Ti(DateTime)) + ser2 = RasterSeries(path, Ti(DateTime); lazy=true) + ser3 = RasterSeries(joinpath(path, "test.tif"), Ti(DateTime)) + ser4 = RasterSeries(path, Ti(DateTime; order=ForwardOrdered()); ext=".tif") + ser5 = RasterSeries(joinpath(path, "test"), Ti(DateTime); ext=".tif") @test dims(ser1) == dims(ser2) == dims(ser3) == dims(ser3) == dims(ser5) == dims(tifser) - @test_throws ErrorException RasterSeries("tifseries", Ti(Int)) - ser6 = RasterSeries("tifseries/test", Ti(DateTime; sampling=Intervals(Center())); ext=".tif") + @test_throws ErrorException RasterSeries(path, Ti(Int)) + ser6 = RasterSeries(joinpath(path, "test"), Ti(DateTime; sampling=Intervals(Center())); ext=".tif") @test sampling(ser6) == (Intervals(Center()),) - rm("tifseries"; recursive=true) end @testset "methods" begin @@ -973,6 +968,7 @@ end @test crs(gdalarray) == wkt @test crs(gdalarray[Y(1)]) == wkt end + end diff --git a/test/sources/grd.jl b/test/sources/grd.jl index 31411e636..940f8aa1c 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -141,7 +141,7 @@ grdpath = stem * ".gri" end @testset "mask and mask! to disk" begin - msk = read(replace_missing(grdarray, missing)) + msk = replace_missing(grdarray, missing) msk[X(1:73), Y([1, 5, 77])] .= missingval(msk) @test !any(grdarray[X(1:73)] .=== missingval(msk)) masked = mask(grdarray; with=msk) @@ -156,8 +156,6 @@ grdpath = stem * ".gri" mask!(A; with=msk, missingval=missingval(A)) end @test all(Raster(tempgri)[X(1:73), Y([1, 5, 77])] .=== missingval(grdarray)) - rm(tempgrd) - rm(tempgri) end @testset "classify! to disk" begin @@ -181,7 +179,7 @@ grdpath = stem * ".gri" tn = tempname() tempgrd = tn * ".grd" tempgri = tn * ".gri" - Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, missingval=nothing) + Afile = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1, filename=tempgrd, force=true) Amem = mosaic(first, A1, A2; missingval=0.0f0, atol=1e-1) Atest = grdarray[X(1:80), Y(1:60)] Atest[X(1:26), Y(31:60)] .= 0.0f0 @@ -191,15 +189,6 @@ grdpath = stem * ".gri" read(Atest .- Afile) end - @testset "rasterize" begin - # A = read(grdarray) - # R = rasterize(A; to=A) - # @test all(A .=== R .== grdarray) - # B = rebuild(read(grdarray) .= 0x00; missingval=0x00) - # rasterize!(B, read(grdarray)) - # @test all(B .=== grdarray |> collect) - end - end @testset "selectors" begin @@ -228,8 +217,8 @@ grdpath = stem * ".gri" # 1 band is added again on save @test size(saved) == size(grdarray[Band(1)]) @test parent(saved) == parent(grdarray[Band(1)]) - write(filename2, grdarray; force=true) - @test (@allocations write(filename2, grdarray; force=true, verbose=false)) < 3e3 + write(filename2, grdarray; force=true, verbose=false) + # @test_broken (@allocations write(filename2, grdarray; force=true, verbose=false)) < 3e3 end @testset "3d with subset" begin @@ -256,16 +245,16 @@ grdpath = stem * ".gri" @test saved isa typeof(geoA) @test parent(saved) == parent(geoA) write(filename, GRDsource(), geoA; force = true) - @test (@allocations write(filename, GRDsource(), geoA; force = true)) < 3e3 + # @test_broken (@allocations write(filename, GRDsource(), geoA; force = true)) < 3e3 end @testset "to netcdf" begin filename2 = tempname() * ".nc" span(grdarray[Band(1)]) - write(filename2, grdarray[Band(1)]; force = true) + write(filename2, grdarray[Band(1)]; force=true) saved = Raster(filename2; crs=crs(grdarray)) @test size(saved) == size(grdarray[Band(1)]) - @test all(parent(replace_missing(saved, missingval(grdarray))) .≈ parent(grdarray[Band(1)])) + @test all(parent(saved) .≈ parent(grdarray[Band(1)])) @test index(saved, X) ≈ index(grdarray, X) .+ 0.5 @test index(saved, Y) ≈ index(grdarray, Y) .+ 0.5 @test bounds(saved, Y) == bounds(grdarray, Y) @@ -299,7 +288,8 @@ grdpath = stem * ".gri" @test missingval(Raster(filename)) === missing filename = tempname() * ".grd" write(filename, A) - @test missingval(Raster(filename; missingval=nothing)) === typemin(Float32) + @test missingval(Raster(filename; missingval=nothing)) === nothing + @test missingval(Raster(filename)) === missing end end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 3725c9ac6..88f747643 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -44,7 +44,7 @@ stackkeys = ( ) end -#@testset "Raster" begin +@testset "Raster" begin @time ncarray = Raster(ncsingle); @time lazyarray = Raster(ncsingle; lazy=true) @time eagerarray = Raster(ncsingle; lazy=false) @@ -58,7 +58,7 @@ end @time read(lazyarray); end - @testset "scaling and maskign" begin + @testset "scaling and maskin" begin @time cfarray = Raster(ncsingle) @time cfarray = Raster(ncsingle) @time cf_nomask_array = Raster(ncsingle; missingval=nothing) @@ -115,7 +115,7 @@ end @testset "handle empty variables" begin st = RasterStack((empty=view(ncarray, 1, 1, 1), full=ncarray)) empty_test = tempname() * ".nc" - write(empty_test, st) + write(empty_test, st; force=true) rast = Raster(empty_test) st = RasterStack(empty_test) @@ -272,23 +272,23 @@ end end @testset "write" begin - @testset "to netcdf" begin - filename = tempname() * ".nc" - write(filename, ncarray; force=true) - @test (@allocations write(filename, ncarray; force=true)) < 1e4 + @testset "to netcdf" begin + fn = tempname() * ".nc" + write(fn, ncarray; force=true); + @test (@allocations write(fn, ncarray; force=true)) < 1e4 @testset "CF attributes" begin - @test NCDatasets.Dataset(filename)[:x].attrib["axis"] == "X" - @test NCDatasets.Dataset(filename)[:x].attrib["bounds"] == "x_bnds" + @test NCDatasets.Dataset(fn)[:x].attrib["axis"] == "X" + @test NCDatasets.Dataset(fn)[:x].attrib["bounds"] == "x_bnds" # TODO better units and standard name handling end - saved = Raster(filename) + saved = Raster(fn) @test size(saved) == size(ncarray) @test refdims(saved) == refdims(ncarray) @test missingval(saved) === missingval(ncarray) @test map(metadata.(dims(saved)), metadata.(dims(Raster))) do s, g all(s .== g) end |> all - @test metadata(saved) == metadata(ncarray) + @test_broken metadata(saved) == metadata(ncarray) # Dimension names are renamed so metadata is different @test_broken all( metadata(dims(saved)) == metadata.(dims(ncarray))) @test Rasters.name(saved) == Rasters.name(ncarray) @@ -297,11 +297,11 @@ end @test all(typeof.(span.(dims(saved))) .== typeof.(span.(dims(ncarray)))) @test all(val.(span.(dims(saved))) .== val.(span.(dims(ncarray)))) @test all(sampling.(dims(saved)) .== sampling.(dims(ncarray))) - @test typeof(dims(saved)) <: typeof(dims(ncarray)) + @test_broken typeof(dims(saved)) <: typeof(dims(ncarray)) @test index(saved, 3) == index(ncarray, 3) @test all(val.(dims(saved)) .== val.(dims(ncarray))) @test all(parent(saved) .=== parent(ncarray)) - @test saved isa typeof(ncarray) + @test_broken saved isa typeof(ncarray) # TODO test crs @testset "chunks" begin @@ -347,8 +347,9 @@ end end @testset "non allowed values" begin - @test_throws ArgumentError write(filename, convert.(Union{Missing,Float16}, ncarray); force=true) + @test_throws ArgumentError write(fn, convert.(Union{Missing,Float16}, ncarray); force=true) end + end @testset "to gdal" begin @@ -370,26 +371,26 @@ end @testset "to grd" begin nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) - write("testgrd.gri", nccleaned; force=true) - @test (@allocations write("testgrd.gri", nccleaned; force=true)) < 1e4 - grdarray = Raster("testgrd.gri", missingval=nothing); + fn = tempname() * ".gri" + write(fn, nccleaned; force=true) + @test_broken (@allocations write(fn, nccleaned; force=true)) < 1e4 + grdarray = Raster(fn, missingval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) @test bounds(grdarray) == bounds(nccleaned) @test index(grdarray, Y) ≈ reverse(index(nccleaned, Y)) .- 0.5 @test index(grdarray, X) ≈ index(nccleaned, X) .- 1.0 @test parent(reverse(grdarray; dims=Y)) ≈ parent(nccleaned) - rm("testgrd.gri") - rm("testgrd.grd") end @testset "write points" begin + filename = tempname() * ".nc" lon, lat = X(25:1:30), Y(25:1:30) ti = Ti(DateTime(2001):Month(1):DateTime(2002)) ras = Raster(rand(lon, lat, ti)) - write("point_rast.nc", ras; force=true) - saved = Raster("point_rast.nc") + write(filename, ras; force=true) + saved = Raster(filename) @test sampling(saved) == (Points(), Points(), Points()) - @test @allocations(write("point_rast.nc", ras; force=true)) < 10e3 + @test @allocations(write(filename, ras; force=true)) < 10e3 end end @@ -466,7 +467,7 @@ end @testset "load ncstack" begin @test ncstack isa RasterStack - @test isnothing(missingval(ncstack)) + @test ismissing(missingval(ncstack)) @test dims(ncstack[:abso4]) == dims(ncstack, (X, Y, Ti)) @test refdims(ncstack) == () # Loads child as a regular Raster @@ -532,13 +533,13 @@ end @test parent(st) isa NamedTuple @test first(parent(st)) isa Array length(dims(st[:aclcac])) - filename = tempname() * ".nc" - write(filename, st; force=true) - @test (@allocations write(filename, st; force=true)) < 1e6 # writing a rasterseries/stack has no force keyword - saved = RasterStack(RasterStack(filename)) + fn = tempname() * ".nc" + write(fn, st; force=true) + @test (@allocations write(fn, st; force=true)) < 1e6 # writing a rasterseries/stack has no force keyword + saved = RasterStack(RasterStack(fn)) @test keys(saved) == keys(st) - @test metadata(saved)["advection"] == "Lin & Rood" - @test metadata(saved) == metadata(st) == metadata(ncstack) + @test_broken metadata(saved)["advection"] == "Lin & Rood" + @test_broken metadata(saved) == metadata(st) == metadata(ncstack) @test all(first(DimensionalData.layers(saved)) .== first(DimensionalData.layers(st))) end @@ -577,10 +578,10 @@ end rm("test_2.nc") end -if !haskey(ENV, "CI") +h5path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") +if !haskey(ENV, "CI") && isfile(h5path) @testset "HDF5 with Groups" begin - path = joinpath(testdir, "data/SMAP_L4_SM_gph_20160101T223000_Vv4011_001.h5") - stack = RasterStack(path; group="Geophysical_Data") + stack = RasterStack(h5path; group="Geophysical_Data") lazy_stack = RasterStack(path; group="Geophysical_Data", lazy=true) rast = Raster(path; name=:surface_temp, group="Geophysical_Data") lazy_rast = Raster(path; name=:surface_temp, group="Geophysical_Data", lazy=true) diff --git a/test/sources/zarr.jl b/test/sources/zarr.jl index 9a3da8cb2..c823136d8 100644 --- a/test/sources/zarr.jl +++ b/test/sources/zarr.jl @@ -1,4 +1,8 @@ using Rasters +using DimensionalData +using DimensionalData.Lookups +using DimensionalData.Dimensions +using Dates using ZarrDatasets using ZarrDatasets.Zarr using Rasters: FileArray, FileStack, Zarrsource, crs, bounds, name, trim @@ -18,14 +22,15 @@ eagerarray = Raster(path; lazy=false, name="air_temperature_2m") @test parent(lazyarray) isa FileArray @test parent(eagerarray) isa Array end + @testset "read" begin @time A = read(lazyarray); @test A isa Raster @test parent(A) isa Array A2 = copy(A) .= 0 - @time read!(ncarray, A2); + @time read!(zraster, A2); A3 = copy(A) .= 0 - @time read!(ncsingle, A3) + @time read!(zraster, A3) @test all(A .=== A2) @test all(A .=== A3) end @@ -36,24 +41,27 @@ end @test index(zraster,X) == collect(-178.75:2.5:178.75) # TODO the spatial bounds are strange, because the data is point data # We should find a dataset that has actual intervals + @test bounds(zraster) == ( (-178.75, 178.75), - (-88.75, 88,75), + (-88.75, 88.75), (DateTime("1979-01-09T00:00:00"), DateTime("2021-12-27T00:00:00")), ) end + @testset "dimensions" begin @test ndims(zraster) == 3 @test length.(dims(zraster)) == (144, 72, 989) @test dims(zraster) isa Tuple{<:X,<:Y,<:Ti} @test refdims(zraster) == () - @test val.(span(ncarray)) == (2.5, 2.5, (nothing, nothing)) - @test typeof(lookup(ncarray)) <: Tuple{<:Mapped,<:Mapped,<:Sampled} + @test val.(span(zraster)) == (2.5, 2.5, (nothing, nothing)) + @test typeof(lookup(zraster)) <: Tuple{<:Mapped,<:Mapped,<:Sampled} end + @testset "other fields" begin @test ismissing(missingval(zraster)) - @test metadata(r)["original_name"] == "t2m" - @test metadata(zraster) isa Metadata{<:Rasters.CDMsource, Dict{String, Any}} + @test metadata(zraster)["original_name"] == "t2m" + @test metadata(zraster) isa Rasters.Metadata{<:Rasters.CDMsource, Dict{String, Any}} @test name(zraster) == :air_temperature_2m end @@ -120,7 +128,6 @@ end vals[1, 1] = 1.0 vals[end, end] = 0.0 - ra = Raster(path) @test extrema(ra) == (180.0, 182.0) # test scale and offset diff --git a/test/warp.jl b/test/warp.jl index 95a5d4438..d8d79bd99 100644 --- a/test/warp.jl +++ b/test/warp.jl @@ -9,23 +9,18 @@ gdalpath = maybedownload(url) # test that warp actually does *something* r = Raster(gdalpath) crs_ = crs(r).val - warped = warp(r, Dict(:t_srs => "EPSG:25832"); missingval=nothing) - @test warped isa Raster - @test size(warped) == (720, 721) - # the crs is way off, the image is rotated - all four corners should be black - missingval(warped) === nothing - @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0x00 - - warped = warp(r, Dict(:t_srs => "EPSG:25832")) + warped = warp(r, Dict(:t_srs => "EPSG:25832"); missingval=0xff) @test warped isa Raster @test size(warped) == (720, 721) # the crs is rotatedso the image is rotated an all four corners should be black missingval(warped) === nothing - @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff == missingval(warped) # now compute mean squared error of the back transformation - warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_), res=map(step, lookup(r)), missingval=0xff)) + res = map(step, lookup(r)) + warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_); res, missingval=0xff)) # subtracting UInts brings us into hell -> Int # we also need to shrink the range because of some bleed during warp diff_ = parent(warped_back[2:end-1, 2:end-1]) .- r + @test sum(x -> x^2, diff_) / prod(size(diff_)) < 600 end From f3e98ac8c366225d1557272955a894c5c2cb4535 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Thu, 9 Jan 2025 17:03:13 +0100 Subject: [PATCH 30/38] fix aqua --- src/array.jl | 3 --- src/utils.jl | 2 +- test/warp.jl | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/src/array.jl b/src/array.jl index 42c2eaab0..4e75fb72e 100644 --- a/src/array.jl +++ b/src/array.jl @@ -239,9 +239,6 @@ struct Raster{T,N,D<:Tuple,R<:Tuple,A<:AbstractArray{T,N},Na,Me,Mi<:Union{T,Noth new{T,N,D,R,A,Na,Me,typeof(missingval1)}(data, dims, refdims, name, metadata, missingval1) end end -Raster(f::Function, args...; kw...) = Raster(args...; f, kw...) -# For ambiguity with dataset methods -Raster(f::Function, s::AbstractString; kw...) = Raster(s; f, kw...) # Create a Raster from and AbstractArray and dims function Raster(A::AbstractArray{T,N}, dims::Tuple; refdims=(), diff --git a/src/utils.jl b/src/utils.jl index 6cafda878..69ccd0fbb 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -97,7 +97,7 @@ _type_missingval(::Type{T}) where T = _type_missingval1(Missings.nonmissingtype( _type_missingval1(::Type{T}) where T<:Number = typemin(T) _type_missingval1(::Type{T}) where T<:Unsigned = typemax(T) -_type_missingval1(::Type{<:AbstractString}) where T = T("") +_type_missingval1(::Type{T}) where T<:AbstractString = T("") _fix_missingval(::Type, ::Union{NoKW,Nothing}) = nothing _fix_missingval(::AbstractArray, ::Nothing) = nothing diff --git a/test/warp.jl b/test/warp.jl index b23c13a6a..7e8f2e73a 100644 --- a/test/warp.jl +++ b/test/warp.jl @@ -14,7 +14,6 @@ gdalpath = maybedownload(url) @test size(warped) == (720, 721) # the crs is rotated so the image is rotated an all four corners should be missing missingval(warped) === 0xff - parent(warped) @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff == missingval(warped) # now compute mean squared error of the back transformation res = map(step, lookup(r)) From 0efbcb2b8747b4a0f49e93ccec758f2a8fcf9e43 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sat, 11 Jan 2025 14:43:20 +0100 Subject: [PATCH 31/38] fix netcdf metadata --- ext/RastersNCDatasetsExt/ncdatasets_source.jl | 10 ++------- src/modifieddiskarray.jl | 15 ++++++++----- src/sources/commondatamodel.jl | 4 ++-- test/sources/ncdatasets.jl | 22 +++++++++---------- 4 files changed, 24 insertions(+), 27 deletions(-) diff --git a/ext/RastersNCDatasetsExt/ncdatasets_source.jl b/ext/RastersNCDatasetsExt/ncdatasets_source.jl index 024dc7255..6296bb86f 100644 --- a/ext/RastersNCDatasetsExt/ncdatasets_source.jl +++ b/ext/RastersNCDatasetsExt/ncdatasets_source.jl @@ -19,9 +19,7 @@ function Base.write(filename::AbstractString, source::NCDsource, A::AbstractRast RA.check_can_write(filename, force) "c" end - mode = !isfile(filename) || !append ? "c" : "a"; - attrib = RA._attribdict(metadata(A)) - ds = NCD.Dataset(filename, mode) + ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(A))) try RA._writevar!(ds, source, A; kw...) finally @@ -43,14 +41,10 @@ function Base.write(filename::AbstractString, source::Source, s::AbstractRasterS "c" end ds = NCD.Dataset(filename, mode; attrib=RA._attribdict(metadata(s))) - missingval = RA._stack_nt(s, isnokw(missingval) ? Rasters.missingval(s) : missingval) try map(keys(s)) do k - RA._writevar!(ds, source, s[k]; - missingval=missingval[k], - kw... - ) + RA._writevar!(ds, source, s[k]; missingval=missingval[k], kw...) end f(RA.OpenStack{Source,K,T}(ds)) finally diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index d4f167dbe..7e86e2f4e 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -203,17 +203,20 @@ _maybe_modify(var, ::NoMod; kw...) = var _write_missingval_pair(A, missingval::Pair; kw...) = missingval function _write_missingval_pair(A, missingval; verbose=true, eltype, metadata=metadata(A))::Pair source_mv = Rasters.missingval(A) - if isnothing(mv) - # See if there is a missing value in metadata - source_mv = Rasters.missingval(metadata) - end - disk_mv = if isnothing(source_mv) + disk_mv = if isnothing(source_mv) || isnothing(missingval) nothing elseif isnokw(missingval) || ismissing(missingval) - _writeable_missing(eltype; verbose) + # See if there is a missing value in metadata + md_mv = Rasters.missingval(metadata) + if isnothing(md_mv) + _writeable_missing(eltype; verbose) + else + md_mv + end else missingval end + @show source_mv disk_mv return disk_mv => source_mv end diff --git a/src/sources/commondatamodel.jl b/src/sources/commondatamodel.jl index 5800c0cca..88f288a97 100644 --- a/src/sources/commondatamodel.jl +++ b/src/sources/commondatamodel.jl @@ -403,8 +403,8 @@ end function _attribdict(md::Metadata{<:CDMsource}) attrib = Dict{String,Any}() for (k, v) in md - # v isa Tuple && continue - # attrib[string(k)] = v + v isa Tuple && continue + attrib[string(k)] = v end return attrib end diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 88f747643..c233c4a46 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -58,7 +58,7 @@ end @time read(lazyarray); end - @testset "scaling and maskin" begin + @testset "scaling and masking" begin @time cfarray = Raster(ncsingle) @time cfarray = Raster(ncsingle) @time cf_nomask_array = Raster(ncsingle; missingval=nothing) @@ -288,20 +288,20 @@ end @test map(metadata.(dims(saved)), metadata.(dims(Raster))) do s, g all(s .== g) end |> all - @test_broken metadata(saved) == metadata(ncarray) - # Dimension names are renamed so metadata is different - @test_broken all( metadata(dims(saved)) == metadata.(dims(ncarray))) + @test metadata(saved) == metadata(ncarray) + # Bounds variable names are renamed so metadata is different + @test_broken all(metadata(dims(saved))[1] == metadata(dims(ncarray))[1]) @test Rasters.name(saved) == Rasters.name(ncarray) @test all(lookup.(dims(saved)) .== lookup.(dims(ncarray))) @test all(order.(dims(saved)) .== order.(dims(ncarray))) @test all(typeof.(span.(dims(saved))) .== typeof.(span.(dims(ncarray)))) @test all(val.(span.(dims(saved))) .== val.(span.(dims(ncarray)))) @test all(sampling.(dims(saved)) .== sampling.(dims(ncarray))) - @test_broken typeof(dims(saved)) <: typeof(dims(ncarray)) + @test typeof(dims(saved)) <: typeof(dims(ncarray)) @test index(saved, 3) == index(ncarray, 3) @test all(val.(dims(saved)) .== val.(dims(ncarray))) @test all(parent(saved) .=== parent(ncarray)) - @test_broken saved isa typeof(ncarray) + @test saved isa typeof(ncarray) # TODO test crs @testset "chunks" begin @@ -373,7 +373,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) fn = tempname() * ".gri" write(fn, nccleaned; force=true) - @test_broken (@allocations write(fn, nccleaned; force=true)) < 1e4 + @test (@allocations write(fn, nccleaned; force=true)) < 1e4 grdarray = Raster(fn, missingval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) @test bounds(grdarray) == bounds(nccleaned) @@ -420,7 +420,7 @@ end end -@testset "Single file stack" begin +# @testset "Single file stack" begin @time ncstack = RasterStack(ncmulti) @testset "lazyness" begin @@ -538,10 +538,10 @@ end @test (@allocations write(fn, st; force=true)) < 1e6 # writing a rasterseries/stack has no force keyword saved = RasterStack(RasterStack(fn)) @test keys(saved) == keys(st) - @test_broken metadata(saved)["advection"] == "Lin & Rood" - @test_broken metadata(saved) == metadata(st) == metadata(ncstack) + @test metadata(saved)["advection"] == "Lin & Rood" + @test metadata(saved) == metadata(st) == metadata(ncstack) @test all(first(DimensionalData.layers(saved)) .== first(DimensionalData.layers(st))) - end + nend @testset "show" begin ncstack = view(RasterStack(ncmulti), X(7:99), Y(3:90)); From 1ead1ba7ff9def90e1df8af0cd4c8a1036e3249d Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 12 Jan 2025 14:32:22 +0100 Subject: [PATCH 32/38] docs tweaks --- src/methods/shared_docstrings.jl | 22 ++++++++++++---------- src/modifieddiskarray.jl | 1 - 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index 57cfd028d..bb512e971 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -154,13 +154,21 @@ const MISSINGVAL_KEYWORD = """ - `missingval`: value representing missing data, normally detected from the file and automatically converted to `missing`. Setting to an alternate value, such as `0` or `NaN` may be desirable for improved perfomance. `nothing` specifies no missing value. - Using the same `missingval` the file already has removes the overhead of replacing it. + Using the same `missingval` the file already has removes the overhead of replacing it, + this can be done by passing the `missingval` function as `missingval`. If the file has an incorrect value, we can manually define the transformation - as a pair e.g. `correct_value => missing`, `correct_value => NaN` or - `correct_value => correct_value` to keep it the same and remove the overhead of changing it. + as a pair like `correct_value => missing` or `correct_value => NaN`. + `correct_value => correct_value` will keep remove the overhead of changing it. When `raw=true` is set, `missingval` is not changed from the value specified in the file. """ +const WRITE_MISSINGVAL_KEYWORD = """ +- `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, + as Julia's `missing` cannot be stored. If not passed in, an appropriate `missingval` + will be detected from the objects `missingval`, its `metadata`, or a default will be + chosen base on the array element type(s). +""" + const NAME_KEYWORD = """ - `name`: a `Symbol` name for a Raster, which will also retrieve the a named layer if `Raster` is used on a multi-layered file like a NetCDF. @@ -188,10 +196,4 @@ const CHUNKS_KEYWORD = """ size of `1`. `true` can be used to mean: use the original chunk size of the lazy `Raster` being written or X and Y of 256 by 256. `false` means don't use chunks at all. -""" - -const WRITE_MISSINGVAL_KEYWORD = """ -- `missingval`: set the missing value (i.e. FillValue / nodataval) of the written raster, - as Julias `missing` cannot be stored. If not passed in, `missingval` will be detected - from metadata or a default will be chosen. -""" +""" \ No newline at end of file diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 7e86e2f4e..9260c478c 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -216,7 +216,6 @@ function _write_missingval_pair(A, missingval; verbose=true, eltype, metadata=me else missingval end - @show source_mv disk_mv return disk_mv => source_mv end From 1f6d614a65e36619ae91685ed513648b2fd83608 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 12 Jan 2025 15:20:55 +0100 Subject: [PATCH 33/38] typo --- test/sources/ncdatasets.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index c233c4a46..fc4f18841 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -541,7 +541,7 @@ end @test metadata(saved)["advection"] == "Lin & Rood" @test metadata(saved) == metadata(st) == metadata(ncstack) @test all(first(DimensionalData.layers(saved)) .== first(DimensionalData.layers(st))) - nend + end @testset "show" begin ncstack = view(RasterStack(ncmulti), X(7:99), Y(3:90)); From 1a9c2cd8f2750c37576402bcd0672b7529a970ec Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 12 Jan 2025 16:04:26 +0100 Subject: [PATCH 34/38] bugfix and tweak docs --- src/methods/shared_docstrings.jl | 58 +++++++++++++++----------------- test/sources/ncdatasets.jl | 2 +- 2 files changed, 29 insertions(+), 31 deletions(-) diff --git a/src/methods/shared_docstrings.jl b/src/methods/shared_docstrings.jl index bb512e971..88bb5437d 100644 --- a/src/methods/shared_docstrings.jl +++ b/src/methods/shared_docstrings.jl @@ -1,5 +1,17 @@ # Share common docstrings here to keep things consistent +const NAME_KEYWORD = """ +- `name`: a `Symbol` name for a Raster, which will also retrieve the + a named layer if `Raster` is used on a multi-layered file like a NetCDF. +""" +const METADATA_KEYWORD = """ +- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. +""" +const REFDIMS_KEYWORD = """ +- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. + Usually not needed. +""" + const TO_KEYWORD = """ - `to`: a `Raster`, `RasterStack`, `Tuple` of `Dimension` or `Extents.Extent`. If no `to` object is provided the extent will be calculated from the geometries, @@ -116,7 +128,7 @@ const GROUP_KEYWORD = """ """ const CHECKMEMORY_KEYWORD = """ -- `checkmemory`: If `true` (the default), check if there is enough memory for the operation. +- `checkmemory`: if `true` (the default), check if there is enough memory for the operation. `false` will ignore memory needs. """ @@ -129,25 +141,30 @@ const OFFSET_KEYWORD = """ """ const RAW_KEYWORD = """ -- `raw`: Turn of all scaling and masking and load the raw values from disk. +- `raw`: turn of all scaling and masking and load the raw values from disk. `false` by default. If `true`, `scaled` will be set to `false` and `missingval` will to the existing missing value in the file. A warning will be printed if `scaled` or `missingval` are manually set to another value. """ const SCALED_KEYWORD = """ -- `scaled`: apply scale and offset as `x * scale + offset`. `true` by default. +- `scaled`: apply scale and offset as `x * scale + offset` where + `scale` and/or `offset` are found in file metadata. `true` by default. This is common where data has been convert to e.g. UInt8 to save disk space. - To ignore `scale` and `offset` metadata, use `scaled=false`. If `scale` and - Note: `offset` are `1.0` and `0.0` they will be ignored and the original type will - be used even when `scaled=true`. This is because these values may be fallback - defaults and we do not want to convert every `Real` array to larger `Float64` values. + To ignore `scale` and `offset` metadata, use `scaled=false`. + Note 1: If `scale` and `offset` are `1.0` and `0.0` they will be ignored and the + original type will be used even when `scaled=true`. This is because these values + may be fallback defaults and we do not want to convert every `Real` array to larger + `Float64` values. + Note 2: `raw=true` will ignore `scaled` and `missingval` and return + the raw values. """ const COERCE_KEYWORD = """ - `coerce`: where `scale` and/or `offset` are present during `setindex!` to disk, - coerce values to the disk type. `convert` is the default, but `round`, `trunc` or - or `ceil` may be needed where the values are not exact. + coerce values to the element type used on dist. `convert` is the default, + but `round`, `trunc` or or `ceil` or other functions with `f(::Type{T}, x)` + signature may be needed where the values are not exact. """ const MISSINGVAL_KEYWORD = """ @@ -159,7 +176,8 @@ const MISSINGVAL_KEYWORD = """ If the file has an incorrect value, we can manually define the transformation as a pair like `correct_value => missing` or `correct_value => NaN`. `correct_value => correct_value` will keep remove the overhead of changing it. - When `raw=true` is set, `missingval` is not changed from the value specified in the file. + Note: When `raw=true` is set, `missingval` is not changed from the value specified + in the file. """ const WRITE_MISSINGVAL_KEYWORD = """ @@ -169,26 +187,6 @@ const WRITE_MISSINGVAL_KEYWORD = """ chosen base on the array element type(s). """ -const NAME_KEYWORD = """ -- `name`: a `Symbol` name for a Raster, which will also retrieve the - a named layer if `Raster` is used on a multi-layered file like a NetCDF. -""" - -const METADATA_KEYWORD = """ -- `metadata`: `Dict` or `Metadata` object for the array, or `NoMetadata()`. -""" - -const REFDIMS_KEYWORD = """ -- `refdims`: `Tuple of` position `Dimension`s the array was sliced from, defaulting to `()`. - Usually not needed. -""" - -const GROUP_KEYWORD = """ -- `group`: the group in the dataset where `name` can be found. Only needed for nested datasets. - A `String` or `Symbol` will select a single group. Pairs can also used to access groups - at any nested depth, i.e `group=:group1 => :group2 => :group3`. -""" - const CHUNKS_KEYWORD = """ - `chunks`: a `NTuple{N,Int}` specifying the chunk size for each dimension. To specify only specific dimensions, a Tuple of `Dimension` wrapping `Int` diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index fc4f18841..736746452 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -420,7 +420,7 @@ end end -# @testset "Single file stack" begin +@testset "Single file stack" begin @time ncstack = RasterStack(ncmulti) @testset "lazyness" begin From a60a8096f0e86378d26197023ebd85ef2efc3c23 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 12 Jan 2025 17:00:46 +0100 Subject: [PATCH 35/38] fix extension doctests --- ext/RastersArchGDALExt/gdal_source.jl | 16 ++++++++-------- ext/RastersArchGDALExt/warp.jl | 15 +++++++-------- src/extensions.jl | 6 +++--- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/ext/RastersArchGDALExt/gdal_source.jl b/ext/RastersArchGDALExt/gdal_source.jl index 99bf0cb84..1b187816b 100644 --- a/ext/RastersArchGDALExt/gdal_source.jl +++ b/ext/RastersArchGDALExt/gdal_source.jl @@ -287,16 +287,16 @@ function AG.RasterDataset(f::Function, A::AbstractRaster; missingval=Rasters.missingval(A), kw... ) - A1 = _maybe_permute_to_gdal(A) - return _create_with_driver(filename, dims(A1), eltype; - _block_template=A1, missingval, scale, offset, verbose, kw... - ) do dataset - rds = AG.RasterDataset(dataset) - mod = RA._mod(eltype, RA.missingval(rds), scale, offset, coerce) - open(A1) do O + return open(_maybe_permute_to_gdal(A)) do O + _create_with_driver(filename, dims(A), eltype; + _block_template=A, missingval, scale, offset, verbose, kw... + ) do dataset + rds = AG.RasterDataset(dataset) + mv = RA.missingval(rds) => RA.missingval(O) + mod = RA._mod(eltype, mv, scale, offset, coerce) RA._maybe_modify(rds, mod) .= parent(O) + f(rds) end - f(rds) end end diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index 0921ea9b3..15b6b52ac 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -30,17 +30,16 @@ function _warp(A::AbstractRaster, flags::Dict; # If it does, we can just open it and use it directly. tempfile = isnothing(filename) ? nothing : tempname() * ".tif" warp_kw = isnothing(filename) || filename == "/vsimem/tmp" ? () : (; dest=filename) - mv = if missingval isa Pair - missingval[1] - else - missingval isa eltype(A) ? missingval : Rasters._type_missingval(eltype(A)) - end + mv_pair = RA._write_missingval_pair(A1, missingval; + verbose=false, eltype=eltype(A1), metadata=metadata(A) + ) # We really need a missingval for `warp`, as it may rotate and add missing values - out = AG.Dataset(A1; filename=tempfile, missingval=mv, kw...) do dataset + out = AG.Dataset(A1; filename=tempfile, missingval=mv_pair[1], kw...) do dataset AG.gdalwarp([dataset], flagvect; warp_kw...) do warped - mv1, mv2 = RA._read_missingval_pair(warped, NoMetadata(), missingval) # Read the raster lazily, dropping Band if there is none in `A` - raster = Raster(warped; lazy=true, dropband=!hasdim(A, Band()), name, missingval=mv1 => mv2) + raster = Raster(warped; + lazy=true, dropband=!hasdim(A, Band()), name, missingval=mv_pair + ) # Either read the MEM dataset to an Array, or keep a filename base raster lazy return isnothing(filename) ? read(raster) : raster end diff --git a/src/extensions.jl b/src/extensions.jl index c373f456a..1436862f8 100644 --- a/src/extensions.jl +++ b/src/extensions.jl @@ -128,7 +128,7 @@ This simply resamples the array with the `:tr` (output file resolution) and `:r` flags, giving us a pixelated version: ```jldoctest -using Rasters, RasterDataSources, Plots +using Rasters, ArchGDAL, RasterDataSources, Plots A = Raster(WorldClim{Climate}, :prec; month=1) a = plot(A) @@ -181,7 +181,7 @@ where each value in the raster encodes the area of the cell (in meters by defaul ## Example ```julia -using Rasters, ArchGDAL, Rasters.Lookups +using Rasters, Proj, Rasters.Lookups xdim = X(Projected(90.0:10.0:120; sampling=Intervals(Start()), crs=EPSG(4326))) ydim = Y(Projected(0.0:10.0:50; sampling=Intervals(Start()), crs=EPSG(4326))) myraster = rand(xdim, ydim) @@ -264,7 +264,7 @@ using Rasters, Rasters.Lookups, Proj, StatsBase xdim = X(Projected(90.0:10.0:120; sampling=Intervals(Start()), crs=EPSG(4326))) ydim = Y(Projected(0.0:10.0:50; sampling=Intervals(Start()), crs=EPSG(4326))) myraster = rand(xdim, ydim) -Rasters.sample(myraster, 5; weights = cellarea(myraster)) +Rasters.sample(myraster, 5; weights=cellarea(myraster)) # output From 55f4f167f30fbc04071b84fb0b5c731fc80b8cf0 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 12 Jan 2025 19:15:38 +0100 Subject: [PATCH 36/38] fix warp missingval --- ext/RastersArchGDALExt/warp.jl | 2 +- src/extensions.jl | 1 + src/modifieddiskarray.jl | 11 ++++++++-- test/warp.jl | 37 ++++++++++++++++++++++------------ 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index 15b6b52ac..944a0766d 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -31,7 +31,7 @@ function _warp(A::AbstractRaster, flags::Dict; tempfile = isnothing(filename) ? nothing : tempname() * ".tif" warp_kw = isnothing(filename) || filename == "/vsimem/tmp" ? () : (; dest=filename) mv_pair = RA._write_missingval_pair(A1, missingval; - verbose=false, eltype=eltype(A1), metadata=metadata(A) + verbose=false, eltype=eltype(A1), metadata=metadata(A), required=true ) # We really need a missingval for `warp`, as it may rotate and add missing values out = AG.Dataset(A1; filename=tempfile, missingval=mv_pair[1], kw...) do dataset diff --git a/src/extensions.jl b/src/extensions.jl index 1436862f8..61d90a8e8 100644 --- a/src/extensions.jl +++ b/src/extensions.jl @@ -114,6 +114,7 @@ Run `using ArchGDAL` to make this method available. # Keywords +$MISSINGVAL_KEYWORD $FILENAME_KEYWORD $SUFFIX_KEYWORD - `missingval`: the missing value to use during warping, will default to diff --git a/src/modifieddiskarray.jl b/src/modifieddiskarray.jl index 9260c478c..93bddd210 100644 --- a/src/modifieddiskarray.jl +++ b/src/modifieddiskarray.jl @@ -201,10 +201,17 @@ _maybe_modify(var, m::Mod; kw...) = ModifiedDiskArray(var, m; kw...) _maybe_modify(var, ::NoMod; kw...) = var _write_missingval_pair(A, missingval::Pair; kw...) = missingval -function _write_missingval_pair(A, missingval; verbose=true, eltype, metadata=metadata(A))::Pair +function _write_missingval_pair(A, missingval; + verbose=true, eltype, metadata=metadata(A), required=false +)::Pair source_mv = Rasters.missingval(A) disk_mv = if isnothing(source_mv) || isnothing(missingval) - nothing + if required + source_mv = isnothing(source_mv) ? missing : source_mv + _writeable_missing(eltype; verbose) + else + nothing + end elseif isnokw(missingval) || ismissing(missingval) # See if there is a missing value in metadata md_mv = Rasters.missingval(metadata) diff --git a/test/warp.jl b/test/warp.jl index 7e8f2e73a..a2e09dd27 100644 --- a/test/warp.jl +++ b/test/warp.jl @@ -9,18 +9,29 @@ gdalpath = maybedownload(url) # test that warp actually does *something* r = Raster(gdalpath) crs_ = crs(r).val - warped = warp(r, Dict(:t_srs => "EPSG:25832"); missingval=0xff) - @test warped isa Raster - @test size(warped) == (720, 721) - # the crs is rotated so the image is rotated an all four corners should be missing - missingval(warped) === 0xff - @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff == missingval(warped) - # now compute mean squared error of the back transformation - res = map(step, lookup(r)) - warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_); res, missingval=0xff)) - # subtracting UInts brings us into hell -> Int - # we also need to shrink the range because of some bleed during warp - diff_ = parent(warped_back[2:end-1, 2:end-1]) .- r + @testset "default missing" begin + warped = warp(r, Dict(:t_srs => "EPSG:25832")) + @test warped isa Raster + @test size(warped) == (720, 721) + # the crs is rotated so the image is rotated an all four corners should be missing + missingval(warped) === missing + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === missing === missingval(warped) + end - @test sum(x -> x^2, diff_) / prod(size(diff_)) < 600 + @testset "custom missing" begin + warped = warp(r, Dict(:t_srs => "EPSG:25832"); missingval=0xff => 0xff) + @test warped isa Raster + @test size(warped) == (720, 721) + # the crs is rotated so the image is rotated an all four corners should be missing + missingval(warped) === 0xff + @test warped[1, 1] === warped[1, end] === warped[end, 1] === warped[end, end] === 0xff === missingval(warped) + # now compute mean squared error of the back transformation + res = map(step, lookup(r)) + warped_back = Rasters.trim(warp(warped, Dict(:t_srs => crs_); res, missingval=0xff)) + # subtracting UInts brings us into hell -> Int + # we also need to shrink the range because of some bleed during warp + diff_ = parent(warped_back[2:end-1, 2:end-1]) .- r + + @test sum(x -> x^2, diff_) / prod(size(diff_)) < 600 + end end From 87bae93c39888db40eb37a8e8f99d2664fae0de6 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Sun, 12 Jan 2025 23:32:41 +0100 Subject: [PATCH 37/38] bugfix missingval tests --- ext/RastersArchGDALExt/warp.jl | 2 +- .../RastersRasterDataSourcesExt.jl | 7 +--- src/utils.jl | 4 +-- test/resample.jl | 29 +++++++++------ test/sources/gdal.jl | 35 +++++++++++++------ test/sources/grd.jl | 14 +++++--- test/sources/ncdatasets.jl | 1 + test/sources/rasterdatasources.jl | 31 +++++++--------- 8 files changed, 70 insertions(+), 53 deletions(-) diff --git a/ext/RastersArchGDALExt/warp.jl b/ext/RastersArchGDALExt/warp.jl index 944a0766d..e8368e776 100644 --- a/ext/RastersArchGDALExt/warp.jl +++ b/ext/RastersArchGDALExt/warp.jl @@ -11,7 +11,7 @@ function warp(A::AbstractRaster, flags::Dict; filename=nothing, kw...) end end function warp(st::AbstractRasterStack, flags::Dict; filename=nothing, suffix=keys(st), kw...) - RA.mapargs((A, s) -> warp(A, flags; filename, suffix=s), st, suffix; kw...) + RA.mapargs((A, s) -> warp(A, flags; filename, suffix=s, kw...), st, suffix) end function _warp(A::AbstractRaster, flags::Dict; diff --git a/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl b/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl index 126916606..20beabb45 100644 --- a/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl +++ b/ext/RastersRasterDataSourcesExt/RastersRasterDataSourcesExt.jl @@ -1,12 +1,7 @@ module RastersRasterDataSourcesExt -@static if isdefined(Base, :get_extension) # julia < 1.9 - using Rasters, RasterDataSources -else - using ..Rasters, ..RasterDataSources -end +using Rasters, RasterDataSources -# using RasterDataSources: RasterDataSource using Rasters.Lookups using Rasters.Dimensions diff --git a/src/utils.jl b/src/utils.jl index 69ccd0fbb..41c11f609 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -459,9 +459,9 @@ _unwrap(::Val{X}) where X = X _unwrap(x) = x # Map filename suffix over a stack -function mapargs(f, st::AbstractRasterStack, args...) +function mapargs(f, st::AbstractRasterStack, args...; kw...) layers = map(values(st), args...) do A, mappedargs... - f(A, mappedargs...) + f(A, mappedargs...; kw...) end return DD.rebuild_from_arrays(st, Tuple(layers)) end diff --git a/test/resample.jl b/test/resample.jl index abddd8778..d845f4b16 100644 --- a/test/resample.jl +++ b/test/resample.jl @@ -113,7 +113,7 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @testset "resample eltype propagates" begin r = Raster(rand(UInt8, X(1:10), Y(1:10))) r1 = resample(r; to=r) - @test eltype(r1) == UInt8 + @test eltype(r1) == Union{UInt8,Missing} end @testset "dimensions matcha after resampling with only `to`" begin @@ -133,23 +133,32 @@ include(joinpath(dirname(pathof(Rasters)), "../test/test_utils.jl")) @test dims(resampled_3D, Z) == Z(1:2) end - mv = Rasters.nokw - for mv in (nothing, missing, Rasters.nokw) + mv = 0xff + for mv in (0xff, missing, Rasters.nokw) # Resample cea.tif using resample cea = Raster(raster_path; missingval=mv, name=:cea) - raster_output = resample(cea; res=output_res, crs=output_crs, method, missingval=mv) - disk_output = resample(cea; res=output_res, crs=output_crs, method, missingval=mv, filename="resample.tif") - - cea_permuted = permutedims(Raster(raster_path; missingval=mv, name=:cea_permuted), (Y, X)) - permuted_output = resample(cea_permuted, output_res; missingval=mv, crs=output_crs, method) + raster_output = resample(cea; + res=output_res, crs=output_crs, method, missingval=mv + ) + disk_output = resample(cea; + res=output_res, crs=output_crs, method, missingval=mv, filename="resample.tif" + ) + cea_permuted = permutedims(Raster(raster_path; + missingval=mv, name=:cea_permuted), (Y, X) + ) + permuted_output = resample(cea_permuted, output_res; + missingval=mv, crs=output_crs, method + ) - AG_output1 = if isnothing(mv) + AG_output1 = if mv === 0xff AG_output else replace(AG_output, 0xff => missing) end # Compare ArchGDAL, resample and permuted resample - @test all(AG_output1 .=== raster_output .=== read(disk_output) .=== permutedims(permuted_output, (X, Y))) + @test all(AG_output1 .=== parent(raster_output) .=== + read(disk_output) .=== + permutedims(permuted_output, (X, Y))) @test abs(step(dims(raster_output, Y))) ≈ abs(step(dims(raster_output, X))) ≈ abs(step(dims(disk_output, X))) ≈ diff --git a/test/sources/gdal.jl b/test/sources/gdal.jl index 523962aa8..9d7731f08 100644 --- a/test/sources/gdal.jl +++ b/test/sources/gdal.jl @@ -419,6 +419,7 @@ gdalpath = maybedownload(url) @testset "to grd" begin fn = joinpath(tempdir(), tempname() * ".gri") write(fn, gdalarray; force=true) + fn = joinpath(tempdir(), tempname() * ".gri") @test (@allocations write(fn, gdalarray; force=true)) < 1e4 grdarray = Raster(fn) @test crs(grdarray) == convert(ProjString, crs(gdalarray)) @@ -519,7 +520,7 @@ gdalpath = maybedownload(url) # Handle WorldClim/ucdavis unreliability A = nothing try - A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, missingval=nothing) + A = Raster(WorldClim{Climate}, :tavg; res="10m", month=1, missingval) catch end if !isnothing(A) @@ -772,21 +773,35 @@ end end ## Resample cea.tif using resample - raster_output = resample(gdalarray, output_res; crs=output_crs, method=resample_method) - disk_output = resample(gdalarray, output_res; crs=output_crs, method=resample_method, filename="resample.tif") - stack_output = resample(gdalstack, output_res; crs=output_crs, method=resample_method) - written_stack_output = resample(gdalstack, output_res; crs=output_crs, method=resample_method, filename="resample.tif") - series_output = resample(gdalser, output_res; crs=output_crs, method=resample_method) - + raster_output = resample(gdalarray, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) + disk_output = resample(gdalarray, output_res; + crs=output_crs, method=resample_method, filename="resample.tif", missingval=0xff=>0xff + ) + stack_output = resample(gdalstack, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) + written_stack_output = resample(gdalstack, output_res; + crs=output_crs, method=resample_method, filename="resample.tif", missingval=0xff=>0xff + ) + series_output = resample(gdalser, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) extradim_raster = cat(gdalarray, gdalarray, gdalarray; dims=Z) - extradim_output = resample(extradim_raster, output_res; crs=output_crs, method=resample_method) + extradim_output = resample(extradim_raster, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) permuted_raster = permutedims(gdalarray, (Y, X)) - permuted_output = resample(permuted_raster, output_res; crs=output_crs, method=resample_method) + permuted_output = resample(permuted_raster, output_res; + crs=output_crs, method=resample_method, missingval=0xff=>0xff + ) # Compare ArchGDAL, resample and permuted resample @test AG_output == - raster_output == disk_output == + raster_output == + disk_output == stack_output[:a] == written_stack_output[:a] == series_output[1] == diff --git a/test/sources/grd.jl b/test/sources/grd.jl index 940f8aa1c..ca3c72425 100644 --- a/test/sources/grd.jl +++ b/test/sources/grd.jl @@ -4,13 +4,13 @@ using DiskArrays import NCDatasets, ArchGDAL using Rasters: FileArray, GRDsource, GDALsource, metadata, trim -testpath = joinpath(dirname(pathof(Rasters)), "../test/") +testpath = joinpath(dirname(pathof(Rasters)), "..", "test") include(joinpath(testpath, "test_utils.jl")) const DD = DimensionalData maybedownload("https://raw.githubusercontent.com/rspatial/raster/master/inst/external/rlogo.grd", "rlogo.grd") maybedownload("https://github.com/rspatial/raster/raw/master/inst/external/rlogo.gri", "rlogo.gri") -stem = joinpath(testpath, "data/rlogo") +stem = joinpath(testpath, "data", "rlogo") @test isfile(stem * ".grd") @test isfile(stem * ".gri") grdpath = stem * ".gri" @@ -217,8 +217,10 @@ grdpath = stem * ".gri" # 1 band is added again on save @test size(saved) == size(grdarray[Band(1)]) @test parent(saved) == parent(grdarray[Band(1)]) + filename2 = tempname() * ".gri" write(filename2, grdarray; force=true, verbose=false) - # @test_broken (@allocations write(filename2, grdarray; force=true, verbose=false)) < 3e3 + filename2 = tempname() * ".gri" + @test (@allocations write(filename2, grdarray; force=true, verbose=false)) < 3e3 end @testset "3d with subset" begin @@ -244,8 +246,10 @@ grdpath = stem * ".gri" @test all(parent(saved) .=== parent(geoA)) @test saved isa typeof(geoA) @test parent(saved) == parent(geoA) + filename = tempname() * ".grd" write(filename, GRDsource(), geoA; force = true) - # @test_broken (@allocations write(filename, GRDsource(), geoA; force = true)) < 3e3 + filename = tempname() * ".grd" + @test (@allocations write(filename, GRDsource(), geoA; force = true)) < 3e3 end @testset "to netcdf" begin @@ -458,7 +462,7 @@ end end @testset "Grd series" begin - grdpath2 = stem * "2" * ".gri" + grdpath2 = joinpath(tempdir(), tempname() * ".gri") write(grdpath2, 2 .* Raster(grdpath); force=true) Raster(grdpath) .* 2 == Raster(grdpath2) eager_grdseries = RasterSeries([grdpath, grdpath2], (Ti,); mappedcrs=EPSG(4326)) diff --git a/test/sources/ncdatasets.jl b/test/sources/ncdatasets.jl index 736746452..4c49ad607 100644 --- a/test/sources/ncdatasets.jl +++ b/test/sources/ncdatasets.jl @@ -373,6 +373,7 @@ end nccleaned = replace_missing(ncarray[Ti(1)], -9999.0) fn = tempname() * ".gri" write(fn, nccleaned; force=true) + fn = tempname() * ".gri" @test (@allocations write(fn, nccleaned; force=true)) < 1e4 grdarray = Raster(fn, missingval=nothing); @test crs(grdarray) == convert(ProjString, EPSG(4326)) diff --git a/test/sources/rasterdatasources.jl b/test/sources/rasterdatasources.jl index 5c3049f32..f1b9afdab 100644 --- a/test/sources/rasterdatasources.jl +++ b/test/sources/rasterdatasources.jl @@ -2,13 +2,14 @@ using Rasters, RasterDataSources, Test, Dates, ArchGDAL, NCDatasets, Proj # Too big to test on CI # if !haskey(ENV, "CI") -# @testset "load WorldClim Weather" begin -# # Weather time-series -# dates = (Date(2001), Date(2002)) -# ser = RasterSeries(WorldClim{Weather}, (:prec,); date=dates) -# ser[Date(2001, 1)][:prec] -# A = Raster(WorldClim{Weather}, :prec; date=DateTime(2001, 05), mappedcrs=EPSG(4326)) -# end + # @testset "load WorldClim Weather" begin + # # Weather time-series + # ser = RasterSeries(WorldClim{Weather}, (:prec,); + # date=(Date(2001), Date(2002)), missingval=NaN32 + # ) + # @test all(ser[At(Date(2001, 1))].prec .=== + # Raster(WorldClim{Weather}, :prec; date=DateTime(2001), missingval=NaN32)) + # end # end @testset "load WorldClim Climate" begin @@ -37,7 +38,7 @@ end st = RasterStack(WorldClim{Future{BioClim, CMIP6, GFDL_ESM4, SSP370}}, (1, 2); date = Date(2050), res = "10m", lazy=true, - missingval=Inf, + missingval=Inf32, crs=nothing, mappedcrs=EPSG(4326), ) @@ -64,16 +65,9 @@ end @test st isa RasterStack{(:bio1, :bio2),@NamedTuple{bio1::UInt16, bio2::UInt16}} @test st.bio2 isa Raster{UInt16,2} - # Allow forcing keywords - st = RasterStack(CHELSA{BioClim}, (1, 2); - lazy=true, - missingval= Int16(9999), - metadata=Rasters.NoMetadata(), - crs=nothing, - mappedcrs=EPSG(4326), - ) - @test missingval(st) === -9999.0 - @test missingval(st.bio1) == -9999.0 + st = RasterStack(CHELSA{BioClim}, (1, 2); lazy=true) + + @test missingval(st) === missingval(st.bio1) === nothing @test metadata(st) == Rasters.NoMetadata() end @@ -106,7 +100,6 @@ end @test crs(st[:s0_pct]) == EPSG(4326) dates = DateTime(2019, 10, 19), DateTime(2021, 11, 20) s = RasterSeries(ALWB{Values,Day}, (:s0_pct, :ss_pct); date=dates, lazy=true) - s[1] @test A isa Raster @test st isa RasterStack @test s isa RasterSeries From 1132988b1f3f1bbab1486c160fa508f58ea960c5 Mon Sep 17 00:00:00 2001 From: Rafael Schouten Date: Mon, 13 Jan 2025 00:05:59 +0100 Subject: [PATCH 38/38] remove ext --- .../GeometryOpsDimensionalDataExt.jl | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 ext/GeometryOpsDimensionalDataExt/GeometryOpsDimensionalDataExt.jl diff --git a/ext/GeometryOpsDimensionalDataExt/GeometryOpsDimensionalDataExt.jl b/ext/GeometryOpsDimensionalDataExt/GeometryOpsDimensionalDataExt.jl deleted file mode 100644 index e0132a5df..000000000 --- a/ext/GeometryOpsDimensionalDataExt/GeometryOpsDimensionalDataExt.jl +++ /dev/null @@ -1,20 +0,0 @@ -module GeometryOpsDimensionalDataExt - -import DimensionalData as DD -import GeometryOps as GO -import GeoInterface as GI - -function GO.polygonize(A::DD.AbstractDimArray; dims=(DD.X(), DD.Y()), crs=GI.crs(A), kw...) - lookups = DD.lookup(A, dims) - bounds_vecs = if DD.isintervals(lookups) - map(DD.intervalbounds, lookups) - else - @warn "`polygonsize` is not possible for `Points` sampling, as polygons cover space by definition. Treating as `Intervals`, but this may not be appropriate" - map(lookups) do l - Dd.intervalbounds(DD.set(l, DD.Intervals())) - end - end - GO.polygonize(bounds_vecs..., DD.AbstractDimArray; crs, kw...) -end - -end