Refactoring. (#59)

* Clean up chain element code and tests. * Clean up filtrations and cubical * Clean up ripserer tests * Clean up prime field * Clean up reductionmatrix * Small fix in recipes * Handle interval postprocessing, test custom filtrations * bump version, document postprocess_interval
mtsch · Jun 20, 2020 · c6e4eeb · c6e4eeb · mtsch · Jun 20, 2020
1 parent 09ca41e
commit c6e4eeb
Show file tree

Hide file tree

Showing 19 changed files with 673 additions and 576 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Ripserer"
 uuid = "aa79e827-bd0b-42a8-9f10-2b302677a641"
 authors = ["mtsch <[email protected]>"]
-version = "0.10.0"
+version = "0.11.0"
 
 [deps]
 Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"

diff --git a/docs/src/api/filtrations.md b/docs/src/api/filtrations.md
@@ -32,6 +32,10 @@ Ripserer.threshold(::Ripserer.AbstractFiltration)
 Ripserer.simplex_type
 ```
 
+```@docs
+Ripserer.postprocess_interval
+```
+
 ```@docs
 Ripserer.AbstractRipsFiltration
 ```

diff --git a/src/abstractfiltration.jl b/src/abstractfiltration.jl
@@ -1,23 +1,23 @@
 """
-    AbstractFiltration{T, S<:AbstractSimplex}
+    AbstractFiltration
 
 A filtration is used to find the edges in filtration and to determine diameters of
 simplices.
 
-`T` is the distance type, accessible by `dist_type` and `S` is the simplex type.
-
 # Interface
 
 * [`n_vertices(::AbstractFiltration)`](@ref)
 * [`edges(::AbstractFiltration)`](@ref)
 * [`diam(::AbstractFiltration, vertices)`](@ref)
 * [`diam(::AbstractFiltration, ::AbstractSimplex, ::Any, ::Any)`](@ref) - only used when
   [`simplex_type`](@ref) is an [`IndexedSimplex`](@ref).
-* [`birth(::AbstractFiltration, v)`](@ref) - optional, defaults to returning `zero(T)`.
-* [`threshold(::AbstractFiltration)`](@ref) - optional, defaults to returning `missing`.
 * [`simplex_type(::AbstractFiltration, dim)`](@ref)
+* [`birth(::AbstractFiltration, v)`](@ref) - optional, defaults to returning `zero(T)`.
+* [`threshold(::AbstractFiltration)`](@ref) - optional, defaults to returning `Inf`.
+* [`postprocess_interval(::AbstractFiltration, ::Any)`](@ref) - optional
+  postprocessing function that is applied to each interval in resulting persistence diagram.
 """
-abstract type AbstractFiltration{T, S<:AbstractSimplex} end
+abstract type AbstractFiltration end
 
 function Base.show(io::IO, flt::AbstractFiltration)
     print(io, typeof(flt), "(n_vertices=$(n_vertices(flt)))")
@@ -32,7 +32,6 @@ simplex_type(::AbstractFiltration, dim)
 
 vertex_type(flt::AbstractFiltration) = simplex_type(flt, 0)
 edge_type(flt::AbstractFiltration) = simplex_type(flt, 1)
-dist_type(::AbstractFiltration{T}) where T = T
 
 """
     n_vertices(filtration::AbstractFiltration)
@@ -72,12 +71,20 @@ diam(::AbstractFiltration, ::AbstractSimplex, ::Any, ::Any)
 
 Get the birth time of vertex `v` in filtration. Defaults to 0.
 """
-birth(::AbstractFiltration{T}, _) where T = zero(T)
+birth(::AbstractFiltration, _) = false # false is a strong zero.
 
 """
     threshold(::AbstractFiltration)
 
 Get the threshold of filtration. This is the maximum diameter a simplex in the filtration
-can have. Used only for placing the infinity line in plotting.
+can have. Used only for placing the infinity line in plotting. Defaults to `missing`.
+"""
+threshold(::AbstractFiltration) = Inf
+
+"""
+    postprocess_diagram(::AbstractFiltration, interval)
+
+This function is called on each resulting persistence interval. The default implementation
+does nothing.
 """
-threshold(::AbstractFiltration) = missing
+postprocess_interval(::AbstractFiltration, interval) = interval
diff --git a/src/chainelement.jl b/src/chainelement.jl
@@ -7,12 +7,13 @@ more efficient way to store a simplex and coefficient than as two fields.
 An `AbstractChainElement{S, F}` behaves like a `S` when doing comparisons and like a `F`
 when doing arithmetic.
 
-Coefficient values must be in a field and must support `+`, `-`, `*`, `/`, `inv`, `zero`,
+Coefficient values must be in a field - they must support `+`, `-`, `*`, `/`, `inv`, `zero`,
 `one` and `iszero`.
 
 # Interface
 
-* `AbstractChainElement{S, F}(sx::S)` - default to `coefficient` equal to `sign(sx)`.
+* `AbstractChainElement{S, F}(sx::S)` - default to element with `coefficient` equal to
+  `sign(sx)`.
 * `AbstractChainElement{S, F}(::S, coefficient)` - convert `coefficient` to `F` if necessary.
 * `simplex(::ChainElement)`
 * `coefficient(::ChainElement)`
@@ -26,23 +27,23 @@ for op in (:+, :-)
     end
 end
 for op in (:oneunit, :zero, :-, :+)
-    @eval (Base.$op)(ce::C) where C<:AbstractChainElement =
-        C(simplex(ce), $op(coefficient(ce)))
+    @eval (Base.$op)(elem::C) where C<:AbstractChainElement =
+        C(simplex(elem), $op(coefficient(elem)))
 end
-function Base.:*(ce::C, x::F) where {F<:Number, C<:AbstractChainElement{<:Any, F}}
-    return C(simplex(ce), coefficient(ce) * x)
+function Base.:*(elem::C, x::F) where {F<:Number, C<:AbstractChainElement{<:Any, F}}
+    return C(simplex(elem), coefficient(elem) * x)
 end
-function Base.:*(x::F, ce::C) where {F<:Number, C<:AbstractChainElement{<:Any, F}}
-    return C(simplex(ce), x * coefficient(ce))
+function Base.:*(x::F, elem::C) where {F<:Number, C<:AbstractChainElement{<:Any, F}}
+    return C(simplex(elem), x * coefficient(elem))
 end
-function Base.:/(ce::C, x::F) where {F<:Number, C<:AbstractChainElement{<:Any, F}}
-    return C(simplex(ce), coefficient(ce) / x)
+function Base.:/(elem::C, x::F) where {F<:Number, C<:AbstractChainElement{<:Any, F}}
+    return C(simplex(elem), coefficient(elem) / x)
 end
 Base.one(::Type{<:AbstractChainElement{<:Any, F}}) where F = one(F)
 Base.one(::AbstractChainElement{<:Any, F}) where F = one(F)
-Base.iszero(ce::AbstractChainElement) = iszero(coefficient(ce))
+Base.iszero(elem::AbstractChainElement) = iszero(coefficient(elem))
 
-Base.hash(ce::AbstractChainElement, u::UInt64) = hash(simplex(ce), u)
+Base.hash(elem::AbstractChainElement, u::UInt64) = hash(simplex(elem), u)
 function Base.:(==)(ce1::AbstractChainElement, ce2::AbstractChainElement)
     return simplex(ce1) == simplex(ce2)
 end
@@ -51,25 +52,41 @@ function Base.isless(ce1::AbstractChainElement, ce2::AbstractChainElement)
 end
 
 # Make chain elements useful when they come out as representatives.
-diam(ce::AbstractChainElement) = diam(simplex(ce))
-Base.sign(ce::AbstractChainElement) = sign(coefficient(ce))
-vertices(ce::AbstractChainElement) = vertices(simplex(ce))
-
-function Base.show(io::IO, ce::AbstractChainElement)
-    print(io, simplex(ce), " => ", coefficient(ce))
+diam(elem::AbstractChainElement) = diam(simplex(elem))
+index(elem::AbstractChainElement) = index(simplex(elem))
+Base.sign(elem::AbstractChainElement) = sign(coefficient(elem))
+vertices(elem::AbstractChainElement) = vertices(simplex(elem))
+
+# Make chain elements kinda similar to `Pair`s
+function Base.getindex(elem::AbstractChainElement, i)
+    if i == 1
+        return simplex(elem)
+    elseif i == 2
+        return coefficient(elem)
+    else
+        throw(BoundsError(elem, i))
+    end
 end
 
-function Base.convert(::Type{C}, simplex::S) where {S, C<:AbstractChainElement{S}}
-    C(simplex)
+Base.eltype(::Type{<:AbstractChainElement{S, F}}) where {S, F} = Union{S, F}
+Base.iterate(elem::AbstractChainElement, i=1) = i > 2 ? nothing : (elem[i], i + 1)
+Base.firstindex(::AbstractChainElement) = 1
+Base.lastindex(::AbstractChainElement) = 2
+Base.length(::AbstractChainElement) = 2
+
+function Base.show(io::IO, elem::AbstractChainElement)
+    print(io, simplex(elem), " => ", coefficient(elem))
 end
 
+Base.convert(::Type{C}, simplex::S) where {S, C<:AbstractChainElement{S}} = C(simplex)
+
 """
     chain_element_type(simplex, coefficient)
 
 Get the type of `AbstractChainElement` that is to be used for packing `simplex` and
 `coefficient`.
 """
-chain_element_type(simplex::S, coefficient::F) where {S, F} = chain_element_type(S, F)
+chain_element_type(::S, ::F) where {S, F} = chain_element_type(S, F)
 
 """
     ChainElement{S<:AbstractSimplex, F} <: AbstractChainElement{S, F}
@@ -85,16 +102,19 @@ struct ChainElement{S<:AbstractSimplex, F} <: AbstractChainElement{S, F}
     end
 end
 
-# Interface specification.
-simplex(ce::ChainElement) = ce.simplex
-coefficient(ce::ChainElement) = ce.coefficient
+simplex(elem::ChainElement) = elem.simplex
+coefficient(elem::ChainElement) = elem.coefficient
 chain_element_type(::Type{S}, ::Type{F}) where {S, F} = ChainElement{S, F}
 
-# TODO: assert F is less than say 8 bits?
-# TODO: check that nothing overflows - maybe in `ripserer`.
+"""
+    PackedElement{S<:IndexedSimplex, F<:Mod} <: AbstractChainElement{S, F}
+
+Like `ChainElement` for `Simplex` and `Mod`. Packs the coefficient value into top `M` bits
+of index.
+"""
 struct PackedElement{S<:IndexedSimplex, F<:Mod, M, U, T} <: AbstractChainElement{S, F}
-    index_coef ::U
-    diam       ::T
+    index_coef::U
+    diam::T
 
     function PackedElement{S, F, M, U, T}(
         simplex::S, coefficient=one(F)
@@ -110,22 +130,22 @@ end
 
 """
     n_bits(M)
+
 Get numer of bits needed to represent number mod `M`.
 """
-@pure n_bits(M::Int) = floor(Int, log2(M-1)) + 1
+@pure n_bits(M::Int) = floor(Int, log2(M - 1)) + 1
 
 function simplex(pe::PackedElement{S, <:Any, M, U}) where {S, M, U}
     mask = typemax(U) << n_bits(M) >> n_bits(M)
     return S(pe.index_coef & mask, pe.diam)
 end
-function coefficient(ce::PackedElement{<:Any, F, M, U}) where {F, U, M}
-    return F(ce.index_coef >> (sizeof(U) * 8 - n_bits(M)), check_mod=false)
+function coefficient(elem::PackedElement{<:Any, F, M, U}) where {F, U, M}
+    return F(elem.index_coef >> (sizeof(U) * 8 - n_bits(M)), false)
 end
 
 function chain_element_type(
     ::Type{S}, ::Type{F}
 ) where {M, I, T, S<:Simplex{<:Any, T, I}, F<:Mod{M}}
-
     if n_bits(M) ≤ 8
         return PackedElement{S, F, M, unsigned(I), T}
     else

diff --git a/src/cubical.jl b/src/cubical.jl
@@ -36,56 +36,57 @@ end
     end
 end
 
-function Base.show(io::IO, ::MIME"text/plain", csx::Cubelet{D, T, I}) where {D, T, I}
-    print(io, D, "-dim Cubelet", (index(csx), diam(csx)))
-    print(io, ":\n  $(sign(csx) == 1 ? '+' : '-')$(vertices(csx))")
+function Base.show(io::IO, ::MIME"text/plain", cube::Cubelet{D, T, I}) where {D, T, I}
+    print(io, D, "-dim Cubelet", (index(cube), diam(cube)))
+    print(io, ":\n  $(sign(cube) == 1 ? '+' : '-')$(vertices(cube))")
 end
 
-function Base.show(io::IO, csx::Cubelet{D, M}) where {D, M}
-    print(io, "Cubelet{$D}($(sign(csx) == 1 ? '+' : '-')$(vertices(csx)), $(diam(csx)))")
+function Base.show(io::IO, cube::Cubelet{D, M}) where {D, M}
+    print(io, "Cubelet{$D}($(sign(cube) == 1 ? '+' : '-')$(vertices(cube)), $(diam(cube)))")
 end
 
-index(csx::Cubelet) = csx.index
-diam(csx::Cubelet) = csx.diam
+index(cube::Cubelet) = cube.index
+diam(cube::Cubelet) = cube.diam
 coface_type(::Type{Cubelet{D, T, I}}) where {D, T, I} = Cubelet{D + 1, T, I}
 face_type(::Type{Cubelet{D, T, I}}) where {D, T, I} = Cubelet{D - 1, T, I}
 
 # @generated used because inference doesn't work well for 2^D
-@generated function vertices(csx::Cubelet{D, <:Any, I}) where {D, I}
-    return :(vertices(index(csx), Val($(2^D))))
+@generated function vertices(cube::Cubelet{D, <:Any, I}) where {D, I}
+    return :(vertices(index(cube), Val($(2^D))))
 end
 
-Base.lastindex(sx::Cubelet{D}) where D = 2^D
-Base.size(sx::Cubelet{D}) where D = (2^D,)
-
+Base.lastindex(::Cubelet{D}) where D = 2^D
+Base.size(::Cubelet{D}) where D = (2^D,)
 
 """
-    Cubical{T, N} <: AbstractFiltration{T, <:Cubelet{0, T}}
+    Cubical{I<:Signed, T} <: AbstractFiltration
 
-`Cubical` is used to compute sublevel persistent homology on `N`-dimensional
-images, which are of type `AbstractArray{T, N}`.
+`Cubical` is used to compute sublevel persistent homology on `N`-dimensional images, which
+are of type `AbstractArray{T, N}`. `I` is the integer type used to represent cubelet
+indices. Set it to a large value for larger images.
 
 # Constructor
 
-    Cubical(::AbstractArray{T, N})
+* `Cubical{I}(::AbstractArray{T, N})`: `I` defaults to `Int64`.
 """
-struct Cubical{I, T, N, A<:AbstractArray{T, N}} <: AbstractFiltration{T, Cubelet}
+struct Cubical{I<:Signed, T, A<:AbstractArray{T}} <: AbstractFiltration
     data::A
     threshold::T
 end
 
 function Cubical(data)
     return Cubical{Int}(data)
 end
-function Cubical{I}(data::AbstractArray{T, N}) where {I, T, N}
-    return Cubical{I, T, N, typeof(data)}(data, maximum(data))
+function Cubical{I}(data::AbstractArray{T}) where {I, T}
+    return Cubical{I, T, typeof(data)}(data, maximum(data))
 end
 
 n_vertices(cf::Cubical) = length(cf.data)
 threshold(cf::Cubical) = cf.threshold
 birth(cf::Cubical, i) = cf.data[i]
 simplex_type(::Cubical{I, T}, dim) where {I, T} = Cubelet{dim, T, I}
 
+dim(cf::Cubical) = length(size(cf.data))
 Base.CartesianIndices(cf::Cubical) = CartesianIndices(cf.data)
 Base.LinearIndices(cf::Cubical) = LinearIndices(cf.data)
 
@@ -101,13 +102,13 @@ function diam(cf::Cubical{<:Any, T}, vertices) where {T}
     return res
 end
 
-function edges(cf::Cubical{I, <:Any, N}) where {I, N}
+function edges(cf::Cubical{I, <:Any}) where {I}
     E = edge_type(cf)
     result = E[]
     for u_lin in eachindex(cf.data)
         u_car = CartesianIndices(cf)[u_lin]
-        for dim in 1:N
-            v_car = u_car + CartesianIndex{N}(ntuple(i -> i == dim ? 1 : 0, N))
+        for d in 1:dim(cf)
+            v_car = u_car + CartesianIndex{dim(cf)}(ntuple(i -> i == d ? 1 : 0, dim(cf)))
             if v_car in CartesianIndices(cf)
                 v_lin = LinearIndices(cf)[v_car]
                 push!(result, E(
@@ -120,18 +121,19 @@ function edges(cf::Cubical{I, <:Any, N}) where {I, N}
 end
 
 # coboundary ============================================================================= #
-struct CubeletCoboundary{A, N, I, C<:Cubelet, F<:Cubical{I, <:Any, N}, K}
+struct CubeletCoboundary{A, I, N, C<:Cubelet, F<:Cubical{I}, K}
     filtration::F
     cubelet::C
     vertices::SVector{K, CartesianIndex{N}}
 end
 
 function CubeletCoboundary{A}(
     filtration::F, cubelet::C
-) where {A, N, I, D, F<:Cubical{I, <:Any, N}, C<:Cubelet{D, <:Any, I}}
-    K = 2^D
+) where {A, I, D, F<:Cubical{I}, C<:Cubelet{D, <:Any, I}}
+    K = length(cubelet)
+    N = dim(filtration)
     vxs = map(v -> CartesianIndices(filtration)[v], vertices(cubelet))
-    return CubeletCoboundary{A, N, I, C, F, K}(filtration, cubelet, vxs)
+    return CubeletCoboundary{A, I, N, C, F, K}(filtration, cubelet, vxs)
 end
 
 function coboundary(filtration::Cubical, cubelet::Cubelet)
@@ -150,8 +152,8 @@ function all_equal_in_dim(dim, vertices)
 end
 
 function Base.iterate(
-    cc::CubeletCoboundary{A, N, I, C}, (dim, dir)=(one(I), one(I))
-) where {A, N, I, C}
+    cc::CubeletCoboundary{A, I, N, C}, (dim, dir)=(one(I), one(I))
+) where {A, I, N, C}
     # If not all indices in a given dimension are equal, we can't create a coface by
     # expanding in that direction.
     while true
@@ -179,17 +181,17 @@ function Base.iterate(
     end
 end
 
-struct CubeletBoundary{D, I, C<:Cubelet, N, F<:Cubical{I, <:Any, N}, K}
+struct CubeletBoundary{D, I, C<:Cubelet, N, F<:Cubical{I}, K}
     filtration::F
     cubelet::C
     vertices::SVector{K, CartesianIndex{N}}
 end
 
 function CubeletBoundary(
     filtration::F, cubelet::C
-) where {N, D, I, F<:Cubical{I, <:Any, N}, C<:Cubelet{D}}
-
-    K = 2^D
+) where {D, I, F<:Cubical{I}, C<:Cubelet{D}}
+    K = length(cubelet)
+    N = dim(filtration)
     vxs = map(v -> CartesianIndices(filtration)[v], vertices(cubelet))
     return CubeletBoundary{D, I, C, N, F, K}(filtration, cubelet, vxs)
 end