Merge pull request #69 from biaslab/develop-unitvector

Optimization of ReactiveMP calculations
ReactiveBayes · Jan 11, 2022 · 24e55a0 · 24e55a0
2 parents ca8ceef + 0fd9e6f
commit 24e55a0
Show file tree

Hide file tree

Showing 86 changed files with 3,359 additions and 2,002 deletions.
diff --git a/demo/Autoregressive model.ipynb b/demo/Autoregressive model.ipynb
diff --git a/src/ReactiveMP.jl b/src/ReactiveMP.jl
@@ -29,11 +29,12 @@ include("pipeline.jl")
 
 include("actors/score.jl")
 
-include("algebra/helpers.jl")
-include("algebra/cholinv.jl")
+include("algebra/cholesky.jl")
 include("algebra/companion_matrix.jl")
-include("algebra/permutation_matrix.jl")
 include("algebra/correction.jl")
+include("algebra/helpers.jl")
+include("algebra/permutation_matrix.jl")
+include("algebra/standard_basis_vector.jl")
 
 include("approximations.jl")
 include("approximations/gausshermite.jl")

diff --git a/src/algebra/cholesky.jl b/src/algebra/cholesky.jl
@@ -0,0 +1,111 @@
+export cholinv, cholsqrt
+
+using LinearAlgebra
+using PositiveFactorizations
+
+import LinearAlgebra: BlasInt
+
+cholinv(x)           = inv(fastcholesky(x))
+cholinv(x::Diagonal) = Diagonal(inv.(diag(x)))
+cholinv(x::Real)     = inv(x)
+
+function cholinv(x::AbstractMatrix{T}) where { T <: LinearAlgebra.BlasFloat }
+    y = fastcholesky(x)
+    LinearAlgebra.inv!(y)
+    return y.factors
+end
+
+cholsqrt(x)           = Matrix(fastcholesky(x).L)
+cholsqrt(x::Diagonal) = Diagonal(sqrt.(diag(x)))
+cholsqrt(x::Real)     = sqrt(x)
+
+chollogdet(x)           = logdet(fastcholesky(x))
+chollogdet(x::Diagonal) = logdet(x)
+chollogdet(x::Real)     = logdet(x)
+
+function cholinv_logdet(x) 
+    # calculate cholesky decomposition
+    y = fastcholesky(x)
+
+    # return inverse and log-determinant
+    return inv(y), logdet(y)
+end
+function cholinv_logdet(x::AbstractMatrix{T}) where { T <: LinearAlgebra.BlasFloat } 
+    # calculate cholesky decomposition
+    y = fastcholesky(x)
+
+    # calculate logdeterminant of cholesky decomposition
+    ly = logdet(y)
+
+    # calculate inplace inverse of A and store in y.factors
+    LinearAlgebra.inv!(y)
+
+    # return inverse and log-determinant
+    return y.factors, ly
+end
+cholinv_logdet(x::Diagonal) = Diagonal(inv.(diag(x))), mapreduce(z -> log(z), +, diag(x))
+cholinv_logdet(x::Real)     = inv(x), log(abs(x))
+
+function fastcholesky(mat::AbstractMatrix)
+    A = copy(mat)
+    C = fastcholesky!(A)
+    if !isposdef(C)
+        return cholesky(PositiveFactorizations.Positive, Hermitian(mat))
+    else
+        return C
+    end 
+end
+
+function fastcholesky!(A::AbstractMatrix)
+    n = LinearAlgebra.checksquare(A)
+    @inbounds for col=1:n
+        @inbounds @simd for idx in 1:col-1
+            A[col, col] -= A[col, idx]^2;
+        end
+        if A[col,col] <= 0
+            return Cholesky(A, 'L', convert(BlasInt, -1))
+        end
+        A[col, col] = sqrt(A[col, col])
+
+        @inbounds for row in col+1: n
+            @inbounds @simd for idx in 1:col-1
+                A[row, col] -= A[row, idx]*A[col, idx]
+            end
+            A[row, col] /= A[col, col]
+        end
+    end
+    return Cholesky(A, 'L', convert(BlasInt, 0))
+end
+
+function fastcholesky!(A::AbstractMatrix{T}) where { T <: LinearAlgebra.BlasFloat }
+    # blocked version (https://arxiv.org/pdf/1812.02056.pdf)
+
+    # step size
+    s = 250
+
+    n = LinearAlgebra.checksquare(A)
+    z = 1
+    @inbounds for c in 1:n
+
+        if c == z + s
+            BLAS.gemm!('N', 'T', -one(T), view(A, c:n, z:c-1), view(A, c:n, z:c-1), one(T), view(A, c:n, c:n)) # replace with syrk once julia bug has been fixed
+            z = c
+        end
+
+        @inbounds for k in z:c-1
+            A[c,c] -= A[c,k]^2
+        end
+        if A[c,c] <= 0
+            return Cholesky(A, 'L', convert(BlasInt, -1))
+        end
+        A[c,c] = sqrt(A[c,c])
+        @inbounds for i in c+1:n
+            @inbounds for k in z:c-1
+                A[i,c] -= A[i,k]*A[c,k]
+            end
+            A[i,c] /= A[c,c]
+        end
+    end
+
+    return Cholesky(A, 'L', convert(BlasInt, 0))
+end
diff --git a/src/algebra/cholinv.jl b/src/algebra/cholinv.jl
diff --git a/src/algebra/helpers.jl b/src/algebra/helpers.jl
@@ -2,9 +2,9 @@ export diageye
 
 using StatsFuns: logistic
 using StatsFuns: softmax, softmax!
+using LoopVectorization
 
 import LinearAlgebra
-import LoopVectorization: @turbo
 
 diageye(::Type{T}, n::Int) where { T <: Real } = Matrix{T}(I, n, n)
 diageye(n::Int)                                = diageye(Float64, n)
@@ -16,6 +16,21 @@ end
 const sigmoid = logistic
 
 dtanh(x) = 1 - tanh(x)^2
+
+"""
+    mirrorlog(x)
+
+Returns `log(1 - x)`.
+"""
+mirrorlog(x) = log(1 - x)
+
+"""
+    xtlog(x)
+
+Returns `x * log(x)`.
+"""
+xtlog(x) = x * log(x)
+
 """
     negate_inplace!(A)
 
@@ -26,8 +41,8 @@ See also: [`mul_inplace!`](@ref)
 function negate_inplace! end
 
 negate_inplace!(A::AbstractArray) = -A
-negate_inplace!(A::Array)         = map!(-, A, A)
 negate_inplace!(A::Real)          = -A
+negate_inplace!(A::Array)         = map!(-, A, A)
 
 """
     mul_inplace!(alpha, A)
@@ -39,8 +54,8 @@ See also: [`negate_inplace!`](@ref)
 function mul_inplace! end
 
 mul_inplace!(alpha, A::AbstractArray) = alpha * A
-mul_inplace!(alpha, A::Array)         = lmul!(alpha, A)
 mul_inplace!(alpha, A::Real)          = alpha * A
+mul_inplace!(alpha::T, A::Array{T}) where { T <: Real } = lmul!(alpha, A)
 
 """
     rank1update(A, x)
@@ -56,19 +71,31 @@ rank1update(A::AbstractMatrix, x::AbstractVector, y::AbstractVector) = rank1upda
 rank1update(A::Real, x::Real)          = rank1update(A, x, x)
 rank1update(A::Real, x::Real, y::Real) = A + x * y
 
-function rank1update(::Type{ T }, ::Type{ T }, ::Type{T}, A::Matrix, x::Vector, y::Vector) where { T <: AbstractFloat } 
+function rank1update(::Type{ T }, ::Type{ T }, ::Type{T}, A::Matrix, x::Vector, y::Vector) where { T <: LinearAlgebra.BlasFloat } 
     return LinearAlgebra.BLAS.ger!(one(T), x, y, copy(A))
 end
 
-function rank1update(::Type{ T1 }, ::Type{ T2 }, ::Type{ T3 }, A::AbstractMatrix, x::AbstractVector, y::AbstractVector) where { T1 <: Real, T2 <: Real , T3 <: Real } 
-    return A + x * y'
+function rank1update(::Type{ T1 }, ::Type{ T2 }, ::Type{ T3 }, A::AbstractMatrix, x::AbstractVector, y::AbstractVector) where { T1 <: Real, T2 <: Real, T3 <: Real }
+    T = promote_type(T1, T2, T3)
+    B = Matrix{T}(undef, size(A))
+    return rank1update!(B, A, x, y)
 end
 
+function rank1update!(B::AbstractMatrix, A::AbstractMatrix, x::AbstractVector, y::AbstractVector)
+    sz = size(A)
+    @inbounds for k2 in 1:sz[2]
+        yk2 = y[k2]
+        @inbounds for k1 in 1:sz[1]
+            B[k1,k2] = A[k1,k2] + x[k1] * yk2
+        end
+    end
+    return B
+end
 
 """
     mul_trace(A, B)
 
-Computes tr(A * B) wihtout allocating A * B.
+Computes tr(A * B) without allocating A * B.
 """
 function mul_trace end
 
@@ -78,8 +105,48 @@ function mul_trace(A::AbstractMatrix, B::AbstractMatrix)
     sA, sB = size(A), size(B)
     @assert (sA === sB) && (length(sA) === 2) && (first(sA) === last(sA))
     result = zero(promote_type(eltype(A), eltype(B)))
-    @turbo for i in 1:first(sA), j in 1:first(sA)
-        @inbounds result += A[i, j] * B[j, i]
+    n = first(sA)
+    @turbo for i in 1:n
+        for j in 1:n
+            result += A[i, j] * B[j, i]
+        end
     end
     return result
 end
+
+
+"""
+    v_a_vT(v, a)
+
+Computes v*a*v^T with a single allocation.
+"""
+function v_a_vT(v::AbstractVector, a::Real)
+    T      = promote_type(eltype(v), typeof(a))
+    result = zeros(T, length(v), length(v))
+    mul!(result, v, v', a, one(T))
+    return result
+end
+
+function v_a_vT(v, a)
+    result = v*v'
+    result *= a
+    return result
+end
+
+"""
+    v_a_vT(v1, a, v2)
+
+Computes v1*a*v2^T with a single allocation.
+"""
+function v_a_vT(v1::AbstractVector, a::Real, v2::AbstractVector)
+    T      = promote_type(eltype(v1), typeof(a), eltype(v2))
+    result = zeros(T, length(v1), length(v2))
+    mul!(result, v1, v2', a, one(T))
+    return result
+end
+
+function v_a_vT(v1, a, v2)
+    result = v1*v2'
+    result *= a
+    return result
+end