Merge pull request #3 from mfherbst/dev-diis

mfherbst · web-flow · commit f2f4db13f1c3 · 2018-08-24T10:45:27.000+02:00
improve structure of cDIIS.jl
diff --git a/src/algorithms/cDIIS.jl b/src/algorithms/cDIIS.jl
@@ -5,109 +5,146 @@
 """
     Container type for the state of one spin type for cDIIS
 """
-mutable struct cDIISstate
-    fock::CircularBuffer
+mutable struct DiisState
+    iterate::CircularBuffer
     error::CircularBuffer
+
+    # errorOverlaps is a Circular Buffer containing already calculated rows of
+    # the next iterate matrix. Each row is also stored as a Circular Buffer.
     errorOverlaps::CircularBuffer
+    n_diis_size::Int
+
+    function DiisState(n_diis_size::Int)
+        new(CircularBuffer{AbstractArray}(n_diis_size),
+            CircularBuffer{AbstractArray}(n_diis_size),
+            CircularBuffer{AbstractArray}(n_diis_size),
+            n_diis_size
+        )
+    end
 end
 
+
 """
     cDIIS
 """
 mutable struct cDIIS <: Accelerator
-    n_diis_size::Int
-    state::Tuple{cDIISstate, cDIISstate}
+    state::Tuple{DiisState, DiisState}
+    sync_spins::Bool
     conditioning_threshold::Float64
     coefficient_threshold::Float64
 
-    function cDIIS(problem::ScfProblem; n_diis_size = 5, conditioning_threshold = 1e-14, coefficient_threshold = 1e-6, kwargs...)
-        stateα = cDIISstate(CircularBuffer{AbstractArray}(n_diis_size),
-            CircularBuffer{AbstractArray}(n_diis_size),
-            CircularBuffer{AbstractArray}(sum(1:n_diis_size -1))
-        )
-        stateβ = deepcopy(stateα)
-        new(n_diis_size, (stateα, stateβ), conditioning_threshold, coefficient_threshold)
+    function cDIIS(problem::ScfProblem; n_diis_size = 5, sync_spins = true, conditioning_threshold = 1e-14, coefficient_threshold = 1e-6, kwargs...)
+        stateα = DiisState(n_diis_size)
+        stateβ = DiisState(n_diis_size)
+        new((stateα, stateβ), sync_spins, conditioning_threshold, coefficient_threshold)
     end
 end
 
 """
     Helper function.
-    pushes current fock and error matrices to states of both spin types
+    pushes current iterate and error matrices to states of both spin types
 """
-function push_iterate_to_state!(iterate::ScfIterState, states::Tuple)
-    for i in 1:size(iterate.fock, 3)
-        pushfirst!(states[i].fock,  view(iterate.fock, :,:,i))
-        pushfirst!(states[i].error, view(iterate.error_pulay, :,:,i))
-    end
+function push_iterate!(state::DiisState, iterate::AbstractArray, error::Union{AbstractArray,Nothing} = nothing)
+    pushfirst!(state.iterate,  iterate)
+
+    # Push difference to previous iterate if no error given
+    pushfirst!(state.error,
+               error != nothing ? error : iterate - state.iterate[1])
+end
+
+"""
+    Helper functions to get views for specific spins
+"""
+function spin(obj::AbstractArray, dim::Int)
+    view(obj, ntuple(x -> Colon(), ndims(obj) - 1)..., dim)
+end
+
+function spincount(obj::AbstractArray)
+    size(obj, ndims(obj))
+end
+
+function spinloop(obj::Union{AbstractArray, Accelerator})
+    typeof(obj) == Accelerator ?
+        (1:spincount(obj.state.iterate)) :
+        (1:spincount(obj))
 end
 
 """
     Computes next iterate using cDIIS
 """
-function compute_next_iterate(acc::cDIIS, iterate::ScfIterState)
+function compute_next_iterate(acc::cDIIS, iterstate::ScfIterState)
+    # Push iterate and error to state
+    map(σ -> push_iterate!(acc.state[σ], spin(iterstate.fock, σ), spin(iterstate.error_pulay, σ)), spinloop(iterstate.fock))
+
     # Check if the number of known fock and error matrices is equal for both
     # spins before doing anything
-    history_size = acc.state[1].fock.length
-    for i in 1:size(iterate.fock, 3)
-        @assert acc.state[i].fock.length == history_size
-        @assert acc.state[i].error.length == history_size
+    history_size = acc.state[1].iterate.length
+    for σ in spinloop(iterstate.fock)
+        @assert acc.state[σ].iterate.length == history_size
+        @assert acc.state[σ].error.length == history_size
     end
 
-    # Store the current fock and error matrices first
-    push_iterate_to_state!(iterate, acc.state)
+    # To save memory we store only new_iterate once and pass views of it to the
+    # computation routines that write directly into the view.
+    new_iterate = zeros(size(iterstate.fock))
 
-    # Calculate the new fock matrix for each spin type separately
-    # and write them in the result matrix directly to save memory
-    new_iterate_fock = zeros(size(iterate.fock))
-    for i in 1:size(iterate.fock, 3)
-        compute_next_iterate_fock!(acc.state[i], acc.n_diis_size, view(new_iterate_fock, :,:,i), acc.conditioning_threshold, acc.coefficient_threshold)
-    end
+    # Defining anonymous functions with given arguments improves readability later on.
+    matrix(σ) = diis_build_matrix(acc.state[σ])
+    coefficients(A) = diis_solve_coefficients(A, acc.conditioning_threshold)
+    compute(c, σ) = compute_next_iterate_matrix!(acc.state[σ], c, spin(new_iterate, σ), acc.coefficient_threshold)
 
-    return update_iterate_matrix(iterate, new_iterate_fock)
+    # If sync_spins is enabled, we need to calculate the coefficients using the
+    # merged matrix. This also means we need to remove the same number of
+    # matrices from both histrories.
+    if acc.sync_spins & (spincount(iterstate.fock) == 2)
+        A = merge_matrices(matrix(1), matrix(2))
+        c, matrixpurgecount = coefficients(A)
+        map(σ -> compute(c, σ), spinloop(iterstate.fock))
+        map(σ -> purge_matrix_from_state(acc.state[σ], matrixpurgecount), spinloop(iterstate.fock))
+    else
+        # If we are calculating the spins separately, each spin has its own coefficients.
+        for σ in spinloop(iterstate.fock)
+            c, matrixpurgecount = coefficients(matrix(σ))
+            compute(c, σ)
+            purge_matrix_from_state(acc.state[σ], matrixpurgecount)
+        end
+    end
+    return update_iterate_matrix(iterstate, new_iterate)
 end
 
 """
-    Computes a new matrix to be used as Fock Matrix in next iteration
-    The function writes the result into the passed argument 'fock'
+    When synchronizing spins the resulting DIIS matrices have to be added
+    together but the constraint must be kept as is.
 """
-function compute_next_iterate_fock!(state::cDIISstate, n_diis_size::Int, fock::SubArray, conditioning_threshold::Float64, coefficient_threshold::Float64)
-    @assert n_diis_size > 0
-    @assert state.fock.length > 0
-
-    # The Fock Matrix in the next Iteration is a linear combination of
-    # previous Fock Matrices.
-    #
-    # The linear system has dimension m <= n_diis_size, since in the
-    # beginning of the iteration we do not have the full number of
-    # previous Fock Matrices yet.
-
-    m = min(n_diis_size, state.fock.length)
-
-    # Build the linear system we need to solve in order to obtain
-    # the neccessary coefficients c_i.
-
-    A = diis_build_matrix(n_diis_size, m, state)
-    rhs = diis_build_rhs(n_diis_size, m, state)
-    (c, bad_condition) = diis_solve_coefficients(A, rhs, conditioning_threshold)
+function merge_matrices(A1::AbstractArray, A2::AbstractArray)
+    view(A1, 1:size(A1, 1) - 1, 1:size(A1, 2) - 1) .+ view(A2, 1:size(A2, 1) - 1, 1:size(A2, 2) - 1)
+    return A1
+end
 
-    # In some cases the matrix so badly conditioned, that we cannot produce a
-    # sane new iterate. In this case we need to reuse the newest fock matrix
-    if bad_condition
-        fock .= state.fock[1]
-        return
+function purge_matrix_from_state(state::DiisState, count::Int)
+    for i in 1:2*count
+        pop!(state.iterate)
+        pop!(state.error)
+        pop!(state.errorOverlaps)
     end
+end
 
+"""
+    Computes a new matrix to be used as Fock Matrix in next iteration
+    The function writes the result into the passed argument 'fock'
+"""
+function compute_next_iterate_matrix!(state::DiisState, c::AbstractArray, iterate::SubArray, coefficient_threshold::Float64)
     # add very small coefficients to the largest one but always use the most
-    # recent fock matrix regardless of the coefficient value
+    # recent iterate matrix regardless of the coefficient value
     mask = map(x -> norm(x) > coefficient_threshold, c)
     mask[1] = true
     c[argmax(c)] += sum(c[ .! mask])
 
     # Construct new Fock Matrix using obtained coefficients
-    # and write it to the given fock matrix. We assume, that
-    # fock is a matrix of zeros.
+    # and write it to the given iterate matrix. We assume, that
+    # iterate is a matrix of zeros.
     for i in eachindex(c)[mask]
-        fock .+= c[i] * state.fock[i]
+        iterate .+= c[i] * state.iterate[i]
     end
 end
 
@@ -118,83 +155,115 @@ end
     Returns the vector c and and a boolean value representing if the matrix A
     is so badly conditioned, that the previous fock matrix should be used.
 """
-function diis_solve_coefficients(A, rhs, threshold)
-    println(cond(A))
+function diis_solve_coefficients(A::AbstractArray, threshold::Float64)
+    # Right hand side of the equation
+    rhs = diis_build_rhs(size(A, 1))
+
     # calculate the eigenvalues of A and select sufficiently large eigenvalues
     λ, U = eigen(A)
     mask = map(x -> norm(x) > threshold, λ)
 
-    # if all eigenvalues are under the threshold, return and indicate, that the
-    # previous fock matrix should be used without modifications.
+    if !all(mask)
+        println("   Removing ", count(.! mask), " of ", length(mask), " eigenvalues from DIIS linear system.")
+    end
+
+    # if all eigenvalues are under the threshold, we cannot calculate sane
+    # coefficients. The current fock matrix should be used without
+    # modifications.
     if all( .! mask)
         println("All eigenvalues are under the threshold! Skipping iterate modification…")
-        return (nothing, true)
+        c = zeros(size(A, 1) - 1)
+        c[1] = 1
+        return c
     end
 
     # Obtain the solution of the linear system A * c = rhs using the inverse
     # matrix of A constructed using the above decomposition
     c = U[:,mask] * Diagonal(1 ./ λ[mask]) * U[:,mask]' * rhs
 
-    # Warning: Note that c has size (n_diis_size + 1) since
-    #          the last element is the lagrange multiplier
-    #          corresponding to the constraint
-    #          \sum_{i=1}^n_diis_size c_i = 1
-    #          We need to remove this element!
-
-    return (c[1:length(c) - 1], false)
+    # Note that c has size (n_diis_size + 1) since the last element is the
+    # lagrange multiplier corresponding to the constraint
+    # \sum_{i=1}^n_diis_size c_i = 1 We need to remove this element!
+    return c[1:length(c) - 1], count(.! mask)
 end
 
 """
     Linear System Matrix for the cDIIM accelerator.
     This is a hermitian matrix containing error overlaps B
     and ones in the form
     
-    B  1
-    1† 0
+    A = B  1
+        1† 0
 """
-function diis_build_matrix(n_diis_size::Int, m::Int, state::cDIISstate)
-    A = zeros(m +1,m +1)
+function diis_build_matrix(state::DiisState)
+    @assert state.n_diis_size > 0
+    @assert state.iterate.length > 0
+
+    # The Fock Matrix in the next Iteration is a linear combination of
+    # previous Fock Matrices.
+    #
+    # The linear system has dimension m <= state.n_diis_size, since in the
+    # beginning of the iteration we do not have the full number of
+    # previous Fock Matrices yet.
 
-    # Since the Matrix A is Hermitian, we only have to calculate
-    # the upper triagonal and can use the Julia function 'Hermitian'
-    # to fill the lower triagonal of the matrix. This also allowes
-    # Julia to use optimized algorithems for Hermitian matrices.
+    m = min(state.n_diis_size, length(state.iterate))
 
-    # We can reuse most of the matrix B from the last iteration,
-    # since the values do not change and only have to calculate
-    # the first lign of B.
-    # We accomplish this using a circular buffer of size n_diis_size,
-    # which holds circular buffers of size n_diis_size to store already
-    # calculated elements.
+    A = zeros(m +1,m +1)
+
+    # Since the Matrix A is symmetric, we only have to calculate
+    # the upper triagonal and can use the Julia object 'Symmetric'
+    # to fill the lower triagonal of the matrix. This also allows
+    # Julia to use optimized algorithems for symmetric matrices.
+    #
+    # The matrix A is filled in the following form:
+    #
+    # B[1,1] B[1,2] B[1,3] … B[1,m] 1
+    # 0      B[2,2] B[2,3] … B[2,m] 1
+    # 0      0      B[3,3] … B[3,m] 1
+    # ⋮      ⋮      ⋮      … ⋮      ⋮
+    # 0      0      0      … B[m,m] 1
+    # 0      0      0      … 0      0
+    #
+    # Note, that the values A[1,1] … A[1,m-1] will become the values
+    # of A[2,2] … A[2,m] in the next iteration. This means, that we effectively
+    # only have to calculate the first row and can reuse all consecutive ones.
+    #
+    # We would like to store the rows in such a way, that the storage variable
+    # can be mapped to a row immediately. Since the values are shifted to the
+    # right in each iteration and the last element is discarded it makes sense
+    # to use a Circular Buffer to hold each row.
+    #
+    # Since the last row is also discarded after the new row is calculated we
+    # use a Circular Buffer again to store the rows.
 
     # Fill the first row with newly calculated values and cache them
     # in a newly created Circular Buffer
-    newValues = CircularBuffer{Any}(n_diis_size)
-    for j in 1:m
-        A[1,j] = tr(state.error[1]' * state.error[j])
-        push!(newValues, A[1,j])
-    end
-    # Since we want to use this buffer as the 2nd row of A in the next
-    # iteration we need the following layout of the buffer
-    #   0 a1 a2 … am
-    # so we need to push a 0 at the beginning
-    pushfirst!(newValues, 0)
-
-    # The last element of each row has to be 1, see above for a
-    # detailed explanation.
+    newValues = CircularBuffer{Any}(state.n_diis_size)
+    map(j -> push!(newValues,
+                   tr(state.error[1]' * state.error[j])), 1:m)
+    fill!(newValues, 0)
+
+    # Push newly calculated row to the row buffer.
+    pushfirst!(state.errorOverlaps, newValues)
+
+    # The last element of each row of A has to be 1. After calling Symmetric(A)
+    # the copy of these 1s in the bottom row of A defines the constraint
+    #   sum(c) == 1.
     A[1, m + 1] = 1
 
-    # Now fill the rest of the lines using cached values,
+    # Now fill all rows with cached values,
     # push a '0' on each buffer to prepare it for the next iteration
     # and set the last element of each row to 1.
-    for i in 2:m
-        A[i,1:m] = state.errorOverlaps[i-1][1:m]
-        pushfirst!(state.errorOverlaps[i-1], 0)
+    for i in 1:m
+        A[i,1:m] = state.errorOverlaps[i][1:m]
         A[i, m + 1] = 1
-    end
 
-    # Push newly calculated row to the row buffer.
-    pushfirst!(state.errorOverlaps, newValues)
+        # Since we want to use this buffer as the 2nd row of A in the next
+        # iteration we need the following layout of the buffer
+        #   0 A[1,1] A[1,2] … A[1,m-1]
+        # so we need to push a 0 to the beginning
+        pushfirst!(state.errorOverlaps[i], 0)
+    end
 
     return Symmetric(A)
 end
@@ -204,8 +273,8 @@ end
     This is a vector of size m+1 containing only ones
     except for the last element which is zero.
 """
-function diis_build_rhs(n_diis_size::Int, m::Int, state::cDIISstate)
-    rhs = zeros(m + 1)
-    rhs[m + 1] = 1
+function diis_build_rhs(vectorsize::Int)
+    rhs = zeros(vectorsize)
+    rhs[end] = 1
     return rhs
 end