@@ -167,7 +167,6 @@ function BivariateGramMatrix(μ::BlockedVector{T, <: PaddedVector{T}}, X::XT, Y:
167167 end
168168 for n in 3 : n
169169 for m in n: min (N- n+ 1 , b+ n)
170- # println("m = $m, n = $n")
171170 recurse! (W, X, Y, m, n)
172171 end
173172 symmetrize_block! (view (W, Block (n, n)))
@@ -622,3 +621,316 @@ function _chebyshev_y(::Type{T}, n::Integer) where T
622621
623622 return Y
624623end
624+
625+
626+ # # Fast Cholesky algorithm using the displacement equations: XᵀW - WX = Gx J Gxᵀ and YᵀW - WY = Gy J Gyᵀ
627+
628+ function cholesky (W:: BivariateChebyshevGramMatrix{T} ) where T
629+ n = blocksize (W, 1 )
630+ N = n* (n+ 1 )÷ 2
631+ X = _chebyshev_x (T, n)
632+ Y = _chebyshev_y (T, n)
633+ @assert blockbandwidths (X) == blockbandwidths (Y) == (1 , 1 )
634+ @assert subblockbandwidths (X) == (0 , 0 )
635+ @assert subblockbandwidths (Y) == (1 , 1 )
636+ Xt = BandedBlockBandedMatrix (X' )
637+ Yt = BandedBlockBandedMatrix (Y' )
638+ Gx = Matrix (compute_skew_generators (Val (1 ), W))
639+ Gy = Matrix (compute_skew_generators (Val (2 ), W))
640+ L = BlockedMatrix {T} (I, 1 : n, 1 : n)
641+ c = zeros (T, N, n)
642+ ĉ = zeros (T, N, n)
643+ vc = view (c, 1 : N, 1 : 1 )
644+ vc .= W[1 : N, 1 : 1 ]
645+ ĉ_xd = zeros (T, N, n)
646+ ĉ_yd = zeros (T, N, n)
647+ l = zeros (T, N, n)
648+ linvd = zeros (T, N, n)
649+ d = zeros (T, n, n)
650+ Xrow1 = zeros (T, N, n)
651+ Yrow1 = zeros (T, N, n)
652+ vx = zeros (T, N, n)
653+ vy = zeros (T, N, n)
654+ @inbounds for k in 1 : n- 1
655+ K = k* (k- 1 )÷ 2
656+ # d = cholesky(Symmetric(c[Block(1, 1)], :L)).L
657+ # L[Block(k, k)] .= d
658+ vd = view (d, 1 : k, 1 : k)
659+ vd .= view (c, 1 : k, 1 : k)
660+ cholesky! (Symmetric (vd, :L ))
661+ # l = BlockedMatrix(Matrix(c[Block.(2:n-k+1), Block(1)])/d', k+1:n, [k])
662+ # L[Block.(k+1:n), Block(k)] .= l
663+ vl = view (l, k+ 1 : N- K, 1 : k)
664+ vl .= view (c, k+ 1 : N- K, 1 : k)
665+ rdiv! (vl, LowerTriangular (vd)' )
666+ view (L, Block (k, k)) .= vd
667+ # view(L, Block.(k+1:n), Block(k)) .= vl
668+ J = 1
669+ for j in k+ 1 : n
670+ view (L, Block (j, k)) .= view (vl, J: J- 1 + j, 1 : k)
671+ J += j
672+ end
673+ # vx = Gx[:, 1:n]*Gx[1:k, n+1:2n]' - Gx[:, n+1:2n]*Gx[1:k, 1:n]'
674+ # vy = Gy[:, 1:n]*Gy[1:k, n+1:2n]' - Gy[:, n+1:2n]*Gy[1:k, 1:n]'
675+ compute_v! (vx, Gx, n, k)
676+ compute_v! (vy, Gy, n, k)
677+ # ĉ_xd = X[Block.(k:n), Block.(k:n)]'c + Xrow1*c[1:k, 1:k]-c*Xrow1[1:k, 1:k]'-vx
678+ # ĉ_yd = Y[Block.(k:n), Block.(k:n)]'c + Yrow1*c[1:k, 1:k]-c*Yrow1[1:k, 1:k]'-vy
679+ compute_ĉd! (ĉ_xd, Xt, Xrow1, vx, c, n, k)
680+ compute_ĉd! (ĉ_yd, Yt, Yrow1, vy, c, n, k)
681+ # ĉ = BlockedMatrix([Matrix(ĉ_xd) / Diagonal(X[Block(2, 1)]) Matrix(ĉ_yd)[:, end]/Y[BlockIndex((2, 1), (k+1, k))]], k:n, [k+1])
682+ vĉ1 = view (ĉ, 1 : N- K, 1 : k)
683+ vĉ1 .= view (ĉ_xd, 1 : N- K, 1 : k)./ view (X. data, Block (3 , k))
684+ # vĉ1 .= view(ĉ_xd, 1:N-K, 1:k)
685+ # rdiv!(vĉ1, Diagonal(X[Block(k+1, k)]))
686+ vĉ2 = view (ĉ, 1 : N- K, k+ 1 )
687+ vĉ2 .= view (ĉ_yd, 1 : N- K, k)./ viewblock (Y, Block (k+ 1 , k))[k+ 1 , k]
688+ vĉ = view (ĉ, k+ 1 : N- K, 1 : k+ 1 )
689+ # c = ĉ[Block.(2:n-k+1), :] - l * (d \ c[Block(2, 1)]')
690+ # linvd = vl/LowerTriangular(vd)
691+ vlinvd = view (linvd, 1 : N- K- k, 1 : k)
692+ vlinvd .= vl
693+ rdiv! (vlinvd, LowerTriangular (vd))
694+ vc = view (c, 1 : N- K- k, 1 : k+ 1 )
695+ # vc .= vĉ .- vlinvd*vc[k+1:2k+1, 1:k]'
696+ vd = view (d, 1 : k+ 1 , 1 : k)
697+ vd .= view (c, k+ 1 : 2 k+ 1 , 1 : k)
698+ vc .= vĉ
699+ mul! (vc, vlinvd, vd' , - 1 , 1 )
700+ # c[Block(1, 1)] .= Symmetric(c[Block(1, 1)], :L)
701+ vc[1 : k+ 1 , 1 : k+ 1 ] .= Symmetric (view (c, 1 : k+ 1 , 1 : k+ 1 ), :L )
702+ # Xrow1 = -linvd*Matrix(X[Block(k+1, k)]')
703+ # Yrow1 = -linvd*Matrix(Y[Block(k+1, k)]')
704+ mul! (view (Xrow1, 1 : N- K- k, 1 : k+ 1 ), vlinvd, viewblock (X, Block (k+ 1 , k))' , - 1 , 0 )
705+ mul! (view (Yrow1, 1 : N- K- k, 1 : k+ 1 ), vlinvd, viewblock (Y, Block (k+ 1 , k))' , - 1 , 0 )
706+ # Gx = Gx[k+1:end, :] - linvd * Gx[1:k, :]
707+ # Gy = Gy[k+1:end, :] - linvd * Gy[1:k, :]
708+ update_generators! (Gx, linvd, n, k)
709+ update_generators! (Gy, linvd, n, k)
710+ end
711+ L[Block (n, n)] = cholesky (Symmetric (c[1 : n, 1 : n], :L )). L
712+ return Cholesky (L, ' L' , 0 )
713+ end
714+
715+ function compute_v! (v, G, n, k)
716+ N = n* (n+ 1 )÷ 2
717+ K = k* (k- 1 )÷ 2
718+ vv = view (v, 1 : N- K, 1 : k)
719+ vG1 = view (G, K+ 1 : N, 1 : n)
720+ vG2 = view (G, K+ 1 : k+ K, n+ 1 : 2 n)
721+ vG3 = view (G, K+ 1 : N, n+ 1 : 2 n)
722+ vG4 = view (G, K+ 1 : k+ K, 1 : n)
723+ mul! (vv, vG1, vG2' )
724+ mul! (vv, vG3, vG4' , - 1 , 1 )
725+ end
726+
727+ function update_generators! (G, linvd, n, k)
728+ N = n* (n+ 1 )÷ 2
729+ K = k* (k+ 1 )÷ 2
730+ vG1 = view (G, K+ 1 : N, 1 : 2 n)
731+ vG2 = view (G, K- k+ 1 : K, 1 : 2 n)
732+ vlinvd = view (linvd, 1 : N- K, 1 : k)
733+ mul! (vG1, vlinvd, vG2, - 1 , 1 )
734+ end
735+
736+ # ĉd = Z[Block.(k:n), Block.(k:n)]'c + Zrow1*c[1:k, 1:k]-c*Zrow1[1:k, 1:k]'-v
737+ function compute_ĉd! (ĉd, Zt, Zrow1, v, c, n, k)
738+ N = n* (n+ 1 )÷ 2
739+ K = k* (k- 1 )÷ 2
740+ jacobi_mul! (ĉd, Zt, c, n, k)
741+ vĉd = view (ĉd, 1 : N- K, 1 : k)
742+ mul! (vĉd, view (Zrow1, 1 : N- K, 1 : k), view (c, 1 : k, 1 : k), 1 , 1 )
743+ mul! (vĉd, view (c, 1 : N- K, 1 : k), view (Zrow1, 1 : k, 1 : k)' , - 1 , 1 )
744+ vĉd .- = view (v, 1 : N- K, 1 : k)
745+ end
746+
747+ # y[:, 1:k] = Z[Block.(k:n), Block.(k:n)]*x[:, 1:k]
748+ function jacobi_mul! (y, Z, x, n, k)
749+ @assert 1 ≤ k ≤ n
750+ if subblockbandwidths (Z) == (0 , 0 )
751+ return jacobi_mul00! (y, Z, x, n, k)
752+ elseif subblockbandwidths (Z) == (1 , 1 )
753+ return jacobi_mul11! (y, Z, x, n, k)
754+ else
755+ return jacobi_mul_default! (y, Z, x, n, k)
756+ end
757+ end
758+
759+ function jacobi_mul00! (y, Z, x, n, k)
760+ @inbounds for col in 1 : k
761+ Jshift = 0
762+ if n == 1
763+ ZBj = view (Z. data, Block (2 , k))
764+ for i in 1 : size (ZBj, 2 )
765+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col]
766+ end
767+ else
768+ if k == n
769+ ZBj = view (Z. data, Block (2 , k))
770+ for i in 1 : size (ZBj, 2 )
771+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col]
772+ end
773+ else
774+ ZBj = view (Z. data, Block (2 , k))
775+ ZBjp1 = view (Z. data, Block (1 , k+ 1 ))
776+ for i in 1 : size (ZBj, 2 )
777+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col] + ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
778+ end
779+ Jshift += size (ZBj, 2 )
780+ for j in k+ 1 : n- 1
781+ ZBjm1 = view (Z. data, Block (3 , j- 1 ))
782+ ZBj = view (Z. data, Block (2 , j))
783+ ZBjp1 = view (Z. data, Block (1 , j+ 1 ))
784+ for i in 1 : size (ZBj, 2 )- 1
785+ y[i+ Jshift, col] = ZBjm1[1 , i]* x[i+ Jshift- size (ZBjm1, 2 ), col] + ZBj[1 , i]* x[i+ Jshift, col] + ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
786+ end
787+ i = size (ZBj, 2 )
788+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col] + ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
789+ Jshift += size (ZBj, 2 )
790+ end
791+ ZBjm1 = view (Z. data, Block (3 , n- 1 ))
792+ ZBj = view (Z. data, Block (2 , n))
793+ for i in 1 : size (ZBj, 2 )- 1
794+ y[i+ Jshift, col] = ZBjm1[1 , i]* x[i+ Jshift- size (ZBjm1, 2 ), col] + ZBj[1 , i]* x[i+ Jshift, col]
795+ end
796+ i = size (ZBj, 2 )
797+ y[i+ Jshift, col] = ZBj[1 , i]* x[i+ Jshift, col]
798+ end
799+ end
800+ end
801+ return y
802+ end
803+
804+ function jacobi_mul11! (y, Z, x, n, k)
805+ @inbounds for col in 1 : k
806+ Jshift = 0
807+ if n == 1
808+ ZBj = view (Z. data, Block (2 , k))
809+ for i in 1 : size (ZBj, 2 )
810+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
811+ end
812+ else
813+ if k == n
814+ ZBj = view (Z. data, Block (2 , k))
815+ for i in 1 : size (ZBj, 2 )
816+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
817+ end
818+ for i in 2 : size (ZBj, 2 )
819+ y[i- 1 + Jshift, col] += ZBj[1 , i]* x[i+ Jshift, col]
820+ end
821+ for i in 1 : size (ZBj, 2 )- 1
822+ y[i+ 1 + Jshift, col] += ZBj[3 , i]* x[i+ Jshift, col]
823+ end
824+ else
825+ ZBj = view (Z. data, Block (2 , k))
826+ for i in 1 : size (ZBj, 2 )
827+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
828+ end
829+ for i in 2 : size (ZBj, 2 )
830+ y[i- 1 + Jshift, col] += ZBj[1 , i]* x[i+ Jshift, col]
831+ end
832+ for i in 1 : size (ZBj, 2 )- 1
833+ y[i+ 1 + Jshift, col] += ZBj[3 , i]* x[i+ Jshift, col]
834+ end
835+
836+ ZBjp1 = view (Z. data, Block (1 , k+ 1 ))
837+ for i in 2 : size (ZBj, 2 )+ 1
838+ y[i- 1 + Jshift, col] += ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
839+ end
840+ for i in 1 : size (ZBj, 2 )
841+ y[i+ Jshift, col] += ZBjp1[2 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
842+ end
843+ for i in 1 : size (ZBj, 2 )- 1
844+ y[i+ 1 + Jshift, col] += ZBjp1[3 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
845+ end
846+
847+ Jshift += size (ZBj, 2 )
848+ for j in k+ 1 : n- 1
849+ ZBj = view (Z. data, Block (2 , j))
850+ for i in 1 : size (ZBj, 2 )
851+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
852+ end
853+ for i in 2 : size (ZBj, 2 )
854+ y[i- 1 + Jshift, col] += ZBj[1 , i]* x[i+ Jshift, col]
855+ end
856+ for i in 1 : size (ZBj, 2 )- 1
857+ y[i+ 1 + Jshift, col] += ZBj[3 , i]* x[i+ Jshift, col]
858+ end
859+
860+ ZBjm1 = view (Z. data, Block (3 , j- 1 ))
861+ for i in 2 : size (ZBjm1, 2 )
862+ y[i- 1 + Jshift, col] += ZBjm1[1 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
863+ end
864+ for i in 1 : size (ZBjm1, 2 )
865+ y[i+ Jshift, col] += ZBjm1[2 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
866+ end
867+ for i in 1 : size (ZBjm1, 2 )
868+ y[i+ 1 + Jshift, col] += ZBjm1[3 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
869+ end
870+
871+ ZBjp1 = view (Z. data, Block (1 , j+ 1 ))
872+ for i in 2 : size (ZBj, 2 )+ 1
873+ y[i- 1 + Jshift, col] += ZBjp1[1 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
874+ end
875+ for i in 1 : size (ZBj, 2 )
876+ y[i+ Jshift, col] += ZBjp1[2 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
877+ end
878+ for i in 1 : size (ZBj, 2 )- 1
879+ y[i+ 1 + Jshift, col] += ZBjp1[3 , i]* x[i+ size (ZBj, 2 )+ Jshift, col]
880+ end
881+
882+ Jshift += size (ZBj, 2 )
883+ end
884+ ZBj = view (Z. data, Block (2 , n))
885+ for i in 1 : size (ZBj, 2 )
886+ y[i+ Jshift, col] = ZBj[2 , i]* x[i+ Jshift, col]
887+ end
888+ for i in 2 : size (ZBj, 2 )
889+ y[i- 1 + Jshift, col] += ZBj[1 , i]* x[i+ Jshift, col]
890+ end
891+ for i in 1 : size (ZBj, 2 )- 1
892+ y[i+ 1 + Jshift, col] += ZBj[3 , i]* x[i+ Jshift, col]
893+ end
894+
895+ ZBjm1 = view (Z. data, Block (3 , n- 1 ))
896+ for i in 2 : size (ZBjm1, 2 )
897+ y[i- 1 + Jshift, col] += ZBjm1[1 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
898+ end
899+ for i in 1 : size (ZBjm1, 2 )
900+ y[i+ Jshift, col] += ZBjm1[2 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
901+ end
902+ for i in 1 : size (ZBjm1, 2 )
903+ y[i+ 1 + Jshift, col] += ZBjm1[3 , i]* x[i- size (ZBjm1, 2 )+ Jshift, col]
904+ end
905+ end
906+ end
907+ end
908+ return y
909+ end
910+
911+ function jacobi_mul_default! (y, Z, x, n, k)
912+ if n == 1
913+ vy = view (y, 1 : k, 1 : k)
914+ mul! (vy, Z[Block (k, k)], view (x, 1 : k, 1 : k))
915+ else
916+ vy = view (y, 1 : k, 1 : k)
917+ Jstart = 1
918+ Jstop = k
919+ mul! (vy, Z[Block (k, k)], view (x, Jstart: Jstop, 1 : k))
920+ mul! (vy, Z[Block (k, k+ 1 )], view (x, (Jstop+ 1 ): (Jstop+ k+ 1 ), 1 : k), 1 , 1 )
921+ for j in k+ 1 : n- 1
922+ vy = view (y, (Jstop+ 1 ): (Jstop+ j), 1 : k)
923+ mul! (vy, Z[Block (j, j- 1 )], view (x, Jstart: Jstop, 1 : k))
924+ Jstart = Jstop+ 1
925+ Jstop = Jstop+ j
926+ mul! (vy, Z[Block (j, j)], view (x, Jstart: Jstop, 1 : k), 1 , 1 )
927+ mul! (vy, Z[Block (j, j+ 1 )], view (x, (Jstop+ 1 ): (Jstop+ j+ 1 ), 1 : k), 1 , 1 )
928+ end
929+ vy = view (y, (Jstop+ 1 ): (Jstop+ n), 1 : k)
930+ mul! (vy, Z[Block (n, n- 1 )], view (x, Jstart: Jstop, 1 : k))
931+ Jstart = Jstop+ 1
932+ Jstop = Jstop+ n
933+ mul! (vy, Z[Block (n, n)], view (x, Jstart: Jstop, 1 : k), 1 , 1 )
934+ end
935+ return y
936+ end
0 commit comments