Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 1 addition & 14 deletions ext/TensorKitCUDAExt/cutensormap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,18 +101,6 @@ function TensorKit.scalar(t::CuTensorMap{T, S, 0, 0}) where {T, S}
return isempty(inds) ? zero(scalartype(t)) : @allowscalar @inbounds t.data[only(inds)]
end

function Base.convert(
TT::Type{CuTensorMap{T, S, N₁, N₂}},
t::AbstractTensorMap{<:Any, S, N₁, N₂}
) where {T, S, N₁, N₂}
if typeof(t) === TT
return t
else
tnew = TT(undef, space(t))
return copy!(tnew, t)
end
end

function LinearAlgebra.isposdef(t::CuTensorMap)
domain(t) == codomain(t) ||
throw(SpaceMismatch("`isposdef` requires domain and codomain to be the same"))
Expand All @@ -138,10 +126,9 @@ function Base.promote_rule(
return CuTensorMap{T, S, N₁, N₂}
end

TensorKit.promote_storage_rule(::Type{CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} =
TensorKit.promote_storage_rule(::Type{<:CuArray{T, N}}, ::Type{<:CuArray{T, N}}) where {T, N} =
CuArray{T, N, CUDA.default_memory}


# CuTensorMap exponentation:
function TensorKit.exp!(t::CuTensorMap)
domain(t) == codomain(t) ||
Expand Down
22 changes: 20 additions & 2 deletions ext/TensorKitCUDAExt/truncation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ function MatrixAlgebraKit.findtruncated(
fill!(v, dim(c))
end

perm = sortperm(parent(values); strategy.by, strategy.rev)
perm = isempty(parent(values)) ? Int64[] : sortperm(parent(values); strategy.by, strategy.rev)
cumulative_dim = cumsum(Base.permute!(parent(dims), perm))

result = similar(values, Bool)
Expand All @@ -36,14 +36,32 @@ function MatrixAlgebraKit.findtruncated(
end
end

perm = sortperm(parent(values); by = abs, rev = false)
perm = isempty(parent(values)) ? Int64[] : sortperm(parent(values); by = abs, rev = false)
cumulative_err = cumsum(Base.permute!(parent(ϵᵖ), perm))

result = similar(values, Bool)
parent(result)[perm] .= cumulative_err .> ϵᵖmax
return result
end

function MatrixAlgebraKit.findtruncated_svd(values::CuSectorVector, strategy::S) where {S <: MatrixAlgebraKit.TruncationStrategy}
# returning a CuSectorVector wrecks things in truncate_{co}domain
# because of scalar indexing
return CUDA.CUDACore.Adapt.adapt(Vector{Int}, MatrixAlgebraKit.findtruncated(values, strategy))
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we are guaranteed to always return a <:AbstractVector{Int}, sometimes this can also be a <:AbstractVector{Bool}. Do you think we could put a hook into the truncate_(co)domain functions directly and then just specialize there?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, could we instead split this into two lines and do

raw_ind = MatrixAlgebraKit.findtruncated(values, strategy)
return adapt(Vector{eltype(raw_ind)}, raw_ind)

For whatever reason, adapt(Vector, ) wasn't working :(

end

function MatrixAlgebraKit.findtruncated_svd(values::CuSectorVector, strategy::MatrixAlgebraKit.TruncationByOrder)
# returning a CuSectorVector wrecks things in truncate_{co}domain
# because of scalar indexing
return CUDA.CUDACore.Adapt.adapt(Vector{Int}, MatrixAlgebraKit.findtruncated(values, strategy))
end

function MatrixAlgebraKit.findtruncated_svd(values::CuSectorVector, strategy::MatrixAlgebraKit.TruncationByValue)
atol = TensorKit.Factorizations.rtol_to_atol(values, strategy.p, strategy.atol, strategy.rtol)
strategy′ = trunctol(; atol, strategy.by, strategy.keep_below)
return SectorDict(c => CUDA.CUDACore.Adapt.adapt(Vector{Int}, MatrixAlgebraKit.findtruncated_svd(d, strategy′)) for (c, d) in pairs(values))
end

# Needed until MatrixAlgebraKit patch hits...
function MatrixAlgebraKit._ind_intersect(A::CuVector{Bool}, B::CuVector{Int})
result = fill!(similar(A), false)
Expand Down
9 changes: 4 additions & 5 deletions src/tensors/abstracttensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,7 @@ storagetype(t) = storagetype(typeof(t))
function storagetype(::Type{T}) where {T <: AbstractTensorMap}
if T isa Union
# attempt to be slightly more specific by promoting unions
Ma = storagetype(T.a)
Mb = storagetype(T.b)
return promote_storagetype(Ma, Mb)
return promote_storagetype(T.a, T.b)
else
# fallback definition by using scalartype
return similarstoragetype(scalartype(T))
Expand Down Expand Up @@ -103,8 +101,9 @@ similarstoragetype(X::Type, ::Type{T}) where {T <: Number} =

# implement on tensors
similarstoragetype(::Type{TT}) where {TT <: AbstractTensorMap} = similarstoragetype(storagetype(TT))
similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number} =
similarstoragetype(storagetype(TT), T)
function similarstoragetype(::Type{TT}, ::Type{T}) where {TT <: AbstractTensorMap, T <: Number}
return similarstoragetype(storagetype(TT), T)
end
Comment thread
kshyatt marked this conversation as resolved.

# implement on arrays
similarstoragetype(::Type{A}) where {A <: DenseVector{<:Number}} = A
Expand Down
2 changes: 0 additions & 2 deletions src/tensors/adjoint.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ Base.@propagate_inbounds function subblock(t::AdjointTensorMap, (f₁, f₂)::Tu
return permutedims(conj(data), (domainind(tp)..., codomainind(tp)...))
end

to_cpu(t::AdjointTensorMap) = adjoint(to_cpu(adjoint(t)))

# Show
#------
function Base.showarg(io::IO, t::AdjointTensorMap, toplevel::Bool)
Expand Down
9 changes: 3 additions & 6 deletions src/tensors/tensor.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ struct TensorMap{T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} <: Abstrac
end
return TensorMap{T, S, N₁, N₂, A}(data, space)
end

# constructors from data
function TensorMap{T, S, N₁, N₂, A}(
data::A, space::TensorMapSpace{S, N₁, N₂}
Expand All @@ -34,6 +33,9 @@ struct TensorMap{T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} <: Abstrac
return new{T, S, N₁, N₂, A}(data, space)
end
end
# constructors from another TensorMap -- no-op
TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂, A}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = t
Comment thread
kshyatt marked this conversation as resolved.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂, A}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = t

I would actually be in favor of only having the second implementation, since there is a subtle semantic difference that I would like to retain:

In general, (although this is very poorly documented), the difference between a constructor and a converter is that a converter is meant to be as lossless as possible, and is allowed to take ownership of all parts of the original data. On the other hand, a constructor is typically meant to make an independent copy. You can see this for regular Arrays in the example below:

Where it becomes a bit more vague is of course that you want to have a constructor that takes the different fields and builds up an object from that, without having to create an independent copy (e.g. the inner constructors of a type). This is also where the Julia docs are completely underspecified, so this is more something we should try and figure out ourselves.

I would say that in this case though, creating an independent copy is probably the safest solution, and it is probably easier to reason in generic code that if you call TensorMap{...}(t::TensorMap), you get something that does not share data with t, but I am of course open to other opinions :)

julia> a = rand(3)
3-element Vector{Float64}:
 0.3155466339261911
 0.6265705423537413
 0.6035244268238111

julia> b = Array(a)
3-element Vector{Float64}:
 0.3155466339261911
 0.6265705423537413
 0.6035244268238111

julia> b[1] = 1
1

julia> a
3-element Vector{Float64}:
 0.3155466339261911
 0.6265705423537413
 0.6035244268238111

julia> c = convert(Array, a)
3-element Vector{Float64}:
 0.3155466339261911
 0.6265705423537413
 0.6035244268238111

julia> c[1] = 1
1

julia> a
3-element Vector{Float64}:
 1.0
 0.6265705423537413
 0.6035244268238111

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this and the adapt thing are the main outstanding points, right?

TensorMap{T, S, N₁, N₂, A}(t::TensorMap{T, S, N₁, N₂}) where {T, S <: IndexSpace, N₁, N₂, A <: DenseVector{T}} = TensorMap(A(t.data), space(t))

"""
Tensor{T, S, N, A<:DenseVector{T}} = TensorMap{T, S, N, 0, A}
Expand Down Expand Up @@ -407,11 +409,6 @@ for randf in (:rand, :randn, :randexp, :randisometry)
end
end

# Moving arbitrary TensorMaps to CPU
#-----------------------------
to_cpu(t::TensorMapWithStorage{T, Vector{T}}) where {T} = t # no op
to_cpu(t::TensorMap) = convert(TensorMapWithStorage{scalartype(t), similarstoragetype(scalartype(t))}, t)

# Efficient copy constructors
#-----------------------------
Base.copy(t::TensorMap) = typeof(t)(copy(t.data), t.space)
Expand Down
72 changes: 36 additions & 36 deletions test/amd/tensors.jl
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ for V in spacelist
for T in (Int, Float32, ComplexF64)
t = @constinferred AMDGPU.rand(T, W)
d = convert(Dict, t)
@test TensorKit.to_cpu(t) == convert(TensorMap, d)
@test adapt(Array, t) == convert(TensorMap, d)
end
end
symmetricbraiding && @timedtestset "Basic linear algebra" begin
Expand Down Expand Up @@ -189,10 +189,10 @@ for V in spacelist
t = AMDGPU.rand(T, W)
t2 = @constinferred AMDGPU.rand!(similar(t))
α = rand(T)
@test norm(t, 2) ≈ norm(TensorKit.to_cpu(t), 2)
@test dot(t2, t) ≈ dot(TensorKit.to_cpu(t2), TensorKit.to_cpu(t))
@test TensorKit.to_cpu(α * t) ≈ α * TensorKit.to_cpu(t)
@test TensorKit.to_cpu(t + t) ≈ 2 * TensorKit.to_cpu(t)
@test norm(t, 2) ≈ norm(adapt(Array, t), 2)
@test dot(t2, t) ≈ dot(adapt(Array, t2), adapt(Array, t))
@test adapt(Array, α * t) ≈ α * adapt(Array, t)
@test adapt(Array, t + t) ≈ 2 * adapt(Array, t)
end
end
@timedtestset "Real and imaginary parts" begin
Expand All @@ -202,17 +202,17 @@ for V in spacelist

tr = @constinferred real(t)
@test scalartype(tr) <: Real
@test real(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tr)
@test real(adapt(Array, t)) == adapt(Array, tr)
@test storagetype(tr) == ROCVector{real(T), AMDGPU.Mem.HIPBuffer}

ti = @constinferred imag(t)
@test scalartype(ti) <: Real
@test imag(TensorKit.to_cpu(t)) == TensorKit.to_cpu(ti)
@test imag(adapt(Array, t)) == adapt(Array, ti)
@test storagetype(ti) == ROCVector{real(T), AMDGPU.Mem.HIPBuffer}

tc = @inferred complex(t)
@test scalartype(tc) <: Complex
@test complex(TensorKit.to_cpu(t)) == TensorKit.to_cpu(tc)
@test complex(adapt(Array, t)) == adapt(Array, tc)
@test storagetype(tc) == ROCVector{complex(T), AMDGPU.Mem.HIPBuffer}

tc2 = @inferred complex(tr, ti)
Expand Down Expand Up @@ -275,13 +275,13 @@ for V in spacelist
p1 = ntuple(n -> p[n], k)
p2 = ntuple(n -> p[k + n], 5 - k)
dt2 = AMDGPU.@allowscalar permute(t, (p1, p2))
ht2 = permute(TensorKit.to_cpu(t), (p1, p2))
@test ht2 == TensorKit.to_cpu(dt2)
ht2 = permute(adapt(Array, t), (p1, p2))
@test ht2 == adapt(Array, dt2)
end

dt3 = AMDGPU.@allowscalar repartition(t, k)
ht3 = repartition(TensorKit.to_cpu(t), k)
@test ht3 == TensorKit.to_cpu(dt3)
ht3 = repartition(adapt(Array, t), k)
@test ht3 == adapt(Array, dt3)
end
end
symmetricbraiding && @timedtestset "Full trace: test self-consistency" begin
Expand Down Expand Up @@ -339,10 +339,10 @@ for V in spacelist
@tensor dHrA12[a, s1, s2, c] := drhoL[a, a'] * conj(dA1[a', t1, b]) *
dA2[b, t2, c'] * drhoR[c', c] *
dH[s1, s2, t1, t2]
@tensor hHrA12[a, s1, s2, c] := TensorKit.to_cpu(drhoL)[a, a'] * conj(TensorKit.to_cpu(dA1)[a', t1, b]) *
TensorKit.to_cpu(dA2)[b, t2, c'] * TensorKit.to_cpu(drhoR)[c', c] *
TensorKit.to_cpu(dH)[s1, s2, t1, t2]
@test TensorKit.to_cpu(dHrA12) ≈ hHrA12
@tensor hHrA12[a, s1, s2, c] := adapt(Array, drhoL)[a, a'] * conj(adapt(Array, dA1)[a', t1, b]) *
adapt(Array, dA2)[b, t2, c'] * adapt(Array, drhoR)[c', c] *
adapt(Array, dH)[s1, s2, t1, t2]
@test adapt(Array, dHrA12) ≈ hHrA12
end
end=# # doesn't yet work because of AdjointTensor
BraidingStyle(I) isa HasBraiding && @timedtestset "Index flipping: test flipping inverse" begin
Expand Down Expand Up @@ -422,31 +422,31 @@ for V in spacelist
t1 = AMDGPU.rand(T, W1, W1)
t2 = AMDGPU.rand(T, W2, W2)
t = AMDGPU.rand(T, W1, W2)
ht1 = TensorKit.to_cpu(t1)
ht2 = TensorKit.to_cpu(t2)
ht = TensorKit.to_cpu(t)
@test TensorKit.to_cpu(t1 * t) ≈ ht1 * ht
@test TensorKit.to_cpu(t1' * t) ≈ ht1' * ht
@test TensorKit.to_cpu(t2 * t') ≈ ht2 * ht'
@test TensorKit.to_cpu(t2' * t') ≈ ht2' * ht'
ht1 = adapt(Array, t1)
ht2 = adapt(Array, t2)
ht = adapt(Array, t)
@test adapt(Array, t1 * t) ≈ ht1 * ht
@test adapt(Array, t1' * t) ≈ ht1' * ht
@test adapt(Array, t2 * t') ≈ ht2 * ht'
@test adapt(Array, t2' * t') ≈ ht2' * ht'

#=AMDGPU.@allowscalar begin
@test TensorKit.to_cpu(inv(t1)) ≈ inv(ht1)
@test TensorKit.to_cpu(pinv(t)) ≈ pinv(ht)
@test adapt(Array, inv(t1)) ≈ inv(ht1)
@test adapt(Array, pinv(t)) ≈ pinv(ht)

if T == Float32 || T == ComplexF32
continue
end

@test TensorKit.to_cpu(t1 \ t) ≈ ht1 \ ht
@test TensorKit.to_cpu(t1' \ t) ≈ ht1' \ ht
@test TensorKit.to_cpu(t2 \ t') ≈ ht2 \ ht'
@test TensorKit.to_cpu(t2' \ t') ≈ ht2' \ ht'
@test adapt(Array, t1 \ t) ≈ ht1 \ ht
@test adapt(Array, t1' \ t) ≈ ht1' \ ht
@test adapt(Array, t2 \ t') ≈ ht2 \ ht'
@test adapt(Array, t2' \ t') ≈ ht2' \ ht'

@test TensorKit.to_cpu(t2 / t) ≈ ht2 / ht
@test TensorKit.to_cpu(t2' / t) ≈ ht2' / ht
@test TensorKit.to_cpu(t1 / t') ≈ ht1 / ht'
@test TensorKit.to_cpu(t1' / t') ≈ ht1' / ht'
@test adapt(Array, t2 / t) ≈ ht2 / ht
@test adapt(Array, t2' / t) ≈ ht2' / ht
@test adapt(Array, t1 / t') ≈ ht1 / ht'
@test adapt(Array, t1' / t') ≈ ht1' / ht'
end=#
end
end
Expand All @@ -456,11 +456,11 @@ for V in spacelist
#=t = project_hermitian!(AMDGPU.randn(T, W, W))
s = dim(W)
@test (@constinferred sqrt(t))^2 ≈ t
@test TensorKit.to_cpu(sqrt(t)) ≈ sqrt(TensorKit.to_cpu(t))
@test adapt(Array, sqrt(t)) ≈ sqrt(adapt(Array, t))
expt = @constinferred exp(t)
@test TensorKit.to_cpu(expt) ≈ exp(TensorKit.to_cpu(t))
@test adapt(Array, expt) ≈ exp(adapt(Array, t))
@test exp(@constinferred log(project_hermitian!(expt))) ≈ expt
@test TensorKit.to_cpu(log(project_hermitian!(expt))) ≈ log(TensorKit.to_cpu(expt))
@test adapt(Array, log(project_hermitian!(expt))) ≈ log(adapt(Array, expt))

@test (@constinferred cos(t))^2 + (@constinferred sin(t))^2 ≈
id(storagetype(t), W)
Expand Down
Loading
Loading