Skip to content

Commit 76a7382

Browse files
authored
Purge CUDA 11 (#2870)
1 parent e734d7a commit 76a7382

File tree

21 files changed

+330
-735
lines changed

21 files changed

+330
-735
lines changed

lib/cublas/CUBLAS.jl

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,9 +43,7 @@ function math_mode!(handle, mode)
4343
flags = 0
4444

4545
# https://github.com/facebookresearch/faiss/issues/1385
46-
if version() > v"11"
47-
flags = CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION
48-
end
46+
flags = CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION
4947

5048
flags |= if mode == CUDA.PEDANTIC_MATH
5149
# prevent use of tensor cores

lib/cudadrv/context.jl

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -214,12 +214,7 @@ Lower the refcount of a context, possibly freeing up all resources associated wi
214214
does not respect any users of the context, and might make other objects unusable.
215215
"""
216216
function unsafe_release!(pctx::CuPrimaryContext)
217-
if driver_version() >= v"11"
218-
cuDevicePrimaryCtxRelease_v2(pctx.dev)
219-
else
220-
cuDevicePrimaryCtxRelease(pctx.dev)
221-
end
222-
217+
cuDevicePrimaryCtxRelease_v2(pctx.dev)
223218
return
224219
end
225220

@@ -231,12 +226,7 @@ in the current process. Note that this forcibly invalidates all contexts derived
231226
primary context, and as a result outstanding resources might become invalid.
232227
"""
233228
function unsafe_reset!(pctx::CuPrimaryContext)
234-
if driver_version() >= v"11"
235-
cuDevicePrimaryCtxReset_v2(pctx.dev)
236-
else
237-
cuDevicePrimaryCtxReset(pctx.dev)
238-
end
239-
229+
cuDevicePrimaryCtxReset_v2(pctx.dev)
240230
return
241231
end
242232

@@ -267,11 +257,7 @@ flags(pctx::CuPrimaryContext) = state(pctx)[1]
267257
Set the flags of a primary context.
268258
"""
269259
function setflags!(pctx::CuPrimaryContext, flags)
270-
if driver_version() >= v"11"
271-
cuDevicePrimaryCtxSetFlags_v2(pctx.dev, flags)
272-
else
273-
cuDevicePrimaryCtxSetFlags(pctx.dev, flags)
274-
end
260+
cuDevicePrimaryCtxSetFlags_v2(pctx.dev, flags)
275261
end
276262

277263

lib/cudadrv/devices.jl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,6 @@ corresponding to the device ID as known to CUDA.
8080
deviceid(dev::CuDevice) = Int(convert(CUdevice, dev))
8181

8282
function uuid(dev::CuDevice)
83-
driver_version() < v"11.4" && return parent_uuid(dev)
84-
8583
# returns the MIG UUID if this is a compute instance
8684
uuid_ref = Ref{CUuuid}()
8785
cuDeviceGetUuid_v2(uuid_ref, dev)
@@ -186,9 +184,7 @@ function capability(dev::CuDevice)
186184
attribute(dev, DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR))
187185
end
188186

189-
memory_pools_supported(dev::CuDevice) =
190-
CUDA.driver_version() >= v"11.2" &&
191-
attribute(dev, DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED) == 1
187+
memory_pools_supported(dev::CuDevice) = attribute(dev, DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED) == 1
192188
@deprecate has_stream_ordered(dev::CuDevice) memory_pools_supported(dev)
193189

194190
unified_addressing(dev::CuDevice) =

lib/cudadrv/graph.jl

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -90,25 +90,7 @@ mutable struct CuGraphExec
9090
global function instantiate(graph::CuGraph, flags=0)
9191
handle_ref = Ref{CUgraphExec}()
9292

93-
if driver_version() >= v"12.0"
94-
cuGraphInstantiateWithFlags(handle_ref, graph, flags)
95-
else
96-
flags == 0 || error("Flags are not supported on CUDA < 12.0")
97-
98-
error_node = Ref{CUgraphNode}()
99-
buflen = 256
100-
buf = Vector{UInt8}(undef, buflen)
101-
102-
GC.@preserve buf begin
103-
if driver_version() >= v"11.0"
104-
cuGraphInstantiate_v2(handle_ref, graph, error_node, pointer(buf), buflen)
105-
else
106-
cuGraphInstantiate(handle_ref, graph, error_node, pointer(buf), buflen)
107-
end
108-
diag = String(buf)
109-
# TODO: how to use these?
110-
end
111-
end
93+
cuGraphInstantiateWithFlags(handle_ref, graph, flags)
11294

11395
ctx = current_context()
11496
obj = new(handle_ref[], graph, ctx)

lib/cudadrv/memory.jl

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -565,16 +565,8 @@ function unsafe_copy3d!(dst::Union{Ptr{T},CuPtr{T},CuArrayPtr{T}}, dstTyp::Type{
565565
srcPos = CuDim3(srcPos)
566566
dstPos = CuDim3(dstPos)
567567

568-
# JuliaGPU/CUDA.jl#863: cuMemcpy3DAsync calculates wrong offset
569-
# when using the stream-ordered memory allocator
570-
# NOTE: we apply the workaround unconditionally, since we want to keep this call cheap.
571-
if v"11.2" <= driver_version() <= v"11.3" #&& pools[device()].stream_ordered
572-
srcOffset = (srcPos.x-1)*aligned_sizeof(T) + srcPitch*((srcPos.y-1) + srcHeight*(srcPos.z-1))
573-
dstOffset = (dstPos.x-1)*aligned_sizeof(T) + dstPitch*((dstPos.y-1) + dstHeight*(dstPos.z-1))
574-
else
575-
srcOffset = 0
576-
dstOffset = 0
577-
end
568+
srcOffset = 0
569+
dstOffset = 0
578570

579571
srcMemoryType, srcHost, srcDevice, srcArray = if srcTyp == HostMemory
580572
CU_MEMORYTYPE_HOST,

lib/cupti/wrappers.jl

Lines changed: 4 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -282,46 +282,16 @@ function process(f, cfg::ActivityConfig)
282282
cuda_version = CUDA.runtime_version()
283283
## kernel activities
284284
activity_types[CUPTI_ACTIVITY_KIND_KERNEL] =
285-
if cuda_version >= v"12.0"
286-
CUpti_ActivityKernel9
287-
elseif cuda_version >= v"11.8"
288-
CUpti_ActivityKernel8
289-
elseif cuda_version >= v"11.6"
290-
CUpti_ActivityKernel7
291-
elseif cuda_version >= v"11.2"
292-
CUpti_ActivityKernel6
293-
elseif cuda_version >= v"11.1"
294-
CUpti_ActivityKernel5
295-
else # v"11.0"
296-
CUpti_ActivityKernel4
297-
end
285+
CUpti_ActivityKernel9
298286
activity_types[CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL] =
299287
activity_types[CUPTI_ACTIVITY_KIND_KERNEL]
300288
## memcpy activities
301289
activity_types[CUPTI_ACTIVITY_KIND_MEMCPY] =
302-
if cuda_version >= v"11.6"
303-
CUpti_ActivityMemcpy5
304-
elseif cuda_version >= v"11.1"
305-
CUpti_ActivityMemcpy4
306-
else # v"11.0"
307-
CUpti_ActivityMemcpy3
308-
end
290+
CUpti_ActivityMemcpy5
309291
activity_types[CUPTI_ACTIVITY_KIND_MEMSET] =
310-
if cuda_version >= v"11.6"
311-
CUpti_ActivityMemset4
312-
elseif cuda_version >= v"11.1"
313-
CUpti_ActivityMemset3
314-
else # v"11.0"
315-
CUpti_ActivityMemset2
316-
end
292+
CUpti_ActivityMemset4
317293
activity_types[CUPTI_ACTIVITY_KIND_MEMORY2] =
318-
if cuda_version >= v"11.6"
319-
CUpti_ActivityMemory3
320-
elseif cuda_version >= v"11.2"
321-
CUpti_ActivityMemory2
322-
else # v"9.0"
323-
CUpti_ActivityMemory
324-
end
294+
CUpti_ActivityMemory3
325295

326296
# extract typed activity records
327297
for (ctx_handle, stream_id, buf_ptr, sz, valid_sz) in cfg.results

lib/cusparse/generic.jl

Lines changed: 7 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -158,12 +158,7 @@ function mv!(transa::SparseChar, alpha::Number, A::Union{CuSparseMatrixCSC{TA},C
158158
# Support transa = 'C' for real matrices
159159
transa = T <: Real && transa == 'C' ? 'T' : transa
160160

161-
if CUSPARSE.version() < v"12.0" && isa(A, CuSparseMatrixCSC) && transa == 'C' && TA <: Complex
162-
throw(ArgumentError("Matrix-vector multiplication with the adjoint of a complex CSC matrix" *
163-
" is not supported by the current CUDA version. Use a CSR or COO matrix instead."))
164-
end
165-
166-
if CUSPARSE.version() < v"12.0" && isa(A, CuSparseMatrixCSC)
161+
if isa(A, CuSparseMatrixCSC)
167162
# cusparseSpMV completely supports CSC matrices with CUSPARSE.version() ≥ v"12.0".
168163
# We use Aᵀ to model them as CSR matrices for older versions of CUSPARSE.
169164
descA = CuSparseMatrixDescriptor(A, index, transposed=true)
@@ -186,9 +181,9 @@ function mv!(transa::SparseChar, alpha::Number, A::Union{CuSparseMatrixCSC{TA},C
186181
# operations with 16-bit numbers always imply mixed-precision computation
187182
# TODO: we should better model the supported combinations here,
188183
# and error if using an unsupported one (like with gemmEx!)
189-
compute_type = if version() >= v"11.4" && T == Float16
184+
compute_type = if T == Float16
190185
Float32
191-
elseif version() >= v"11.7.2" && T == ComplexF16
186+
elseif T == ComplexF16
192187
ComplexF32
193188
else
194189
T
@@ -216,21 +211,8 @@ function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseM
216211
transa = T <: Real && transa == 'C' ? 'T' : transa
217212
transb = T <: Real && transb == 'C' ? 'T' : transb
218213

219-
if CUSPARSE.version() < v"12.0" && isa(A, CuSparseMatrixCSC) && transa == 'C' && T <: Complex
220-
throw(ArgumentError("Matrix-matrix multiplication with the adjoint of a complex CSC matrix" *
221-
" is not supported by the current CUDA version. Use a CSR or COO matrix instead."))
222-
end
223-
224-
if CUSPARSE.version() < v"12.0" && isa(A, CuSparseMatrixCSC)
225-
# cusparseSpMM completely supports CSC matrices with CUSPARSE.version() ≥ v"12.0".
226-
# We use Aᵀ to model them as CSR matrices for older versions of CUSPARSE.
227-
descA = CuSparseMatrixDescriptor(A, index, transposed=true)
228-
k,m = size(A)
229-
transa = transa == 'N' ? 'T' : 'N'
230-
else
231-
descA = CuSparseMatrixDescriptor(A, index)
232-
m,k = size(A)
233-
end
214+
descA = CuSparseMatrixDescriptor(A, index)
215+
m,k = size(A)
234216
n = size(C)[2]
235217

236218
if transa == 'N' && transb == 'N'
@@ -288,10 +270,6 @@ end
288270
function bmm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparseArrayCSR{T,Ti,3},
289271
B::DenseCuArray{T,3}, beta::Number, C::DenseCuArray{T,3}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where {T,Ti}
290272

291-
if CUSPARSE.version() < v"11.7.2"
292-
throw(ErrorException("Batched dense-matrix times batched sparse-matrix (bmm!) requires a CUSPARSE version ≥ 11.7.2 (yours: $(CUSPARSE.version()))."))
293-
end
294-
295273
# Support transa = 'C' and `transb = 'C' for real matrices
296274
transa = T <: Real && transa == 'C' ? 'T' : transa
297275
transb = T <: Real && transb == 'C' ? 'T' : transb
@@ -341,7 +319,7 @@ function bmm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::CuSparse
341319
end
342320
with_workspace(bufferSize) do buffer
343321
# We should find a way to reuse the buffer (issue #1362)
344-
if !(A isa CuSparseMatrixCOO) && (CUSPARSE.version() v"11.7.2")
322+
if !(A isa CuSparseMatrixCOO)
345323
cusparseSpMM_preprocess(
346324
handle(), transa, transb, Ref{T}(alpha), descA, descB, Ref{T}(beta),
347325
descC, T, algo, buffer)
@@ -357,8 +335,6 @@ function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::DenseCuMa
357335
B::Union{CuSparseMatrixCSC{T},CuSparseMatrixCSR{T},CuSparseMatrixCOO{T}}, beta::Number,
358336
C::DenseCuMatrix{T}, index::SparseChar, algo::cusparseSpMMAlg_t=CUSPARSE_SPMM_ALG_DEFAULT) where {T}
359337

360-
CUSPARSE.version() < v"11.7.4" && throw(ErrorException("This operation is not supported by the current CUDA version."))
361-
362338
# Support transa = 'C' and `transb = 'C' for real matrices
363339
transa = T <: Real && transa == 'C' ? 'T' : transa
364340
transb = T <: Real && transb == 'C' ? 'T' : transb
@@ -369,11 +345,6 @@ function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::DenseCuMa
369345
# Cc = α * Ac * Bᴴ + β * Cc → α * B̅ * Ar + β * Cr
370346
# where B is a sparse matrix, Ac and Cc indicate column-major layout, while Ar and Cr refer to row-major layout.
371347

372-
if CUSPARSE.version() < v"12.0" && isa(B, CuSparseMatrixCSR) && transb == 'C' && T <: Complex
373-
throw(ArgumentError("Matrix-matrix multiplication with the adjoint of a complex CSR matrix" *
374-
" is not supported by the current CUDA version. Use a CSC or COO matrix instead."))
375-
end
376-
377348
m,k = size(A)
378349
n = size(C)[2]
379350

@@ -402,7 +373,7 @@ function mm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::DenseCuMa
402373
end
403374
with_workspace(bufferSize) do buffer
404375
# We should find a way to reuse the buffer (issue #1362)
405-
if !(B isa CuSparseMatrixCOO) && (CUSPARSE.version() v"11.7.2")
376+
if !(B isa CuSparseMatrixCOO)
406377
cusparseSpMM_preprocess(
407378
handle(), transb, transa, Ref{T}(alpha), descB, descA, Ref{T}(beta),
408379
descC, T, algo, buffer)
@@ -824,7 +795,6 @@ end
824795
function sddmm!(transa::SparseChar, transb::SparseChar, alpha::Number, A::DenseCuMatrix{T}, B::DenseCuMatrix{T},
825796
beta::Number, C::Union{CuSparseMatrixCSR{T},CuSparseMatrixBSR{T}}, index::SparseChar, algo::cusparseSDDMMAlg_t=CUSPARSE_SDDMM_ALG_DEFAULT) where {T}
826797

827-
CUSPARSE.version() < v"11.4.1" && throw(ErrorException("This operation is not supported by the current CUDA version."))
828798
(C isa CuSparseMatrixBSR) && (CUSPARSE.version() < v"12.1.0") && throw(ErrorException("This operation is not supported by the current CUDA version."))
829799

830800
# Support transa = 'C' and `transb = 'C' for real matrices

src/compiler/reflection.jl

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ function code_sass(io::IO, job::CompilerJob; raw::Bool=false)
4545
end
4646

4747
# NVIDIA bug #3964667: CUPTI in CUDA 11.7+ broken for sm_35 devices
48-
if runtime_version() >= v"11.7" && capability(device()) <= v"3.7"
48+
if capability(device()) <= v"3.7"
4949
@error """SASS code generation is not supported on this device.
50-
Please downgrade to CUDA 11.6 or lower, or use a more recent device."""
50+
Please use a more recent device."""
5151
return
5252
end
5353

@@ -82,9 +82,9 @@ end
8282

8383
function code_sass(f::Base.Callable, io::IO=stdout; raw::Bool=false)
8484
# NVIDIA bug #3964667: CUPTI in CUDA 11.7+ broken for sm_35 devices
85-
if runtime_version() >= v"11.7" && capability(device()) <= v"3.7"
85+
if capability(device()) <= v"3.7"
8686
@error """SASS code generation is not supported on this device.
87-
Please downgrade to CUDA 11.6 or lower, or use a more recent device."""
87+
Please use a more recent device."""
8888
return
8989
end
9090

src/initialization.jl

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,17 +66,12 @@ function __init__()
6666
return
6767
end
6868

69-
if !(v"11" <= driver < v"14-")
70-
@error "This version of CUDA.jl only supports NVIDIA drivers for CUDA 11.x, 12.x or 13.x (yours is for CUDA $driver)"
69+
if !(v"12" <= driver < v"14-")
70+
@error "This version of CUDA.jl only supports NVIDIA drivers for CUDA 12.x or 13.x (yours is for CUDA $driver)"
7171
_initialization_error[] = "CUDA driver unsupported"
7272
return
7373
end
7474

75-
if driver < v"11.3"
76-
@warn """The NVIDIA driver on this system only supports up to CUDA $driver.
77-
For performance reasons, it is recommended to upgrade to a driver that supports CUDA 11.3 or higher."""
78-
end
79-
8075
# check that we have a runtime
8176
if !CUDA_Runtime.is_available()
8277
# try to find out why
@@ -135,8 +130,8 @@ function __init__()
135130
end
136131

137132
# ensure the loaded runtime is supported
138-
if runtime < v"10.2"
139-
@error "This version of CUDA.jl only supports CUDA 11 or higher (your toolkit provides CUDA $runtime)"
133+
if runtime < v"12.0"
134+
@error "This version of CUDA.jl only supports CUDA 12 or higher (your toolkit provides CUDA $runtime)"
140135
end
141136
if runtime.major > driver.major
142137
@warn """You are using CUDA $runtime with a driver that only supports up to $(driver.major).x.

src/profile.jl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,6 @@ slowest 25%, while entries colored in red are among the slowest 5% of all operat
2323
2424
!!! compat "Julia 1.9" This functionality is only available on Julia 1.9 and later.
2525
26-
!!! compat "CUDA 11.2" Older versions of CUDA, before 11.2, contain bugs that may prevent
27-
the `CUDA.@profile` macro to work. It is recommended to use a newer runtime.
28-
2926
## External profilers (`external=true`, when an external profiler is detected)
3027
3128
For more advanced profiling, it is possible to use an external profiling tool, such as
@@ -495,7 +492,7 @@ function capture(cfg)
495492
size=record.bytes); cols=:union)
496493

497494
# memory allocations
498-
elseif record.kind == CUPTI.CUPTI_ACTIVITY_KIND_MEMORY2 && cuda_version >= v"11.2"
495+
elseif record.kind == CUPTI.CUPTI_ACTIVITY_KIND_MEMORY2
499496
# XXX: we'd prefer to postpone processing (i.e. calling format_bytes),
500497
# but cannot realistically add a column for every API call
501498

0 commit comments

Comments
 (0)