From 8b39009e4e025bb1a43649b46ae9ec540b231663 Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Sat, 9 May 2026 16:17:46 +0200 Subject: [PATCH 1/8] Fix deprecation warning --- src/compiler/codegen.jl | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/compiler/codegen.jl b/src/compiler/codegen.jl index a9263a0b7..82ee52221 100644 --- a/src/compiler/codegen.jl +++ b/src/compiler/codegen.jl @@ -31,17 +31,19 @@ GPUCompiler.kernel_state_type(@nospecialize(::HIPCompilerJob)) = AMDGPU.KernelSt function GPUCompiler.link_libraries!( @nospecialize(job::HIPCompilerJob), mod::LLVM.Module, - undefined_fns::Vector{String}, ) invoke(GPUCompiler.link_libraries!, - Tuple{CompilerJob{GCNCompilerTarget}, typeof(mod), typeof(undefined_fns)}, - job, mod, undefined_fns) + Tuple{CompilerJob{GCNCompilerTarget}, typeof(mod)}, + job, mod) # Detect global hostcalls here, before optimizations & cleanup occur. _global_hostcalls[hash(job)] = find_global_hostcalls(mod) # Link only if there are undefined functions. # Everything else was loaded in `finish_module!` stage. + undefined_fns = map(LLVM.name, filter( + f -> isdeclaration(f) && !LLVM.isintrinsic(f), + collect(LLVM.functions(mod)))) link_device_libs!( job.config.target, mod, undefined_fns; wavefrontsize64=job.config.params.wavefrontsize64, @@ -58,7 +60,9 @@ function GPUCompiler.finish_module!( # Link libraries early to include options libraries in the runtime. # Otherwise we get wave64 specific instructions on wave32 hardware # which results in ICE. - undefined_fns = GPUCompiler.decls(mod) + undefined_fns = filter( + f -> isdeclaration(f) && !LLVM.isintrinsic(f), + collect(LLVM.functions(mod))) if !isempty(undefined_fns) link_device_libs!( job.config.target, mod, LLVM.name.(undefined_fns); From 35f039171c332c0f52cac9e4493a529e1f5f3dfe Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Sat, 9 May 2026 21:36:02 +0200 Subject: [PATCH 2/8] Format --- src/compiler/codegen.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/compiler/codegen.jl b/src/compiler/codegen.jl index 82ee52221..59ead5eec 100644 --- a/src/compiler/codegen.jl +++ b/src/compiler/codegen.jl @@ -33,8 +33,7 @@ function GPUCompiler.link_libraries!( @nospecialize(job::HIPCompilerJob), mod::LLVM.Module, ) invoke(GPUCompiler.link_libraries!, - Tuple{CompilerJob{GCNCompilerTarget}, typeof(mod)}, - job, mod) + Tuple{CompilerJob{GCNCompilerTarget}, typeof(mod)}, job, mod) # Detect global hostcalls here, before optimizations & cleanup occur. _global_hostcalls[hash(job)] = find_global_hostcalls(mod) From e127c43872f96ad99a846a871870e0dc2875db96 Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Mon, 11 May 2026 10:47:34 +0200 Subject: [PATCH 3/8] Rely on LLVM --- src/compiler/codegen.jl | 23 ++--------------------- src/compiler/device_libs.jl | 37 +++++++------------------------------ 2 files changed, 9 insertions(+), 51 deletions(-) diff --git a/src/compiler/codegen.jl b/src/compiler/codegen.jl index 59ead5eec..25c48330c 100644 --- a/src/compiler/codegen.jl +++ b/src/compiler/codegen.jl @@ -38,15 +38,9 @@ function GPUCompiler.link_libraries!( # Detect global hostcalls here, before optimizations & cleanup occur. _global_hostcalls[hash(job)] = find_global_hostcalls(mod) - # Link only if there are undefined functions. - # Everything else was loaded in `finish_module!` stage. - undefined_fns = map(LLVM.name, filter( - f -> isdeclaration(f) && !LLVM.isintrinsic(f), - collect(LLVM.functions(mod)))) link_device_libs!( - job.config.target, mod, undefined_fns; - wavefrontsize64=job.config.params.wavefrontsize64, - only_undefined=true) + job.config.target, mod; + wavefrontsize64=job.config.params.wavefrontsize64) end function GPUCompiler.finish_module!( @@ -56,19 +50,6 @@ function GPUCompiler.finish_module!( Tuple{CompilerJob{GCNCompilerTarget}, typeof(mod), typeof(entry)}, job, mod, entry) - # Link libraries early to include options libraries in the runtime. - # Otherwise we get wave64 specific instructions on wave32 hardware - # which results in ICE. - undefined_fns = filter( - f -> isdeclaration(f) && !LLVM.isintrinsic(f), - collect(LLVM.functions(mod))) - if !isempty(undefined_fns) - link_device_libs!( - job.config.target, mod, LLVM.name.(undefined_fns); - wavefrontsize64=job.config.params.wavefrontsize64, - only_undefined=false) - end - # Set kernel target cpu and features. if LLVM.callconv(entry) == LLVM.API.LLVMAMDGPUKERNELCallConv target_cpu_attr = StringAttribute("target-cpu", job.config.target.dev_isa) diff --git a/src/compiler/device_libs.jl b/src/compiler/device_libs.jl index 98deab72d..819cfc414 100644 --- a/src/compiler/device_libs.jl +++ b/src/compiler/device_libs.jl @@ -16,20 +16,18 @@ mutable struct DevLib name::String path::String data::Vector{UInt8} - fn_names::Set{String} - DevLib(name::String, path::String) = new(name, path, read(path), Set{String}()) - DevLib(name::String, ::Nothing) = new(name, "", UInt8[], Set{String}()) + DevLib(name::String, path::String) = new(name, path, read(path)) + DevLib(name::String, ::Nothing) = new(name, "", UInt8[]) end const DEVICE_LIBS::Dict{String, DevLib} = Dict{String, DevLib}() function link_device_libs!( - target::GCNCompilerTarget, mod::LLVM.Module, undefined_fns::Vector{String}; - wavefrontsize64::Bool, only_undefined::Bool, + target::GCNCompilerTarget, mod::LLVM.Module; + wavefrontsize64::Bool, ) isnothing(libdevice_libs) && return - isempty(undefined_fns) && return # 1. Load other libraries. lib_names = ("hc", "hip", "irif", "ockl", "opencl", "ocml") @@ -37,9 +35,8 @@ function link_device_libs!( devlib = get!(DEVICE_LIBS, lib_name) do DevLib(lib_name, locate_lib(lib_name)) end - load_and_link!(devlib, mod, undefined_fns) + load_and_link!(devlib, mod) end - only_undefined && return # 2. Load OCLC library. devlib = get!(DEVICE_LIBS, "oclc") do @@ -72,28 +69,15 @@ function link_device_libs!( end end -function load_and_link!( - devlib::DevLib, mod::LLVM.Module, undefined_fns::Vector{String} = String[], -) +function load_and_link!(devlib::DevLib, mod::LLVM.Module) isempty(devlib.path) && return - fill_fn_names = isempty(devlib.fn_names) - do_linking = false - - if !fill_fn_names && !isempty(undefined_fns) - for undef_fn in undefined_fns - undef_fn ∈ devlib.fn_names && (do_linking = true; break) - end - do_linking || return - end - lib = parse(LLVM.Module, devlib.data) inline_attr = EnumAttribute("alwaysinline") noinline_attr = EnumAttribute("noinline") for f in LLVM.functions(lib) fn_name = LLVM.name(f) - fill_fn_names && push!(devlib.fn_names, fn_name) # FIXME: We should be able to inline this, that we can't means # we are inserting calls to it late. @@ -110,16 +94,9 @@ function load_and_link!( inline && push!(attrs, inline_attr) end - if !do_linking && !isempty(undefined_fns) - for undef_fn in undefined_fns - undef_fn ∈ devlib.fn_names && (do_linking = true; break) - end - do_linking || return - end - # override triple and datalayout to avoid warnings triple!(lib, triple(mod)) datalayout!(lib, datalayout(mod)) - LLVM.link!(mod, lib) + LLVM.link!(mod, lib; only_needed=true) return end From ae7694d4300cdf8544671dd1f09df53be72a15e0 Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Mon, 11 May 2026 10:53:06 +0200 Subject: [PATCH 4/8] format --- src/compiler/codegen.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/codegen.jl b/src/compiler/codegen.jl index 25c48330c..f0f0915b2 100644 --- a/src/compiler/codegen.jl +++ b/src/compiler/codegen.jl @@ -33,7 +33,7 @@ function GPUCompiler.link_libraries!( @nospecialize(job::HIPCompilerJob), mod::LLVM.Module, ) invoke(GPUCompiler.link_libraries!, - Tuple{CompilerJob{GCNCompilerTarget}, typeof(mod)}, job, mod) + Tuple{CompilerJob{GCNCompilerTarget},typeof(mod)}, job, mod) # Detect global hostcalls here, before optimizations & cleanup occur. _global_hostcalls[hash(job)] = find_global_hostcalls(mod) From 76b43c871f0d820d5774548ef7c908ed2bc68312 Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Mon, 11 May 2026 15:23:53 +0200 Subject: [PATCH 5/8] Add lazy --- src/compiler/device_libs.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/device_libs.jl b/src/compiler/device_libs.jl index 819cfc414..f9d09f8c6 100644 --- a/src/compiler/device_libs.jl +++ b/src/compiler/device_libs.jl @@ -72,7 +72,7 @@ end function load_and_link!(devlib::DevLib, mod::LLVM.Module) isempty(devlib.path) && return - lib = parse(LLVM.Module, devlib.data) + lib = parse(LLVM.Module, devlib.data; lazy=true) inline_attr = EnumAttribute("alwaysinline") noinline_attr = EnumAttribute("noinline") From 7979923fdb3efd40d1f184a6d7cd152936f2e82c Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Mon, 11 May 2026 15:24:23 +0200 Subject: [PATCH 6/8] Format --- src/compiler/codegen.jl | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/compiler/codegen.jl b/src/compiler/codegen.jl index f0f0915b2..fc996a80c 100644 --- a/src/compiler/codegen.jl +++ b/src/compiler/codegen.jl @@ -29,9 +29,7 @@ GPUCompiler.method_table(@nospecialize(::HIPCompilerJob)) = AMDGPU.method_table GPUCompiler.kernel_state_type(@nospecialize(::HIPCompilerJob)) = AMDGPU.KernelState -function GPUCompiler.link_libraries!( - @nospecialize(job::HIPCompilerJob), mod::LLVM.Module, -) +function GPUCompiler.link_libraries!(@nospecialize(job::HIPCompilerJob), mod::LLVM.Module) invoke(GPUCompiler.link_libraries!, Tuple{CompilerJob{GCNCompilerTarget},typeof(mod)}, job, mod) From 9491459440bcec474cf06c8b0d59d778eaf0c4c0 Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Mon, 11 May 2026 16:43:08 +0200 Subject: [PATCH 7/8] Revert lazy --- src/compiler/device_libs.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/device_libs.jl b/src/compiler/device_libs.jl index f9d09f8c6..4f01667d8 100644 --- a/src/compiler/device_libs.jl +++ b/src/compiler/device_libs.jl @@ -72,7 +72,7 @@ end function load_and_link!(devlib::DevLib, mod::LLVM.Module) isempty(devlib.path) && return - lib = parse(LLVM.Module, devlib.data; lazy=true) + lib = parse(LLVM.Module, devlib.data) inline_attr = EnumAttribute("alwaysinline") noinline_attr = EnumAttribute("noinline") @@ -97,6 +97,6 @@ function load_and_link!(devlib::DevLib, mod::LLVM.Module) # override triple and datalayout to avoid warnings triple!(lib, triple(mod)) datalayout!(lib, datalayout(mod)) - LLVM.link!(mod, lib; only_needed=true) + LLVM.link!(mod, lib) return end From 8b4e9a666cdf6f75e55f398a122c86375c1cbf66 Mon Sep 17 00:00:00 2001 From: Ludovic Raess Date: Mon, 11 May 2026 17:12:13 +0200 Subject: [PATCH 8/8] Fixup --- src/compiler/codegen.jl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/compiler/codegen.jl b/src/compiler/codegen.jl index fc996a80c..fce8fca44 100644 --- a/src/compiler/codegen.jl +++ b/src/compiler/codegen.jl @@ -48,6 +48,13 @@ function GPUCompiler.finish_module!( Tuple{CompilerJob{GCNCompilerTarget}, typeof(mod), typeof(entry)}, job, mod, entry) + # Re-link device libs to resolve references introduced by the GPUCompiler + # runtime (e.g. boxing → malloc → hostcall → __ockl_hsa_signal*) which are + # added after link_libraries! has already run. + link_device_libs!( + job.config.target, mod; + wavefrontsize64=job.config.params.wavefrontsize64) + # Set kernel target cpu and features. if LLVM.callconv(entry) == LLVM.API.LLVMAMDGPUKERNELCallConv target_cpu_attr = StringAttribute("target-cpu", job.config.target.dev_isa)