diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index ca796ebec286e..8955eeab736b2 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1125,7 +1125,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // configure the pipeline. OptimizationLevel Level = mapToLevel(CodeGenOpts); - if (LangOpts.SYCLIsDevice) + if (LangOpts.SYCLIsDevice) { PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(SYCLVirtualFunctionsAnalysisPass()); @@ -1139,17 +1139,23 @@ void EmitAssemblyHelper::RunOptimizationPipeline( /*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu, /*ExcludeAspects=*/{"fp64"})); MPM.addPass(SYCLPropagateJointMatrixUsagePass()); - // Lowers static/dynamic local memory builtin calls. - MPM.addPass(SYCLLowerWGLocalMemoryPass()); // Compile-time properties pass must create standard metadata as early // as possible to make them available for other passes. MPM.addPass(CompileTimePropertiesPass()); }); - else if (LangOpts.SYCLIsHost && !LangOpts.SYCLESIMDBuildHostCode) + PB.registerOptimizerEarlyEPCallback( + [](ModulePassManager &MPM, OptimizationLevel, ThinOrFullLTOPhase) { + // Allocate static local memory in SYCL kernel scope for each + // allocation call. This pass must run after AlwaysInline pass due + // to current implementation restriction. + MPM.addPass(SYCLLowerWGLocalMemoryPass()); + }); + } else if (LangOpts.SYCLIsHost && !LangOpts.SYCLESIMDBuildHostCode) { PB.registerPipelineStartEPCallback( [&](ModulePassManager &MPM, OptimizationLevel Level) { MPM.addPass(ESIMDRemoveHostCodePass()); }); + } // Add the InferAddressSpaces and SYCLOptimizeBarriers passes for all // the SPIR[V] targets diff --git a/clang/test/CodeGenSYCL/group-local-memory.cpp b/clang/test/CodeGenSYCL/group-local-memory.cpp new file mode 100644 index 0000000000000..02610e33760ab --- /dev/null +++ b/clang/test/CodeGenSYCL/group-local-memory.cpp @@ -0,0 +1,29 @@ +// Check that SYCLLowerWGLocalMemory pass is added to the SYCL device +// compilation pipeline with the inliner pass (new Pass Manager). + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O2 \ +// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHECK-INL,CHECK + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -O0 \ +// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK + +// Check that AlwaysInliner pass is always run for compilation of SYCL device +// target code, even if all optimizations are disabled. + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -fno-sycl-early-optimizations \ +// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ALWINL,CHECK + +// CHECK-INL: Running pass: ModuleInlinerWrapperPass on [module] +// CHECK-ALWINL: Running pass: AlwaysInlinerPass on [module] +// CHECK: Running pass: SYCLLowerWGLocalMemoryPass on [module] + +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -emit-llvm -disable-llvm-passes \ +// RUN: -mdebug-pass Structure %s -o /dev/null 2>&1 \ +// RUN: | FileCheck %s --check-prefixes=CHECK-NO-PASSES-ALWINL,CHECK-NO-PASSES,CHECK-NO-PASSES-INL + +// CHECK-NO-PASSES-INL-NOT: Running pass: ModuleInlinerWrapperPass on [module] +// CHECK-NO-PASSES-ALWINL-NOT: Running pass: AlwaysInlinerPass on [module] +// CHECK-NO-PASSES-NOT: Running pass: SYCLLowerWGLocalMemoryPass on [module] diff --git a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp index 1af3368350bed..a7d514dbca725 100644 --- a/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp +++ b/clang/test/CodeGenSYCL/kernel-early-optimization-pipeline.cpp @@ -8,11 +8,11 @@ // CHECK: SYCLConditionalCallOnDevicePass // CHECK: SYCLPropagateAspectsUsagePass // CHECK: SYCLPropagateJointMatrixUsagePass -// CHECK: SYCLLowerWGLocalMemoryPass // CHECK: CompileTimePropertiesPass // CHECK: InferFunctionAttrsPass // CHECK: AlwaysInlinerPass // CHECK: ModuleInlinerWrapperPass +// CHECK: SYCLLowerWGLocalMemoryPass // CHECK: SYCLOptimizeBarriersPass // CHECK: ConstantMergePass // CHECK: SYCLMutatePrintfAddrspacePass diff --git a/llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp b/llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp index b35d5e6dbf71f..2f3d31ef4c325 100644 --- a/llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp +++ b/llvm/lib/SYCLLowerIR/LowerWGLocalMemory.cpp @@ -9,14 +9,11 @@ //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/Pass.h" -#include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/TargetParser/Triple.h" -#include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; @@ -91,44 +88,6 @@ ModulePass *llvm::createSYCLLowerWGLocalMemoryLegacyPass() { return new SYCLLowerWGLocalMemoryLegacy(); } -// In sycl header __sycl_allocateLocalMemory builtin call is wrapped in -// group_local_memory/group_local_memory_for_overwrite functions, which must be -// inlined first before each __sycl_allocateLocalMemory call can be lowered to a -// distinct global variable. Inlining them here so that this pass doesn't have -// implicit dependency on AlwaysInlinerPass. -// -// syclcompat::local_mem, which represents a distinct allocation, calls -// group_local_memory_for_overwrite. So local_mem should be inlined as well. -static bool inlineGroupLocalMemoryFunc(Module &M) { - Function *ALMFunc = M.getFunction(SYCL_ALLOCLOCALMEM_CALL); - if (!ALMFunc || ALMFunc->use_empty()) - return false; - - SmallVector WorkList{ALMFunc}; - DenseSet Visited; - while (!WorkList.empty()) { - auto *F = WorkList.pop_back_val(); - for (auto *U : make_early_inc_range(F->users())) { - auto *CI = cast(U); - auto *Caller = CI->getFunction(); - // Frontend propagates sycl-forceinline attribute to SYCL_EXTERNAL - // function which directly calls group_local_memory_for_overwrite. - // Don't inline the SYCL_EXTERNAL function. - if (Caller->hasFnAttribute("sycl-forceinline") && - !sycl::utils::isSYCLExternalFunction(Caller) && - Visited.insert(Caller).second) - WorkList.push_back(Caller); - if (F != ALMFunc) { - InlineFunctionInfo IFI; - [[maybe_unused]] auto Result = InlineFunction(*CI, IFI); - assert(Result.isSuccess() && "inlining failed"); - } - } - } - - return !Visited.empty(); -} - // TODO: It should be checked that __sycl_allocateLocalMemory (or its source // form - group_local_memory) does not occur: // - in a function (other than user lambda/functor) @@ -392,8 +351,7 @@ static bool dynamicWGLocalMemory(Module &M) { PreservedAnalyses SYCLLowerWGLocalMemoryPass::run(Module &M, ModuleAnalysisManager &) { - bool Changed = inlineGroupLocalMemoryFunc(M); - Changed |= allocaWGLocalMemory(M); + bool Changed = allocaWGLocalMemory(M); Changed |= dynamicWGLocalMemory(M); return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvm/test/SYCLLowerIR/group_local_memory_inline.ll b/llvm/test/SYCLLowerIR/group_local_memory_inline.ll deleted file mode 100644 index 29f9c2390b115..0000000000000 --- a/llvm/test/SYCLLowerIR/group_local_memory_inline.ll +++ /dev/null @@ -1,66 +0,0 @@ -; RUN: opt < %s -passes=sycllowerwglocalmemory -S | FileCheck %s - -; Check group_local_memory_for_overwrite and group_local_memory functions are inlined. -; Check __sycl_allocateLocalMemory calls are lowered to four separate allocations. - -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" -target triple = "spir64-unknown-unknown" - -%"class.sycl::_V1::multi_ptr" = type { ptr addrspace(3) } -%"class.sycl::_V1::group" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::id" } -%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } -%"class.sycl::_V1::detail::array" = type { [1 x i64] } -%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } - -; CHECK: @WGLocalMem{{.*}} = internal addrspace(3) global [4 x i8] poison, align 4 -; CHECK: @WGLocalMem{{.*}} = internal addrspace(3) global [4 x i8] poison, align 4 -; CHECK: @WGLocalMem{{.*}} = internal addrspace(3) global [4 x i8] poison, align 4 -; CHECK: @WGLocalMem{{.*}} = internal addrspace(3) global [4 x i8] poison, align 4 - -; Function Attrs: alwaysinline -define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_() #0 { -entry: -; CHECK-LABEL: define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_( -; CHECK: store ptr addrspace(3) @WGLocalMem{{.*}}, ptr addrspace(4) %AllocatedMem{{.*}}, align 8 -; CHECK: store ptr addrspace(3) @WGLocalMem{{.*}}, ptr addrspace(4) %AllocatedMem{{.*}}, align 8 -; CHECK: store ptr addrspace(3) @WGLocalMem{{.*}}, ptr addrspace(4) %AllocatedMem{{.*}}, align 8 -; CHECK: store ptr addrspace(3) @WGLocalMem{{.*}}, ptr addrspace(4) %AllocatedMem{{.*}}, align 8 - - %Ptr = alloca %"class.sycl::_V1::multi_ptr", align 8 - %agg = alloca %"class.sycl::_V1::group", align 8 - %Ptr.ascast = addrspacecast ptr %Ptr to ptr addrspace(4) - call spir_func void @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi1EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %Ptr.ascast, ptr byval(%"class.sycl::_V1::group") align 8 %agg) - call spir_func void @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi1EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %Ptr.ascast, ptr byval(%"class.sycl::_V1::group") align 8 %agg) - call spir_func void @_ZN4sycl3_V13ext6oneapi18group_local_memoryIiNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_DpOT1_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %Ptr.ascast, ptr byval(%"class.sycl::_V1::group") align 8 %agg) - call spir_func void @_ZN4sycl3_V13ext6oneapi18group_local_memoryIiNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_DpOT1_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %Ptr.ascast, ptr byval(%"class.sycl::_V1::group") align 8 %agg) - ret void -} - -; Function Attrs: alwaysinline -define spir_func void @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi1EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %agg.result, ptr byval(%"class.sycl::_V1::group") align 8 %g) #1 { -entry: -; CHECK-LABEL: define {{.*}} @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi1EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_( - - %AllocatedMem = alloca ptr addrspace(3), align 8 - %AllocatedMem.ascast = addrspacecast ptr %AllocatedMem to ptr addrspace(4) - %call = call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 4, i64 4) - store ptr addrspace(3) %call, ptr addrspace(4) %AllocatedMem.ascast, align 8 - ret void -} - -; Function Attrs: alwaysinline -define spir_func void @_ZN4sycl3_V13ext6oneapi18group_local_memoryIiNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_DpOT1_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %agg.result, ptr byval(%"class.sycl::_V1::group") align 8 %g) #1 { -entry: -; CHECK-LABEL: define {{.*}} @_ZN4sycl3_V13ext6oneapi18group_local_memoryIiNS0_5groupILi1EEEJEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_DpOT1_ - - %AllocatedMem = alloca ptr addrspace(3), align 8 - %AllocatedMem.ascast = addrspacecast ptr %AllocatedMem to ptr addrspace(4) - %call = call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 4, i64 4) - store ptr addrspace(3) %call, ptr addrspace(4) %AllocatedMem.ascast, align 8 - ret void -} - -declare spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 noundef, i64 noundef) - -attributes #0 = { alwaysinline } -attributes #1 = { "sycl-forceinline"="true" } diff --git a/llvm/test/SYCLLowerIR/group_local_memory_sycl_device_user.ll b/llvm/test/SYCLLowerIR/group_local_memory_sycl_device_user.ll deleted file mode 100644 index b2355199e63c2..0000000000000 --- a/llvm/test/SYCLLowerIR/group_local_memory_sycl_device_user.ll +++ /dev/null @@ -1,49 +0,0 @@ -; RUN: opt < %s -passes=sycllowerwglocalmemory -S | FileCheck %s - -; `foo` is a SYCL_EXTERNAL function that directly calls `group_local_memory_for_overwrite`. -; Frontend propagates `sycl-forceinline` attribute from `group_local_memory_for_overwrite` to `foo`. -; This test checks that `foo` is not inlined. - -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" -target triple = "spir64-unknown-unknown" - -%"class.sycl::_V1::multi_ptr" = type { ptr addrspace(3) } -%"class.sycl::_V1::group" = type { %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::range", %"class.sycl::_V1::id" } -%"class.sycl::_V1::range" = type { %"class.sycl::_V1::detail::array" } -%"class.sycl::_V1::detail::array" = type { [3 x i64] } -%"class.sycl::_V1::id" = type { %"class.sycl::_V1::detail::array" } - -; CHECK: @WGLocalMem = internal addrspace(3) global [0 x i8] poison, align 1 - -define weak_odr dso_local spir_func void @_Z3fooPPi(ptr addrspace(4) noundef %a) #0 { -entry: -; CHECK-LABEL: define {{.*}} @_Z3fooPPi( -; CHECK: store ptr addrspace(3) @WGLocalMem, - - call spir_func void @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi3EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(ptr addrspace(4) null, ptr null) - ret void -} - -define linkonce_odr dso_local spir_func void @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi3EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_(ptr addrspace(4) sret(%"class.sycl::_V1::multi_ptr") align 8 %result, ptr noundef byval(%"class.sycl::_V1::group") align 8 %g) #1 { -entry: -; CHECK-LABEL: define {{.*}} @_ZN4sycl3_V13ext6oneapi32group_local_memory_for_overwriteIiNS0_5groupILi3EEEEENSt9enable_ifIXaasr3stdE27is_trivially_destructible_vIT_Esr4sycl6detail8is_groupIT0_EE5valueENS0_9multi_ptrIS7_LNS0_6access13address_spaceE3ELNSA_9decoratedE2EEEE4typeES8_( - - %AllocatedMem.ascast = addrspacecast ptr %g to ptr addrspace(4) - %call = call spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64 0, i64 1) - store ptr addrspace(3) %call, ptr addrspace(4) %AllocatedMem.ascast, align 8 - ret void -} - -declare spir_func ptr addrspace(3) @__sycl_allocateLocalMemory(i64, i64) - -define internal spir_func void @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_() { -entry: -; CHECK-LABEL: define {{.*}} @_ZZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_ENKUlNS0_7nd_itemILi1EEEE_clES5_( -; CHECK: call spir_func void @_Z3fooPPi( - - call spir_func void @_Z3fooPPi(ptr addrspace(4) null) - ret void -} - -attributes #0 = { "sycl-forceinline"="true" "sycl-module-id"="group_local_memory_template.cpp" } -attributes #1 = { "sycl-forceinline"="true" } diff --git a/sycl/include/sycl/ext/oneapi/group_local_memory.hpp b/sycl/include/sycl/ext/oneapi/group_local_memory.hpp index 8b0b39c20fd39..6e65b9acffe8e 100644 --- a/sycl/include/sycl/ext/oneapi/group_local_memory.hpp +++ b/sycl/include/sycl/ext/oneapi/group_local_memory.hpp @@ -21,9 +21,6 @@ namespace sycl { inline namespace _V1 { namespace ext::oneapi { template -#ifdef __SYCL_DEVICE_ONLY__ -[[__sycl_detail__::add_ir_attributes_function("sycl-forceinline", true)]] -#endif std::enable_if_t< std::is_trivially_destructible_v && sycl::detail::is_group::value, multi_ptr> @@ -47,9 +44,6 @@ std::enable_if_t< } template -#ifdef __SYCL_DEVICE_ONLY__ -[[__sycl_detail__::add_ir_attributes_function("sycl-forceinline", true)]] -#endif std::enable_if_t< std::is_trivially_destructible_v && sycl::detail::is_group::value, multi_ptr>