From 0605577d51e83b2b565a433bced5e7681050acad Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Thu, 18 Dec 2025 13:10:20 +0100 Subject: [PATCH 1/2] [NVPTX] fix ptr escapes and byval params in kernel args --- llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 43 +++++++++++++++++------- 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp index e2bbe57c0085c..da5ac6775885f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -475,13 +475,18 @@ struct ArgUseChecker : PtrUseVisitor { } void visitStoreInst(StoreInst &SI) { - // Storing the pointer escapes it. - if (U->get() == SI.getValueOperand()) - return PI.setEscapedAndAborted(&SI); - // Writes to the pointer are UB w/ __grid_constant__, but do not force a - // copy. - if (!IsGridConstant) - return PI.setAborted(&SI); + // Storing the pointer value (as opposed to storing through it) escapes it. + // For grid_constant params, this is allowed - we can pass the generic + // pointer. For non-grid-constant, this requires a copy. + if (U->get() == SI.getValueOperand()) { + if (IsGridConstant) + return PI.setEscaped(&SI); + else + return PI.setEscapedAndAborted(&SI); + } + // Writes through the pointer to param space are UB w/ __grid_constant__, + // and param space is read-only on CUDA, so we need to force a copy. + return PI.setAborted(&SI); } void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) { @@ -529,10 +534,22 @@ void copyByValParam(Function &F, Argument &Arg) { // the use of the byval parameter with this alloca instruction. AllocA->setAlignment( Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType))); - Arg.replaceAllUsesWith(AllocA); + // Must create ArgInParam before replacing uses of Arg. + // createNVVMInternalAddrspaceWrap needs to use Arg as an operand. CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg); + // Replace all uses of Arg with AllocA, except the use in ArgInParam. + // Note: we can't use replaceAllUsesWith because it would replace ArgInParam's + // operand too, creating a circular dependency. + SmallVector UsesToReplace; + for (Use &U : Arg.uses()) { + if (U.getUser() != ArgInParam) + UsesToReplace.push_back(&U); + } + for (Use *U : UsesToReplace) + U->set(AllocA); + // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX // addrspacecast preserves alignment. Since params are constant, this load // is definitely not volatile. @@ -578,10 +595,12 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) { // We can't access byval arg directly and need a pointer. on sm_70+ we have // ability to take a pointer to the argument without making a local copy. - // However, we're still not allowed to write to it. If the user specified - // `__grid_constant__` for the argument, we'll consider escaped pointer as - // read-only. - if (IsGridConstant || (HasCvtaParam && ArgUseIsReadOnly)) { + // However, param space is read-only, so we can only use this optimization + // if the argument is not written to. + // Grid constant params can escape (pointer passed to functions) but cannot + // have stores. Non-grid-constant params must be fully read-only. + if ((IsGridConstant && !PI.isAborted()) || + (HasCvtaParam && ArgUseIsReadOnly)) { LLVM_DEBUG(dbgs() << "Using non-copy pointer to " << *Arg << "\n"); // Replace all argument pointer uses (which might include a device function // call) with a cast to the generic address space using cvta.param From f8011dc525a32bb0dbd123c88b13863c94ea79c8 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Thu, 18 Dec 2025 13:10:54 +0100 Subject: [PATCH 2/2] [SYCL] remove XFAIL --- .../structs_with_special_types_as_kernel_paramters.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/test-e2e/FreeFunctionKernels/structs_with_special_types_as_kernel_paramters.cpp b/sycl/test-e2e/FreeFunctionKernels/structs_with_special_types_as_kernel_paramters.cpp index 11ba73350e21a..93f703fc3229c 100644 --- a/sycl/test-e2e/FreeFunctionKernels/structs_with_special_types_as_kernel_paramters.cpp +++ b/sycl/test-e2e/FreeFunctionKernels/structs_with_special_types_as_kernel_paramters.cpp @@ -1,9 +1,6 @@ // RUN: %{build} -o %t.out // RUN: %{run} %t.out -// XFAIL: target-nvidia -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20908 - // This test verifies whether struct that contains either sycl::local_accesor or // sycl::accessor can be used with free function kernels extension.