Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 31 additions & 12 deletions llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,13 +475,18 @@ struct ArgUseChecker : PtrUseVisitor<ArgUseChecker> {
}

void visitStoreInst(StoreInst &SI) {
// Storing the pointer escapes it.
if (U->get() == SI.getValueOperand())
return PI.setEscapedAndAborted(&SI);
// Writes to the pointer are UB w/ __grid_constant__, but do not force a
// copy.
if (!IsGridConstant)
return PI.setAborted(&SI);
// Storing the pointer value (as opposed to storing through it) escapes it.
// For grid_constant params, this is allowed - we can pass the generic
// pointer. For non-grid-constant, this requires a copy.
if (U->get() == SI.getValueOperand()) {
if (IsGridConstant)
return PI.setEscaped(&SI);
else
return PI.setEscapedAndAborted(&SI);
}
// Writes through the pointer to param space are UB w/ __grid_constant__,
// and param space is read-only on CUDA, so we need to force a copy.
return PI.setAborted(&SI);
}

void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
Expand Down Expand Up @@ -529,10 +534,22 @@ void copyByValParam(Function &F, Argument &Arg) {
// the use of the byval parameter with this alloca instruction.
AllocA->setAlignment(
Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
Arg.replaceAllUsesWith(AllocA);

// Must create ArgInParam before replacing uses of Arg.
// createNVVMInternalAddrspaceWrap needs to use Arg as an operand.
CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg);

// Replace all uses of Arg with AllocA, except the use in ArgInParam.
// Note: we can't use replaceAllUsesWith because it would replace ArgInParam's
// operand too, creating a circular dependency.
SmallVector<Use *, 8> UsesToReplace;
for (Use &U : Arg.uses()) {
if (U.getUser() != ArgInParam)
UsesToReplace.push_back(&U);
}
for (Use *U : UsesToReplace)
U->set(AllocA);

// Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX
// addrspacecast preserves alignment. Since params are constant, this load
// is definitely not volatile.
Expand Down Expand Up @@ -578,10 +595,12 @@ static void handleByValParam(const NVPTXTargetMachine &TM, Argument *Arg) {

// We can't access byval arg directly and need a pointer. on sm_70+ we have
// ability to take a pointer to the argument without making a local copy.
// However, we're still not allowed to write to it. If the user specified
// `__grid_constant__` for the argument, we'll consider escaped pointer as
// read-only.
if (IsGridConstant || (HasCvtaParam && ArgUseIsReadOnly)) {
// However, param space is read-only, so we can only use this optimization
// if the argument is not written to.
// Grid constant params can escape (pointer passed to functions) but cannot
// have stores. Non-grid-constant params must be fully read-only.
if ((IsGridConstant && !PI.isAborted()) ||
(HasCvtaParam && ArgUseIsReadOnly)) {
LLVM_DEBUG(dbgs() << "Using non-copy pointer to " << *Arg << "\n");
// Replace all argument pointer uses (which might include a device function
// call) with a cast to the generic address space using cvta.param
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
// RUN: %{build} -o %t.out
// RUN: %{run} %t.out

// XFAIL: target-nvidia
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/20908

// This test verifies whether struct that contains either sycl::local_accesor or
// sycl::accessor can be used with free function kernels extension.

Expand Down
Loading