Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions crates/cuda_builder/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,12 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
rustflags.push(format!("--emit={string}"));
}

if builder.debug == DebugInfo::None {
// Default dev builds: strip debuginfo to avoid libnvvm crashes with unoptimized IR.
// TODO: drop this once newer libnvvm toolchains are stable with debuginfo in opt=0 builds.
rustflags.push("-Cdebuginfo=0".into());
}

let mut llvm_args = vec![NvvmOption::Arch(builder.arch).to_string()];

if !builder.nvvm_opts {
Expand Down
4 changes: 2 additions & 2 deletions crates/cuda_std/src/warp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,9 @@ unsafe fn match_any_32(mask: u32, value: u32) -> u32 {
unsafe fn match_any_64(mask: u32, value: u64) -> u32 {
extern "C" {
#[link_name = "llvm.nvvm.match.any.sync.i64"]
fn __nvvm_warp_match_any_64(mask: u32, value: u64) -> u32;
fn __nvvm_warp_match_any_64(mask: u32, value: u64) -> u64;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks wrong. https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html has this:

declare i32 @llvm.nvvm.match.any.sync.i32(i32 %membermask, i32 %value)
declare i32 @llvm.nvvm.match.any.sync.i64(i32 %membermask, i64 %value)
declare {i32, i1} @llvm.nvvm.match.all.sync.i32(i32 %membermask, i32 %value)
declare {i32, i1} @llvm.nvvm.match.all.sync.i64(i32 %membermask, i64 %value)

Not sure about the signed/unsigned mismatches, but the return value is definitely 32-bits.

Aside: The match_all_{32,64} functions below don't have link_name attributes the way the match_any_{32,64} functions do. Not sure if this is valid. I suspect these functions aren't tested at all!

Anyway, I think this change should be reverted.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, hm. Ok, will revert.

}
__nvvm_warp_match_any_64(mask, value)
__nvvm_warp_match_any_64(mask, value) as u32
}

#[gpu_only]
Expand Down
7 changes: 7 additions & 0 deletions crates/rustc_codegen_nvvm/src/override_fns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::context::CodegenCx;
use crate::llvm;
use rustc_codegen_ssa::mono_item::MonoItemExt;
use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
use rustc_hir::def::DefKind;
use rustc_hir::def_id::LOCAL_CRATE;
use rustc_middle::mir::mono::{Linkage, MonoItem, MonoItemData, Visibility};
use rustc_middle::ty::layout::FnAbiOf;
Expand Down Expand Up @@ -43,6 +44,12 @@ fn should_override<'tcx>(func: Instance<'tcx>, cx: &CodegenCx<'_, 'tcx>) -> bool
return false;
}

// Only try to override top-level/assoc functions; closures/anon fns cause ICE via item_name.
match cx.tcx.def_kind(func.def_id()) {
DefKind::Fn | DefKind::AssocFn => {}
_ => return false,
}

let sym = cx.tcx.item_name(func.def_id());
let name = sym.as_str();

Expand Down
6 changes: 6 additions & 0 deletions crates/rustc_codegen_nvvm/src/ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,16 @@ impl<'ll, 'tcx> BaseTypeCodegenMethods for CodegenCx<'ll, 'tcx> {

fn float_width(&self, ty: &'ll Type) -> usize {
match self.type_kind(ty) {
TypeKind::Half => 16,
TypeKind::Float => 32,
TypeKind::Double => 64,
TypeKind::X86_FP80 => 80,
TypeKind::FP128 | TypeKind::PPC_FP128 => 128,
TypeKind::BFloat => 16,
TypeKind::Vector | TypeKind::ScalableVector => {
// Recurse on element type for vector floats
self.float_width(self.element_type(ty))
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all of Half/BFloat/Vector/ScalableVector needed to fix the issue? I see that rustc_codegen_llvm only has Half. Seems wise to only add code that's necessary for the fix (and thus has some level of testing).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need at least BFloat as well but I'll double check.

_ => bug!("llvm_float_width called on a non-float type"),
}
}
Expand Down
Loading