Rust-GPU · LegNeato · Nov 26, 2025 · nnethercote · Nov 30, 2025 · LegNeato
diff --git a/crates/cuda_builder/src/lib.rs b/crates/cuda_builder/src/lib.rs
@@ -715,6 +715,12 @@ fn invoke_rustc(builder: &CudaBuilder) -> Result<PathBuf, CudaBuilderError> {
         rustflags.push(format!("--emit={string}"));
     }
 
+    if builder.debug == DebugInfo::None {
+        // Default dev builds: strip debuginfo to avoid libnvvm crashes with unoptimized IR.
+        // TODO: drop this once newer libnvvm toolchains are stable with debuginfo in opt=0 builds.
+        rustflags.push("-Cdebuginfo=0".into());
+    }
+
     let mut llvm_args = vec![NvvmOption::Arch(builder.arch).to_string()];
 
     if !builder.nvvm_opts {

diff --git a/crates/cuda_std/src/warp.rs b/crates/cuda_std/src/warp.rs
@@ -296,9 +296,9 @@ unsafe fn match_any_32(mask: u32, value: u32) -> u32 {
 unsafe fn match_any_64(mask: u32, value: u64) -> u32 {
     extern "C" {
         #[link_name = "llvm.nvvm.match.any.sync.i64"]
-        fn __nvvm_warp_match_any_64(mask: u32, value: u64) -> u32;
+        fn __nvvm_warp_match_any_64(mask: u32, value: u64) -> u64;
     }
-    __nvvm_warp_match_any_64(mask, value)
+    __nvvm_warp_match_any_64(mask, value) as u32
 }
 
 #[gpu_only]

diff --git a/crates/rustc_codegen_nvvm/src/override_fns.rs b/crates/rustc_codegen_nvvm/src/override_fns.rs
@@ -8,6 +8,7 @@ use crate::context::CodegenCx;
 use crate::llvm;
 use rustc_codegen_ssa::mono_item::MonoItemExt;
 use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
+use rustc_hir::def::DefKind;
 use rustc_hir::def_id::LOCAL_CRATE;
 use rustc_middle::mir::mono::{Linkage, MonoItem, MonoItemData, Visibility};
 use rustc_middle::ty::layout::FnAbiOf;
@@ -43,6 +44,12 @@ fn should_override<'tcx>(func: Instance<'tcx>, cx: &CodegenCx<'_, 'tcx>) -> bool
         return false;
     }
 
+    // Only try to override top-level/assoc functions; closures/anon fns cause ICE via item_name.
+    match cx.tcx.def_kind(func.def_id()) {
+        DefKind::Fn | DefKind::AssocFn => {}
+        _ => return false,
+    }
+
     let sym = cx.tcx.item_name(func.def_id());
     let name = sym.as_str();
 

diff --git a/crates/rustc_codegen_nvvm/src/ty.rs b/crates/rustc_codegen_nvvm/src/ty.rs
@@ -228,10 +228,16 @@ impl<'ll, 'tcx> BaseTypeCodegenMethods for CodegenCx<'ll, 'tcx> {
 
     fn float_width(&self, ty: &'ll Type) -> usize {
         match self.type_kind(ty) {
+            TypeKind::Half => 16,
             TypeKind::Float => 32,
             TypeKind::Double => 64,
             TypeKind::X86_FP80 => 80,
             TypeKind::FP128 | TypeKind::PPC_FP128 => 128,
+            TypeKind::BFloat => 16,
+            TypeKind::Vector | TypeKind::ScalableVector => {
+                // Recurse on element type for vector floats
+                self.float_width(self.element_type(ty))
+            }
             _ => bug!("llvm_float_width called on a non-float type"),
         }
     }