Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
abb86d6
Avoid alloca for fully static sizes
Sa4dUs Feb 7, 2026
30d7ed4
library/test: always enable unstable features for miri
cuviper Mar 3, 2026
fe71b66
Fix comment on `is_horizontal_whitespace`
traviscross Mar 4, 2026
d863850
Streamline cache-related query functions.
nnethercote Mar 3, 2026
d08f97d
Remove `QueryVTable::construct_dep_node`.
nnethercote Mar 3, 2026
20768d8
fix(thir): Include NoneWithError in enum struct tail assertion
TKanX Mar 4, 2026
391a755
enable `PassMode::Indirect { on_stack: true }` tail call arguments
folkertdev Mar 3, 2026
cbc711e
rustc_llvm: add missing `-` to flag-comparison logic
durin42 Mar 4, 2026
253670b
Update dispatch2
eggyal Mar 5, 2026
fb488de
Rollup merge of #153361 - folkertdev:tail-call-indirect-on-stack-true…
JonathanBrouwer Mar 5, 2026
2feb90a
Rollup merge of #153369 - cuviper:unstable-libtest, r=Kobzol
JonathanBrouwer Mar 5, 2026
7595e5b
Rollup merge of #152283 - Sa4dUs:offload-handle-alloca, r=ZuseZ4
JonathanBrouwer Mar 5, 2026
62d1a71
Rollup merge of #153323 - nnethercote:rm-impl-QueryVTable, r=Zalathar
JonathanBrouwer Mar 5, 2026
4675a29
Rollup merge of #153385 - traviscross:TC/fix-frontmatter-whitespace-c…
JonathanBrouwer Mar 5, 2026
fed3c49
Rollup merge of #153394 - TKanX:bugfix/153390-ice-enum-struct-syntax-…
JonathanBrouwer Mar 5, 2026
b8a919a
Rollup merge of #153419 - durin42:missing-dashes, r=cuviper
JonathanBrouwer Mar 5, 2026
005fc16
Rollup merge of #153423 - eggyal:update-dispatch, r=madsmtm
JonathanBrouwer Mar 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1217,9 +1217,9 @@ dependencies = [

[[package]]
name = "dispatch2"
version = "0.3.0"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89a09f22a6c6069a18470eb92d2298acf25463f14256d24778e1230d789a2aec"
checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38"
dependencies = [
"bitflags",
"block2",
Expand Down
64 changes: 51 additions & 13 deletions compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ use std::ffi::CString;
use bitflags::Flags;
use llvm::Linkage::*;
use rustc_abi::Align;
use rustc_codegen_ssa::MemFlags;
use rustc_codegen_ssa::common::TypeKind;
use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods};
use rustc_middle::bug;
use rustc_middle::ty::offload_meta::{MappingFlags, OffloadMetadata};
use rustc_middle::ty::offload_meta::{MappingFlags, OffloadMetadata, OffloadSize};

use crate::builder::Builder;
use crate::common::CodegenCx;
Expand Down Expand Up @@ -450,7 +451,15 @@ pub(crate) fn gen_define_handling<'ll>(
// FIXME(offload): add `OMP_MAP_TARGET_PARAM = 0x20` only if necessary
let transfer_kernel = vec![MappingFlags::TARGET_PARAM.bits(); transfer_to.len()];

let offload_sizes = add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{symbol}"), &sizes);
let actual_sizes = sizes
.iter()
.map(|s| match s {
OffloadSize::Static(sz) => *sz,
OffloadSize::Dynamic => 0,
})
.collect::<Vec<_>>();
let offload_sizes =
add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{symbol}"), &actual_sizes);
let memtransfer_begin =
add_priv_unnamed_arr(&cx, &format!(".offload_maptypes.{symbol}.begin"), &transfer_to);
let memtransfer_kernel =
Expand Down Expand Up @@ -499,9 +508,6 @@ pub(crate) fn gen_define_handling<'ll>(
region_id,
};

// FIXME(Sa4dUs): use this global for constant offload sizes
cx.add_compiler_used_global(result.offload_sizes);

cx.offload_kernel_cache.borrow_mut().insert(symbol, result);

result
Expand Down Expand Up @@ -535,6 +541,15 @@ pub(crate) fn scalar_width<'ll>(cx: &'ll SimpleCx<'_>, ty: &'ll Type) -> u64 {
}
}

fn get_runtime_size<'ll, 'tcx>(
_cx: &CodegenCx<'ll, 'tcx>,
_val: &'ll Value,
_meta: &OffloadMetadata,
) -> &'ll Value {
// FIXME(Sa4dUs): handle dynamic-size data (e.g. slices)
bug!("offload does not support dynamic sizes yet");
}

// For each kernel *call*, we now use some of our previous declared globals to move data to and from
// the gpu. For now, we only handle the data transfer part of it.
// If two consecutive kernels use the same memory, we still move it to the host and back to the gpu.
Expand Down Expand Up @@ -564,15 +579,17 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
) {
let cx = builder.cx;
let OffloadKernelGlobals {
offload_sizes,
memtransfer_begin,
memtransfer_kernel,
memtransfer_end,
region_id,
..
} = offload_data;
let OffloadKernelDims { num_workgroups, threads_per_block, workgroup_dims, thread_dims } =
offload_dims;

let has_dynamic = metadata.iter().any(|m| matches!(m.payload_size, OffloadSize::Dynamic));

let tgt_decl = offload_globals.launcher_fn;
let tgt_target_kernel_ty = offload_globals.launcher_ty;

Expand All @@ -596,7 +613,24 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
let a2 = builder.direct_alloca(ty, Align::EIGHT, ".offload_ptrs");
// These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16.
let ty2 = cx.type_array(cx.type_i64(), num_args);
let a4 = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes");

let a4 = if has_dynamic {
let alloc = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes");

builder.memcpy(
alloc,
Align::EIGHT,
offload_sizes,
Align::EIGHT,
cx.get_const_i64(8 * args.len() as u64),
MemFlags::empty(),
None,
);

alloc
} else {
offload_sizes
};

//%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
let a5 = builder.direct_alloca(tgt_kernel_decl, Align::EIGHT, "kernel_args");
Expand Down Expand Up @@ -648,9 +682,12 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
builder.store(vals[i as usize], gep1, Align::EIGHT);
let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, idx]);
builder.store(geps[i as usize], gep2, Align::EIGHT);
let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, idx]);
// FIXME(offload): write an offload frontend and handle arbitrary types.
builder.store(cx.get_const_i64(metadata[i as usize].payload_size), gep3, Align::EIGHT);

if matches!(metadata[i as usize].payload_size, OffloadSize::Dynamic) {
let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, idx]);
let size_val = get_runtime_size(cx, args[i as usize], &metadata[i as usize]);
builder.store(size_val, gep3, Align::EIGHT);
}
}

// For now we have a very simplistic indexing scheme into our
Expand All @@ -662,13 +699,14 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
a1: &'ll Value,
a2: &'ll Value,
a4: &'ll Value,
is_dynamic: bool,
) -> [&'ll Value; 3] {
let cx = builder.cx;
let i32_0 = cx.get_const_i32(0);

let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, i32_0]);
let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, i32_0]);
let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, i32_0]);
let gep3 = if is_dynamic { builder.inbounds_gep(ty2, a4, &[i32_0, i32_0]) } else { a4 };
[gep1, gep2, gep3]
}

Expand All @@ -692,7 +730,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(

// Step 2)
let s_ident_t = offload_globals.ident_t_global;
let geps = get_geps(builder, ty, ty2, a1, a2, a4);
let geps = get_geps(builder, ty, ty2, a1, a2, a4, has_dynamic);
generate_mapper_call(
builder,
geps,
Expand Down Expand Up @@ -725,7 +763,7 @@ pub(crate) fn gen_call_handling<'ll, 'tcx>(
// %41 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.kernel_1.region_id, ptr %kernel_args)

// Step 4)
let geps = get_geps(builder, ty, ty2, a1, a2, a4);
let geps = get_geps(builder, ty, ty2, a1, a2, a4, has_dynamic);
generate_mapper_call(
builder,
geps,
Expand Down
74 changes: 28 additions & 46 deletions compiler/rustc_codegen_ssa/src/mir/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1256,55 +1256,37 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
}
}
CallKind::Tail => {
match fn_abi.args[i].mode {
PassMode::Indirect { on_stack: false, .. } => {
let Some(tmp) = tail_call_temporaries[i].take() else {
span_bug!(
fn_span,
"missing temporary for indirect tail call argument #{i}"
)
};

let local = self.mir.args_iter().nth(i).unwrap();

match &self.locals[local] {
LocalRef::Place(arg) => {
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
op.val = Ref(arg.val);
}
LocalRef::Operand(arg) => {
let Ref(place_value) = arg.val else {
bug!("only `Ref` should use `PassMode::Indirect`");
};
bx.typed_place_copy(
place_value,
tmp.val,
fn_abi.args[i].layout,
);
op.val = arg.val;
}
LocalRef::UnsizedPlace(_) => {
span_bug!(fn_span, "unsized types are not supported")
}
LocalRef::PendingOperand => {
span_bug!(fn_span, "argument local should not be pending")
}
};

bx.lifetime_end(tmp.val.llval, tmp.layout.size);
}
PassMode::Indirect { on_stack: true, .. } => {
// FIXME: some LLVM backends (notably x86) do not correctly pass byval
// arguments to tail calls (as of LLVM 21). See also:
//
// - https://github.com/rust-lang/rust/pull/144232#discussion_r2218543841
// - https://github.com/rust-lang/rust/issues/144855
if let PassMode::Indirect { on_stack: false, .. } = fn_abi.args[i].mode {
let Some(tmp) = tail_call_temporaries[i].take() else {
span_bug!(
fn_span,
"arguments using PassMode::Indirect {{ on_stack: true, .. }} are currently not supported for tail calls"
"missing temporary for indirect tail call argument #{i}"
)
}
_ => (),
};

let local = self.mir.args_iter().nth(i).unwrap();

match &self.locals[local] {
LocalRef::Place(arg) => {
bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout);
op.val = Ref(arg.val);
}
LocalRef::Operand(arg) => {
let Ref(place_value) = arg.val else {
bug!("only `Ref` should use `PassMode::Indirect`");
};
bx.typed_place_copy(place_value, tmp.val, fn_abi.args[i].layout);
op.val = arg.val;
}
LocalRef::UnsizedPlace(_) => {
span_bug!(fn_span, "unsized types are not supported")
}
LocalRef::PendingOperand => {
span_bug!(fn_span, "argument local should not be pending")
}
};

bx.lifetime_end(tmp.val.llval, tmp.layout.size);
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,8 @@ pub fn is_whitespace(c: char) -> bool {

/// True if `c` is considered horizontal whitespace according to Rust language definition.
pub fn is_horizontal_whitespace(c: char) -> bool {
// This is Pattern_White_Space.
// This is the horizontal space subset of `Pattern_White_Space` as
// categorized by UAX #31, Section 4.1.
//
// Note that this set is stable (ie, it doesn't change with different
// Unicode versions), so it's ok to just hard-code the values.
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_llvm/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,15 +476,15 @@ fn main() {
// C++ runtime library
if !target.contains("msvc") {
if let Some(s) = llvm_static_stdcpp {
assert!(cxxflags_iter.all(|flag| flag != "stdlib=libc++"));
assert!(cxxflags_iter.all(|flag| flag != "-stdlib=libc++"));
let path = PathBuf::from(s);
println!("cargo:rustc-link-search=native={}", path.parent().unwrap().display());
if target.contains("windows") {
println!("cargo:rustc-link-lib=static:-bundle={stdcppname}");
} else {
println!("cargo:rustc-link-lib=static={stdcppname}");
}
} else if cxxflags_iter.any(|flag| flag == "stdlib=libc++") {
} else if cxxflags_iter.any(|flag| flag == "-stdlib=libc++") {
println!("cargo:rustc-link-lib=c++");
} else {
println!("cargo:rustc-link-lib={stdcppname}");
Expand Down
64 changes: 10 additions & 54 deletions compiler/rustc_middle/src/query/plumbing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use rustc_macros::HashStable;
use rustc_span::{ErrorGuaranteed, Span};
pub use sealed::IntoQueryParam;

use crate::dep_graph::{DepKind, DepNode, DepNodeIndex, SerializedDepNodeIndex};
use crate::dep_graph::{DepKind, DepNodeIndex, SerializedDepNodeIndex};
use crate::ich::StableHashingContext;
use crate::queries::{ExternProviders, Providers, QueryArenas, QueryVTables};
use crate::query::on_disk_cache::OnDiskCache;
Expand Down Expand Up @@ -113,7 +113,6 @@ pub struct QueryVTable<'tcx, C: QueryCache> {
pub cycle_error_handling: CycleErrorHandling,
pub state: QueryState<'tcx, C::Key>,
pub cache: C,
pub will_cache_on_disk_for_key_fn: Option<fn(tcx: TyCtxt<'tcx>, key: &C::Key) -> bool>,

/// Function pointer that calls `tcx.$query(key)` for this query and
/// discards the returned value.
Expand All @@ -129,17 +128,17 @@ pub struct QueryVTable<'tcx, C: QueryCache> {
/// This should be the only code that calls the provider function.
pub invoke_provider_fn: fn(tcx: TyCtxt<'tcx>, key: C::Key) -> C::Value,

pub try_load_from_disk_fn: Option<
fn(
tcx: TyCtxt<'tcx>,
key: &C::Key,
prev_index: SerializedDepNodeIndex,
index: DepNodeIndex,
) -> Option<C::Value>,
>,
pub will_cache_on_disk_for_key_fn: fn(tcx: TyCtxt<'tcx>, key: &C::Key) -> bool,

pub try_load_from_disk_fn: fn(
tcx: TyCtxt<'tcx>,
key: &C::Key,
prev_index: SerializedDepNodeIndex,
index: DepNodeIndex,
) -> Option<C::Value>,

pub is_loadable_from_disk_fn:
Option<fn(tcx: TyCtxt<'tcx>, key: &C::Key, index: SerializedDepNodeIndex) -> bool>,
fn(tcx: TyCtxt<'tcx>, key: &C::Key, index: SerializedDepNodeIndex) -> bool,

/// Function pointer that hashes this query's result values.
///
Expand Down Expand Up @@ -180,49 +179,6 @@ impl<'tcx, C: QueryCache> fmt::Debug for QueryVTable<'tcx, C> {
}
}

impl<'tcx, C: QueryCache> QueryVTable<'tcx, C> {
#[inline(always)]
pub fn will_cache_on_disk_for_key(&self, tcx: TyCtxt<'tcx>, key: &C::Key) -> bool {
self.will_cache_on_disk_for_key_fn.map_or(false, |f| f(tcx, key))
}

#[inline(always)]
pub fn try_load_from_disk(
&self,
tcx: TyCtxt<'tcx>,
key: &C::Key,
prev_index: SerializedDepNodeIndex,
index: DepNodeIndex,
) -> Option<C::Value> {
// `?` will return None immediately for queries that never cache to disk.
self.try_load_from_disk_fn?(tcx, key, prev_index, index)
}

#[inline]
pub fn is_loadable_from_disk(
&self,
tcx: TyCtxt<'tcx>,
key: &C::Key,
index: SerializedDepNodeIndex,
) -> bool {
self.is_loadable_from_disk_fn.map_or(false, |f| f(tcx, key, index))
}

/// Synthesize an error value to let compilation continue after a cycle.
pub fn value_from_cycle_error(
&self,
tcx: TyCtxt<'tcx>,
cycle_error: CycleError,
guar: ErrorGuaranteed,
) -> C::Value {
(self.value_from_cycle_error)(tcx, cycle_error, guar)
}

pub fn construct_dep_node(&self, tcx: TyCtxt<'tcx>, key: &C::Key) -> DepNode {
DepNode::construct(tcx, self.dep_kind, key)
}
}

pub struct QuerySystem<'tcx> {
pub arenas: WorkerLocal<QueryArenas<'tcx>>,
pub query_vtables: QueryVTables<'tcx>,
Expand Down
Loading
Loading