Skip to content

Commit cad558f

Browse files
Cupnfishtomcur
andauthored
Fix non-deterministic GPU stroke artifacts (vello#1314) (#1323)
## Summary Fix non-deterministic rendering artifacts when GPU-stroking certain rounded rectangles ([vello#1314](#1314)). The issue could manifest as severe frame-to-frame corruption even when rendering the exact same scene. This PR keeps the change minimal (stroker join logic in WGSL plus the CPU mirror) and includes a standalone repro program in the PR description for reviewers to copy/paste and run locally. ## Reproduction (standalone, not committed) This PR intentionally does **not** add a new `examples/` crate. Instead, below is a standalone `repro_1314` program. Reviewers can copy it into a scratch Cargo project and run it against a local checkout of this branch. ### How to run From your Vello workspace root (the directory that contains `vello/`): 1. Create a scratch project directory (anywhere; example below uses `/tmp`). 2. Paste the following `Cargo.toml` and `src/main.rs`. 3. Run: ```sh cargo run --manifest-path /tmp/repro_1314/Cargo.toml -- --iters 250 ``` On affected revisions this tends to fail very quickly (often within the first couple frames). With this patch it should remain stable and print `ok: 250 frames matched`. ### `/tmp/repro_1314/Cargo.toml` ```toml [package] name = "repro_1314" version = "0.0.0" edition = "2024" publish = false [dependencies] # Set this path to your local checkout of Vello's `vello/` crate. # If you create the scratch project in /tmp as shown above and you run from the Vello repo root, # an absolute path is easiest (edit to match your machine). vello = { path = "/ABS/PATH/TO/your/vello/repo/vello" } anyhow = "1.0" futures-intrusive = "0.5" pollster = "0.4" ``` ### `/tmp/repro_1314/src/main.rs` ```rust use std::num::NonZeroUsize; use anyhow::{Context, Result, anyhow}; use vello::kurbo::{Affine, Join, Rect, Stroke}; use vello::peniko::color::palette; use vello::util::{RenderContext, block_on_wgpu}; use vello::wgpu::{ self, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, TexelCopyBufferInfo, TextureDescriptor, TextureFormat, TextureUsages, }; use vello::{AaConfig, AaSupport, RenderParams, Renderer, RendererOptions, Scene}; const DEFAULT_WIDTH: u32 = 512; const DEFAULT_HEIGHT: u32 = 512; const DEFAULT_ITERS: usize = 250; const DEFAULT_SHAPES: usize = 96; fn parse_flag_value(args: &[String], flag: &str) -> Option<String> { args.iter() .position(|arg| arg == flag) .and_then(|ix| args.get(ix + 1)) .cloned() } fn parse_usize(args: &[String], flag: &str, default: usize) -> Result<usize> { match parse_flag_value(args, flag) { Some(v) => v .parse::<usize>() .with_context(|| format!("failed to parse `{flag}` value `{v}`")), None => Ok(default), } } fn parse_u32(args: &[String], flag: &str, default: u32) -> Result<u32> { match parse_flag_value(args, flag) { Some(v) => v .parse::<u32>() .with_context(|| format!("failed to parse `{flag}` value `{v}`")), None => Ok(default), } } fn fnv1a_64(bytes: &[u8]) -> u64 { let mut hash = 0xcbf29ce484222325_u64; for &b in bytes { hash ^= u64::from(b); hash = hash.wrapping_mul(0x100000001b3_u64); } hash } fn build_scene(scene: &mut Scene, shapes: usize) { // Exact geometry from the issue report: let base_rect = Rect::from_origin_size((2.0, 0.0), (8.0, 327.765_467_171_717_15)).to_rounded_rect(3.91); let base_stroke = Stroke { width: 1.0, join: Join::Miter, ..Default::default() }; // Add many similar copies to make failures appear quickly while always including the exact // geometry above. let variants = [(8.0, 3.91), (8.0, 3.999), (8.0, 4.0), (7.99, 3.995)]; let cols = 12_usize; let spacing_x = 16.0_f64; let spacing_y = 12.0_f64; for ix in 0..shapes { let col = (ix % cols) as f64; let row = (ix / cols) as f64; let (w, r) = variants[ix % variants.len()]; let rect = Rect::from_origin_size((2.0, 0.0), (w, 327.765_467_171_717_15)).to_rounded_rect(r); let transform = Affine::translate((16.0 + col * spacing_x, 16.0 + row * spacing_y)); scene.stroke(&base_stroke, transform, palette::css::WHITE, None, &rect); } scene.stroke( &base_stroke, Affine::IDENTITY, palette::css::WHITE, None, &base_rect, ); } fn main() -> Result<()> { let args: Vec<String> = std::env::args().collect(); let iters = parse_usize(&args, "--iters", DEFAULT_ITERS)?; let shapes = parse_usize(&args, "--shapes", DEFAULT_SHAPES)?; let width = parse_u32(&args, "--width", DEFAULT_WIDTH)?; let height = parse_u32(&args, "--height", DEFAULT_HEIGHT)?; let mut context = RenderContext::new(); let device_id = pollster::block_on(context.device(None)) .ok_or_else(|| anyhow!("no compatible device found"))?; let device_handle = &mut context.devices[device_id]; let device = &device_handle.device; let queue = &device_handle.queue; let mut renderer = Renderer::new( device, RendererOptions { use_cpu: false, antialiasing_support: AaSupport::area_only(), num_init_threads: NonZeroUsize::new(1), ..Default::default() }, ) .map_err(|err| anyhow!("Renderer::new failed: {err}"))?; let size = Extent3d { width, height, depth_or_array_layers: 1, }; let target = device.create_texture(&TextureDescriptor { label: Some("repro_1314.target"), size, mip_level_count: 1, sample_count: 1, dimension: wgpu::TextureDimension::D2, format: TextureFormat::Rgba8Unorm, usage: TextureUsages::STORAGE_BINDING | TextureUsages::COPY_SRC, view_formats: &[], }); let view = target.create_view(&wgpu::TextureViewDescriptor::default()); let padded_byte_width = (width * 4).next_multiple_of(256); let buffer_size = padded_byte_width as u64 * height as u64; let buffer = device.create_buffer(&BufferDescriptor { label: Some("repro_1314.readback"), size: buffer_size, usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST, mapped_at_creation: false, }); let params = RenderParams { base_color: palette::css::BLACK, width, height, antialiasing_method: AaConfig::Area, }; let mut baseline: Option<Vec<u8>> = None; let mut scene = Scene::new(); for ix in 0..iters { scene.reset(); build_scene(&mut scene, shapes); renderer .render_to_texture(device, queue, &scene, &view, &params) .map_err(|err| anyhow!("render_to_texture failed: {err}"))?; let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor { label: Some("repro_1314.copy_out"), }); encoder.copy_texture_to_buffer( target.as_image_copy(), TexelCopyBufferInfo { buffer: &buffer, layout: wgpu::TexelCopyBufferLayout { offset: 0, bytes_per_row: Some(padded_byte_width), rows_per_image: None, }, }, size, ); queue.submit([encoder.finish()]); let buf_slice = buffer.slice(..); let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); buf_slice.map_async(wgpu::MapMode::Read, move |v| { sender.send(v).expect("receiver dropped"); }); if let Some(recv_result) = block_on_wgpu(device, receiver.receive()) { recv_result?; } else { return Err(anyhow!("mapping channel was closed")); } let mapped = buf_slice.get_mapped_range(); let mut frame = Vec::<u8>::with_capacity((width * height * 4) as usize); for row in 0..height { let start = (row * padded_byte_width) as usize; let end = start + (width * 4) as usize; frame.extend_from_slice(&mapped[start..end]); } drop(mapped); buffer.unmap(); if let Some(expected) = &baseline { if &frame != expected { let expected_hash = fnv1a_64(expected); let got_hash = fnv1a_64(&frame); eprintln!( "non-deterministic output at iter {ix}: expected={expected_hash:#x} \ got={got_hash:#x}" ); return Err(anyhow!("detected non-deterministic rendering (vello#1314)")); } } else { baseline = Some(frame); } } println!("ok: {iters} frames matched"); Ok(()) } ``` ### Options - `--iters N` number of frames to compare (default: 250) - `--width N` render target width (default: 512) - `--height N` render target height (default: 512) - `--shapes N` number of copies of the triggering geometry (default: 96) ## Root cause In `vello_shaders/shader/flatten.wgsl`, the GPU stroker's miter-join path uses a miter-limit predicate to decide whether to emit a miter or fall back to bevel. For tangents `a = tan_prev` and `b = tan_next`: - `d = dot(a, b)` - `cr = cross(a, b)` - `hypot = length(vec2(cr, d)) = |a| * |b|` (since `cr^2 + d^2 = |a|^2 |b|^2`) Given the two tangents arranged tail-to-tail, the miter length ratio is `1 / |cos(θ/2)|`, where θ is the angle between tangents. After rearranging, the miter-limit predicate can be expressed in terms of `hypot + d`. The bug is that we only guarded on `cr != 0`. When `cr` is extremely small but non-zero (near-collinear tangents), the miter-point intersection math divides by `cr`, which can produce extremely large geometry. That pathological geometry can destabilize later pipeline stages and show up as frame-to-frame corruption even when the scene is unchanged. ## Fix Keep the miter-limit predicate as-is, but treat sufficiently small `cr` as effectively zero (using `TANGENT_THRESH^2`) and skip the miter computation in that case (fall back to bevel), avoiding pathological miter points. On my machine this stabilizes the repro for 1000 iterations (`ok: 1000 frames matched`). ## Files changed - `vello_shaders/shader/flatten.wgsl`: robustify miter join near-collinearity - `vello_shaders/src/cpu/flatten.rs`: keep CPU shader mirror in sync ## Related - [vello#1314](#1314) - [xilem#1512](linebender/xilem#1512) --------- Co-authored-by: Tom Churchman <thomas@churchman.nl>
1 parent 4ea47b4 commit cad558f

File tree

3 files changed

+20
-2
lines changed

3 files changed

+20
-2
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ This release has an [MSRV][] of 1.88.
2222
### Fixed
2323

2424
- Bitmap emoji displayed at an incorrect position when scaled. ([#1273][] by [@ArthurCose][])
25+
- Miter joins for path segments with near-parallel endpoint tangents no longer cause rendering artifacts. ([#1323][] by [@Cupnfish][] and [@tomcur][])
2526

2627
## [0.6.0][] - 2025-10-03
2728

@@ -270,6 +271,7 @@ This release has an [MSRV][] of 1.75.
270271
[@ArthurCose]: https://github.com/ArthurCose
271272
[@armansito]: https://github.com/armansito
272273
[@cfagot]: https://github.com/cfagot
274+
[@Cupnfish]: https://github.com/Cupnfish
273275
[@DasLixou]: https://github.com/DasLixou
274276
[@dfrg]: https://github.com/drfg
275277
[@DJMcNab]: https://github.com/DJMcNab
@@ -380,6 +382,7 @@ This release has an [MSRV][] of 1.75.
380382
[#1229]: https://github.com/linebender/vello/pull/1229
381383
[#1273]: https://github.com/linebender/vello/pull/1273
382384
[#1280]: https://github.com/linebender/vello/pull/1280
385+
[#1323]: https://github.com/linebender/vello/pull/1323
383386

384387
<!-- Note that this still comparing against 0.5.0, because 0.5.1 is a cherry-picked patch -->
385388
[Unreleased]: https://github.com/linebender/vello/compare/v0.5.0...HEAD

vello_shaders/shader/flatten.wgsl

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,20 @@ fn draw_join(
569569
let miter_limit = unpack2x16float(style_flags & STYLE_MITER_LIMIT_MASK)[0];
570570

571571
var line_ix: u32;
572-
if 2. * hypot < (hypot + d) * miter_limit * miter_limit && cr != 0. {
572+
// Given the two tangents `tan_prev` and `tan_next` arranged tail-to-tail, the
573+
// miter length ratio is `1 / |cos(theta/2)|`, where `theta` is the angle
574+
// between the tangents.
575+
//
576+
// `hypot` is `|tan_prev| * |tan_next|` (since cr^2 + d^2 = |a|^2 |b|^2) and
577+
// `hypot + d` is `2 * |tan_prev| * |tan_next| * cos^2(theta/2)`. After
578+
// rearranging, the following tests whether `1/|cos(theta/2)| < miter_limit`.
579+
//
580+
// Also avoid the miter computation when `cr` is very small; the intersection
581+
// math divides by `cr` and becomes numerically unstable for near-collinear
582+
// tangents.
583+
if 2. * hypot < (hypot + d) * miter_limit * miter_limit
584+
&& abs(cr) > TANGENT_THRESH * TANGENT_THRESH
585+
{
573586
let is_backside = cr > 0.;
574587
let fp_last = select(front0, back1, is_backside);
575588
let fp_this = select(front1, back0, is_backside);

vello_shaders/src/cpu/flatten.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,9 @@ fn draw_join(
457457
let hypot = cr.hypot(d);
458458
let miter_limit = f16_to_f32((style_flags & Style::MITER_LIMIT_MASK) as u16);
459459

460-
if 2. * hypot < (hypot + d) * miter_limit * miter_limit && cr != 0. {
460+
if 2. * hypot < (hypot + d) * miter_limit * miter_limit
461+
&& cr.abs() > TANGENT_THRESH.powi(2)
462+
{
461463
let is_backside = cr > 0.;
462464
let fp_last = if is_backside { back1 } else { front0 };
463465
let fp_this = if is_backside { back0 } else { front1 };

0 commit comments

Comments
 (0)