Commit cad558f
Fix non-deterministic GPU stroke artifacts (vello#1314) (#1323)
## Summary
Fix non-deterministic rendering artifacts when GPU-stroking certain
rounded rectangles
([vello#1314](#1314)).
The issue could manifest as severe frame-to-frame corruption even when
rendering
the exact same scene. This PR keeps the change minimal (stroker join
logic in
WGSL plus the CPU mirror) and includes a standalone repro program in the
PR
description for reviewers to copy/paste and run locally.
## Reproduction (standalone, not committed)
This PR intentionally does **not** add a new `examples/` crate. Instead,
below is a standalone `repro_1314` program. Reviewers can copy it into a
scratch Cargo project and run it against a local checkout of this
branch.
### How to run
From your Vello workspace root (the directory that contains `vello/`):
1. Create a scratch project directory (anywhere; example below uses
`/tmp`).
2. Paste the following `Cargo.toml` and `src/main.rs`.
3. Run:
```sh
cargo run --manifest-path /tmp/repro_1314/Cargo.toml -- --iters 250
```
On affected revisions this tends to fail very quickly (often within the
first couple frames). With this patch it should remain stable and print
`ok: 250 frames matched`.
### `/tmp/repro_1314/Cargo.toml`
```toml
[package]
name = "repro_1314"
version = "0.0.0"
edition = "2024"
publish = false
[dependencies]
# Set this path to your local checkout of Vello's `vello/` crate.
# If you create the scratch project in /tmp as shown above and you run from the Vello repo root,
# an absolute path is easiest (edit to match your machine).
vello = { path = "/ABS/PATH/TO/your/vello/repo/vello" }
anyhow = "1.0"
futures-intrusive = "0.5"
pollster = "0.4"
```
### `/tmp/repro_1314/src/main.rs`
```rust
use std::num::NonZeroUsize;
use anyhow::{Context, Result, anyhow};
use vello::kurbo::{Affine, Join, Rect, Stroke};
use vello::peniko::color::palette;
use vello::util::{RenderContext, block_on_wgpu};
use vello::wgpu::{
self, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, TexelCopyBufferInfo,
TextureDescriptor, TextureFormat, TextureUsages,
};
use vello::{AaConfig, AaSupport, RenderParams, Renderer, RendererOptions, Scene};
const DEFAULT_WIDTH: u32 = 512;
const DEFAULT_HEIGHT: u32 = 512;
const DEFAULT_ITERS: usize = 250;
const DEFAULT_SHAPES: usize = 96;
fn parse_flag_value(args: &[String], flag: &str) -> Option<String> {
args.iter()
.position(|arg| arg == flag)
.and_then(|ix| args.get(ix + 1))
.cloned()
}
fn parse_usize(args: &[String], flag: &str, default: usize) -> Result<usize> {
match parse_flag_value(args, flag) {
Some(v) => v
.parse::<usize>()
.with_context(|| format!("failed to parse `{flag}` value `{v}`")),
None => Ok(default),
}
}
fn parse_u32(args: &[String], flag: &str, default: u32) -> Result<u32> {
match parse_flag_value(args, flag) {
Some(v) => v
.parse::<u32>()
.with_context(|| format!("failed to parse `{flag}` value `{v}`")),
None => Ok(default),
}
}
fn fnv1a_64(bytes: &[u8]) -> u64 {
let mut hash = 0xcbf29ce484222325_u64;
for &b in bytes {
hash ^= u64::from(b);
hash = hash.wrapping_mul(0x100000001b3_u64);
}
hash
}
fn build_scene(scene: &mut Scene, shapes: usize) {
// Exact geometry from the issue report:
let base_rect =
Rect::from_origin_size((2.0, 0.0), (8.0, 327.765_467_171_717_15)).to_rounded_rect(3.91);
let base_stroke = Stroke {
width: 1.0,
join: Join::Miter,
..Default::default()
};
// Add many similar copies to make failures appear quickly while always including the exact
// geometry above.
let variants = [(8.0, 3.91), (8.0, 3.999), (8.0, 4.0), (7.99, 3.995)];
let cols = 12_usize;
let spacing_x = 16.0_f64;
let spacing_y = 12.0_f64;
for ix in 0..shapes {
let col = (ix % cols) as f64;
let row = (ix / cols) as f64;
let (w, r) = variants[ix % variants.len()];
let rect =
Rect::from_origin_size((2.0, 0.0), (w, 327.765_467_171_717_15)).to_rounded_rect(r);
let transform = Affine::translate((16.0 + col * spacing_x, 16.0 + row * spacing_y));
scene.stroke(&base_stroke, transform, palette::css::WHITE, None, &rect);
}
scene.stroke(
&base_stroke,
Affine::IDENTITY,
palette::css::WHITE,
None,
&base_rect,
);
}
fn main() -> Result<()> {
let args: Vec<String> = std::env::args().collect();
let iters = parse_usize(&args, "--iters", DEFAULT_ITERS)?;
let shapes = parse_usize(&args, "--shapes", DEFAULT_SHAPES)?;
let width = parse_u32(&args, "--width", DEFAULT_WIDTH)?;
let height = parse_u32(&args, "--height", DEFAULT_HEIGHT)?;
let mut context = RenderContext::new();
let device_id = pollster::block_on(context.device(None))
.ok_or_else(|| anyhow!("no compatible device found"))?;
let device_handle = &mut context.devices[device_id];
let device = &device_handle.device;
let queue = &device_handle.queue;
let mut renderer = Renderer::new(
device,
RendererOptions {
use_cpu: false,
antialiasing_support: AaSupport::area_only(),
num_init_threads: NonZeroUsize::new(1),
..Default::default()
},
)
.map_err(|err| anyhow!("Renderer::new failed: {err}"))?;
let size = Extent3d {
width,
height,
depth_or_array_layers: 1,
};
let target = device.create_texture(&TextureDescriptor {
label: Some("repro_1314.target"),
size,
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: TextureFormat::Rgba8Unorm,
usage: TextureUsages::STORAGE_BINDING | TextureUsages::COPY_SRC,
view_formats: &[],
});
let view = target.create_view(&wgpu::TextureViewDescriptor::default());
let padded_byte_width = (width * 4).next_multiple_of(256);
let buffer_size = padded_byte_width as u64 * height as u64;
let buffer = device.create_buffer(&BufferDescriptor {
label: Some("repro_1314.readback"),
size: buffer_size,
usage: BufferUsages::MAP_READ | BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let params = RenderParams {
base_color: palette::css::BLACK,
width,
height,
antialiasing_method: AaConfig::Area,
};
let mut baseline: Option<Vec<u8>> = None;
let mut scene = Scene::new();
for ix in 0..iters {
scene.reset();
build_scene(&mut scene, shapes);
renderer
.render_to_texture(device, queue, &scene, &view, ¶ms)
.map_err(|err| anyhow!("render_to_texture failed: {err}"))?;
let mut encoder = device.create_command_encoder(&CommandEncoderDescriptor {
label: Some("repro_1314.copy_out"),
});
encoder.copy_texture_to_buffer(
target.as_image_copy(),
TexelCopyBufferInfo {
buffer: &buffer,
layout: wgpu::TexelCopyBufferLayout {
offset: 0,
bytes_per_row: Some(padded_byte_width),
rows_per_image: None,
},
},
size,
);
queue.submit([encoder.finish()]);
let buf_slice = buffer.slice(..);
let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
buf_slice.map_async(wgpu::MapMode::Read, move |v| {
sender.send(v).expect("receiver dropped");
});
if let Some(recv_result) = block_on_wgpu(device, receiver.receive()) {
recv_result?;
} else {
return Err(anyhow!("mapping channel was closed"));
}
let mapped = buf_slice.get_mapped_range();
let mut frame = Vec::<u8>::with_capacity((width * height * 4) as usize);
for row in 0..height {
let start = (row * padded_byte_width) as usize;
let end = start + (width * 4) as usize;
frame.extend_from_slice(&mapped[start..end]);
}
drop(mapped);
buffer.unmap();
if let Some(expected) = &baseline {
if &frame != expected {
let expected_hash = fnv1a_64(expected);
let got_hash = fnv1a_64(&frame);
eprintln!(
"non-deterministic output at iter {ix}: expected={expected_hash:#x} \
got={got_hash:#x}"
);
return Err(anyhow!("detected non-deterministic rendering (vello#1314)"));
}
} else {
baseline = Some(frame);
}
}
println!("ok: {iters} frames matched");
Ok(())
}
```
### Options
- `--iters N` number of frames to compare (default: 250)
- `--width N` render target width (default: 512)
- `--height N` render target height (default: 512)
- `--shapes N` number of copies of the triggering geometry (default: 96)
## Root cause
In `vello_shaders/shader/flatten.wgsl`, the GPU stroker's miter-join
path uses a miter-limit predicate to decide whether to emit a miter or
fall back to bevel.
For tangents `a = tan_prev` and `b = tan_next`:
- `d = dot(a, b)`
- `cr = cross(a, b)`
- `hypot = length(vec2(cr, d)) = |a| * |b|` (since `cr^2 + d^2 = |a|^2
|b|^2`)
Given the two tangents arranged tail-to-tail, the miter length ratio is
`1 / |cos(θ/2)|`, where θ is the angle between tangents. After
rearranging, the
miter-limit predicate can be expressed in terms of `hypot + d`.
The bug is that we only guarded on `cr != 0`. When `cr` is extremely
small but
non-zero (near-collinear tangents), the miter-point intersection math
divides by
`cr`, which can produce extremely large geometry. That pathological
geometry can
destabilize later pipeline stages and show up as frame-to-frame
corruption even
when the scene is unchanged.
## Fix
Keep the miter-limit predicate as-is, but treat sufficiently small `cr`
as
effectively zero (using `TANGENT_THRESH^2`) and skip the miter
computation in
that case (fall back to bevel), avoiding pathological miter points.
On my machine this stabilizes the repro for 1000 iterations (`ok: 1000
frames
matched`).
## Files changed
- `vello_shaders/shader/flatten.wgsl`: robustify miter join
near-collinearity
- `vello_shaders/src/cpu/flatten.rs`: keep CPU shader mirror in sync
## Related
- [vello#1314](#1314)
- [xilem#1512](linebender/xilem#1512)
---------
Co-authored-by: Tom Churchman <thomas@churchman.nl>1 parent 4ea47b4 commit cad558f
File tree
3 files changed
+20
-2
lines changed- vello_shaders
- shader
- src/cpu
3 files changed
+20
-2
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
22 | 22 | | |
23 | 23 | | |
24 | 24 | | |
| 25 | + | |
25 | 26 | | |
26 | 27 | | |
27 | 28 | | |
| |||
270 | 271 | | |
271 | 272 | | |
272 | 273 | | |
| 274 | + | |
273 | 275 | | |
274 | 276 | | |
275 | 277 | | |
| |||
380 | 382 | | |
381 | 383 | | |
382 | 384 | | |
| 385 | + | |
383 | 386 | | |
384 | 387 | | |
385 | 388 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
569 | 569 | | |
570 | 570 | | |
571 | 571 | | |
572 | | - | |
| 572 | + | |
| 573 | + | |
| 574 | + | |
| 575 | + | |
| 576 | + | |
| 577 | + | |
| 578 | + | |
| 579 | + | |
| 580 | + | |
| 581 | + | |
| 582 | + | |
| 583 | + | |
| 584 | + | |
| 585 | + | |
573 | 586 | | |
574 | 587 | | |
575 | 588 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
457 | 457 | | |
458 | 458 | | |
459 | 459 | | |
460 | | - | |
| 460 | + | |
| 461 | + | |
| 462 | + | |
461 | 463 | | |
462 | 464 | | |
463 | 465 | | |
| |||
0 commit comments