From e5d793f97cd52f0c4e4fdb0620fde50e92a3c5ed Mon Sep 17 00:00:00 2001 From: Oliver Hofkens <23633993+OliverHofkens@users.noreply.github.com> Date: Sat, 7 Mar 2026 16:37:39 +0100 Subject: [PATCH 1/6] perf: flatten freq map and improve minmax impl through benchmarks --- rust/chaosymmetry/Cargo.lock | 244 +++++++++++++++++++++- rust/chaosymmetry/Cargo.toml | 9 +- rust/chaosymmetry/benches/scan_min_max.rs | 135 ++++++++++++ rust/chaosymmetry/src/chaos.rs | 25 ++- rust/chaosymmetry/src/color/scale.rs | 77 +++---- rust/chaosymmetry/src/main.rs | 9 +- 6 files changed, 429 insertions(+), 70 deletions(-) create mode 100644 rust/chaosymmetry/benches/scan_min_max.rs diff --git a/rust/chaosymmetry/Cargo.lock b/rust/chaosymmetry/Cargo.lock index e17f74c..9fadcd7 100644 --- a/rust/chaosymmetry/Cargo.lock +++ b/rust/chaosymmetry/Cargo.lock @@ -94,6 +94,12 @@ dependencies = [ "libc", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + [[package]] name = "anstream" version = "0.6.18" @@ -274,6 +280,12 @@ dependencies = [ "wayland-client", ] +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.9" @@ -315,8 +327,9 @@ version = "0.1.0" dependencies = [ "chrono", "clap", + "criterion", "env_logger", - "itertools", + "itertools 0.14.0", "log", "num", "pixels", @@ -342,6 +355,33 @@ dependencies = [ "windows-link", ] +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + [[package]] name = "clap" version = "4.5.28" @@ -497,12 +537,73 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools 0.10.5", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools 0.10.5", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "cursor-icon" version = "1.1.0" @@ -765,6 +866,17 @@ dependencies = [ "bitflags 2.7.0", ] +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy 0.8.27", +] + [[package]] name = "hashbrown" version = "0.14.5" @@ -808,6 +920,12 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "hexf-parse" version = "0.2.1" @@ -862,12 +980,32 @@ dependencies = [ "rustversion", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi 0.5.2", + "libc", + "windows-sys 0.59.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itertools" version = "0.14.0" @@ -877,6 +1015,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + [[package]] name = "jni" version = "0.21.1" @@ -1424,6 +1568,12 @@ version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "orbclient" version = "0.3.48" @@ -1523,6 +1673,34 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2" +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "png" version = "0.17.16" @@ -1544,7 +1722,7 @@ checksum = "a604568c3202727d1507653cb121dbd627a58684eb09a820fd746bee38b4442f" dependencies = [ "cfg-if", "concurrent-queue", - "hermit-abi", + "hermit-abi 0.4.0", "pin-project-lite", "rustix", "tracing", @@ -1622,7 +1800,7 @@ checksum = "3779b94aeb87e8bd4e834cee3650289ee9e0d5677f976ecdb6d219e5f4f6cd94" dependencies = [ "rand_chacha", "rand_core", - "zerocopy 0.8.14", + "zerocopy 0.8.27", ] [[package]] @@ -1642,7 +1820,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b08f3c9802962f7e1b25113931d94f43ed9725bebc59db9d0c3e9a23b67e15ff" dependencies = [ "getrandom 0.3.1", - "zerocopy 0.8.14", + "zerocopy 0.8.27", ] [[package]] @@ -1657,6 +1835,26 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.4.1" @@ -1735,6 +1933,12 @@ version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" +[[package]] +name = "ryu" +version = "1.0.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" + [[package]] name = "safe_arch" version = "0.7.4" @@ -1798,6 +2002,18 @@ dependencies = [ "syn 2.0.96", ] +[[package]] +name = "serde_json" +version = "1.0.143" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + [[package]] name = "serde_spanned" version = "0.6.8" @@ -1980,6 +2196,16 @@ dependencies = [ "strict-num", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "toml" version = "0.8.20" @@ -2861,11 +3087,11 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.14" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a367f292d93d4eab890745e75a778da40909cab4d6ff8173693812f79c4a2468" +checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" dependencies = [ - "zerocopy-derive 0.8.14", + "zerocopy-derive 0.8.27", ] [[package]] @@ -2881,9 +3107,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.8.14" +version = "0.8.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3931cb58c62c13adec22e38686b559c86a30565e16ad6e8510a337cedc611e1" +checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" dependencies = [ "proc-macro2", "quote", diff --git a/rust/chaosymmetry/Cargo.toml b/rust/chaosymmetry/Cargo.toml index 2c1d2c4..0ed4a6c 100644 --- a/rust/chaosymmetry/Cargo.toml +++ b/rust/chaosymmetry/Cargo.toml @@ -3,11 +3,18 @@ name = "chaosymmetry" version = "0.1.0" edition = "2021" +[dev-dependencies] +criterion = { version = "0.5", features = ["html_reports"] } +itertools = "0.14.0" + +[[bench]] +name = "scan_min_max" +harness = false + [dependencies] chrono = "0.4.40" clap = { version = "4.5.28", features = ["derive"] } env_logger = "0.11.6" -itertools = "0.14.0" log = "0.4.22" num = "0.4.3" pixels = "0.15.0" diff --git a/rust/chaosymmetry/benches/scan_min_max.rs b/rust/chaosymmetry/benches/scan_min_max.rs new file mode 100644 index 0000000..03b3f25 --- /dev/null +++ b/rust/chaosymmetry/benches/scan_min_max.rs @@ -0,0 +1,135 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use itertools::Itertools; +use itertools::MinMaxResult::{MinMax, NoElements, OneElement}; + +// --------------------------------------------------------------------------- +// Shared input generation +// --------------------------------------------------------------------------- + +/// Builds a realistic freq map: ~50% of cells are zero (unvisited), the rest +/// are random u64s in a plausible hit-count range. +fn make_freq_map(width: usize, height: usize) -> Vec { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let n = width * height; + let mut v = Vec::with_capacity(n); + for i in 0..n { + // Cheap deterministic "random" without pulling in rand as a dev-dep. + let mut h = DefaultHasher::new(); + i.hash(&mut h); + let r = h.finish(); + // 50% is zero + if r % 10 < 5 { + v.push(0); + } else { + v.push((r % 1000) + 1); + } + } + v +} + +// --------------------------------------------------------------------------- +// Candidate implementations +// --------------------------------------------------------------------------- + +fn minmax_itertools(freqs: &[u64]) -> (u64, u64) { + match freqs.iter().filter(|v| **v > 0).minmax() { + NoElements => (0, 0), + OneElement(x) => (*x, *x), + MinMax(x, y) => (*x, *y), + } +} + +fn minmax_manual_loop(freqs: &[u64]) -> (u64, u64) { + let mut min = u64::MAX; + let mut max = 0u64; + for &v in freqs { + if v > 0 { + if v < min { + min = v; + } + if v > max { + max = v; + } + } + } + if max == 0 { + (0, 0) + } else { + (min, max) + } +} + +fn minmax_two_pass(freqs: &[u64]) -> (u64, u64) { + let min = freqs.iter().filter(|&&v| v > 0).min().copied().unwrap_or(0); + let max = freqs.iter().copied().max().unwrap_or(0); + (min, max) +} + +fn minmax_fold(freqs: &[u64]) -> (u64, u64) { + let (min, max) = freqs + .iter() + .filter(|&&v| v > 0) + .fold((u64::MAX, 0u64), |(mn, mx), &v| (mn.min(v), mx.max(v))); + if max == 0 { + (0, 0) + } else { + (min, max) + } +} + +fn minmax_chunked(freqs: &[u64]) -> (u64, u64) { + const CHUNK: usize = 1024; + let (min, max) = freqs + .chunks(CHUNK) + .fold((u64::MAX, 0u64), |(mn, mx), chunk| { + let (cmn, cmx) = chunk + .iter() + .filter(|&&v| v > 0) + .fold((u64::MAX, 0u64), |(a, b), &v| (a.min(v), b.max(v))); + (mn.min(cmn), mx.max(cmx)) + }); + if max == 0 { + (0, 0) + } else { + (min, max) + } +} + +// --------------------------------------------------------------------------- +// Benchmarks +// --------------------------------------------------------------------------- + +fn bench_scan_min_max(c: &mut Criterion) { + // Use real simulation dimensions. + const W: usize = 10_000; + const H: usize = 10_000; + + let freqs = make_freq_map(W, H); + + let mut g = c.benchmark_group("scan_min_max"); + + g.bench_function("itertools_minmax", |b| { + b.iter(|| minmax_itertools(black_box(&freqs))) + }); + + g.bench_function("manual_loop", |b| { + b.iter(|| minmax_manual_loop(black_box(&freqs))) + }); + + g.bench_function("two_pass", |b| { + b.iter(|| minmax_two_pass(black_box(&freqs))) + }); + + g.bench_function("fold", |b| b.iter(|| minmax_fold(black_box(&freqs)))); + + g.bench_function("chunked_fold", |b| { + b.iter(|| minmax_chunked(black_box(&freqs))) + }); + + g.finish(); +} + +criterion_group!(benches, bench_scan_min_max); +criterion_main!(benches); diff --git a/rust/chaosymmetry/src/chaos.rs b/rust/chaosymmetry/src/chaos.rs index 5b54d96..d78eea8 100644 --- a/rust/chaosymmetry/src/chaos.rs +++ b/rust/chaosymmetry/src/chaos.rs @@ -6,7 +6,7 @@ use crate::color::palette::Palette; use crate::color::scale::ColorScale; use crate::figures::Figure; -type FreqMap = Vec>; +type FreqMap = Vec; type SharedFreqMap = Arc>; pub struct ChaosEngine { @@ -27,7 +27,7 @@ impl ChaosEngine { ChaosEngine { width, height, - freq: Arc::new(RwLock::new(vec![vec![0; width]; height])), + freq: Arc::new(RwLock::new(vec![0; width * height])), params, curr, } @@ -46,7 +46,7 @@ impl ChaosEngine { self.curr = next; let (x, y) = self.coord_to_screen(next); let mut freqs = self.freq.write().unwrap(); - freqs[y][x] += 1; + freqs[y * self.width + x] += 1; } pub fn batch_step(&mut self, steps: usize) { @@ -70,6 +70,7 @@ pub struct Position { } pub struct Renderer { + pub sim_width: usize, pub win_width: usize, pub scale: f64, color_scale: Box, @@ -80,6 +81,7 @@ pub struct Renderer { impl Renderer { pub fn new( + sim_width: usize, win_width: usize, scale: f64, color_scale: Box, @@ -87,6 +89,7 @@ impl Renderer { freq: SharedFreqMap, ) -> Self { Renderer { + sim_width, win_width, scale, color_scale, @@ -98,18 +101,17 @@ impl Renderer { pub fn draw(&mut self, frame: &mut [u8]) { let freqs = self.freq.read().unwrap(); - self.color_scale.init_from_freq(&freqs); + self.color_scale.init_from_freq(&freqs[..]); // Render center of simulation in center of window let win_height = frame.len() / 4 / self.win_width; - let sim_width = freqs[0].len() as i64; - let sim_height = freqs.len() as i64; + let sim_height = freqs.len() as i64 / self.sim_width as i64; // Window size scaled, in sim units let scaled_win_width = self.win_width as f64 / self.scale; let scaled_win_height = win_height as f64 / self.scale; - let offset_x = (sim_width as f64 - scaled_win_width) / 2.0; + let offset_x = (self.sim_width as f64 - scaled_win_width) / 2.0; let offset_y = (sim_height as f64 - scaled_win_height) / 2.0; let freqs_per_px = (1.0 / self.scale).clamp(1.0, f64::MAX) as i64; @@ -127,11 +129,12 @@ impl Renderer { let freq = (sim_start_y.max(0)..(sim_start_y + freqs_per_px).clamp(0, sim_height - 1)) .map(|row| { - // let row_data = &freqs[row as usize]; - let start = sim_start_x.max(0) as usize; - let end = (sim_start_x + freqs_per_px).clamp(0, sim_width - 1) as usize; + let row_offset = row as usize * self.sim_width; + let start = row_offset + sim_start_x.max(0) as usize; + let end = row_offset + + (sim_start_x + freqs_per_px).clamp(0, self.sim_width as i64 - 1) as usize; if start < end { - freqs[row as usize][start..end].iter().sum::() + freqs[start..end].iter().sum::() } else { 0 } diff --git a/rust/chaosymmetry/src/color/scale.rs b/rust/chaosymmetry/src/color/scale.rs index db3f893..def3795 100644 --- a/rust/chaosymmetry/src/color/scale.rs +++ b/rust/chaosymmetry/src/color/scale.rs @@ -1,14 +1,32 @@ -use itertools::Itertools; -use itertools::MinMaxResult::{MinMax, NoElements, OneElement}; use serde::{Deserialize, Serialize}; +fn scan_min_max(freqs: &[u64]) -> (u64, u64) { + let mut min = u64::MAX; + let mut max = 0u64; + for &v in freqs { + if v > 0 { + if v < min { + min = v; + } + if v > max { + max = v; + } + } + } + if max == 0 { + (0, 0) + } else { + (min, max) + } +} + #[typetag::serde(tag = "type")] pub trait ColorScale { - fn init_from_freq(&mut self, freqs: &[Vec]); + fn init_from_freq(&mut self, freqs: &[u64]); fn freq_to_scale(&self, freq: u64) -> f64; } -#[derive(Deserialize, Serialize)] +#[derive(Deserialize, Serialize, Default)] pub struct LinearColorScale { #[serde(default)] min_freq: u64, @@ -18,24 +36,8 @@ pub struct LinearColorScale { #[typetag::serde] impl ColorScale for LinearColorScale { - fn init_from_freq(&mut self, freqs: &[Vec]) { - let mut min: u64 = u64::MAX; - let mut max: u64 = 0; - - for row in freqs { - match row.iter().filter(|v| **v > 0).minmax() { - NoElements => (), - OneElement(x) => { - min = min.min(*x); - max = max.max(*x); - } - MinMax(x, y) => { - min = min.min(*x); - max = max.max(*y); - } - } - } - + fn init_from_freq(&mut self, freqs: &[u64]) { + let (min, max) = scan_min_max(freqs); self.min_freq = min; self.max_freq = max; } @@ -48,7 +50,7 @@ impl ColorScale for LinearColorScale { } } -#[derive(Deserialize, Serialize)] +#[derive(Deserialize, Serialize, Default)] pub struct LogColorScale { #[serde(default)] min_log: u64, @@ -58,31 +60,10 @@ pub struct LogColorScale { #[typetag::serde] impl ColorScale for LogColorScale { - fn init_from_freq(&mut self, freqs: &[Vec]) { - let mut min: u64 = u64::MAX; - let mut max: u64 = 0; - - for row in freqs { - match row.iter().filter(|v| **v > 0).minmax() { - NoElements => (), - OneElement(x) => { - min = min.min(*x); - max = max.max(*x); - } - MinMax(x, y) => { - min = min.min(*x); - max = max.max(*y); - } - } - } - + fn init_from_freq(&mut self, freqs: &[u64]) { + let (min, max) = scan_min_max(freqs); self.min_log = min.ilog2() as u64; self.max_log = max.ilog2() as u64; - - // println!( - // "Min: {}, Min Log: {}, Max: {}, Max Log: {}", - // min, self.min_log, max, self.max_log - // ); } fn freq_to_scale(&self, freq: u64) -> f64 { @@ -102,7 +83,7 @@ mod tests { #[test] fn linear_scale_from_freq_simple() { let mut scale = LinearColorScale::default(); - let freqs = vec![vec![0, 1, 0], vec![1, 2, 1], vec![0, 1, 0]]; + let freqs = vec![0, 1, 0, 1, 2, 1, 0, 1, 0]; scale.init_from_freq(&freqs); assert_eq!(scale.min_freq, 1); @@ -124,7 +105,7 @@ mod tests { #[test] fn log_scale_from_freq_simple() { let mut scale = LogColorScale::default(); - let freqs = vec![vec![0, 1, 0], vec![1, 1024, 1], vec![0, 1, 0]]; + let freqs = vec![0, 1, 0, 1, 1024, 1, 0, 1, 0]; scale.init_from_freq(&freqs); assert_eq!(scale.min_log, 0); diff --git a/rust/chaosymmetry/src/main.rs b/rust/chaosymmetry/src/main.rs index c9b9e4c..c29b995 100644 --- a/rust/chaosymmetry/src/main.rs +++ b/rust/chaosymmetry/src/main.rs @@ -65,7 +65,14 @@ fn main() { let im = rng.random_range(0.001..0.005); let mut engine = ChaosEngine::new(SIM_WIDTH, SIM_HEIGHT, Complex64::new(re, im), figure); - let renderer = Renderer::new(WIDTH, 0.5, style.scale, style.palette, engine.freq.clone()); + let renderer = Renderer::new( + SIM_WIDTH, + WIDTH, + 0.5, + style.scale, + style.palette, + engine.freq.clone(), + ); // Simulate in background thread thread::spawn(move || { From d3ad82e1421e0306a132a6e65b73690dc99f3a79 Mon Sep 17 00:00:00 2001 From: Oliver Hofkens <23633993+OliverHofkens@users.noreply.github.com> Date: Sat, 7 Mar 2026 18:03:30 +0100 Subject: [PATCH 2/6] perf: don't update color scale every frame --- rust/chaosymmetry/src/chaos.rs | 13 ++++++++++++- rust/chaosymmetry/src/main.rs | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/rust/chaosymmetry/src/chaos.rs b/rust/chaosymmetry/src/chaos.rs index d78eea8..8637bab 100644 --- a/rust/chaosymmetry/src/chaos.rs +++ b/rust/chaosymmetry/src/chaos.rs @@ -77,6 +77,8 @@ pub struct Renderer { color_palette: Box, freq: SharedFreqMap, pub position: Position, + frames_drawn: u64, + update_colors_every: u64, } impl Renderer { @@ -87,6 +89,7 @@ impl Renderer { color_scale: Box, color_palette: Box, freq: SharedFreqMap, + update_colors_every: u64, ) -> Self { Renderer { sim_width, @@ -96,12 +99,18 @@ impl Renderer { color_palette, freq, position: Position::default(), + frames_drawn: 0, + update_colors_every, } } pub fn draw(&mut self, frame: &mut [u8]) { let freqs = self.freq.read().unwrap(); - self.color_scale.init_from_freq(&freqs[..]); + + // Recalculating color scale is quite expensive, so don't do it every frame. + if self.frames_drawn.is_multiple_of(self.update_colors_every) { + self.color_scale.init_from_freq(&freqs[..]); + } // Render center of simulation in center of window let win_height = frame.len() / 4 / self.win_width; @@ -150,5 +159,7 @@ impl Renderer { px.copy_from_slice(&rgba); } + + self.frames_drawn += 1; } } diff --git a/rust/chaosymmetry/src/main.rs b/rust/chaosymmetry/src/main.rs index c29b995..2a37873 100644 --- a/rust/chaosymmetry/src/main.rs +++ b/rust/chaosymmetry/src/main.rs @@ -72,6 +72,7 @@ fn main() { style.scale, style.palette, engine.freq.clone(), + 10, ); // Simulate in background thread From 28e3bc64c1bbe33cf6a0ea9d24881c77a826ae35 Mon Sep 17 00:00:00 2001 From: Oliver Hofkens <23633993+OliverHofkens@users.noreply.github.com> Date: Sat, 7 Mar 2026 18:14:59 +0100 Subject: [PATCH 3/6] perf: add fast path to pixel freq calculations --- rust/chaosymmetry/src/chaos.rs | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/rust/chaosymmetry/src/chaos.rs b/rust/chaosymmetry/src/chaos.rs index 8637bab..7653f18 100644 --- a/rust/chaosymmetry/src/chaos.rs +++ b/rust/chaosymmetry/src/chaos.rs @@ -136,19 +136,25 @@ impl Renderer { let sim_start_y = ((win_y as f64 / self.scale) + offset_y + self.position.vertical as f64) as i64; - let freq = (sim_start_y.max(0)..(sim_start_y + freqs_per_px).clamp(0, sim_height - 1)) - .map(|row| { - let row_offset = row as usize * self.sim_width; - let start = row_offset + sim_start_x.max(0) as usize; - let end = row_offset - + (sim_start_x + freqs_per_px).clamp(0, self.sim_width as i64 - 1) as usize; - if start < end { - freqs[start..end].iter().sum::() - } else { - 0 - } - }) - .sum::(); + // Fast path if zoomed in sufficiently: + let freq = if freqs_per_px == 1 { + freqs[(sim_start_y * self.sim_width as i64 + sim_start_x) as usize] + } else { + (sim_start_y.max(0)..(sim_start_y + freqs_per_px).clamp(0, sim_height - 1)) + .map(|row| { + let row_offset = row as usize * self.sim_width; + let start = row_offset + sim_start_x.max(0) as usize; + let end = row_offset + + (sim_start_x + freqs_per_px).clamp(0, self.sim_width as i64 - 1) + as usize; + if start < end { + freqs[start..end].iter().sum::() + } else { + 0 + } + }) + .sum::() + }; let rgba = if freq == 0 { [u8::MAX; 4] From 2c2f9f255ae7ca5f29e72b281c914434fcea3f48 Mon Sep 17 00:00:00 2001 From: Oliver Hofkens <23633993+OliverHofkens@users.noreply.github.com> Date: Sun, 8 Mar 2026 10:10:37 +0100 Subject: [PATCH 4/6] perf: simplify freq bucketing in `draw()` --- rust/chaosymmetry/src/chaos.rs | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/rust/chaosymmetry/src/chaos.rs b/rust/chaosymmetry/src/chaos.rs index 7653f18..e54584e 100644 --- a/rust/chaosymmetry/src/chaos.rs +++ b/rust/chaosymmetry/src/chaos.rs @@ -140,20 +140,22 @@ impl Renderer { let freq = if freqs_per_px == 1 { freqs[(sim_start_y * self.sim_width as i64 + sim_start_x) as usize] } else { - (sim_start_y.max(0)..(sim_start_y + freqs_per_px).clamp(0, sim_height - 1)) - .map(|row| { - let row_offset = row as usize * self.sim_width; - let start = row_offset + sim_start_x.max(0) as usize; - let end = row_offset - + (sim_start_x + freqs_per_px).clamp(0, self.sim_width as i64 - 1) - as usize; - if start < end { - freqs[start..end].iter().sum::() - } else { - 0 - } - }) - .sum::() + let sim_x0 = sim_start_x.max(0) as usize; + let sim_y0 = sim_start_y.max(0) as usize; + let sim_x1 = (sim_start_x + freqs_per_px).clamp(0, self.sim_width as i64) as usize; + let sim_y1 = (sim_start_y + freqs_per_px).clamp(0, sim_height) as usize; + + let col_len = sim_x1.saturating_sub(sim_x0); + let mut res = 0u64; + + if sim_x0 < sim_x1 && sim_y0 < sim_y1 { + let mut row_start = sim_y0 * self.sim_width + sim_x0; + for _ in sim_y0..sim_y1 { + res += freqs[row_start..row_start + col_len].iter().sum::(); + row_start += self.sim_width; + } + } + res }; let rgba = if freq == 0 { From 276b1463f2494ef4f3fa1788bbba4b7d8652b572 Mon Sep 17 00:00:00 2001 From: Oliver Hofkens <23633993+OliverHofkens@users.noreply.github.com> Date: Sun, 8 Mar 2026 10:17:21 +0100 Subject: [PATCH 5/6] perf: add benchmarks on draw code --- rust/chaosymmetry/Cargo.toml | 5 + rust/chaosymmetry/benches/draw.rs | 225 ++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 rust/chaosymmetry/benches/draw.rs diff --git a/rust/chaosymmetry/Cargo.toml b/rust/chaosymmetry/Cargo.toml index 0ed4a6c..b312dc0 100644 --- a/rust/chaosymmetry/Cargo.toml +++ b/rust/chaosymmetry/Cargo.toml @@ -11,6 +11,11 @@ itertools = "0.14.0" name = "scan_min_max" harness = false +[[bench]] +name = "draw" +harness = false + + [dependencies] chrono = "0.4.40" clap = { version = "4.5.28", features = ["derive"] } diff --git a/rust/chaosymmetry/benches/draw.rs b/rust/chaosymmetry/benches/draw.rs new file mode 100644 index 0000000..1f715dd --- /dev/null +++ b/rust/chaosymmetry/benches/draw.rs @@ -0,0 +1,225 @@ +use criterion::{black_box, criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; + +// --------------------------------------------------------------------------- +// Shared input generation +// --------------------------------------------------------------------------- + +/// Builds a realistic freq map: ~50% of cells are zero (unvisited), the rest +/// are random u64s in a plausible hit-count range. +fn make_freq_map(width: usize, height: usize) -> Vec { + use std::collections::hash_map::DefaultHasher; + use std::hash::{Hash, Hasher}; + + let n = width * height; + let mut v = Vec::with_capacity(n); + for i in 0..n { + let mut h = DefaultHasher::new(); + i.hash(&mut h); + let r = h.finish(); + if r % 10 < 5 { + v.push(0); + } else { + v.push((r % 1000) + 1); + } + } + v +} + +// --------------------------------------------------------------------------- +// Variant A — original iterator-chain approach (from before the refactor) +// --------------------------------------------------------------------------- + +fn sum_block_iter( + freqs: &[u64], + sim_width: usize, + sim_start_x: i64, + sim_start_y: i64, + freqs_per_px: i64, + sim_height: i64, +) -> u64 { + let sim_x1 = (sim_start_x + freqs_per_px).clamp(0, sim_width as i64 - 1) as usize; + let sim_y1 = (sim_start_y + freqs_per_px).clamp(0, sim_height - 1) as usize; + + (sim_start_y.max(0)..sim_y1 as i64) + .map(|row| { + let row_offset = row as usize * sim_width; + let start = row_offset + sim_start_x.max(0) as usize; + let end = row_offset + sim_x1; + if start < end { + freqs[start..end].iter().sum::() + } else { + 0 + } + }) + .sum::() +} + +// --------------------------------------------------------------------------- +// Variant B — current incremental-slice approach +// --------------------------------------------------------------------------- + +fn sum_block_incremental( + freqs: &[u64], + sim_width: usize, + x0: usize, + x1: usize, + y0: usize, + y1: usize, +) -> u64 { + let col_len = x1.saturating_sub(x0); + if col_len == 0 || y0 >= y1 { + return 0; + } + let mut res = 0u64; + let mut row_start = y0 * sim_width + x0; + for _ in y0..y1 { + res += freqs[row_start..row_start + col_len].iter().sum::(); + row_start += sim_width; + } + res +} + +// --------------------------------------------------------------------------- +// Variant C — summed-area table (from perf/summed-area-table branch) +// --------------------------------------------------------------------------- + +struct SummedAreaTable { + table: Vec, + width: usize, + height: usize, +} + +impl SummedAreaTable { + fn new(width: usize, height: usize) -> Self { + Self { + table: vec![0; (width + 1) * (height + 1)], + width, + height, + } + } + + fn init_from_map(&mut self, map: &[u64]) { + let stride = self.width + 1; + for row in 1..self.height + 1 { + let map_row_offset = (row - 1) * self.width; + let tbl_row_offset = row * stride; + for col in 1..self.width + 1 { + let map_idx = map_row_offset + col - 1; + let tbl_idx = tbl_row_offset + col; + self.table[tbl_idx] = map[map_idx] + .wrapping_add(self.table[tbl_idx - 1]) + .wrapping_add(self.table[tbl_idx - stride]) + .wrapping_sub(self.table[tbl_idx - stride - 1]); + } + } + } + + /// Inclusive rectangle query: sums cells in [x0, x1] × [y0, y1]. + fn query(&self, x0: usize, y0: usize, x1: usize, y1: usize) -> u64 { + let stride = self.width + 1; + let x1 = x1 + 1; + let y1 = y1 + 1; + let idx_d = stride * y1 + x1; + let idx_b = stride * y0 + x1; + let idx_c = stride * y1 + x0; + let idx_a = stride * y0 + x0; + self.table[idx_d] + .wrapping_sub(self.table[idx_b]) + .wrapping_sub(self.table[idx_c]) + .wrapping_add(self.table[idx_a]) + } +} + +// --------------------------------------------------------------------------- +// Benchmarks +// --------------------------------------------------------------------------- + +/// Per-query cost of each approach — what draw() pays per output pixel. +/// The SAT is pre-built outside the timed loop (its best case / amortised cost). +fn bench_block_sum_query(c: &mut Criterion) { + const W: usize = 10_000; + const H: usize = 10_000; + + let freqs = make_freq_map(W, H); + + // Query a tile in the middle of the sim — representative of the common case. + let cx = W / 2; + let cy = H / 2; + + // Pre-build SAT once; only query cost is timed below. + let mut sat = SummedAreaTable::new(W, H); + sat.init_from_map(&freqs); + + let mut g = c.benchmark_group("block_sum_query"); + + for block_size in [2usize, 8, 32] { + let x0 = cx; + let x1 = cx + block_size; + let y0 = cy; + let y1 = cy + block_size; + let fpp = block_size as i64; + + g.bench_with_input(BenchmarkId::new("iter", block_size), &block_size, |b, _| { + b.iter(|| { + sum_block_iter( + black_box(&freqs), + W, + black_box(cx as i64), + black_box(cy as i64), + black_box(fpp), + H as i64, + ) + }) + }); + + g.bench_with_input( + BenchmarkId::new("incremental_slice", block_size), + &block_size, + |b, _| { + b.iter(|| { + sum_block_incremental( + black_box(&freqs), + W, + black_box(x0), + black_box(x1), + black_box(y0), + black_box(y1), + ) + }) + }, + ); + + // SAT query only — table already built, reflects amortised per-pixel cost. + g.bench_with_input( + BenchmarkId::new("sat_query", block_size), + &block_size, + |b, _| { + // x1/y1 are inclusive in the SAT API. + b.iter(|| black_box(sat.query(black_box(x0), black_box(y0), x1 - 1, y1 - 1))) + }, + ); + } + + g.finish(); +} + +/// SAT build cost in isolation — the one-off overhead paid per frame. +/// Uses iter_batched so the table is reset between samples without touching +/// the timed section. +fn bench_sat_build(c: &mut Criterion) { + const W: usize = 10_000; + const H: usize = 10_000; + + let freqs = make_freq_map(W, H); + + c.bench_function("sat_build/10000x10000", |b| { + b.iter_batched( + || SummedAreaTable::new(W, H), + |mut sat| sat.init_from_map(black_box(&freqs)), + BatchSize::LargeInput, + ) + }); +} + +criterion_group!(benches, bench_block_sum_query, bench_sat_build); +criterion_main!(benches); From 4ba195664796f6fd85dbe736fe5a0c7ba7ba279f Mon Sep 17 00:00:00 2001 From: Oliver Hofkens <23633993+OliverHofkens@users.noreply.github.com> Date: Sun, 8 Mar 2026 10:23:32 +0100 Subject: [PATCH 6/6] perf: parallelize `draw()` with `rayon` --- rust/chaosymmetry/Cargo.lock | 1 + rust/chaosymmetry/Cargo.toml | 1 + rust/chaosymmetry/src/chaos.rs | 92 ++++++++++++++------------ rust/chaosymmetry/src/color/palette.rs | 2 +- rust/chaosymmetry/src/color/scale.rs | 2 +- 5 files changed, 53 insertions(+), 45 deletions(-) diff --git a/rust/chaosymmetry/Cargo.lock b/rust/chaosymmetry/Cargo.lock index 9fadcd7..1132e41 100644 --- a/rust/chaosymmetry/Cargo.lock +++ b/rust/chaosymmetry/Cargo.lock @@ -335,6 +335,7 @@ dependencies = [ "pixels", "png", "rand", + "rayon", "serde", "toml", "typetag", diff --git a/rust/chaosymmetry/Cargo.toml b/rust/chaosymmetry/Cargo.toml index b312dc0..150e3b6 100644 --- a/rust/chaosymmetry/Cargo.toml +++ b/rust/chaosymmetry/Cargo.toml @@ -25,6 +25,7 @@ num = "0.4.3" pixels = "0.15.0" png = "0.17.16" rand = "0.9.0" +rayon = "1.10" serde = {version = "1.0.217", features = ["derive"]} toml = "0.8.20" typetag = "0.2.19" diff --git a/rust/chaosymmetry/src/chaos.rs b/rust/chaosymmetry/src/chaos.rs index e54584e..36c00c4 100644 --- a/rust/chaosymmetry/src/chaos.rs +++ b/rust/chaosymmetry/src/chaos.rs @@ -1,6 +1,7 @@ use std::sync::{Arc, RwLock}; use num::complex::Complex64; +use rayon::prelude::*; use crate::color::palette::Palette; use crate::color::scale::ColorScale; @@ -73,8 +74,8 @@ pub struct Renderer { pub sim_width: usize, pub win_width: usize, pub scale: f64, - color_scale: Box, - color_palette: Box, + color_scale: Box, + color_palette: Box, freq: SharedFreqMap, pub position: Position, frames_drawn: u64, @@ -86,8 +87,8 @@ impl Renderer { sim_width: usize, win_width: usize, scale: f64, - color_scale: Box, - color_palette: Box, + color_scale: Box, + color_palette: Box, freq: SharedFreqMap, update_colors_every: u64, ) -> Self { @@ -127,46 +128,51 @@ impl Renderer { // 1 pixel is 4 u8 values: R,G,B,A // So we iter in chunks of 4. - for (i, px) in frame.chunks_exact_mut(4).enumerate() { - let win_x = i % self.win_width; - let win_y = i / self.win_width; - - let sim_start_x = - ((win_x as f64 / self.scale) + offset_x + self.position.horizontal as f64) as i64; - let sim_start_y = - ((win_y as f64 / self.scale) + offset_y + self.position.vertical as f64) as i64; - - // Fast path if zoomed in sufficiently: - let freq = if freqs_per_px == 1 { - freqs[(sim_start_y * self.sim_width as i64 + sim_start_x) as usize] - } else { - let sim_x0 = sim_start_x.max(0) as usize; - let sim_y0 = sim_start_y.max(0) as usize; - let sim_x1 = (sim_start_x + freqs_per_px).clamp(0, self.sim_width as i64) as usize; - let sim_y1 = (sim_start_y + freqs_per_px).clamp(0, sim_height) as usize; - - let col_len = sim_x1.saturating_sub(sim_x0); - let mut res = 0u64; - - if sim_x0 < sim_x1 && sim_y0 < sim_y1 { - let mut row_start = sim_y0 * self.sim_width + sim_x0; - for _ in sim_y0..sim_y1 { - res += freqs[row_start..row_start + col_len].iter().sum::(); - row_start += self.sim_width; + frame + .par_chunks_exact_mut(4) + .enumerate() + .for_each(|(i, px)| { + let win_x = i % self.win_width; + let win_y = i / self.win_width; + + let sim_start_x = ((win_x as f64 / self.scale) + + offset_x + + self.position.horizontal as f64) as i64; + let sim_start_y = + ((win_y as f64 / self.scale) + offset_y + self.position.vertical as f64) as i64; + + // Fast path if zoomed in sufficiently: + let freq = if freqs_per_px == 1 { + freqs[(sim_start_y * self.sim_width as i64 + sim_start_x) as usize] + } else { + let sim_x0 = sim_start_x.max(0) as usize; + let sim_y0 = sim_start_y.max(0) as usize; + let sim_x1 = + (sim_start_x + freqs_per_px).clamp(0, self.sim_width as i64) as usize; + let sim_y1 = (sim_start_y + freqs_per_px).clamp(0, sim_height) as usize; + + let col_len = sim_x1.saturating_sub(sim_x0); + let mut res = 0u64; + + if sim_x0 < sim_x1 && sim_y0 < sim_y1 { + let mut row_start = sim_y0 * self.sim_width + sim_x0; + for _ in sim_y0..sim_y1 { + res += freqs[row_start..row_start + col_len].iter().sum::(); + row_start += self.sim_width; + } } - } - res - }; - - let rgba = if freq == 0 { - [u8::MAX; 4] - } else { - let color_scale = self.color_scale.freq_to_scale(freq); - self.color_palette.color_from_scale(color_scale) - }; - - px.copy_from_slice(&rgba); - } + res + }; + + let rgba = if freq == 0 { + [u8::MAX; 4] + } else { + let color_scale = self.color_scale.freq_to_scale(freq); + self.color_palette.color_from_scale(color_scale) + }; + + px.copy_from_slice(&rgba); + }); self.frames_drawn += 1; } diff --git a/rust/chaosymmetry/src/color/palette.rs b/rust/chaosymmetry/src/color/palette.rs index dddc91c..b3de881 100644 --- a/rust/chaosymmetry/src/color/palette.rs +++ b/rust/chaosymmetry/src/color/palette.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; #[typetag::serde(tag = "type")] -pub trait Palette { +pub trait Palette: Sync { fn color_from_scale(&self, scale: f64) -> [u8; 4]; } diff --git a/rust/chaosymmetry/src/color/scale.rs b/rust/chaosymmetry/src/color/scale.rs index def3795..d5cfbcb 100644 --- a/rust/chaosymmetry/src/color/scale.rs +++ b/rust/chaosymmetry/src/color/scale.rs @@ -21,7 +21,7 @@ fn scan_min_max(freqs: &[u64]) -> (u64, u64) { } #[typetag::serde(tag = "type")] -pub trait ColorScale { +pub trait ColorScale: Sync { fn init_from_freq(&mut self, freqs: &[u64]); fn freq_to_scale(&self, freq: u64) -> f64; }