Conversation
zhiltsov-max
commented
Feb 7, 2026
- Added performance test
- Added performance numbers in Readme
There was a problem hiding this comment.
Pull request overview
Adds a Criterion benchmark and publishes initial benchmark results in the README to document string_distance::levenshtein() performance.
Changes:
- Added a Criterion benchmark (
cargo bench) to measurelevenshtein()over multiple string lengths/charsets. - Documented benchmark results and benchmarking instructions in
README.md. - Added benchmark-related dev dependencies and bench configuration in
Cargo.toml(and updatedCargo.lock).
Reviewed changes
Copilot reviewed 4 out of 5 changed files in this pull request and generated 5 comments.
| File | Description |
|---|---|
| benches/my_benchmark.rs | New Criterion benchmark for levenshtein() over ASCII and UTF-8 inputs. |
| README.md | Adds a Performance section with reported timings and a Benchmarking section. |
| Cargo.toml | Adds criterion + rand dev-dependencies and declares the benchmark target. |
| Cargo.lock | Locks new dev-dependency transitive graph for benchmarking. |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| } | ||
|
|
||
| impl rand::distr::Distribution<char> for AlphanumericChar { | ||
| fn sample<R: rand::Rng + ?std::marker::Sized>(&self, rng: &mut R) -> char { |
There was a problem hiding this comment.
The generic bound ?std::marker::Sized is invalid Rust syntax and will not compile. Use ?Sized (or just omit it, since R: Rng is Sized by default) in the sample signature.
| fn sample<R: rand::Rng + ?std::marker::Sized>(&self, rng: &mut R) -> char { | |
| fn sample<R: rand::Rng>(&self, rng: &mut R) -> char { |
There was a problem hiding this comment.
Pull request overview
Copilot reviewed 4 out of 5 changed files in this pull request and generated 4 comments.
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| use rand; | ||
| use std::hint::black_box; | ||
| use string_distance; |
There was a problem hiding this comment.
use rand; and use string_distance; are unused here (crate names are already in the extern prelude, and you reference them via rand::... / string_distance::...). This will trigger unused-import warnings when running cargo bench. Remove these imports or import the specific items you need (e.g., rand::Rng).
| use rand; | |
| use std::hint::black_box; | |
| use string_distance; | |
| use std::hint::black_box; |
| let mut rng = rand::rng(); | ||
|
|
||
| for length in lengths { | ||
| c.bench_function( | ||
| &format!( | ||
| "levenshtein for {charset} strings of length {length}", | ||
| charset = "ASCII", | ||
| length = length | ||
| ), | ||
| |b| { | ||
| b.iter_batched( | ||
| || { | ||
| return ( | ||
| generate_random_string(length, &mut rng, &ascii_chars), | ||
| generate_random_string(length, &mut rng, &ascii_chars), | ||
| ); | ||
| }, | ||
| |v| string_distance::levenshtein(black_box(&v.0), black_box(&v.1)), | ||
| BatchSize::SmallInput, | ||
| ) | ||
| }, | ||
| ); | ||
| } | ||
|
|
||
| for length in lengths { | ||
| c.bench_function( | ||
| &format!( | ||
| "levenshtein for {charset} strings of length {length}", | ||
| charset = "UTF8", | ||
| length = length | ||
| ), | ||
| |b| { | ||
| b.iter_batched( | ||
| || { | ||
| return ( | ||
| generate_random_string(length, &mut rng, &utf8_chars), | ||
| generate_random_string(length, &mut rng, &utf8_chars), | ||
| ); | ||
| }, | ||
| |v| string_distance::levenshtein(black_box(&v.0), black_box(&v.1)), | ||
| BatchSize::SmallInput, | ||
| ) |
There was a problem hiding this comment.
This uses iter_batched with random string generation in the setup closure; Criterion measures the whole loop, so the reported time includes string generation/allocation as well as levenshtein(). If the goal is to benchmark the distance algorithm, pre-generate inputs per length (or use a pool and clone/select) and benchmark only the levenshtein call.
| let mut rng = rand::rng(); | |
| for length in lengths { | |
| c.bench_function( | |
| &format!( | |
| "levenshtein for {charset} strings of length {length}", | |
| charset = "ASCII", | |
| length = length | |
| ), | |
| |b| { | |
| b.iter_batched( | |
| || { | |
| return ( | |
| generate_random_string(length, &mut rng, &ascii_chars), | |
| generate_random_string(length, &mut rng, &ascii_chars), | |
| ); | |
| }, | |
| |v| string_distance::levenshtein(black_box(&v.0), black_box(&v.1)), | |
| BatchSize::SmallInput, | |
| ) | |
| }, | |
| ); | |
| } | |
| for length in lengths { | |
| c.bench_function( | |
| &format!( | |
| "levenshtein for {charset} strings of length {length}", | |
| charset = "UTF8", | |
| length = length | |
| ), | |
| |b| { | |
| b.iter_batched( | |
| || { | |
| return ( | |
| generate_random_string(length, &mut rng, &utf8_chars), | |
| generate_random_string(length, &mut rng, &utf8_chars), | |
| ); | |
| }, | |
| |v| string_distance::levenshtein(black_box(&v.0), black_box(&v.1)), | |
| BatchSize::SmallInput, | |
| ) | |
| // Benchmark ASCII inputs | |
| for length in lengths { | |
| // Pre-generate a pool of input pairs so the benchmark measures only levenshtein() | |
| let mut rng = rand::rng(); | |
| let sample_size = 1024usize; | |
| let mut samples: Vec<(String, String)> = Vec::with_capacity(sample_size); | |
| for _ in 0..sample_size { | |
| samples.push(( | |
| generate_random_string(length, &mut rng, &ascii_chars), | |
| generate_random_string(length, &mut rng, &ascii_chars), | |
| )); | |
| } | |
| c.bench_function( | |
| &format!( | |
| "levenshtein for {charset} strings of length {length}", | |
| charset = "ASCII", | |
| length = length | |
| ), | |
| move |b| { | |
| let mut idx = 0usize; | |
| b.iter(|| { | |
| let pair = &samples[idx]; | |
| idx += 1; | |
| if idx == samples.len() { | |
| idx = 0; | |
| } | |
| string_distance::levenshtein(black_box(&pair.0), black_box(&pair.1)) | |
| }) | |
| }, | |
| ); | |
| } | |
| // Benchmark UTF-8 inputs | |
| for length in lengths { | |
| // Pre-generate a pool of input pairs so the benchmark measures only levenshtein() | |
| let mut rng = rand::rng(); | |
| let sample_size = 1024usize; | |
| let mut samples: Vec<(String, String)> = Vec::with_capacity(sample_size); | |
| for _ in 0..sample_size { | |
| samples.push(( | |
| generate_random_string(length, &mut rng, &utf8_chars), | |
| generate_random_string(length, &mut rng, &utf8_chars), | |
| )); | |
| } | |
| c.bench_function( | |
| &format!( | |
| "levenshtein for {charset} strings of length {length}", | |
| charset = "UTF8", | |
| length = length | |
| ), | |
| move |b| { | |
| let mut idx = 0usize; | |
| b.iter(|| { | |
| let pair = &samples[idx]; | |
| idx += 1; | |
| if idx == samples.len() { | |
| idx = 0; | |
| } | |
| string_distance::levenshtein(black_box(&pair.0), black_box(&pair.1)) | |
| }) |
| for length in lengths { | ||
| c.bench_function( | ||
| &format!( | ||
| "levenshtein for {charset} strings of length {length}", |
There was a problem hiding this comment.
For the "UTF8" benchmarks, length is the number of char scalar values generated, but the library’s UTF path operates on Unicode grapheme clusters (via unicode-segmentation). That means the label "strings of length {length}" may not reflect the actual units processed. Either constrain the generated characters to single-codepoint graphemes or adjust the label/documentation to clarify what "length" means.
| "levenshtein for {charset} strings of length {length}", | |
| "levenshtein for {charset} strings with {length} Unicode scalar values", |
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>