Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "python-daachorse"
version = "0.2.2"
version = "0.3.0"
edition = "2021"
authors = [
"Koichi Akabe <vbkaisetsu@gmail.com>",
Expand All @@ -16,7 +16,7 @@ name = "daachorse"
crate-type = ["cdylib"]

[dependencies]
daachorse = "2.1.1" # MIT or Apache-2.0
daachorse = "3.0.0" # MIT or Apache-2.0
pyo3 = { version = "0.28.3", features = ["extension-module"] } # Apache-2.0

[profile.release]
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "maturin"

[project]
name = "daachorse"
version = "0.2.2"
version = "0.3.0"
description = "🐎 A fast implementation of the Aho-Corasick algorithm using the compact double-array data structure"
readme = "README.md"
classifiers = [
Expand Down
78 changes: 62 additions & 16 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,18 @@ impl DoubleArrayAhoCorasick {

/// Returns a list of non-overlapping matches in the given haystack.
///
/// According to the ``match_kind`` option you specified in the construction, the behavior is
/// changed for multiple possible matches, as follows.
///
/// * If you set ``MATCH_KIND_STANDARD`` (default), the automaton searches from the beginning of
/// the input string, yielding a value immediately when a pattern is found.
/// * If you set ``MATCH_KIND_LEFTMOST_LONGEST``, the automaton reports matches corresponding to
/// the longest pattern.
/// * If you set ``MATCH_KIND_LEFTMOST_FIRST``, the automaton reports matches corresponding to
/// the pattern earlier registered to the automaton.
///
/// The next search resumes from the end of the previously found pattern.
///
/// Example 1: Standard semantics
/// >>> import daachorse
/// >>> patterns = [b'bcd', b'ab', b'a']
Expand All @@ -55,14 +67,20 @@ impl DoubleArrayAhoCorasick {
/// Example 2: Leftmost longest semantics
/// >>> import daachorse
/// >>> patterns = [b'ab', b'a', b'abcd']
/// >>> pma = daachorse.DoubleArrayAhoCorasick(patterns, daachorse.MATCH_KIND_LEFTMOST_LONGEST)
/// >>> pma = daachorse.DoubleArrayAhoCorasick(
/// ... patterns,
/// ... daachorse.MATCH_KIND_LEFTMOST_LONGEST
/// ... )
/// >>> pma.find(b'abcd')
/// [(0, 4, 2)]
///
/// Example 3: Leftmost first semantics
/// >>> import daachorse
/// >>> patterns = [b'ab', b'a', b'abcd']
/// >>> pma = daachorse.DoubleArrayAhoCorasick(patterns, daachorse.MATCH_KIND_LEFTMOST_FIRST)
/// >>> pma = daachorse.DoubleArrayAhoCorasick(
/// ... patterns,
/// ... daachorse.MATCH_KIND_LEFTMOST_FIRST
/// ... )
/// >>> pma.find(b'abcd')
/// [(0, 2, 0)]
///
Expand Down Expand Up @@ -90,6 +108,13 @@ impl DoubleArrayAhoCorasick {

/// Returns a list of overlapping matches in the given haystack.
///
/// The automaton follows the standard behavior of the Aho-Corasick algorithm. It searches from
/// the beginning of the input string, and upon reaching a given position, it yields the
/// patterns ending at that position in descending order of length.
///
/// If the pattern set contains duplicate patterns, they are yielded in the order they were
/// registered.
///
/// Examples:
/// >>> import daachorse
/// >>> patterns = [b'bcd', b'ab', b'a']
Expand All @@ -116,13 +141,11 @@ impl DoubleArrayAhoCorasick {
}))
}

/// Returns a list of overlapping matches without suffixes in the given haystack iterator.
///
/// The Aho-Corasick algorithm reads through the haystack from left to right and reports
/// matches when it reaches the end of each pattern. In the overlapping match, more than one
/// pattern can be returned per report.
/// Returns a list of overlapping matches without suffixes in the given haystack.
///
/// This function returns the first match on each report.
/// The behavior of the automaton is similar to ``find_overlapping()``, except that upon
/// reaching a given position, it yields only the single longest pattern ending at that
/// position.
///
/// Examples:
/// >>> import daachorse
Expand Down Expand Up @@ -252,6 +275,18 @@ impl CharwiseDoubleArrayAhoCorasick {

/// Returns a list of non-overlapping matches in the given haystack.
///
/// According to the ``match_kind`` option you specified in the construction, the behavior is
/// changed for multiple possible matches, as follows.
///
/// * If you set ``MATCH_KIND_STANDARD`` (default), the automaton searches from the beginning of
/// the input string, yielding a value immediately when a pattern is found.
/// * If you set ``MATCH_KIND_LEFTMOST_LONGEST``, the automaton reports matches corresponding to
/// the longest pattern.
/// * If you set ``MATCH_KIND_LEFTMOST_FIRST``, the automaton reports matches corresponding to
/// the pattern earlier registered to the automaton.
///
/// The next search resumes from the end of the previously found pattern.
///
/// Example 1: Standard semantics
/// >>> import daachorse
/// >>> patterns = ['bcd', 'ab', 'a']
Expand All @@ -262,14 +297,20 @@ impl CharwiseDoubleArrayAhoCorasick {
/// Example 2: Leftmost longest semantics
/// >>> import daachorse
/// >>> patterns = ['ab', 'a', 'abcd']
/// >>> pma = daachorse.CharwiseDoubleArrayAhoCorasick(patterns, daachorse.MATCH_KIND_LEFTMOST_LONGEST)
/// >>> pma = daachorse.CharwiseDoubleArrayAhoCorasick(
/// ... patterns,
/// ... daachorse.MATCH_KIND_LEFTMOST_LONGEST
/// ... )
/// >>> pma.find('abcd')
/// [(0, 4, 2)]
///
/// Example 3: Leftmost first semantics
/// >>> import daachorse
/// >>> patterns = ['ab', 'a', 'abcd']
/// >>> pma = daachorse.CharwiseDoubleArrayAhoCorasick(patterns, daachorse.MATCH_KIND_LEFTMOST_FIRST)
/// >>> pma = daachorse.CharwiseDoubleArrayAhoCorasick(
/// ... patterns,
/// ... daachorse.MATCH_KIND_LEFTMOST_FIRST
/// ... )
/// >>> pma.find('abcd')
/// [(0, 2, 0)]
///
Expand Down Expand Up @@ -322,6 +363,13 @@ impl CharwiseDoubleArrayAhoCorasick {

/// Returns a list of overlapping matches in the given haystack.
///
/// The automaton follows the standard behavior of the Aho-Corasick algorithm. It searches from
/// the beginning of the input string, and upon reaching a given position, it yields the
/// patterns ending at that position in descending order of length.
///
/// If the pattern set contains duplicate patterns, they are yielded in the order they were
/// registered.
///
/// Examples:
/// >>> import daachorse
/// >>> patterns = ['bcd', 'ab', 'a']
Expand Down Expand Up @@ -364,13 +412,11 @@ impl CharwiseDoubleArrayAhoCorasick {
}))
}

/// Returns a list of overlapping matches without suffixes in the given haystack iterator.
///
/// The Aho-Corasick algorithm reads through the haystack from left to right and reports
/// matches when it reaches the end of each pattern. In the overlapping match, more than one
/// pattern can be returned per report.
/// Returns a list of overlapping matches without suffixes in the given haystack.
///
/// This function returns the first match on each report.
/// The behavior of the automaton is similar to ``find_overlapping()``, except that upon
/// reaching a given position, it yields only the single longest pattern ending at that
/// position.
///
/// Examples:
/// >>> import daachorse
Expand Down
Loading