Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,34 @@ fn some_links_without_scheme(c: &mut Criterion) {
});
}

fn ipv6_links(c: &mut Criterion) {
let finder = LinkFinder::new();

// performance of a standard, valid ipv6 link
c.bench_function("ipv6_standard", |b| {
b.iter(|| {
let links = finder.links("http://[2001:db8::1]:8080/path");
assert_eq!(links.count(), 1);
})
});

c.bench_function("ipv6_unclosed_bracket_fail_fast", |b| {
b.iter(|| {
let links = finder.links(
"http://[2001:db8::1_malformed_text_continues_on_and_on\
notice how it just keeps going on but doesnt end because there hasnt been a closing bracket",
);
assert_eq!(links.count(), 0);
})
});
}

criterion_group!(
benches,
no_links,
some_links,
heaps_of_links,
some_links_without_scheme
some_links_without_scheme,
ipv6_links
);
criterion_main!(benches);
83 changes: 75 additions & 8 deletions src/domains.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,65 @@

use std::char;

/// ipv6 characters are hex characters, :'s and .'s (you can have ipv4 addresses inside ipv6 addresses).
#[inline(always)]
fn is_ipv6_char(c: char) -> bool {
matches!(c, '0'..='9' | 'a'..='f' | 'A'..='F' | ':' | '.')
}

/// zone_id characters
#[inline(always)]
fn is_zone_id_char(c: char) -> bool {
matches!(c, 'g'..='z' | 'G'..='Z')
}

fn find_ipv6_end<I>(chars: &mut I) -> Option<usize>
where
I: Iterator<Item = (usize, char)>,
{
let mut has_chars = false;
let mut in_zone_id = false;

for (inner_i, inner_c) in chars {
match inner_c {
']' => {
return if has_chars {
Some(inner_i + inner_c.len_utf8())
} else {
None
};
}
'%' if !in_zone_id => {
in_zone_id = true;
has_chars = true;
}
'%' => return None, // Reject multiple '%' signs

c if is_ipv6_char(c) => {
has_chars = true;
}

c if is_zone_id_char(c) => {
if !in_zone_id {
return None; // Non-hex char found outside of zone id
}
has_chars = true;
}

_ => return None, // Invalid character, abort
}
}

None
}

pub(crate) fn find_authority_end(
s: &str,
mut userinfo_allowed: bool,
require_host: bool,
port_allowed: bool,
iri_parsing_enabled: bool,
) -> (Option<usize>, Option<usize>) {
let mut end = Some(0);

let mut maybe_last_dot = None;
let mut last_dot = None;
let mut number_dots = 0;
Expand All @@ -44,8 +94,10 @@ pub(crate) fn find_authority_end(
let mut all_numeric = true;
let mut maybe_host = true;
let mut host_ended = false;
let mut end = Some(0);
let mut chars = s.char_indices();

for (i, c) in s.char_indices() {
while let Some((i, c)) = chars.next() {
let can_be_last = match c {
// ALPHA
'a'..='z' | 'A'..='Z' | '\u{80}'..=char::MAX => {
Expand Down Expand Up @@ -161,12 +213,27 @@ pub(crate) fn find_authority_end(
}
break;
}
_ => {
// Anything else, this might be the end of the authority (can be empty).
// Now let the rest of the code handle checking whether the end of the URL is
// valid.
Comment thread
andrewsuperlegit marked this conversation as resolved.
break;
'[' => {
if !maybe_host || host_ended {
break;
}
if let Some(bracket_end_index) = find_ipv6_end(&mut chars) {
all_numeric = false;
maybe_last_dot = None;
end = Some(bracket_end_index);
// The IPv6 helper finds the exact end index and consumes the closing ]
// We use continue below to satisfy the match statement's type requirements,
// and bypass the default `can_be_last` calculation for the opening [
// and immediately process whatever follows the address.
continue;
Comment thread
andrewsuperlegit marked this conversation as resolved.
} else {
break;
}
}
// Anything else, this might be the end of the authority (can be empty).
// Now let the rest of the code handle checking whether the end of the URL is
// valid.
_ => break,
};

if can_be_last {
Expand Down
20 changes: 19 additions & 1 deletion tests/common/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use linkify::LinkFinder;
use linkify::{LinkFinder, LinkKind};

pub fn assert_linked_with(finder: &LinkFinder, input: &str, expected: &str) {
let actual = show_links(input, finder);
Expand All @@ -19,3 +19,21 @@ pub fn show_links(input: &str, finder: &LinkFinder) -> String {
}
result
}

/// Assert link without protocol
pub fn assert_urls_without_protocol(input: &str, expected: &str) {
let mut finder = LinkFinder::new();
finder.url_must_have_scheme(false);
finder.kinds(&[LinkKind::Url]);
assert_linked_with(&finder, input, expected);
}

/// Assert link with protocol
pub fn assert_linked(input: &str, expected: &str) {
let finder = LinkFinder::new();
assert_linked_with(&finder, input, expected);
}

pub fn assert_not_linked(s: &str) {
assert_linked(s, s);
}
Loading