Skip to content

Commit a69b8b8

Browse files
committed
Added some utility methods
1 parent 7e74657 commit a69b8b8

File tree

5 files changed

+1179
-3
lines changed

5 files changed

+1179
-3
lines changed

Changes

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
Revision history for Data-Random-String-Matches
22

3+
0.03
4+
Added some utility methods
5+
Added the test dashboard
6+
37
0.02 Tue Oct 28 20:16:01 EDT 2025
48
Added more obscure regex features
59
Fixed Unicode characters

MANIFEST

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ t/generate.t
2121
t/pod-cm.t
2222
t/pod-synopsis.t
2323
t/pod.t
24+
t/utility_methods.t

lib/Data/Random/String/Matches.pm

Lines changed: 150 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,146 @@ sub generate_many {
358358
return @results;
359359
}
360360

361+
=head2 set_seed($seed)
362+
363+
Sets the random seed for reproducible generation
364+
365+
=cut
366+
367+
sub set_seed {
368+
my ($self, $seed) = @_;
369+
370+
croak 'Seed must be defined' unless defined $seed;
371+
372+
srand($seed);
373+
$self->{seed} = $seed;
374+
375+
return $self;
376+
}
377+
378+
=head2 validate($string)
379+
380+
Checks if a string matches the pattern without generating.
381+
382+
if ($gen->validate('1234')) {
383+
print "Valid!\n";
384+
}
385+
386+
=cut
387+
388+
sub validate {
389+
my ($self, $string) = @_;
390+
391+
croak 'String must be defined' unless defined $string;
392+
393+
my $regex = $self->{regex};
394+
return $string =~ /^$regex$/;
395+
}
396+
397+
=head2 pattern_info()
398+
399+
Returns detailed information about the pattern.
400+
401+
my $info = $gen->pattern_info();
402+
print "Complexity: $info->{complexity}\n";
403+
print "Min length: $info->{min_length}\n";
404+
print "Has Unicode: ", $info->{features}{has_unicode} ? "Yes" : "No", "\n";
405+
406+
=cut
407+
408+
sub pattern_info {
409+
my ($self) = @_;
410+
411+
my $pattern = $self->{regex_str};
412+
413+
# Calculate approximate min/max lengths
414+
my ($min_len, $max_len) = $self->_estimate_length($pattern);
415+
416+
# Detect pattern features
417+
my %features = (
418+
has_alternation => ($pattern =~ /\|/ ? 1 : 0),
419+
has_backreferences => ($pattern =~ /(\\[1-9]|\\k<)/ ? 1 : 0),
420+
has_unicode => ($pattern =~ /\\p\{/ ? 1 : 0),
421+
has_lookahead => ($pattern =~ /\(\?[=!]/ ? 1 : 0),
422+
has_lookbehind => ($pattern =~ /\(\?<[=!]/ ? 1 : 0),
423+
has_named_groups => ($pattern =~ /\(\?</ ? 1 : 0),
424+
has_possessive => ($pattern =~ /(?:[+*?]\+|\{\d+(?:,\d*)?\}\+)/ ? 1 : 0),
425+
);
426+
427+
return {
428+
pattern => $pattern,
429+
min_length => $min_len,
430+
max_length => $max_len,
431+
estimated_length => int(($min_len + $max_len) / 2),
432+
features => \%features,
433+
complexity => $self->_calculate_complexity(\%features, $pattern),
434+
};
435+
}
436+
437+
# FIXME: min_length can be one too small
438+
sub _estimate_length {
439+
my ($self, $pattern) = @_;
440+
441+
# Remove anchors and modifiers
442+
$pattern =~ s/^\(\?\^?[iumsx-]*:(.*)\)$/$1/;
443+
$pattern =~ s/^\^//;
444+
$pattern =~ s/\$//;
445+
446+
my $min = 0;
447+
my $max = 0;
448+
449+
# Simple heuristic - count fixed characters and quantifiers
450+
while ($pattern =~ /([^+*?{}\[\]\\])|\\[dwsWDN]|\[([^\]]+)\]|\{(\d+)(?:,(\d+))?\}/g) {
451+
if (defined $1 || (defined $2 && $2)) {
452+
# Fixed character or character class
453+
$min++;
454+
$max++;
455+
} elsif (defined $3) {
456+
# Quantifier {n} or {n,m}
457+
$min += $3;
458+
$max += defined $4 ? $4 : $3;
459+
}
460+
}
461+
462+
# Account for +, *, ?
463+
my $plus_count = () = $pattern =~ /\+/g;
464+
my $star_count = () = $pattern =~ /\*/g;
465+
my $question_count = () = $pattern =~ /\?/g;
466+
467+
$min += $plus_count; # + means at least 1
468+
$max += ($plus_count * 5) + ($star_count * 5); # Assume max 5 repetitions
469+
$min -= $question_count; # ? makes things optional
470+
471+
$min = 0 if $min < 0;
472+
$max = $min + 50 if $max < $min; # Ensure max >= min
473+
474+
return ($min, $max);
475+
}
476+
477+
sub _calculate_complexity {
478+
my ($self, $features, $pattern) = @_;
479+
480+
my $score = 0;
481+
482+
# Base complexity from pattern length
483+
$score += length($pattern) / 10;
484+
485+
# Add complexity for features
486+
$score += 2 if $features->{has_alternation};
487+
$score += 3 if $features->{has_backreferences};
488+
$score += 2 if $features->{has_unicode};
489+
$score += 2 if $features->{has_lookahead};
490+
$score += 2 if $features->{has_lookbehind};
491+
$score += 1 if $features->{has_named_groups};
492+
$score += 1 if $features->{has_possessive};
493+
494+
# Classify
495+
return 'simple' if $score < 3;
496+
return 'moderate' if $score < 7;
497+
return 'complex' if $score < 12;
498+
return 'very_complex';
499+
}
500+
361501
sub _build_from_pattern {
362502
my ($self, $pattern) = @_;
363503

@@ -840,10 +980,17 @@ sub create_random_string
840980
841981
Nigel Horne, C<< <njh at nigelhorne.com> >>
842982
843-
=head1 LICENSE
983+
=head1 SEE ALSO
984+
985+
=over 4
986+
987+
=item * Test coverage report: L<https://nigelhorne.github.io/Data-Random-String-Matches/coverage/>
988+
989+
=item * L<String::Random>
990+
991+
=item * L<Regexp::Genex>
844992
845-
This is free software; you can redistribute it and/or modify it under
846-
the same terms as Perl itself.
993+
=end
847994
848995
=head1 LICENCE AND COPYRIGHT
849996

0 commit comments

Comments
 (0)