Skip to content

Commit 2def38c

Browse files
committed
Fixed off by one bug in estimate length
1 parent 65e76e5 commit 2def38c

File tree

2 files changed

+61
-56
lines changed

2 files changed

+61
-56
lines changed

lib/Data/Random/String/Matches.pm

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -423,7 +423,7 @@ sub pattern_info {
423423
has_named_groups => ($pattern =~ /\(\?</ ? 1 : 0),
424424
has_possessive => ($pattern =~ /(?:[+*?]\+|\{\d+(?:,\d*)?\}\+)/ ? 1 : 0),
425425
);
426-
426+
427427
return {
428428
pattern => $pattern,
429429
min_length => $min_len,
@@ -434,7 +434,6 @@ sub pattern_info {
434434
};
435435
}
436436

437-
# FIXME: min_length can be one too small
438437
sub _estimate_length {
439438
my ($self, $pattern) = @_;
440439

@@ -447,15 +446,23 @@ sub _estimate_length {
447446
my $max = 0;
448447

449448
# Simple heuristic - count fixed characters and quantifiers
449+
my $last_was_atom = 0;
450450
while ($pattern =~ /([^+*?{}\[\]\\])|\\[dwsWDN]|\[([^\]]+)\]|\{(\d+)(?:,(\d+))?\}/g) {
451451
if (defined $1 || (defined $2 && $2)) {
452-
# Fixed character or character class
453452
$min++;
454453
$max++;
454+
$last_was_atom = 1;
455455
} elsif (defined $3) {
456-
# Quantifier {n} or {n,m}
457-
$min += $3;
458-
$max += defined $4 ? $4 : $3;
456+
if ($last_was_atom) {
457+
# Replace the last atom’s contribution
458+
$min += $3 - 1;
459+
$max += (defined $4 ? $4 : $3) - 1;
460+
$last_was_atom = 0;
461+
} else {
462+
# No preceding atom? assume standalone
463+
$min += $3;
464+
$max += defined $4 ? $4 : $3;
465+
}
459466
}
460467
}
461468

@@ -990,7 +997,7 @@ Nigel Horne, C<< <njh at nigelhorne.com> >>
990997
991998
=item * L<Regexp::Genex>
992999
993-
=end
1000+
=back
9941001
9951002
=head1 LICENCE AND COPYRIGHT
9961003

t/utility_methods.t

Lines changed: 47 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -14,58 +14,58 @@ BEGIN {
1414

1515
subtest 'set_seed - basic functionality' => sub {
1616
my $gen = Data::Random::String::Matches->new(qr/\d{10}/);
17-
17+
1818
# Set seed and generate
1919
$gen->set_seed(12345);
2020
my $str1 = $gen->generate();
21-
21+
2222
# Reset seed and generate again
2323
$gen->set_seed(12345);
2424
my $str2 = $gen->generate();
25-
25+
2626
is($str1, $str2, 'Same seed produces same result');
2727
like($str1, qr/^\d{10}$/, 'Generated string matches pattern');
2828
};
2929

3030
subtest 'set_seed - returns self for chaining' => sub {
3131
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
3232
my $result = $gen->set_seed(999);
33-
33+
3434
is($result, $gen, 'Returns self for method chaining');
35-
35+
3636
# Test chaining
3737
my $str = $gen->set_seed(999)->generate();
3838
like($str, qr/^\d{4}$/, 'Chaining works');
3939
};
4040

4141
subtest 'set_seed - different seeds produce different results' => sub {
4242
my $gen = Data::Random::String::Matches->new(qr/[A-Z]{10}/);
43-
43+
4444
$gen->set_seed(111);
4545
my $str1 = $gen->generate();
46-
46+
4747
$gen->set_seed(222);
4848
my $str2 = $gen->generate();
49-
49+
5050
isnt($str1, $str2, 'Different seeds produce different results');
5151
};
5252

5353
subtest 'set_seed - error handling' => sub {
5454
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
55-
55+
5656
eval { $gen->set_seed() };
5757
like($@, qr/Seed must be defined/, 'Dies without seed');
58-
58+
5959
eval { $gen->set_seed(undef) };
6060
like($@, qr/Seed must be defined/, 'Dies with undef seed');
6161
};
6262

6363
subtest 'set_seed - various seed types' => sub {
6464
my $gen = Data::Random::String::Matches->new(qr/\d{5}/);
65-
65+
6666
# Numeric seed
6767
ok($gen->set_seed(12345), 'Accepts numeric seed');
68-
68+
6969
# Zero seed
7070
ok($gen->set_seed(0), 'Accepts zero as seed');
7171
};
@@ -76,15 +76,15 @@ subtest 'set_seed - various seed types' => sub {
7676

7777
subtest 'validate - matching strings' => sub {
7878
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
79-
79+
8080
ok($gen->validate('1234'), 'Valid 4-digit string');
8181
ok($gen->validate('0000'), 'Valid with zeros');
8282
ok($gen->validate('9999'), 'Valid with nines');
8383
};
8484

8585
subtest 'validate - non-matching strings' => sub {
8686
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
87-
87+
8888
ok(!$gen->validate('123'), 'Too short');
8989
ok(!$gen->validate('12345'), 'Too long');
9090
ok(!$gen->validate('abcd'), 'Wrong characters');
@@ -93,7 +93,7 @@ subtest 'validate - non-matching strings' => sub {
9393

9494
subtest 'validate - complex patterns' => sub {
9595
my $gen = Data::Random::String::Matches->new(qr/[A-Z]{3}-\d{4}/);
96-
96+
9797
ok($gen->validate('ABC-1234'), 'Valid complex pattern');
9898
ok(!$gen->validate('ABC1234'), 'Missing dash');
9999
ok(!$gen->validate('abc-1234'), 'Wrong case');
@@ -102,7 +102,7 @@ subtest 'validate - complex patterns' => sub {
102102

103103
subtest 'validate - with alternation' => sub {
104104
my $gen = Data::Random::String::Matches->new(qr/(cat|dog|bird)/);
105-
105+
106106
ok($gen->validate('cat'), 'First alternative');
107107
ok($gen->validate('dog'), 'Second alternative');
108108
ok($gen->validate('bird'), 'Third alternative');
@@ -111,7 +111,7 @@ subtest 'validate - with alternation' => sub {
111111

112112
subtest 'validate - with backreferences' => sub {
113113
my $gen = Data::Random::String::Matches->new(qr/(\w{3})-\1/);
114-
114+
115115
ok($gen->validate('abc-abc'), 'Valid backreference');
116116
ok($gen->validate('XYZ-XYZ'), 'Valid uppercase backreference');
117117
ok(!$gen->validate('abc-xyz'), 'Backreference mismatch');
@@ -120,17 +120,17 @@ subtest 'validate - with backreferences' => sub {
120120

121121
subtest 'validate - error handling' => sub {
122122
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
123-
123+
124124
eval { $gen->validate() };
125125
like($@, qr/String must be defined/, 'Dies without string');
126-
126+
127127
eval { $gen->validate(undef) };
128128
like($@, qr/String must be defined/, 'Dies with undef');
129129
};
130130

131131
subtest 'validate - special cases' => sub {
132132
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
133-
133+
134134
ok($gen->validate('0000'), 'All zeros valid');
135135
ok(!$gen->validate(''), 'Empty string invalid');
136136
ok(!$gen->validate(' 1234'), 'Leading space invalid');
@@ -139,7 +139,7 @@ subtest 'validate - special cases' => sub {
139139

140140
subtest 'validate - generated strings always validate' => sub {
141141
my $gen = Data::Random::String::Matches->new(qr/[A-Z0-9]{8}/);
142-
142+
143143
for (1..10) {
144144
my $str = $gen->generate();
145145
ok($gen->validate($str), "Generated string validates: $str");
@@ -153,10 +153,10 @@ subtest 'validate - generated strings always validate' => sub {
153153
subtest 'pattern_info - basic structure' => sub {
154154
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
155155
my $info = $gen->pattern_info();
156-
156+
157157
ok(defined $info, 'Returns defined value');
158158
is(ref($info), 'HASH', 'Returns hashref');
159-
159+
160160
# Check required keys
161161
ok(exists $info->{pattern}, 'Has pattern key');
162162
ok(exists $info->{min_length}, 'Has min_length key');
@@ -169,7 +169,7 @@ subtest 'pattern_info - basic structure' => sub {
169169
subtest 'pattern_info - simple pattern' => sub {
170170
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
171171
my $info = $gen->pattern_info();
172-
172+
173173
is($info->{pattern}, '(?^:\d{4})', 'Pattern stored correctly');
174174
cmp_ok($info->{min_length}, '>=', 4, 'Min length reasonable');
175175
ok($info->{max_length} >= 4, 'Max length reasonable');
@@ -181,22 +181,22 @@ subtest 'pattern_info - features detection' => sub {
181181
my $gen1 = Data::Random::String::Matches->new(qr/(cat|dog)/);
182182
my $info1 = $gen1->pattern_info();
183183
ok($info1->{features}{has_alternation}, 'Detects alternation');
184-
184+
185185
# Backreferences
186186
my $gen2 = Data::Random::String::Matches->new(qr/(\w{3})-\1/);
187187
my $info2 = $gen2->pattern_info();
188188
ok($info2->{features}{has_backreferences}, 'Detects backreferences');
189-
189+
190190
# Unicode
191191
my $gen3 = Data::Random::String::Matches->new(qr/\p{L}{5}/);
192192
my $info3 = $gen3->pattern_info();
193193
ok($info3->{features}{has_unicode}, 'Detects Unicode properties');
194-
194+
195195
# Named groups
196196
my $gen4 = Data::Random::String::Matches->new(qr/(?<id>\d{3})/);
197197
my $info4 = $gen4->pattern_info();
198198
ok($info4->{features}{has_named_groups}, 'Detects named groups');
199-
199+
200200
# Possessive
201201
my $gen5 = Data::Random::String::Matches->new(qr/\d++/);
202202
my $info5 = $gen5->pattern_info();
@@ -209,7 +209,7 @@ subtest 'pattern_info - length estimation' => sub {
209209
my $info1 = $gen1->pattern_info();
210210
ok($info1->{min_length} <= 5, 'Min length <= 5');
211211
ok($info1->{max_length} >= 5, 'Max length >= 5');
212-
212+
213213
# Variable length
214214
my $gen2 = Data::Random::String::Matches->new(qr/\d{3,7}/);
215215
my $info2 = $gen2->pattern_info();
@@ -221,7 +221,7 @@ subtest 'pattern_info - complexity levels' => sub {
221221
# Simple
222222
my $gen1 = Data::Random::String::Matches->new(qr/\d{4}/);
223223
is($gen1->pattern_info()->{complexity}, 'simple', 'Simple pattern');
224-
224+
225225
# Complex (with multiple features)
226226
my $gen2 = Data::Random::String::Matches->new(qr/(?<id>\d{3})-(\w+)-\k<id>|[A-Z]{10}/);
227227
my $complexity = $gen2->pattern_info()->{complexity};
@@ -231,19 +231,17 @@ subtest 'pattern_info - complexity levels' => sub {
231231
subtest 'pattern_info - estimated length' => sub {
232232
my $gen = Data::Random::String::Matches->new(qr/\d{3,7}/);
233233
my $info = $gen->pattern_info();
234-
235-
ok($info->{estimated_length} >= $info->{min_length},
236-
'Estimated >= min');
237-
ok($info->{estimated_length} <= $info->{max_length},
238-
'Estimated <= max');
234+
235+
ok($info->{estimated_length} >= $info->{min_length}, 'Estimated >= min');
236+
ok($info->{estimated_length} <= $info->{max_length}, 'Estimated <= max');
239237
};
240238

241239
subtest 'pattern_info - features hash structure' => sub {
242240
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
243241
my $features = $gen->pattern_info()->{features};
244-
242+
245243
is(ref($features), 'HASH', 'Features is a hashref');
246-
244+
247245
# Check all expected feature keys exist
248246
ok(exists $features->{has_alternation}, 'has_alternation key');
249247
ok(exists $features->{has_backreferences}, 'has_backreferences key');
@@ -257,7 +255,7 @@ subtest 'pattern_info - features hash structure' => sub {
257255
subtest 'pattern_info - no features in simple pattern' => sub {
258256
my $gen = Data::Random::String::Matches->new(qr/[A-Z]{5}/);
259257
my $features = $gen->pattern_info()->{features};
260-
258+
261259
ok(!$features->{has_alternation}, 'No alternation');
262260
ok(!$features->{has_backreferences}, 'No backreferences');
263261
ok(!$features->{has_unicode}, 'No unicode');
@@ -270,19 +268,19 @@ subtest 'pattern_info - no features in simple pattern' => sub {
270268

271269
subtest 'Integration - set_seed with generate_many' => sub {
272270
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
273-
271+
274272
$gen->set_seed(42);
275273
my @batch1 = $gen->generate_many(5);
276-
274+
277275
$gen->set_seed(42);
278276
my @batch2 = $gen->generate_many(5);
279-
277+
280278
is_deeply(\@batch1, \@batch2, 'Seeded generate_many is reproducible');
281279
};
282280

283281
subtest 'Integration - validate with generate' => sub {
284282
my $gen = Data::Random::String::Matches->new(qr/[A-Z]{3}\d{4}/);
285-
283+
286284
for (1..20) {
287285
my $str = $gen->generate();
288286
ok($gen->validate($str), "Generated string $str validates");
@@ -292,35 +290,35 @@ subtest 'Integration - validate with generate' => sub {
292290
subtest 'Integration - pattern_info accuracy check' => sub {
293291
my $gen = Data::Random::String::Matches->new(qr/[A-Z]{3}\d{4}/);
294292
my $info = $gen->pattern_info();
295-
293+
296294
# Generate several strings and check lengths
297295
for (1..10) {
298296
my $str = $gen->generate();
299297
my $len = length($str);
300-
298+
301299
ok($len >= $info->{min_length}, "Length $len >= min $info->{min_length}");
302300
ok($len <= $info->{max_length}, "Length $len <= max $info->{max_length}");
303301
}
304302
};
305303

306304
subtest 'Integration - all methods together' => sub {
307305
my $gen = Data::Random::String::Matches->new(qr/\d{4}/);
308-
306+
309307
# Get info
310308
my $info = $gen->pattern_info();
311309
ok($info->{complexity}, 'Got pattern info');
312-
310+
313311
# Set seed for reproducibility
314312
$gen->set_seed(123);
315-
313+
316314
# Generate and validate
317315
my $str = $gen->generate();
318316
ok($gen->validate($str), 'Generated and validated');
319-
317+
320318
# Generate many
321319
my @many = $gen->generate_many(5);
322320
is(scalar @many, 5, 'Generated many');
323-
321+
324322
# Validate all
325323
for my $s (@many) {
326324
ok($gen->validate($s), "Batch string validates: $s");

0 commit comments

Comments
 (0)