@@ -146,22 +146,6 @@ a wide range of regex features.
146146
147147=over 4
148148
149- =item * Lookaheads and lookbehinds are not supported
150-
151- =item * Named groups are not supported
152-
153- =item * Possessive quantifiers (C<*+ > , C<++ > ) are not supported
154-
155- =item * Unicode properties (C<\p{} > ) are not supported
156-
157- =item * Some complex nested patterns may not work correctly
158-
159- =back
160-
161- =head1 LIMITATIONS
162-
163- =over 4
164-
165149=item * Lookaheads and lookbehinds ((?=...), (?!...)) are not supported
166150
167151=item * Named groups ((?<name>...)) are not supported
@@ -413,9 +397,11 @@ Finally, it calculates a rough "complexity" classification based on pattern leng
413397=cut
414398
415399sub pattern_info {
416- my ( $self ) = @_ ;
400+ my $self = $_ [0] ;
417401
418- my $pattern = $self -> {regex_str };
402+ return $self -> {' _pattern_info_cache' } if $self -> {' _pattern_info_cache' };
403+
404+ my $pattern = $self -> {' regex_str' };
419405
420406 # Calculate approximate min/max lengths
421407 my ($min_len , $max_len ) = $self -> _estimate_length($pattern );
@@ -431,14 +417,18 @@ sub pattern_info {
431417 has_possessive => ($pattern =~ / (?:[+*?]\+ |\{\d +(?:,\d *)?\}\+ )/ ? 1 : 0),
432418 );
433419
434- return {
420+ my $info = {
435421 pattern => $pattern ,
436422 min_length => $min_len ,
437423 max_length => $max_len ,
438424 estimated_length => int (($min_len + $max_len ) / 2),
439425 features => \%features ,
440426 complexity => $self -> _calculate_complexity(\%features , $pattern ),
441427 };
428+
429+ $self -> {' _pattern_info_cache' } = $info ;
430+
431+ return $info ;
442432}
443433
444434sub _estimate_length {
@@ -453,7 +443,7 @@ sub _estimate_length {
453443 my $max = 0;
454444
455445 # Simple heuristic - count fixed characters and quantifiers
456- my $last_was_atom = 0;
446+ my $last_was_atom = 0; # Handle cases like \d{3} where the quantifier modifies the atom count
457447 while ($pattern =~ / ([^+*?{}\[\]\\ ])|\\ [dwsWDN]|\[ ([^\] ]+)\] |\{ (\d +)(?:,(\d +))?\} /g ) {
458448 if (defined $1 || (defined $2 && $2 )) {
459449 $min ++;
@@ -581,11 +571,11 @@ sub _parse_sequence {
581571 my ($generated , $new_i ) = $self -> _handle_quantifier($pattern , $i , sub {
582572 my @chars = (' a' ..' z' , ' A' ..' Z' , ' 0' ..' 9' , ' _' );
583573 $chars [int (rand (@chars ))];
584- });
574+ }, 1 );
585575 $result .= $generated ;
586576 $i = $new_i ;
587577 } elsif ($next eq ' s' ) {
588- my ($generated , $new_i ) = $self -> _handle_quantifier($pattern , $i , sub { ' ' });
578+ my ($generated , $new_i ) = $self -> _handle_quantifier($pattern , $i , sub { ' ' }, 1 );
589579 $result .= $generated ;
590580 $i = $new_i ;
591581 } elsif ($next eq ' D' ) {
@@ -756,7 +746,7 @@ sub _handle_quantifier {
756746
757747 if ($next eq ' {' ) {
758748 my $end = index ($pattern , ' }' , $pos + 2);
759- croak " Unmatched '}' " if ($end == -1);
749+ croak " Unmatched '{' at position $pos in pattern: $pattern " if ($end == -1);
760750 my $quant = substr ($pattern , $pos + 2, $end - $pos - 2);
761751
762752 # Check for possessive after }
@@ -949,7 +939,7 @@ sub _unicode_property_chars {
949939 return (' 0' .. ' 9' );
950940 } elsif ($prop eq ' Lu' || $prop eq ' Uppercase_Letter' ) {
951941 # Uppercase letters, skip × which is not a letter
952- return (' A' .. ' Z' , map { chr ($_ ) } (ord (' À' ) .. ord (' Ö' ), ord (' ø ' ) .. ord (' Þ' )));
942+ return (' A' .. ' Z' , map { chr ($_ ) } (ord (' À' ) .. ord (' Ö' ), ord (' Ø ' ) .. ord (' Þ' )));
953943 } elsif ($prop eq ' Ll' || $prop eq ' Lowercase_Letter' ) {
954944 # Lowercase letters, skip ÷ which is not a letter
955945 return (' a' .. ' z' , map { chr ($_ ) } (ord (' à' ) .. ord (' ö' ), ord (' ø' ) .. ord (' ÿ' )));
0 commit comments