Skip to content

Commit 50811e2

Browse files
authored
Improve repeat support (#624)
Less generated code duplication in ++ and +? cases. Especially useful for large classes.
1 parent 45506a2 commit 50811e2

File tree

3 files changed

+42
-7
lines changed

3 files changed

+42
-7
lines changed

src/pcre2_jit_compile.c

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10624,7 +10624,7 @@ if (exact > 1)
1062410624
JUMPTO(SLJIT_NOT_ZERO, label);
1062510625
}
1062610626
}
10627-
else if (exact == 1)
10627+
else if (exact == 1 && opcode != OP_POSSTAR && opcode != OP_MINSTAR)
1062810628
compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
1062910629

1063010630
if (early_fail_type == type_fail_range)
@@ -10909,8 +10909,18 @@ switch(opcode)
1090910909
case OP_MINSTAR:
1091010910
if (private_data_ptr == 0)
1091110911
allocate_stack(common, 1);
10912-
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10913-
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10912+
10913+
if (exact == 1)
10914+
{
10915+
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10916+
compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
10917+
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10918+
}
10919+
else
10920+
{
10921+
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
10922+
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
10923+
}
1091410924
if (early_fail_ptr != 0)
1091510925
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
1091610926
break;
@@ -10945,6 +10955,9 @@ switch(opcode)
1094510955
if (type == OP_ALLANY)
1094610956
#endif
1094710957
{
10958+
if (exact == 1)
10959+
detect_partial_match(common, &backtrack->own_backtracks);
10960+
1094810961
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
1094910962
process_partial_match(common);
1095010963
if (early_fail_ptr != 0)
@@ -10956,7 +10969,8 @@ switch(opcode)
1095610969
if (type == OP_EXTUNI || common->utf)
1095710970
{
1095810971
SLJIT_ASSERT(tmp_base == TMP3 || common->locals_size >= 3 * SSIZE_OF(sw));
10959-
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10972+
10973+
OP1(SLJIT_MOV, tmp_base, tmp_offset, exact == 1 ? SLJIT_IMM : STR_PTR, 0);
1096010974
detect_partial_match(common, &no_match);
1096110975
label = LABEL();
1096210976
compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
@@ -10965,6 +10979,9 @@ switch(opcode)
1096510979

1096610980
set_jumps(no_match, LABEL());
1096710981
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
10982+
if (exact == 1)
10983+
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0));
10984+
1096810985
if (early_fail_ptr != 0)
1096910986
{
1097010987
if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
@@ -10976,6 +10993,9 @@ switch(opcode)
1097610993
}
1097710994
#endif
1097810995

10996+
if (exact == 1)
10997+
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
10998+
1097910999
detect_partial_match(common, &no_match);
1098011000
label = LABEL();
1098111001
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
@@ -10985,6 +11005,10 @@ switch(opcode)
1098511005
set_jumps(no_char1_match, LABEL());
1098611006
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
1098711007
set_jumps(no_match, LABEL());
11008+
11009+
if (exact == 1)
11010+
add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, tmp_base, tmp_offset, STR_PTR, 0));
11011+
1098811012
if (early_fail_ptr != 0)
1098911013
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
1099011014
break;
@@ -11643,10 +11667,13 @@ switch(opcode)
1164311667

1164411668
case OP_MINSTAR:
1164511669
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11646-
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
11647-
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11670+
if (exact != 1)
11671+
{
11672+
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
11673+
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11674+
}
1164811675
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
11649-
set_jumps(jumplist, LABEL());
11676+
set_jumps(exact == 1 ? CURRENT_AS(char_iterator_backtrack)->u.backtracks : jumplist, LABEL());
1165011677
if (private_data_ptr == 0)
1165111678
free_stack(common, 1);
1165211679
break;

testdata/testinput5

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3486,6 +3486,9 @@
34863486
\= Expect no match
34873487
a
34883488

3489+
/([\p{Lu}&&\p{sc=Hung}]+?\x{10c81})+#/utf,alt_extended_class
3490+
\x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10cf2}\x{10c81}#\x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10c81}##
3491+
34893492
/[[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]
34903493
&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]
34913494
&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]

testdata/testoutput5

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8023,6 +8023,11 @@ No match
80238023
a
80248024
No match
80258025

8026+
/([\p{Lu}&&\p{sc=Hung}]+?\x{10c81})+#/utf,alt_extended_class
8027+
\x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10cf2}\x{10c81}#\x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10c81}##
8028+
0: \x{10c80}\x{10cb2}\x{10c81}\x{10c85}\x{10cb0}\x{10c81}#
8029+
1: \x{10c85}\x{10cb0}\x{10c81}
8030+
80268031
/[[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]
80278032
&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]
80288033
&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]&&[\pN--[\pC||\x{9F5}]]

0 commit comments

Comments
 (0)