@@ -318,9 +318,9 @@ typedef struct char_iterator_backtrack {
318318 struct {
319319 unsigned int othercasebit ;
320320 PCRE2_UCHAR chr ;
321- BOOL enabled ;
322321 } charpos ;
323322 } u ;
323+ BOOL charpos_enabled ;
324324} char_iterator_backtrack ;
325325
326326typedef struct ref_iterator_backtrack {
@@ -10624,7 +10624,7 @@ if (exact > 1)
1062410624 JUMPTO (SLJIT_NOT_ZERO , label );
1062510625 }
1062610626 }
10627- else if (exact == 1 && opcode != OP_POSSTAR && opcode != OP_MINSTAR )
10627+ else if (exact == 1 && opcode != OP_STAR && opcode != OP_MINSTAR && opcode != OP_POSSTAR )
1062810628 compile_char1_matchingpath (common , type , cc , & backtrack -> own_backtracks , TRUE);
1062910629
1063010630if (early_fail_type == type_fail_range )
@@ -10642,18 +10642,30 @@ if (early_fail_type == type_fail_range)
1064210642
1064310643switch (opcode )
1064410644 {
10645- case OP_STAR :
1064610645 case OP_UPTO :
10646+ /* Exact is ignored for upto. */
10647+ exact = 0 ;
10648+ /* Fall through */
10649+ case OP_STAR :
1064710650 SLJIT_ASSERT (early_fail_ptr == 0 || opcode == OP_STAR );
1064810651
1064910652 if (type == OP_EXTUNI )
1065010653 {
1065110654 SLJIT_ASSERT (private_data_ptr == 0 );
1065210655 SLJIT_ASSERT (early_fail_ptr == 0 );
1065310656
10654- allocate_stack (common , 2 );
10655- OP1 (SLJIT_MOV , SLJIT_MEM1 (STACK_TOP ), STACK (0 ), STR_PTR , 0 );
10656- OP1 (SLJIT_MOV , SLJIT_MEM1 (STACK_TOP ), STACK (1 ), SLJIT_IMM , 0 );
10657+ if (exact == 1 )
10658+ {
10659+ SLJIT_ASSERT (opcode == OP_STAR );
10660+ allocate_stack (common , 1 );
10661+ OP1 (SLJIT_MOV , SLJIT_MEM1 (STACK_TOP ), STACK (0 ), SLJIT_IMM , 0 );
10662+ }
10663+ else
10664+ {
10665+ allocate_stack (common , 2 );
10666+ OP1 (SLJIT_MOV , SLJIT_MEM1 (STACK_TOP ), STACK (0 ), STR_PTR , 0 );
10667+ OP1 (SLJIT_MOV , SLJIT_MEM1 (STACK_TOP ), STACK (1 ), SLJIT_IMM , 0 );
10668+ }
1065710669
1065810670 if (opcode == OP_UPTO )
1065910671 {
@@ -10688,6 +10700,9 @@ switch(opcode)
1068810700 {
1068910701 if (opcode == OP_STAR )
1069010702 {
10703+ if (exact == 1 )
10704+ detect_partial_match (common , & backtrack -> own_backtracks );
10705+
1069110706 if (private_data_ptr == 0 )
1069210707 allocate_stack (common , 2 );
1069310708
@@ -10768,7 +10783,7 @@ switch(opcode)
1076810783 if (charpos_othercasebit != 0 )
1076910784 charpos_char |= charpos_othercasebit ;
1077010785
10771- BACKTRACK_AS (char_iterator_backtrack )-> u . charpos . enabled = TRUE;
10786+ BACKTRACK_AS (char_iterator_backtrack )-> charpos_enabled = TRUE;
1077210787 BACKTRACK_AS (char_iterator_backtrack )-> u .charpos .chr = charpos_char ;
1077310788 BACKTRACK_AS (char_iterator_backtrack )-> u .charpos .othercasebit = charpos_othercasebit ;
1077410789 }
@@ -10781,7 +10796,14 @@ switch(opcode)
1078110796 OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , max + 1 );
1078210797
1078310798 /* Search the first instance of charpos_char. */
10784- jump = JUMP (SLJIT_JUMP );
10799+ if (exact == 1 )
10800+ {
10801+ SLJIT_ASSERT (opcode == OP_STAR );
10802+ detect_partial_match (common , & backtrack -> own_backtracks );
10803+ }
10804+ else
10805+ jump = JUMP (SLJIT_JUMP );
10806+
1078510807 label = LABEL ();
1078610808 if (opcode == OP_UPTO )
1078710809 {
@@ -10791,7 +10813,9 @@ switch(opcode)
1079110813 compile_char1_matchingpath (common , type , cc , & backtrack -> own_backtracks , FALSE);
1079210814 if (early_fail_ptr != 0 )
1079310815 OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), early_fail_ptr , STR_PTR , 0 );
10794- JUMPHERE (jump );
10816+
10817+ if (exact != 1 )
10818+ JUMPHERE (jump );
1079510819
1079610820 detect_partial_match (common , & backtrack -> own_backtracks );
1079710821 OP1 (MOV_UCHAR , TMP1 , 0 , SLJIT_MEM1 (STR_PTR ), IN_UCHARS (0 ));
@@ -10899,6 +10923,12 @@ switch(opcode)
1089910923 OP1 (SLJIT_MOV , base , offset0 , STR_PTR , 0 );
1090010924 }
1090110925
10926+ if (exact == 1 )
10927+ {
10928+ SLJIT_ASSERT (opcode == OP_STAR );
10929+ add_jump (compiler , & BACKTRACK_AS (char_iterator_backtrack )-> u .backtracks , CMP (SLJIT_EQUAL , base , offset1 , STR_PTR , 0 ));
10930+ }
10931+
1090210932 if (early_fail_ptr != 0 )
1090310933 OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), early_fail_ptr , STR_PTR , 0 );
1090410934 }
@@ -11580,16 +11610,18 @@ struct sljit_jump *jump;
1158011610
1158111611OP2 (SLJIT_SUB , TMP1 , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
1158211612jump = CMP (SLJIT_LESS_EQUAL , TMP1 , 0 , TMP2 , 0 );
11613+ /* All newlines are single byte, or their last byte
11614+ is not equal to CHAR_NL/CHAR_CR even if UTF is enabled. */
11615+ OP1 (MOV_UCHAR , SLJIT_TMP_DEST_REG , 0 , SLJIT_MEM1 (STR_PTR ), IN_UCHARS (-2 ));
1158311616OP1 (MOV_UCHAR , TMP1 , 0 , SLJIT_MEM1 (STR_PTR ), IN_UCHARS (-1 ));
11584- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , CHAR_NL );
11585- OP1 (MOV_UCHAR , TMP1 , 0 , SLJIT_MEM1 (STR_PTR ), IN_UCHARS (-2 ));
11586- OP_FLAGS (SLJIT_MOV , SLJIT_TMP_DEST_REG , 0 , SLJIT_EQUAL );
11587- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , CHAR_CR );
11588- OP_FLAGS (SLJIT_AND , SLJIT_TMP_DEST_REG , 0 , SLJIT_EQUAL );
11617+ OP2 (SLJIT_SHL , SLJIT_TMP_DEST_REG , 0 , SLJIT_TMP_DEST_REG , 0 , SLJIT_IMM , 8 );
11618+ OP2 (SLJIT_OR , TMP1 , 0 , TMP1 , 0 , SLJIT_TMP_DEST_REG , 0 );
11619+ OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP1 , 0 , SLJIT_IMM , CHAR_CR << 8 | CHAR_NL );
11620+ OP_FLAGS (SLJIT_MOV , TMP1 , 0 , SLJIT_EQUAL );
1158911621#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11590- OP2 (SLJIT_SHL , SLJIT_TMP_DEST_REG , 0 , SLJIT_TMP_DEST_REG , 0 , SLJIT_IMM , UCHAR_SHIFT );
11622+ OP2 (SLJIT_SHL , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , UCHAR_SHIFT );
1159111623#endif
11592- OP2 (SLJIT_SUB , STR_PTR , 0 , STR_PTR , 0 , SLJIT_TMP_DEST_REG , 0 );
11624+ OP2 (SLJIT_SUB , STR_PTR , 0 , STR_PTR , 0 , TMP1 , 0 );
1159311625JUMPHERE (jump );
1159411626}
1159511627
@@ -11625,7 +11657,7 @@ switch(opcode)
1162511657 }
1162611658 else
1162711659 {
11628- if (CURRENT_AS (char_iterator_backtrack )-> u . charpos . enabled )
11660+ if (CURRENT_AS (char_iterator_backtrack )-> charpos_enabled )
1162911661 {
1163011662 OP1 (SLJIT_MOV , STR_PTR , 0 , base , offset0 );
1163111663 OP1 (SLJIT_MOV , TMP2 , 0 , base , offset1 );
@@ -11646,19 +11678,39 @@ switch(opcode)
1164611678 else
1164711679 {
1164811680 OP1 (SLJIT_MOV , STR_PTR , 0 , base , offset0 );
11649- if (type == OP_ANYNL )
11681+
11682+ if (opcode == OP_STAR && exact == 1 )
1165011683 {
11651- OP1 (SLJIT_MOV , TMP2 , 0 , base , offset1 );
11652- jump = CMP (SLJIT_LESS_EQUAL , STR_PTR , 0 , TMP2 , 0 );
11653- compile_newline_move_back (common );
11684+ if (type == OP_ANYNL )
11685+ {
11686+ OP1 (SLJIT_MOV , TMP2 , 0 , base , offset1 );
11687+ compile_newline_move_back (common );
11688+ }
11689+
11690+ move_back (common , NULL , TRUE);
11691+ jump = CMP (SLJIT_LESS_EQUAL , STR_PTR , 0 , base , offset1 );
1165411692 }
1165511693 else
11656- jump = CMP (SLJIT_LESS_EQUAL , STR_PTR , 0 , base , offset1 );
11694+ {
11695+ if (type == OP_ANYNL )
11696+ {
11697+ OP1 (SLJIT_MOV , TMP2 , 0 , base , offset1 );
11698+ jump = CMP (SLJIT_LESS_EQUAL , STR_PTR , 0 , TMP2 , 0 );
11699+ compile_newline_move_back (common );
11700+ }
11701+ else
11702+ jump = CMP (SLJIT_LESS_EQUAL , STR_PTR , 0 , base , offset1 );
11703+
11704+ move_back (common , NULL , TRUE);
11705+ }
1165711706
11658- move_back (common , NULL , TRUE);
1165911707 OP1 (SLJIT_MOV , base , offset0 , STR_PTR , 0 );
1166011708 JUMPTO (SLJIT_JUMP , CURRENT_AS (char_iterator_backtrack )-> matchingpath );
11709+
11710+ if (opcode == OP_STAR && exact == 1 )
11711+ set_jumps (CURRENT_AS (char_iterator_backtrack )-> u .backtracks , LABEL ());
1166111712 }
11713+
1166211714 JUMPHERE (jump );
1166311715 if (private_data_ptr == 0 )
1166411716 free_stack (common , 2 );
0 commit comments