@@ -1184,13 +1184,8 @@ while (cc < ccend)
11841184
11851185 case OP_TYPEPOSUPTO :
11861186#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1187- if (common -> utf )
1188- {
1189- if (cc [1 + IMM2_SIZE ] == OP_EXTUNI && locals_size <= 4 * SSIZE_OF (sw ))
1190- locals_size = 4 * SSIZE_OF (sw );
1191- else if (locals_size <= 3 * SSIZE_OF (sw ))
1192- locals_size = 3 * SSIZE_OF (sw );
1193- }
1187+ if (common -> utf && locals_size <= 3 * SSIZE_OF (sw ))
1188+ locals_size = 3 * SSIZE_OF (sw );
11941189#endif
11951190 if (cc [1 + IMM2_SIZE ] == OP_EXTUNI && locals_size <= 3 * SSIZE_OF (sw ))
11961191 locals_size = 3 * SSIZE_OF (sw );
@@ -1307,7 +1302,8 @@ while (cc < ccend)
13071302
13081303#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
13091304 case OP_CRPOSRANGE :
1310- if (GET2 (cc , 1 ) < GET2 (cc , 1 + IMM2_SIZE ) && locals_size <= 3 * SSIZE_OF (sw ))
1305+ /* The second value can be 0 for infinite repeats. */
1306+ if (common -> utf && GET2 (cc , 1 ) != GET2 (cc , 1 + IMM2_SIZE ) && locals_size <= 3 * SSIZE_OF (sw ))
13111307 locals_size = 3 * SSIZE_OF (sw );
13121308 cc += 1 + 2 * IMM2_SIZE ;
13131309 break ;
@@ -10447,8 +10443,10 @@ else
1044710443 * exact = 1 ;
1044810444 * opcode -= OP_PLUS - OP_STAR ;
1044910445 }
10446+ return cc ;
1045010447 }
10451- else if (* opcode >= OP_CRPOSSTAR && * opcode <= OP_CRPOSQUERY )
10448+
10449+ if (* opcode >= OP_CRPOSSTAR && * opcode <= OP_CRPOSQUERY )
1045210450 {
1045310451 * opcode -= OP_CRPOSSTAR - OP_POSSTAR ;
1045410452 * end = cc + class_len ;
@@ -10458,41 +10456,36 @@ else
1045810456 * exact = 1 ;
1045910457 * opcode = OP_POSSTAR ;
1046010458 }
10459+ return cc ;
1046110460 }
10462- else
10461+
10462+ SLJIT_ASSERT (* opcode == OP_CRRANGE || * opcode == OP_CRMINRANGE || * opcode == OP_CRPOSRANGE );
10463+ * max = GET2 (cc , (class_len + IMM2_SIZE ));
10464+ * exact = GET2 (cc , class_len );
10465+ * end = cc + class_len + 2 * IMM2_SIZE ;
10466+
10467+ if (* max == 0 )
1046310468 {
10464- SLJIT_ASSERT (* opcode == OP_CRRANGE || * opcode == OP_CRMINRANGE || * opcode == OP_CRPOSRANGE );
10465- * max = GET2 (cc , (class_len + IMM2_SIZE ));
10466- * exact = GET2 (cc , class_len );
10469+ SLJIT_ASSERT (* exact > 1 );
10470+ if (* opcode == OP_CRPOSRANGE )
10471+ * opcode = OP_POSUPTO ;
10472+ else
10473+ * opcode -= OP_CRRANGE - OP_STAR ;
10474+ return cc ;
10475+ }
1046710476
10468- if (* max == 0 )
10469- {
10470- if (* opcode == OP_CRPOSRANGE )
10471- * opcode = OP_POSSTAR ;
10472- else
10473- * opcode -= OP_CRRANGE - OP_STAR ;
10474- }
10477+ * max -= * exact ;
10478+ if (* max == 0 )
10479+ * opcode = OP_EXACT ;
10480+ else
10481+ {
10482+ SLJIT_ASSERT (* exact > 0 || * max > 1 );
10483+ if (* opcode == OP_CRPOSRANGE )
10484+ * opcode = OP_POSUPTO ;
10485+ else if (* max == 1 )
10486+ * opcode -= OP_CRRANGE - OP_QUERY ;
1047510487 else
10476- {
10477- * max -= * exact ;
10478- if (* max == 0 )
10479- * opcode = OP_EXACT ;
10480- else if (* max == 1 )
10481- {
10482- if (* opcode == OP_CRPOSRANGE )
10483- * opcode = OP_POSQUERY ;
10484- else
10485- * opcode -= OP_CRRANGE - OP_QUERY ;
10486- }
10487- else
10488- {
10489- if (* opcode == OP_CRPOSRANGE )
10490- * opcode = OP_POSUPTO ;
10491- else
10492- * opcode -= OP_CRRANGE - OP_UPTO ;
10493- }
10494- }
10495- * end = cc + class_len + 2 * IMM2_SIZE ;
10488+ * opcode -= OP_CRRANGE - OP_UPTO ;
1049610489 }
1049710490 return cc ;
1049810491 }
@@ -10593,36 +10586,49 @@ else
1059310586 }
1059410587
1059510588/* Handle fixed part first. */
10596- if (exact > 1 )
10589+ if (opcode != OP_POSUPTO )
1059710590 {
10598- SLJIT_ASSERT (early_fail_ptr == 0 );
10591+ if (exact > 1 )
10592+ {
10593+ SLJIT_ASSERT (early_fail_ptr == 0 );
1059910594
10600- if (common -> mode == PCRE2_JIT_COMPLETE
10601- #ifdef SUPPORT_UNICODE
10602- && !common -> utf
10595+ if (common -> mode == PCRE2_JIT_COMPLETE
10596+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10597+ && !common -> utf
1060310598#endif
10604- && type != OP_ANYNL && type != OP_EXTUNI )
10605- {
10606- OP2 (SLJIT_ADD , TMP1 , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (exact ));
10607- add_jump (compiler , & backtrack -> own_backtracks , CMP (SLJIT_GREATER , TMP1 , 0 , STR_END , 0 ));
10608- OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , exact );
10609- label = LABEL ();
10610- compile_char1_matchingpath (common , type , cc , & backtrack -> own_backtracks , FALSE);
10611- OP2 (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , tmp_base , tmp_offset , SLJIT_IMM , 1 );
10612- JUMPTO (SLJIT_NOT_ZERO , label );
10599+ && type != OP_ANYNL && type != OP_EXTUNI )
10600+ {
10601+ OP2 (SLJIT_SUB , TMP1 , 0 , STR_END , 0 , STR_PTR , 0 );
10602+ add_jump (compiler , & backtrack -> own_backtracks , CMP (SLJIT_LESS , TMP1 , 0 , SLJIT_IMM , IN_UCHARS (exact )));
10603+
10604+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
10605+ if (type == OP_ALLANY && !common -> invalid_utf )
10606+ #else
10607+ if (type == OP_ALLANY )
10608+ #endif
10609+ OP2 (SLJIT_ADD , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (exact ));
10610+ else
10611+ {
10612+ OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , exact );
10613+ label = LABEL ();
10614+ compile_char1_matchingpath (common , type , cc , & backtrack -> own_backtracks , FALSE);
10615+ OP2 (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , tmp_base , tmp_offset , SLJIT_IMM , 1 );
10616+ JUMPTO (SLJIT_NOT_ZERO , label );
10617+ }
10618+ }
10619+ else
10620+ {
10621+ SLJIT_ASSERT (tmp_base == TMP3 || common -> locals_size >= 3 * SSIZE_OF (sw ));
10622+ OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , exact );
10623+ label = LABEL ();
10624+ compile_char1_matchingpath (common , type , cc , & backtrack -> own_backtracks , TRUE);
10625+ OP2 (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , tmp_base , tmp_offset , SLJIT_IMM , 1 );
10626+ JUMPTO (SLJIT_NOT_ZERO , label );
10627+ }
1061310628 }
10614- else
10615- {
10616- SLJIT_ASSERT (tmp_base == TMP3 || common -> locals_size >= 3 * SSIZE_OF (sw ));
10617- OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , exact );
10618- label = LABEL ();
10629+ else if (exact == 1 && opcode != OP_STAR && opcode != OP_MINSTAR && opcode != OP_POSSTAR )
1061910630 compile_char1_matchingpath (common , type , cc , & backtrack -> own_backtracks , TRUE);
10620- OP2 (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , tmp_base , tmp_offset , SLJIT_IMM , 1 );
10621- JUMPTO (SLJIT_NOT_ZERO , label );
10622- }
1062310631 }
10624- else if (exact == 1 && opcode != OP_STAR && opcode != OP_MINSTAR && opcode != OP_POSSTAR )
10625- compile_char1_matchingpath (common , type , cc , & backtrack -> own_backtracks , TRUE);
1062610632
1062710633if (early_fail_type == type_fail_range )
1062810634 {
@@ -10987,29 +10993,34 @@ switch(opcode)
1098710993 }
1098810994
1098910995#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10990- if (type == OP_EXTUNI || common -> utf )
10996+ if (common -> utf )
1099110997 {
1099210998 SLJIT_ASSERT (tmp_base == TMP3 || common -> locals_size >= 3 * SSIZE_OF (sw ));
1099310999
10994- OP1 (SLJIT_MOV , tmp_base , tmp_offset , exact == 1 ? SLJIT_IMM : STR_PTR , 0 );
11000+ if (tmp_base != TMP3 )
11001+ {
11002+ OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), LOCAL2 , COUNT_MATCH , 0 );
11003+ tmp_base = COUNT_MATCH ;
11004+ }
11005+
11006+ OP1 (SLJIT_MOV , tmp_base , 0 , exact == 1 ? SLJIT_IMM : STR_PTR , 0 );
1099511007 detect_partial_match (common , & no_match );
1099611008 label = LABEL ();
1099711009 compile_char1_matchingpath (common , type , cc , & no_match , FALSE);
10998- OP1 (SLJIT_MOV , tmp_base , tmp_offset , STR_PTR , 0 );
11010+ OP1 (SLJIT_MOV , tmp_base , 0 , STR_PTR , 0 );
1099911011 detect_partial_match_to (common , label );
1100011012
1100111013 set_jumps (no_match , LABEL ());
11002- OP1 (SLJIT_MOV , STR_PTR , 0 , tmp_base , tmp_offset );
11014+ OP1 (SLJIT_MOV , STR_PTR , 0 , tmp_base , 0 );
11015+
11016+ if (tmp_base != TMP3 )
11017+ OP1 (SLJIT_MOV , COUNT_MATCH , 0 , SLJIT_MEM1 (SLJIT_SP ), LOCAL2 );
11018+
1100311019 if (exact == 1 )
1100411020 add_jump (compiler , & backtrack -> own_backtracks , CMP (SLJIT_EQUAL , STR_PTR , 0 , SLJIT_IMM , 0 ));
1100511021
1100611022 if (early_fail_ptr != 0 )
11007- {
11008- if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3 )
11009- OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), early_fail_ptr , TMP3 , 0 );
11010- else
11011- OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), early_fail_ptr , STR_PTR , 0 );
11012- }
11023+ OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), early_fail_ptr , STR_PTR , 0 );
1101311024 break ;
1101411025 }
1101511026#endif
@@ -11019,12 +11030,17 @@ switch(opcode)
1101911030
1102011031 detect_partial_match (common , & no_match );
1102111032 label = LABEL ();
11033+ /* Extuni never fails, so no_char1_match is not used in that case.
11034+ Anynl optionally reads an extra character on success. */
1102211035 compile_char1_matchingpath (common , type , cc , & no_char1_match , FALSE);
1102311036 detect_partial_match_to (common , label );
11024- OP2 (SLJIT_ADD , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
11037+ if (type != OP_EXTUNI )
11038+ OP2 (SLJIT_ADD , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
1102511039
1102611040 set_jumps (no_char1_match , LABEL ());
11027- OP2 (SLJIT_SUB , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
11041+ if (type != OP_EXTUNI )
11042+ OP2 (SLJIT_SUB , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
11043+
1102811044 set_jumps (no_match , LABEL ());
1102911045
1103011046 if (exact == 1 )
@@ -11036,65 +11052,86 @@ switch(opcode)
1103611052
1103711053 case OP_POSUPTO :
1103811054 SLJIT_ASSERT (early_fail_ptr == 0 );
11039- SLJIT_ASSERT (tmp_base == TMP3 || common -> locals_size >= 3 * SSIZE_OF (sw ));
11055+
11056+ max += exact ;
1104011057#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11041- if (common -> utf )
11058+ if (type == OP_EXTUNI || common -> utf )
11059+ #else
11060+ if (type == OP_EXTUNI )
11061+ #endif
1104211062 {
1104311063 SLJIT_ASSERT (common -> locals_size >= 3 * SSIZE_OF (sw ));
11044- if (tmp_base != TMP3 )
11045- {
11046- SLJIT_ASSERT (type == OP_EXTUNI && common -> locals_size >= 4 * SSIZE_OF (sw ));
11047- tmp_offset = LOCAL3 ;
11048- }
1104911064
11050- OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), LOCAL2 , STR_PTR , 0 );
11051- OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , max );
11065+ /* Count match is not modified by compile_char1_matchingpath. */
11066+ OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), LOCAL2 , COUNT_MATCH , 0 );
11067+ OP1 (SLJIT_MOV , COUNT_MATCH , 0 , SLJIT_IMM , exact == max ? 0 : max );
1105211068
11053- detect_partial_match (common , & no_match );
1105411069 label = LABEL ();
11055- compile_char1_matchingpath (common , type , cc , & no_match , FALSE);
11056- OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), LOCAL2 , STR_PTR , 0 );
11057- OP2 (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , tmp_base , tmp_offset , SLJIT_IMM , 1 );
11058- add_jump (compiler , & no_match , JUMP (SLJIT_ZERO ));
11059- detect_partial_match_to (common , label );
11060-
11061- set_jumps (no_match , LABEL ());
11062- OP1 (SLJIT_MOV , STR_PTR , 0 , SLJIT_MEM1 (SLJIT_SP ), LOCAL2 );
11063- break ;
11064- }
11065- #endif
11066-
11067- if (type == OP_ALLANY )
11068- {
11069- OP2 (SLJIT_ADD , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (max ));
11070+ /* Extuni only modifies TMP3 on successful match. */
11071+ OP1 (SLJIT_MOV , TMP3 , 0 , STR_PTR , 0 );
11072+ compile_char1_matchingpath (common , type , cc , & no_match , TRUE);
1107011073
11071- if (common -> mode == PCRE2_JIT_COMPLETE )
11074+ if (exact == max )
1107211075 {
11073- OP2U ( SLJIT_SUB | SLJIT_SET_GREATER , STR_PTR , 0 , STR_END , 0 );
11074- SELECT ( SLJIT_GREATER , STR_PTR , STR_END , 0 , STR_PTR );
11076+ OP2 ( SLJIT_ADD , COUNT_MATCH , 0 , COUNT_MATCH , 0 , SLJIT_IMM , 1 );
11077+ JUMPTO ( SLJIT_JUMP , label );
1107511078 }
1107611079 else
1107711080 {
11078- jump = CMP (SLJIT_LESS_EQUAL , STR_PTR , 0 , STR_END , 0 );
11079- process_partial_match (common );
11080- JUMPHERE (jump );
11081+ OP2 (SLJIT_SUB | SLJIT_SET_Z , COUNT_MATCH , 0 , COUNT_MATCH , 0 , SLJIT_IMM , 1 );
11082+ JUMPTO (SLJIT_NOT_ZERO , label );
11083+ OP1 (SLJIT_MOV , TMP3 , 0 , STR_PTR , 0 );
11084+ }
11085+
11086+ set_jumps (no_match , LABEL ());
11087+
11088+ if (exact > 0 )
11089+ {
11090+ if (exact == max )
11091+ OP2U (SLJIT_SUB | SLJIT_SET_LESS , COUNT_MATCH , 0 , SLJIT_IMM , exact );
11092+ else
11093+ OP2U (SLJIT_SUB | SLJIT_SET_GREATER , COUNT_MATCH , 0 , SLJIT_IMM , max - exact );
1108111094 }
11095+
11096+ OP1 (SLJIT_MOV , COUNT_MATCH , 0 , SLJIT_MEM1 (SLJIT_SP ), LOCAL2 );
11097+
11098+ if (exact > 0 )
11099+ add_jump (compiler , & backtrack -> own_backtracks , JUMP (exact == max ? SLJIT_LESS : SLJIT_GREATER ));
11100+ OP1 (SLJIT_MOV , STR_PTR , 0 , TMP3 , 0 );
1108211101 break ;
1108311102 }
1108411103
11085- OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , max );
11104+ SLJIT_ASSERT (tmp_base == TMP3 );
11105+
11106+ OP1 (SLJIT_MOV , TMP3 , 0 , SLJIT_IMM , exact == max ? 0 : max );
1108611107
1108711108 detect_partial_match (common , & no_match );
1108811109 label = LABEL ();
1108911110 compile_char1_matchingpath (common , type , cc , & no_char1_match , FALSE);
11090- OP2 (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , tmp_base , tmp_offset , SLJIT_IMM , 1 );
11091- add_jump (compiler , & no_match , JUMP (SLJIT_ZERO ));
11111+
11112+ if (exact == max )
11113+ OP2 (SLJIT_ADD , TMP3 , 0 , TMP3 , 0 , SLJIT_IMM , 1 );
11114+ else
11115+ {
11116+ OP2 (SLJIT_SUB | SLJIT_SET_Z , TMP3 , 0 , TMP3 , 0 , SLJIT_IMM , 1 );
11117+ add_jump (compiler , & no_match , JUMP (SLJIT_ZERO ));
11118+ }
1109211119 detect_partial_match_to (common , label );
1109311120 OP2 (SLJIT_ADD , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
1109411121
1109511122 set_jumps (no_char1_match , LABEL ());
1109611123 OP2 (SLJIT_SUB , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
1109711124 set_jumps (no_match , LABEL ());
11125+
11126+ if (exact > 0 )
11127+ {
11128+ if (exact == max )
11129+ jump = CMP (SLJIT_LESS , TMP3 , 0 , SLJIT_IMM , exact );
11130+ else
11131+ jump = CMP (SLJIT_GREATER , TMP3 , 0 , SLJIT_IMM , max - exact );
11132+
11133+ add_jump (compiler , & backtrack -> own_backtracks , jump );
11134+ }
1109811135 break ;
1109911136
1110011137 case OP_POSQUERY :
0 commit comments