@@ -1769,7 +1769,7 @@ switch(*cc)
17691769 if (max == 0 )
17701770 return (* cc == OP_CRRANGE ) ? 2 : 1 ;
17711771 max -= min ;
1772- if (max > 2 )
1772+ if (max > ( * cc == OP_CRRANGE ? 0 : 1 ) )
17731773 max = 2 ;
17741774 return max ;
17751775
@@ -10467,10 +10467,12 @@ else
1046710467 if (* max == 0 )
1046810468 {
1046910469 SLJIT_ASSERT (* exact > 1 );
10470- if (* opcode == OP_CRPOSRANGE )
10470+ if (* opcode == OP_CRRANGE )
10471+ * opcode = OP_UPTO ;
10472+ else if (* opcode == OP_CRPOSRANGE )
1047110473 * opcode = OP_POSUPTO ;
1047210474 else
10473- * opcode -= OP_CRRANGE - OP_STAR ;
10475+ * opcode = OP_MINSTAR ;
1047410476 return cc ;
1047510477 }
1047610478
@@ -10480,12 +10482,14 @@ else
1048010482 else
1048110483 {
1048210484 SLJIT_ASSERT (* exact > 0 || * max > 1 );
10483- if (* opcode == OP_CRPOSRANGE )
10485+ if (* opcode == OP_CRRANGE )
10486+ * opcode = OP_UPTO ;
10487+ else if (* opcode == OP_CRPOSRANGE )
1048410488 * opcode = OP_POSUPTO ;
1048510489 else if (* max == 1 )
10486- * opcode -= OP_CRRANGE - OP_QUERY ;
10490+ * opcode = OP_MINQUERY ;
1048710491 else
10488- * opcode -= OP_CRRANGE - OP_UPTO ;
10492+ * opcode = OP_MINUPTO ;
1048910493 }
1049010494 return cc ;
1049110495 }
@@ -10586,7 +10590,7 @@ else
1058610590 }
1058710591
1058810592/* Handle fixed part first. */
10589- if (opcode != OP_POSUPTO )
10593+ if (opcode != OP_UPTO && opcode != OP_POSUPTO )
1059010594 {
1059110595 if (exact > 1 )
1059210596 {
@@ -10646,11 +10650,9 @@ if (early_fail_type == type_fail_range)
1064610650switch (opcode )
1064710651 {
1064810652 case OP_UPTO :
10649- /* Exact is ignored for upto. */
10650- exact = 0 ;
10651- /* Fall through */
1065210653 case OP_STAR :
1065310654 SLJIT_ASSERT (early_fail_ptr == 0 || opcode == OP_STAR );
10655+ max += exact ;
1065410656
1065510657 if (type == OP_EXTUNI )
1065610658 {
@@ -10665,6 +10667,9 @@ switch(opcode)
1066510667 }
1066610668 else
1066710669 {
10670+ /* If OP_EXTUNI is present, it has a separate EXACT opcode. */
10671+ SLJIT_ASSERT (exact == 0 );
10672+
1066810673 allocate_stack (common , 2 );
1066910674 OP1 (SLJIT_MOV , SLJIT_MEM1 (STACK_TOP ), STACK (0 ), STR_PTR , 0 );
1067010675 OP1 (SLJIT_MOV , SLJIT_MEM1 (STACK_TOP ), STACK (1 ), SLJIT_IMM , 0 );
@@ -10726,6 +10731,9 @@ switch(opcode)
1072610731 else
1072710732#endif
1072810733 {
10734+ /* If OP_ALLANY is present, it has a separate EXACT opcode. */
10735+ SLJIT_ASSERT (exact == 0 );
10736+
1072910737 if (private_data_ptr == 0 )
1073010738 allocate_stack (common , 2 );
1073110739
@@ -10794,71 +10802,86 @@ switch(opcode)
1079410802 if (private_data_ptr == 0 )
1079510803 allocate_stack (common , 2 );
1079610804
10797- use_tmp = (! HAS_VIRTUAL_REGISTERS && opcode == OP_STAR );
10805+ use_tmp = (opcode == OP_STAR );
1079810806
1079910807 if (use_tmp )
1080010808 {
10801- OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , 0 );
10802- OP1 (SLJIT_MOV , base , offset0 , tmp_base , tmp_offset );
10809+ OP1 (SLJIT_MOV , TMP3 , 0 , SLJIT_IMM , 0 );
10810+ OP1 (SLJIT_MOV , base , offset0 , TMP3 , 0 );
1080310811 }
1080410812 else
1080510813 {
10806- OP1 (SLJIT_MOV , TMP1 , 0 , SLJIT_IMM , 0 );
10807- OP1 (SLJIT_MOV , base , offset0 , TMP1 , 0 );
10808- OP1 (SLJIT_MOV , base , offset1 , TMP1 , 0 );
10809- if (opcode == OP_UPTO )
10810- OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , max + 1 );
10814+ OP1 (SLJIT_MOV , base , offset1 , COUNT_MATCH , 0 );
10815+ OP1 (SLJIT_MOV , COUNT_MATCH , 0 , SLJIT_IMM , 0 );
10816+ OP1 (SLJIT_MOV , base , offset0 , COUNT_MATCH , 0 );
10817+ OP1 (SLJIT_MOV , TMP3 , 0 , SLJIT_IMM , exact == max ? 0 : (max + 1 ));
1081110818 }
1081210819
1081310820 /* Search the first instance of charpos_char. */
10814- if (exact == 1 )
10815- {
10816- SLJIT_ASSERT (opcode == OP_STAR );
10821+ if (exact > 0 )
1081710822 detect_partial_match (common , & no_match );
10818- }
1081910823 else
1082010824 jump = JUMP (SLJIT_JUMP );
1082110825
1082210826 label = LABEL ();
10827+
1082310828 if (opcode == OP_UPTO )
1082410829 {
10825- OP2 (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , tmp_base , tmp_offset , SLJIT_IMM , 1 );
10826- add_jump (compiler , & no_match , JUMP (SLJIT_ZERO ));
10830+ if (exact == max )
10831+ OP2 (SLJIT_ADD , TMP3 , 0 , TMP3 , 0 , SLJIT_IMM , 1 );
10832+ else
10833+ {
10834+ OP2 (SLJIT_SUB | SLJIT_SET_Z , TMP3 , 0 , TMP3 , 0 , SLJIT_IMM , 1 );
10835+ add_jump (compiler , & no_match , JUMP (SLJIT_ZERO ));
10836+ }
1082710837 }
10838+
1082810839 compile_char1_matchingpath (common , type , cc , & no_match , FALSE);
1082910840
1083010841 if (early_fail_ptr != 0 )
1083110842 OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), early_fail_ptr , STR_PTR , 0 );
1083210843
10833- if (exact != 1 )
10844+ if (exact == 0 )
1083410845 JUMPHERE (jump );
1083510846
1083610847 detect_partial_match (common , & no_match );
10848+
10849+ if (opcode == OP_UPTO && exact > 0 )
10850+ {
10851+ if (exact == max )
10852+ CMPTO (SLJIT_LESS , TMP3 , 0 , SLJIT_IMM , exact , label );
10853+ else
10854+ CMPTO (SLJIT_GREATER , TMP3 , 0 , SLJIT_IMM , (max + 1 ) - exact , label );
10855+ }
10856+
1083710857 OP1 (MOV_UCHAR , TMP1 , 0 , SLJIT_MEM1 (STR_PTR ), IN_UCHARS (0 ));
1083810858 if (charpos_othercasebit != 0 )
1083910859 OP2 (SLJIT_OR , TMP1 , 0 , TMP1 , 0 , SLJIT_IMM , charpos_othercasebit );
1084010860 CMPTO (SLJIT_NOT_EQUAL , TMP1 , 0 , SLJIT_IMM , charpos_char , label );
1084110861
10862+ OP1 (SLJIT_MOV , base , offset0 , STR_PTR , 0 );
1084210863 if (use_tmp )
1084310864 {
10844- OP1 (SLJIT_MOV , base , offset0 , STR_PTR , 0 );
10845- OP2U (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , SLJIT_IMM , 0 );
10846- SELECT (SLJIT_EQUAL , tmp_base , STR_PTR , 0 , tmp_base );
10865+ OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP3 , 0 , SLJIT_IMM , 0 );
10866+ SELECT (SLJIT_EQUAL , TMP3 , STR_PTR , 0 , TMP3 );
1084710867 }
1084810868 else
1084910869 {
10850- OP1 (SLJIT_MOV , TMP2 , 0 , base , offset1 );
10851- OP1 (SLJIT_MOV , base , offset0 , STR_PTR , 0 );
10852- OP2U (SLJIT_SUB | SLJIT_SET_Z , TMP2 , 0 , SLJIT_IMM , 0 );
10853- SELECT (SLJIT_EQUAL , TMP2 , STR_PTR , 0 , TMP2 );
10854- OP1 (SLJIT_MOV , base , offset1 , TMP2 , 0 );
10870+ OP2U (SLJIT_SUB | SLJIT_SET_Z , COUNT_MATCH , 0 , SLJIT_IMM , 0 );
10871+ SELECT (SLJIT_EQUAL , COUNT_MATCH , STR_PTR , 0 , COUNT_MATCH );
1085510872 }
1085610873 JUMPTO (SLJIT_JUMP , label );
1085710874
1085810875 set_jumps (no_match , LABEL ());
1085910876 OP1 (SLJIT_MOV , STR_PTR , 0 , base , offset0 );
1086010877 if (use_tmp )
10861- OP1 (SLJIT_MOV , base , offset1 , tmp_base , tmp_offset );
10878+ OP1 (SLJIT_MOV , base , offset1 , TMP3 , 0 );
10879+ else
10880+ {
10881+ OP1 (SLJIT_MOV , TMP1 , 0 , base , offset1 );
10882+ OP1 (SLJIT_MOV , base , offset1 , COUNT_MATCH , 0 );
10883+ OP1 (SLJIT_MOV , COUNT_MATCH , 0 , TMP1 , 0 );
10884+ }
1086210885
1086310886 add_jump (compiler , & backtrack -> own_backtracks , CMP (SLJIT_EQUAL , STR_PTR , 0 , SLJIT_IMM , 0 ));
1086410887
@@ -10872,60 +10895,110 @@ switch(opcode)
1087210895 if (private_data_ptr == 0 )
1087310896 allocate_stack (common , 2 );
1087410897
10875- OP1 (SLJIT_MOV , base , offset1 , STR_PTR , 0 );
1087610898#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10877- use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR );
10878- SLJIT_ASSERT (!use_tmp || tmp_base == TMP3 );
10899+ use_tmp = (opcode == OP_STAR );
1087910900
1088010901 if (common -> utf )
10881- OP1 (SLJIT_MOV , use_tmp ? TMP3 : base , use_tmp ? 0 : offset0 , STR_PTR , 0 );
10902+ {
10903+ if (!use_tmp )
10904+ OP1 (SLJIT_MOV , base , offset0 , COUNT_MATCH , 0 );
10905+
10906+ OP1 (SLJIT_MOV , use_tmp ? TMP3 : COUNT_MATCH , 0 , STR_PTR , 0 );
10907+ }
1088210908#endif
10909+
1088310910 if (opcode == OP_UPTO )
10884- OP1 (SLJIT_MOV , tmp_base , tmp_offset , SLJIT_IMM , max );
10911+ OP1 (SLJIT_MOV , TMP3 , 0 , SLJIT_IMM , exact == max ? - ( sljit_sw ) exact : ( sljit_sw ) max );
1088510912
10886- detect_partial_match (common , & no_match );
10887- label = LABEL ();
10888- compile_char1_matchingpath (common , type , cc , & no_char1_match , FALSE);
10913+ if (opcode == OP_UPTO && exact > 0 )
10914+ {
10915+ label = LABEL ();
10916+ detect_partial_match (common , & no_match );
10917+ compile_char1_matchingpath (common , type , cc , & no_char1_match , FALSE);
1088910918#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10890- if (common -> utf )
10891- OP1 (SLJIT_MOV , use_tmp ? TMP3 : base , use_tmp ? 0 : offset0 , STR_PTR , 0 );
10919+ if (common -> utf )
10920+ OP1 (SLJIT_MOV , use_tmp ? TMP3 : COUNT_MATCH , 0 , STR_PTR , 0 );
1089210921#endif
1089310922
10894- if (opcode == OP_UPTO )
10895- {
10896- OP2 (SLJIT_SUB | SLJIT_SET_Z , tmp_base , tmp_offset , tmp_base , tmp_offset , SLJIT_IMM , 1 );
10897- add_jump (compiler , & no_match , JUMP (SLJIT_ZERO ));
10923+ if (exact == max )
10924+ {
10925+ OP2 (SLJIT_ADD | SLJIT_SET_Z , TMP3 , 0 , TMP3 , 0 , SLJIT_IMM , 1 );
10926+ JUMPTO (SLJIT_NOT_ZERO , label );
10927+ }
10928+ else
10929+ {
10930+ OP2 (SLJIT_SUB | SLJIT_SET_Z , TMP3 , 0 , TMP3 , 0 , SLJIT_IMM , 1 );
10931+ add_jump (compiler , & no_match , JUMP (SLJIT_ZERO ));
10932+ CMPTO (SLJIT_NOT_EQUAL , TMP3 , 0 , SLJIT_IMM , max - exact , label );
10933+ }
10934+
10935+ OP1 (SLJIT_MOV , base , offset1 , STR_PTR , 0 );
10936+ JUMPTO (SLJIT_JUMP , label );
1089810937 }
10938+ else
10939+ {
10940+ OP1 (SLJIT_MOV , base , offset1 , STR_PTR , 0 );
1089910941
10900- detect_partial_match_to (common , label );
10901- OP2 (SLJIT_ADD , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
10942+ detect_partial_match (common , & no_match );
10943+ label = LABEL ();
10944+ compile_char1_matchingpath (common , type , cc , & no_char1_match , FALSE);
10945+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
10946+ if (common -> utf )
10947+ OP1 (SLJIT_MOV , use_tmp ? TMP3 : COUNT_MATCH , 0 , STR_PTR , 0 );
10948+ #endif
10949+
10950+ if (opcode == OP_UPTO )
10951+ {
10952+ OP2 (SLJIT_SUB | SLJIT_SET_Z , TMP3 , 0 , TMP3 , 0 , SLJIT_IMM , 1 );
10953+ add_jump (compiler , & no_match , JUMP (SLJIT_ZERO ));
10954+ }
10955+
10956+ detect_partial_match_to (common , label );
10957+ }
1090210958
10903- set_jumps (no_char1_match , LABEL ());
1090410959#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1090510960 if (common -> utf )
1090610961 {
10962+ set_jumps (no_char1_match , LABEL ());
1090710963 set_jumps (no_match , LABEL ());
1090810964 if (use_tmp )
1090910965 {
1091010966 OP1 (SLJIT_MOV , STR_PTR , 0 , TMP3 , 0 );
1091110967 OP1 (SLJIT_MOV , base , offset0 , TMP3 , 0 );
1091210968 }
1091310969 else
10914- OP1 (SLJIT_MOV , STR_PTR , 0 , base , offset0 );
10970+ {
10971+ OP1 (SLJIT_MOV , STR_PTR , 0 , COUNT_MATCH , 0 );
10972+ OP1 (SLJIT_MOV , COUNT_MATCH , 0 , base , offset0 );
10973+ OP1 (SLJIT_MOV , base , offset0 , STR_PTR , 0 );
10974+ }
1091510975 }
1091610976 else
1091710977#endif
1091810978 {
10979+ if (opcode != OP_UPTO || exact == 0 )
10980+ OP2 (SLJIT_ADD , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
10981+ set_jumps (no_char1_match , LABEL ());
10982+
1091910983 OP2 (SLJIT_SUB , STR_PTR , 0 , STR_PTR , 0 , SLJIT_IMM , IN_UCHARS (1 ));
1092010984 set_jumps (no_match , LABEL ());
1092110985 OP1 (SLJIT_MOV , base , offset0 , STR_PTR , 0 );
1092210986 }
1092310987
10924- if (exact == 1 )
10988+ if (opcode == OP_UPTO )
1092510989 {
10926- SLJIT_ASSERT (opcode == OP_STAR );
10927- add_jump (compiler , & BACKTRACK_AS (char_iterator_backtrack )-> u .backtracks , CMP (SLJIT_EQUAL , base , offset1 , STR_PTR , 0 ));
10990+ if (exact > 0 )
10991+ {
10992+ if (max == exact )
10993+ jump = CMP (SLJIT_GREATER_EQUAL , TMP3 , 0 , SLJIT_IMM , - (sljit_sw )exact );
10994+ else
10995+ jump = CMP (SLJIT_GREATER , TMP3 , 0 , SLJIT_IMM , max - exact );
10996+
10997+ add_jump (compiler , & BACKTRACK_AS (char_iterator_backtrack )-> u .backtracks , jump );
10998+ }
1092810999 }
11000+ else if (exact == 1 )
11001+ add_jump (compiler , & BACKTRACK_AS (char_iterator_backtrack )-> u .backtracks , CMP (SLJIT_EQUAL , base , offset1 , STR_PTR , 0 ));
1092911002
1093011003 if (early_fail_ptr != 0 )
1093111004 OP1 (SLJIT_MOV , SLJIT_MEM1 (SLJIT_SP ), early_fail_ptr , STR_PTR , 0 );
@@ -11052,8 +11125,8 @@ switch(opcode)
1105211125
1105311126 case OP_POSUPTO :
1105411127 SLJIT_ASSERT (early_fail_ptr == 0 );
11055-
1105611128 max += exact ;
11129+
1105711130#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
1105811131 if (type == OP_EXTUNI || common -> utf )
1105911132#else
@@ -11738,8 +11811,7 @@ switch(opcode)
1173811811 OP1 (SLJIT_MOV , base , offset0 , STR_PTR , 0 );
1173911812 JUMPTO (SLJIT_JUMP , CURRENT_AS (char_iterator_backtrack )-> matchingpath );
1174011813
11741- if (opcode == OP_STAR && exact == 1 )
11742- set_jumps (CURRENT_AS (char_iterator_backtrack )-> u .backtracks , LABEL ());
11814+ set_jumps (CURRENT_AS (char_iterator_backtrack )-> u .backtracks , LABEL ());
1174311815 }
1174411816
1174511817 JUMPHERE (jump );
0 commit comments