@@ -1269,34 +1269,37 @@ while (cc < ccend)
12691269return TRUE;
12701270}
12711271
1272- #define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1272+ #define EARLY_FAIL_ENHANCE_MAX (3 + 3)
12731273
12741274/*
1275- start:
1276- 0 - skip / early fail allowed
1277- 1 - only early fail with range allowed
1278- >1 - (start - 1) early fail is processed
1275+ Start represent the number of allowed early fail enhancements
12791276
1280- return: current number of iterators enhanced with fast fail
1277+ The 0-2 values has a special meaning:
1278+ 0 - skip is allowed for all iterators
1279+ 1 - fail is allowed for all iterators
1280+ 2 - fail is allowed for greedy iterators
1281+ 3 - only ranged early fail is allowed
1282+ >3 - (start - 3) number of remaining ranged early fails allowed
1283+
1284+ return: the updated value of start
12811285*/
1282- static int detect_early_fail (compiler_common * common , PCRE2_SPTR cc , int * private_data_start ,
1283- sljit_s32 depth , int start , BOOL fast_forward_allowed )
1286+ static int detect_early_fail (compiler_common * common , PCRE2_SPTR cc ,
1287+ int * private_data_start , sljit_s32 depth , int start )
12841288{
12851289PCRE2_SPTR begin = cc ;
12861290PCRE2_SPTR next_alt ;
12871291PCRE2_SPTR end ;
12881292PCRE2_SPTR accelerated_start ;
1289- BOOL prev_fast_forward_allowed ;
12901293int result = 0 ;
1291- int count ;
1294+ int count , prev_count ;
12921295
12931296SLJIT_ASSERT (* cc == OP_ONCE || * cc == OP_BRA || * cc == OP_CBRA );
12941297SLJIT_ASSERT (* cc != OP_CBRA || common -> optimized_cbracket [GET2 (cc , 1 + LINK_SIZE )] != 0 );
12951298SLJIT_ASSERT (start < EARLY_FAIL_ENHANCE_MAX );
12961299
12971300next_alt = cc + GET (cc , 1 );
1298- if (* next_alt == OP_ALT )
1299- fast_forward_allowed = FALSE ;
1301+ if (* next_alt == OP_ALT && start < 1 )
1302+ start = 1 ;
13001303
13011304do
13021305 {
@@ -1339,39 +1342,45 @@ do
13391342 case OP_HSPACE :
13401343 case OP_NOT_VSPACE :
13411344 case OP_VSPACE :
1342- fast_forward_allowed = FALSE;
1345+ if (count < 1 )
1346+ count = 1 ;
13431347 cc ++ ;
13441348 continue ;
13451349
13461350 case OP_ANYNL :
13471351 case OP_EXTUNI :
1348- fast_forward_allowed = FALSE;
1349- if (count == 0 )
1350- count = 1 ;
1352+ if (count < 3 )
1353+ count = 3 ;
13511354 cc ++ ;
13521355 continue ;
13531356
13541357 case OP_NOTPROP :
13551358 case OP_PROP :
1356- fast_forward_allowed = FALSE;
1359+ if (count < 1 )
1360+ count = 1 ;
13571361 cc += 1 + 2 ;
13581362 continue ;
13591363
13601364 case OP_CHAR :
13611365 case OP_CHARI :
13621366 case OP_NOT :
13631367 case OP_NOTI :
1364- fast_forward_allowed = FALSE;
1368+ if (count < 1 )
1369+ count = 1 ;
13651370 cc += 2 ;
13661371#ifdef SUPPORT_UNICODE
13671372 if (common -> utf && HAS_EXTRALEN (cc [-1 ])) cc += GET_EXTRALEN (cc [-1 ]);
13681373#endif
13691374 continue ;
13701375
1371- case OP_TYPESTAR :
13721376 case OP_TYPEMINSTAR :
1373- case OP_TYPEPLUS :
13741377 case OP_TYPEMINPLUS :
1378+ if (count == 2 )
1379+ count = 3 ;
1380+ /* Fall through */
1381+
1382+ case OP_TYPESTAR :
1383+ case OP_TYPEPLUS :
13751384 case OP_TYPEPOSSTAR :
13761385 case OP_TYPEPOSPLUS :
13771386 /* The type or prop opcode is skipped in the next iteration. */
@@ -1383,14 +1392,18 @@ do
13831392 break ;
13841393 }
13851394
1386- if (count == 0 )
1395+ if (count < 3 )
1396+ count = 3 ;
1397+ continue ;
1398+
1399+ case OP_TYPEEXACT :
1400+ if (count < 1 )
13871401 count = 1 ;
1388- fast_forward_allowed = FALSE ;
1402+ cc += 1 + IMM2_SIZE ;
13891403 continue ;
13901404
13911405 case OP_TYPEUPTO :
13921406 case OP_TYPEMINUPTO :
1393- case OP_TYPEEXACT :
13941407 case OP_TYPEPOSUPTO :
13951408 cc += IMM2_SIZE ;
13961409 /* Fall through */
@@ -1399,37 +1412,40 @@ do
13991412 case OP_TYPEMINQUERY :
14001413 case OP_TYPEPOSQUERY :
14011414 /* The type or prop opcode is skipped in the next iteration. */
1402- fast_forward_allowed = FALSE;
1403- if (count == 0 )
1404- count = 1 ;
1415+ if (count < 3 )
1416+ count = 3 ;
14051417 cc += 1 ;
14061418 continue ;
14071419
1408- case OP_STAR :
14091420 case OP_MINSTAR :
1410- case OP_PLUS :
14111421 case OP_MINPLUS :
1422+ case OP_MINSTARI :
1423+ case OP_MINPLUSI :
1424+ case OP_NOTMINSTAR :
1425+ case OP_NOTMINPLUS :
1426+ case OP_NOTMINSTARI :
1427+ case OP_NOTMINPLUSI :
1428+ if (count == 2 )
1429+ count = 3 ;
1430+ /* Fall through */
1431+
1432+ case OP_STAR :
1433+ case OP_PLUS :
14121434 case OP_POSSTAR :
14131435 case OP_POSPLUS :
14141436
14151437 case OP_STARI :
1416- case OP_MINSTARI :
14171438 case OP_PLUSI :
1418- case OP_MINPLUSI :
14191439 case OP_POSSTARI :
14201440 case OP_POSPLUSI :
14211441
14221442 case OP_NOTSTAR :
1423- case OP_NOTMINSTAR :
14241443 case OP_NOTPLUS :
1425- case OP_NOTMINPLUS :
14261444 case OP_NOTPOSSTAR :
14271445 case OP_NOTPOSPLUS :
14281446
14291447 case OP_NOTSTARI :
1430- case OP_NOTMINSTARI :
14311448 case OP_NOTPLUSI :
1432- case OP_NOTMINPLUSI :
14331449 case OP_NOTPOSSTARI :
14341450 case OP_NOTPOSPLUSI :
14351451 accelerated_start = cc ;
@@ -1439,9 +1455,17 @@ do
14391455#endif
14401456 break ;
14411457
1458+ case OP_EXACT :
1459+ if (count < 1 )
1460+ count = 1 ;
1461+ cc += 2 + IMM2_SIZE ;
1462+ #ifdef SUPPORT_UNICODE
1463+ if (common -> utf && HAS_EXTRALEN (cc [-1 ])) cc += GET_EXTRALEN (cc [-1 ]);
1464+ #endif
1465+ continue ;
1466+
14421467 case OP_UPTO :
14431468 case OP_MINUPTO :
1444- case OP_EXACT :
14451469 case OP_POSUPTO :
14461470 case OP_UPTOI :
14471471 case OP_MINUPTOI :
14701494 case OP_NOTQUERYI :
14711495 case OP_NOTMINQUERYI :
14721496 case OP_NOTPOSQUERYI :
1473- fast_forward_allowed = FALSE;
1474- if (count == 0 )
1475- count = 1 ;
1497+ if (count < 3 )
1498+ count = 3 ;
14761499 cc += 2 ;
14771500#ifdef SUPPORT_UNICODE
14781501 if (common -> utf && HAS_EXTRALEN (cc [-1 ])) cc += GET_EXTRALEN (cc [-1 ]);
@@ -1492,10 +1515,14 @@ do
14921515
14931516 switch (* cc )
14941517 {
1495- case OP_CRSTAR :
14961518 case OP_CRMINSTAR :
1497- case OP_CRPLUS :
14981519 case OP_CRMINPLUS :
1520+ if (count == 2 )
1521+ count = 3 ;
1522+ /* Fall through */
1523+
1524+ case OP_CRSTAR :
1525+ case OP_CRPLUS :
14991526 case OP_CRPOSSTAR :
15001527 case OP_CRPOSPLUS :
15011528 cc ++ ;
@@ -1504,39 +1531,58 @@ do
15041531 case OP_CRRANGE :
15051532 case OP_CRMINRANGE :
15061533 case OP_CRPOSRANGE :
1534+ if (GET2 (cc , 1 ) == GET2 (cc , 1 + IMM2_SIZE ))
1535+ {
1536+ /* Exact repeat. */
1537+ cc += 1 + 2 * IMM2_SIZE ;
1538+ if (count < 1 )
1539+ count = 1 ;
1540+ continue ;
1541+ }
1542+
15071543 cc += 2 * IMM2_SIZE ;
15081544 /* Fall through */
15091545 case OP_CRQUERY :
15101546 case OP_CRMINQUERY :
15111547 case OP_CRPOSQUERY :
15121548 cc ++ ;
1513- if (count == 0 )
1514- count = 1 ;
1515- /* Fall through */
1549+ if (count < 3 )
1550+ count = 3 ;
1551+ continue ;
1552+
15161553 default :
1517- accelerated_start = NULL ;
1518- fast_forward_allowed = FALSE;
1554+ /* No repeat. */
1555+ if (count < 1 )
1556+ count = 1 ;
15191557 continue ;
15201558 }
15211559 break ;
15221560
15231561 case OP_ONCE :
1562+ if (count < 2 )
1563+ count = 2 ;
1564+ /* Fall through */
1565+
15241566 case OP_BRA :
15251567 case OP_CBRA :
1526- prev_fast_forward_allowed = fast_forward_allowed ;
1527- fast_forward_allowed = FALSE;
1568+ prev_count = count ;
1569+ if (count < 1 )
1570+ count = 1 ;
15281571
15291572 if (depth >= 4 )
15301573 break ;
15311574
1532- if (count == 0 && cc [GET (cc , 1 )] == OP_ALT )
1533- count = 1 ;
1575+ if (count < 3 && cc [GET (cc , 1 )] == OP_ALT )
1576+ count = 3 ;
15341577
15351578 end = bracketend (cc );
15361579 if (end [-1 - LINK_SIZE ] != OP_KET || (* cc == OP_CBRA && common -> optimized_cbracket [GET2 (cc , 1 + LINK_SIZE )] == 0 ))
15371580 break ;
15381581
1539- count = detect_early_fail (common , cc , private_data_start , depth + 1 , count , prev_fast_forward_allowed );
1582+ prev_count = detect_early_fail (common , cc , private_data_start , depth + 1 , prev_count );
1583+
1584+ if (prev_count > count )
1585+ count = prev_count ;
15401586
15411587 if (PRIVATE_DATA (cc ) != 0 )
15421588 common -> private_data_ptrs [begin - common -> start ] = 1 ;
@@ -1556,55 +1602,52 @@ do
15561602 continue ;
15571603 }
15581604
1559- if (accelerated_start != NULL )
1605+ if (accelerated_start == NULL )
1606+ break ;
1607+
1608+ if (count == 0 )
15601609 {
1561- if (count == 0 )
1562- {
1563- count ++ ;
1610+ common -> fast_forward_bc_ptr = accelerated_start ;
1611+ common -> private_data_ptrs [(accelerated_start + 1 ) - common -> start ] = ((* private_data_start ) << 3 ) | type_skip ;
1612+ * private_data_start += sizeof (sljit_sw );
1613+ count = 4 ;
1614+ }
1615+ else if (count < 3 )
1616+ {
1617+ common -> private_data_ptrs [(accelerated_start + 1 ) - common -> start ] = ((* private_data_start ) << 3 ) | type_fail ;
15641618
1565- if (fast_forward_allowed )
1566- {
1567- common -> fast_forward_bc_ptr = accelerated_start ;
1568- common -> private_data_ptrs [(accelerated_start + 1 ) - common -> start ] = ((* private_data_start ) << 3 ) | type_skip ;
1569- * private_data_start += sizeof (sljit_sw );
1570- }
1571- else
1572- {
1573- common -> private_data_ptrs [(accelerated_start + 1 ) - common -> start ] = ((* private_data_start ) << 3 ) | type_fail ;
1619+ if (common -> early_fail_start_ptr == 0 )
1620+ common -> early_fail_start_ptr = * private_data_start ;
15741621
1575- if ( common -> early_fail_start_ptr == 0 )
1576- common -> early_fail_start_ptr = * private_data_start ;
1622+ * private_data_start += sizeof ( sljit_sw );
1623+ common -> early_fail_end_ptr = * private_data_start ;
15771624
1578- * private_data_start += sizeof ( sljit_sw );
1579- common -> early_fail_end_ptr = * private_data_start ;
1625+ if ( * private_data_start > SLJIT_MAX_LOCAL_SIZE )
1626+ return EARLY_FAIL_ENHANCE_MAX ;
15801627
1581- if (* private_data_start > SLJIT_MAX_LOCAL_SIZE )
1582- return EARLY_FAIL_ENHANCE_MAX ;
1583- }
1584- }
1585- else
1586- {
1587- common -> private_data_ptrs [(accelerated_start + 1 ) - common -> start ] = ((* private_data_start ) << 3 ) | type_fail_range ;
1628+ count = 4 ;
1629+ }
1630+ else
1631+ {
1632+ common -> private_data_ptrs [(accelerated_start + 1 ) - common -> start ] = ((* private_data_start ) << 3 ) | type_fail_range ;
15881633
1589- if (common -> early_fail_start_ptr == 0 )
1590- common -> early_fail_start_ptr = * private_data_start ;
1634+ if (common -> early_fail_start_ptr == 0 )
1635+ common -> early_fail_start_ptr = * private_data_start ;
15911636
1592- * private_data_start += 2 * sizeof (sljit_sw );
1593- common -> early_fail_end_ptr = * private_data_start ;
1637+ * private_data_start += 2 * sizeof (sljit_sw );
1638+ common -> early_fail_end_ptr = * private_data_start ;
15941639
1595- if (* private_data_start > SLJIT_MAX_LOCAL_SIZE )
1596- return EARLY_FAIL_ENHANCE_MAX ;
1597- }
1640+ if (* private_data_start > SLJIT_MAX_LOCAL_SIZE )
1641+ return EARLY_FAIL_ENHANCE_MAX ;
15981642
1599- /* Cannot be part of a repeat. */
1600- common -> private_data_ptrs [begin - common -> start ] = 1 ;
16011643 count ++ ;
1602-
1603- if (count < EARLY_FAIL_ENHANCE_MAX )
1604- continue ;
16051644 }
16061645
1607- break ;
1646+ /* Cannot be part of a repeat. */
1647+ common -> private_data_ptrs [begin - common -> start ] = 1 ;
1648+
1649+ if (count >= EARLY_FAIL_ENHANCE_MAX )
1650+ break ;
16081651 }
16091652
16101653 if (* cc != OP_ALT && * cc != OP_KET )
@@ -14239,7 +14282,7 @@ memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
1423914282private_data_size = common -> cbra_ptr + (re -> top_bracket + 1 ) * sizeof (sljit_sw );
1424014283
1424114284if ((re -> overall_options & PCRE2_ANCHORED ) == 0 && (re -> overall_options & PCRE2_NO_START_OPTIMIZE ) == 0 && !common -> has_skip_in_assert_back )
14242- detect_early_fail (common , common -> start , & private_data_size , 0 , 0 , TRUE );
14285+ detect_early_fail (common , common -> start , & private_data_size , 0 , 0 );
1424314286
1424414287set_private_data_ptrs (common , & private_data_size , ccend );
1424514288
0 commit comments