@@ -1463,6 +1463,25 @@ static inline void json_value_completed(json_frame *frame)
14631463 UNREACHABLE ;
14641464}
14651465
1466+ static inline bool json_match_keyword (JSON_ParserState * state , bool resumable , const char * keyword , size_t offset )
1467+ {
1468+ // It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
1469+ // `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch.
1470+
1471+ size_t len = strlen (keyword );
1472+
1473+ // Note: memcmp with a small power of two and a literal string compile to an integer comparison /
1474+ // That's why we sometime compare starting from the first byte and sometimes from the second.
1475+ if (rest (state ) >= len && (memcmp (state -> cursor + offset , keyword + offset , len - offset ) == 0 )) {
1476+ state -> cursor += len ;
1477+ return true;
1478+ } else if (resumable && rest (state ) < len && memcmp (state -> cursor , keyword , rest (state )) == 0 ) {
1479+ return false;
1480+ }
1481+
1482+ raise_parse_error ("unexpected token %s" , state );
1483+ }
1484+
14661485// Parse an arbitrary JSON value iteratively. This is a state machine driven
14671486// entirely by the top frame's phase so it can stop at any value boundary and
14681487// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
@@ -1484,88 +1503,71 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
14841503
14851504 switch (peek (state )) {
14861505 case 'n' :
1487- if (rest (state ) >= 4 && (memcmp (state -> cursor , "null" , 4 ) == 0 )) {
1488- state -> cursor += 4 ;
1506+ if (json_match_keyword (state , resumable , "null" , 0 )) {
14891507 json_push_value (state , config , Qnil );
14901508 json_value_completed (frame );
14911509 break ;
1492- } else if (resumable && rest (state ) < 4 && memcmp (state -> cursor , "null" , rest (state )) == 0 ) {
1493- return false;
14941510 }
1511+ return false;
14951512
1496- raise_parse_error ("unexpected token %s" , state );
14971513 case 't' :
1498- if (rest (state ) >= 4 && (memcmp (state -> cursor , "true" , 4 ) == 0 )) {
1499- state -> cursor += 4 ;
1514+ if (json_match_keyword (state , resumable , "true" , 0 )) {
15001515 json_push_value (state , config , Qtrue );
15011516 json_value_completed (frame );
15021517 break ;
1503- } else if (resumable && rest (state ) < 4 && memcmp (state -> cursor , "true" , rest (state )) == 0 ) {
1504- return false;
15051518 }
1519+ return false;
15061520
1507- raise_parse_error ("unexpected token %s" , state );
15081521 case 'f' :
1509- // Note: memcmp with a small power of two compile to an integer comparison
1510- if (rest (state ) >= 5 && (memcmp (state -> cursor + 1 , "alse" , 4 ) == 0 )) {
1511- state -> cursor += 5 ;
1522+ if (json_match_keyword (state , resumable , "false" , 1 )) {
15121523 json_push_value (state , config , Qfalse );
15131524 json_value_completed (frame );
15141525 break ;
1515- } else if (resumable && rest (state ) < 5 && memcmp (state -> cursor , "false" , rest (state )) == 0 ) {
1516- return false;
15171526 }
1527+ return false;
15181528
1519- raise_parse_error ("unexpected token %s" , state );
15201529 case 'N' :
15211530 if (config -> allow_nan ) {
1522- // Note: memcmp with a small power of two compile to an integer comparison
1523- if (rest (state ) >= 3 && (memcmp (state -> cursor + 1 , "aN" , 2 ) == 0 )) {
1524- state -> cursor += 3 ;
1531+ if (json_match_keyword (state , resumable , "NaN" , 1 )) {
15251532 json_push_value (state , config , CNaN );
15261533 json_value_completed (frame );
15271534 break ;
1528- } else if (resumable && rest (state ) < 3 && memcmp (state -> cursor , "NaN" , rest (state )) == 0 ) {
1529- return false;
15301535 }
1536+ return false;
15311537 }
15321538
15331539 raise_parse_error ("unexpected token %s" , state );
15341540 case 'I' :
15351541 if (config -> allow_nan ) {
1536- if (rest (state ) >= 8 && (memcmp (state -> cursor , "Infinity" , 8 ) == 0 )) {
1537- state -> cursor += 8 ;
1542+ if (json_match_keyword (state , resumable , "Infinity" , 0 )) {
15381543 json_push_value (state , config , CInfinity );
15391544 json_value_completed (frame );
15401545 break ;
1541- } else if (resumable && memcmp (state -> cursor , "Infinity" , rest (state )) == 0 ) {
1542- return false;
15431546 }
1547+ return false;
15441548 }
15451549
15461550 raise_parse_error ("unexpected token %s" , state );
15471551 case '-' : {
15481552 const char * start = state -> cursor ;
1549- // Note: memcmp with a small power of two compile to an integer comparison
1550- if (rest (state ) >= 9 && (memcmp (state -> cursor + 1 , "Infinity" , 8 ) == 0 )) {
1551- if (config -> allow_nan ) {
1552- state -> cursor += 9 ;
1553- json_push_value (state , config , CMinusInfinity );
1554- json_value_completed (frame );
1555- break ;
1556- } else {
1557- raise_parse_error ("unexpected token %s" , state );
1558- }
1559- }
15601553 VALUE number = json_parse_negative_number (state , config );
15611554
1562- if (resumable && peek (state ) == 0 ) {
1555+ if (RB_UNLIKELY ( resumable && peek (state ) == 0 ) ) {
15631556 state -> cursor = start ;
15641557 return false;
15651558 }
15661559
1567- if (UNDEF_P (number )) {
1568- raise_parse_error_at ("invalid number: %s" , state , start );
1560+ if (RB_UNLIKELY (UNDEF_P (number ))) {
1561+ if (config -> allow_nan && peek (state ) == 'I' ) {
1562+ if (json_match_keyword (state , resumable , "Infinity" , 0 )) {
1563+ number = CMinusInfinity ;
1564+ } else {
1565+ state -> cursor = start ;
1566+ return false;
1567+ }
1568+ } else {
1569+ raise_parse_error_at ("invalid number: %s" , state , start );
1570+ }
15691571 }
15701572
15711573 json_push_value (state , config , number );
0 commit comments