Skip to content

Commit 497df78

Browse files
committed
Extract json_match_keyword
1 parent 2435bc2 commit 497df78

2 files changed

Lines changed: 42 additions & 40 deletions

File tree

ext/json/ext/parser/parser.c

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1463,6 +1463,25 @@ static inline void json_value_completed(json_frame *frame)
14631463
UNREACHABLE;
14641464
}
14651465

1466+
static inline bool json_match_keyword(JSON_ParserState *state, bool resumable, const char *keyword, size_t offset)
1467+
{
1468+
// It is assumed that since `keyword` is always a literal, the compiler is able to constantize this
1469+
// `strlen` and several other computations in that routine, such as eliminating the `if (resumable)` branch.
1470+
1471+
size_t len = strlen(keyword);
1472+
1473+
// Note: memcmp with a small power of two and a literal string compile to an integer comparison /
1474+
// That's why we sometime compare starting from the first byte and sometimes from the second.
1475+
if (rest(state) >= len && (memcmp(state->cursor + offset, keyword + offset, len - offset) == 0)) {
1476+
state->cursor += len;
1477+
return true;
1478+
} else if (resumable && rest(state) < len && memcmp(state->cursor, keyword, rest(state)) == 0) {
1479+
return false;
1480+
}
1481+
1482+
raise_parse_error("unexpected token %s", state);
1483+
}
1484+
14661485
// Parse an arbitrary JSON value iteratively. This is a state machine driven
14671486
// entirely by the top frame's phase so it can stop at any value boundary and
14681487
// resume purely from the frame stack. A JSON_FRAME_ROOT frame sits at the
@@ -1484,88 +1503,71 @@ ALWAYS_INLINE(static) bool json_parse_any(JSON_ParserState *state, JSON_ParserCo
14841503

14851504
switch (peek(state)) {
14861505
case 'n':
1487-
if (rest(state) >= 4 && (memcmp(state->cursor, "null", 4) == 0)) {
1488-
state->cursor += 4;
1506+
if (json_match_keyword(state, resumable, "null", 0)) {
14891507
json_push_value(state, config, Qnil);
14901508
json_value_completed(frame);
14911509
break;
1492-
} else if (resumable && rest(state) < 4 && memcmp(state->cursor, "null", rest(state)) == 0) {
1493-
return false;
14941510
}
1511+
return false;
14951512

1496-
raise_parse_error("unexpected token %s", state);
14971513
case 't':
1498-
if (rest(state) >= 4 && (memcmp(state->cursor, "true", 4) == 0)) {
1499-
state->cursor += 4;
1514+
if (json_match_keyword(state, resumable, "true", 0)) {
15001515
json_push_value(state, config, Qtrue);
15011516
json_value_completed(frame);
15021517
break;
1503-
} else if (resumable && rest(state) < 4 && memcmp(state->cursor, "true", rest(state)) == 0) {
1504-
return false;
15051518
}
1519+
return false;
15061520

1507-
raise_parse_error("unexpected token %s", state);
15081521
case 'f':
1509-
// Note: memcmp with a small power of two compile to an integer comparison
1510-
if (rest(state) >= 5 && (memcmp(state->cursor + 1, "alse", 4) == 0)) {
1511-
state->cursor += 5;
1522+
if (json_match_keyword(state, resumable, "false", 1)) {
15121523
json_push_value(state, config, Qfalse);
15131524
json_value_completed(frame);
15141525
break;
1515-
} else if (resumable && rest(state) < 5 && memcmp(state->cursor, "false", rest(state)) == 0) {
1516-
return false;
15171526
}
1527+
return false;
15181528

1519-
raise_parse_error("unexpected token %s", state);
15201529
case 'N':
15211530
if (config->allow_nan) {
1522-
// Note: memcmp with a small power of two compile to an integer comparison
1523-
if (rest(state) >= 3 && (memcmp(state->cursor + 1, "aN", 2) == 0)) {
1524-
state->cursor += 3;
1531+
if (json_match_keyword(state, resumable, "NaN", 1)) {
15251532
json_push_value(state, config, CNaN);
15261533
json_value_completed(frame);
15271534
break;
1528-
} else if (resumable && rest(state) < 3 && memcmp(state->cursor, "NaN", rest(state)) == 0) {
1529-
return false;
15301535
}
1536+
return false;
15311537
}
15321538

15331539
raise_parse_error("unexpected token %s", state);
15341540
case 'I':
15351541
if (config->allow_nan) {
1536-
if (rest(state) >= 8 && (memcmp(state->cursor, "Infinity", 8) == 0)) {
1537-
state->cursor += 8;
1542+
if (json_match_keyword(state, resumable, "Infinity", 0)) {
15381543
json_push_value(state, config, CInfinity);
15391544
json_value_completed(frame);
15401545
break;
1541-
} else if (resumable && memcmp(state->cursor, "Infinity", rest(state)) == 0) {
1542-
return false;
15431546
}
1547+
return false;
15441548
}
15451549

15461550
raise_parse_error("unexpected token %s", state);
15471551
case '-': {
15481552
const char *start = state->cursor;
1549-
// Note: memcmp with a small power of two compile to an integer comparison
1550-
if (rest(state) >= 9 && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
1551-
if (config->allow_nan) {
1552-
state->cursor += 9;
1553-
json_push_value(state, config, CMinusInfinity);
1554-
json_value_completed(frame);
1555-
break;
1556-
} else {
1557-
raise_parse_error("unexpected token %s", state);
1558-
}
1559-
}
15601553
VALUE number = json_parse_negative_number(state, config);
15611554

1562-
if (resumable && peek(state) == 0) {
1555+
if (RB_UNLIKELY(resumable && peek(state) == 0)) {
15631556
state->cursor = start;
15641557
return false;
15651558
}
15661559

1567-
if (UNDEF_P(number)) {
1568-
raise_parse_error_at("invalid number: %s", state, start);
1560+
if (RB_UNLIKELY(UNDEF_P(number))) {
1561+
if (config->allow_nan && peek(state) == 'I') {
1562+
if (json_match_keyword(state, resumable, "Infinity", 0)) {
1563+
number = CMinusInfinity;
1564+
} else {
1565+
state->cursor = start;
1566+
return false;
1567+
}
1568+
} else {
1569+
raise_parse_error_at("invalid number: %s", state, start);
1570+
}
15691571
}
15701572

15711573
json_push_value(state, config, number);

test/json/json_ext_parser_test.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def test_error_messages
2626

2727
ex = assert_raise(ParserError) { parse('-Infinity something') }
2828
unless RUBY_PLATFORM =~ /java/
29-
assert_equal "unexpected token '-Infinity' at line 1 column 1", ex.message
29+
assert_equal "invalid number: '-Infinity' at line 1 column 1", ex.message
3030
end
3131

3232
ex = assert_raise(ParserError) { parse('NaN something') }

0 commit comments

Comments
 (0)