Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -653,10 +653,6 @@ tokens:
comment: "a separator between words in a list"
- name: __END__
comment: "marker for the point in the file at which the parser should stop"
- name: MISSING
comment: "a token that was expected but not found"
- name: NOT_PROVIDED
comment: "a token that was not present but it is okay"
flags:
- name: ArgumentsNodeFlags
values:
Expand Down
2 changes: 1 addition & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ The available values for `type` are:
* `constant` - A field that is an integer that represents an index in the constant pool. This is a `pm_constant_id_t` in C.
* `constant[]` - A field that is an array of constants. This is a `pm_constant_id_list_t` in C.
* `location` - A field that is a location. This is a `pm_location_t` in C.
* `location?` - A field that is a location that is optionally present. This is a `pm_location_t` in C, but if the value is not present then the `start` and `end` fields will be `NULL`.
* `location?` - A field that is a location that is optionally present. This is a `pm_location_t` in C, but if the value is not present then the `length` field will be `0`.
* `uint8` - A field that is an 8-bit unsigned integer. This is a `uint8_t` in C.
* `uint32` - A field that is a 32-bit unsigned integer. This is a `uint32_t` in C.

Expand Down
30 changes: 15 additions & 15 deletions ext/prism/extension.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,23 +455,23 @@ rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool free
* Create a new Location instance from the given parser and bounds.
*/
static inline VALUE
parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) {
VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) };
parser_location(VALUE source, bool freeze, uint32_t start, uint32_t length) {
VALUE argv[] = { source, LONG2FIX(start), LONG2FIX(length) };
return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze);
}

/**
* Create a new Location instance from the given parser and location.
*/
#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \
parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start))
#define PARSER_LOCATION(source, freeze, location) \
parser_location(source, freeze, location.start, location.length)

/**
* Build a new Comment instance from the given parser and comment.
*/
static inline VALUE
parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) {
VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) };
parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) {
VALUE argv[] = { PARSER_LOCATION(source, freeze, comment->location) };
VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
return rb_class_new_instance_freeze(1, argv, type, freeze);
}
Expand All @@ -488,7 +488,7 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
comment != NULL;
comment = (const pm_comment_t *) comment->node.next
) {
VALUE value = parser_comment(parser, source, freeze, comment);
VALUE value = parser_comment(source, freeze, comment);
rb_ary_push(comments, value);
}

Expand All @@ -500,9 +500,9 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
* Build a new MagicComment instance from the given parser and magic comment.
*/
static inline VALUE
parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length);
VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length);
parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
VALUE key_loc = parser_location(source, freeze, magic_comment->key.start, magic_comment->key.length);
VALUE value_loc = parser_location(source, freeze, magic_comment->value.start, magic_comment->value.length);
VALUE argv[] = { key_loc, value_loc };
return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze);
}
Expand All @@ -519,7 +519,7 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
magic_comment != NULL;
magic_comment = (const pm_magic_comment_t *) magic_comment->node.next
) {
VALUE value = parser_magic_comment(parser, source, freeze, magic_comment);
VALUE value = parser_magic_comment(source, freeze, magic_comment);
rb_ary_push(magic_comments, value);
}

Expand All @@ -533,10 +533,10 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
*/
static VALUE
parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
if (parser->data_loc.end == NULL) {
if (parser->data_loc.length == 0) {
return Qnil;
} else {
return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc);
return parser_location(source, freeze, parser->data_loc.start, parser->data_loc.length);
}
}

Expand All @@ -554,7 +554,7 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo
) {
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding));
VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location);
VALUE location = PARSER_LOCATION(source, freeze, error->location);

VALUE level = Qnil;
switch (error->level) {
Expand Down Expand Up @@ -594,7 +594,7 @@ parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source,
) {
VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding));
VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location);
VALUE location = PARSER_LOCATION(source, freeze, warning->location);

VALUE level = Qnil;
switch (warning->level) {
Expand Down
3 changes: 1 addition & 2 deletions include/prism.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,10 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void
/**
* Serialize the given list of comments to the given buffer.
*
* @param parser The parser to serialize.
* @param list The list of comments to serialize.
* @param buffer The buffer to serialize to.
*/
void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer);

/**
* Serialize the name of the encoding to the buffer.
Expand Down
33 changes: 33 additions & 0 deletions include/prism/defines.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,4 +257,37 @@
#define PRISM_FALLTHROUGH
#endif

/**
* We need to align nodes in the AST to a pointer boundary so that it can be
* safely cast to different node types. Use PRISM_ALIGNAS/PRISM_ALIGNOF to
* specify alignment in a compiler-agnostic way.
*/
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */
#include <stdalign.h>

/** Specify alignment for a type or variable. */
#define PRISM_ALIGNAS(size) alignas(size)

/** Get the alignment requirement of a type. */
#define PRISM_ALIGNOF(type) alignof(type)
#elif defined(__GNUC__) || defined(__clang__)
/** Specify alignment for a type or variable. */
#define PRISM_ALIGNAS(size) __attribute__((aligned(size)))

/** Get the alignment requirement of a type. */
#define PRISM_ALIGNOF(type) __alignof__(type)
#elif defined(_MSC_VER)
/** Specify alignment for a type or variable. */
#define PRISM_ALIGNAS(size) __declspec(align(size))

/** Get the alignment requirement of a type. */
#define PRISM_ALIGNOF(type) __alignof(type)
#else
/** Void because this platform does not support specifying alignment. */
#define PRISM_ALIGNAS(size)

/** Fallback to sizeof as alignment requirement of a type. */
#define PRISM_ALIGNOF(type) sizeof(type)
#endif

#endif
14 changes: 4 additions & 10 deletions include/prism/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -479,17 +479,11 @@ typedef struct {
/** The embedded base node. */
pm_list_node_t node;

/** A pointer to the start of the key in the source. */
const uint8_t *key_start;
/** The key of the magic comment. */
pm_location_t key;

/** A pointer to the start of the value in the source. */
const uint8_t *value_start;

/** The length of the key in the source. */
uint32_t key_length;

/** The length of the value in the source. */
uint32_t value_length;
/** The value of the magic comment. */
pm_location_t value;
} pm_magic_comment_t;

/**
Expand Down
6 changes: 4 additions & 2 deletions include/prism/static_literals.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,14 @@ typedef struct {
* Add a node to the set of static literals.
*
* @param newline_list The list of newline offsets to use to calculate lines.
* @param start The start of the source being parsed.
* @param start_line The line number that the parser starts on.
* @param literals The set of static literals to add the node to.
* @param node The node to add to the set.
* @param replace Whether to replace the previous node if one already exists.
* @return A pointer to the node that is being overwritten, if there is one.
*/
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);

/**
* Free the internal memory associated with the given static literals set.
Expand All @@ -112,10 +113,11 @@ void pm_static_literals_free(pm_static_literals_t *literals);
*
* @param buffer The buffer to write the string to.
* @param newline_list The list of newline offsets to use to calculate lines.
* @param start The start of the source being parsed.
* @param start_line The line number that the parser starts on.
* @param encoding_name The name of the encoding of the source being parsed.
* @param node The node to create a string representation of.
*/
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node);

#endif
4 changes: 3 additions & 1 deletion include/prism/util/pm_char.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
* @param string The string to search.
* @param length The maximum number of characters to search.
* @param newline_list The list of newlines to populate.
* @param start_offset The offset at which the string occurs in the source, for
* the purpose of tracking newlines.
* @return The number of characters at the start of the string that are
* whitespace.
*/
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list, uint32_t start_offset);

/**
* Returns the number of characters at the start of the string that are inline
Expand Down
23 changes: 9 additions & 14 deletions include/prism/util/pm_newline_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,14 @@
* sorted/inserted in ascending order.
*/
typedef struct {
/** A pointer to the start of the source string. */
const uint8_t *start;

/** The number of offsets in the list. */
size_t size;

/** The capacity of the list that has been allocated. */
size_t capacity;

/** The list of offsets. */
size_t *offsets;
uint32_t *offsets;
} pm_newline_list_t;

/**
Expand All @@ -55,53 +52,51 @@ typedef struct {
* allocation of the offsets succeeds, otherwise returns false.
*
* @param list The list to initialize.
* @param start A pointer to the start of the source string.
* @param capacity The initial capacity of the list.
* @return True if the allocation of the offsets succeeds, otherwise false.
*/
bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
bool pm_newline_list_init(pm_newline_list_t *list, size_t capacity);

/**
* Clear out the newlines that have been appended to the list.
*
* @param list The list to clear.
*/
void
pm_newline_list_clear(pm_newline_list_t *list);
void pm_newline_list_clear(pm_newline_list_t *list);

/**
* Append a new offset to the newline list. Returns true if the reallocation of
* the offsets succeeds (if one was necessary), otherwise returns false.
*
* @param list The list to append to.
* @param cursor A pointer to the offset to append.
* @param cursor The offset to append.
* @return True if the reallocation of the offsets succeeds (if one was
* necessary), otherwise false.
*/
bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
bool pm_newline_list_append(pm_newline_list_t *list, uint32_t cursor);

/**
* Returns the line of the given offset. If the offset is not in the list, the
* line of the closest offset less than the given offset is returned.
*
* @param list The list to search.
* @param cursor A pointer to the offset to search for.
* @param cursor The offset to search for.
* @param start_line The line to start counting from.
* @return The line of the given offset.
*/
int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
int32_t pm_newline_list_line(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line);

/**
* Returns the line and column of the given offset. If the offset is not in the
* list, the line and column of the closest offset less than the given offset
* are returned.
*
* @param list The list to search.
* @param cursor A pointer to the offset to search for.
* @param cursor The offset to search for.
* @param start_line The line to start counting from.
* @return The line and column of the given offset.
*/
pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line);

/**
* Free the internal memory allocated for the newline list.
Expand Down
2 changes: 1 addition & 1 deletion lib/prism/translation/parser/compiler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1767,7 +1767,7 @@ def visit_symbol_node(node)
end
else
parts =
if node.value == ""
if node.value_loc.nil?
[]
elsif node.value.include?("\n")
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
Expand Down
1 change: 0 additions & 1 deletion lib/prism/translation/parser/lexer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class Lexer
TYPES = {
# These tokens should never appear in the output of the lexer.
MISSING: nil,
NOT_PROVIDED: nil,
EMBDOC_END: nil,
EMBDOC_LINE: nil,

Expand Down
13 changes: 6 additions & 7 deletions lib/prism/translation/ripper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3106,14 +3106,13 @@ def visit_super_node(node)
# :foo
# ^^^^
def visit_symbol_node(node)
if (opening = node.opening)&.match?(/^%s|['"]:?$/)
if node.value_loc.nil?
bounds(node.location)
on_dyna_symbol(on_string_content)
elsif (opening = node.opening)&.match?(/^%s|['"]:?$/)
bounds(node.value_loc)
content = on_string_content

if !(value = node.value).empty?
content = on_string_add(content, on_tstring_content(value))
end

content = on_string_add(on_string_content, on_tstring_content(node.value))
bounds(node.location)
on_dyna_symbol(content)
elsif (closing = node.closing) == ":"
bounds(node.location)
Expand Down
8 changes: 4 additions & 4 deletions rust/ruby-prism-sys/tests/parser_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ fn comments_test() {
assert_eq!((*comment).type_, pm_comment_type_t::PM_COMMENT_INLINE);

let location = {
let start = (*comment).location.start.offset_from(parser.start);
let end = (*comment).location.end.offset_from(parser.start);
let start = (*comment).location.start;
let end = (*comment).location.start + (*comment).location.length;
start..end
};
assert_eq!(location, 0..7);
Expand Down Expand Up @@ -89,8 +89,8 @@ fn diagnostics_test() {
);

let location = {
let start = (*error).location.start.offset_from(parser.start);
let end = (*error).location.end.offset_from(parser.start);
let start = (*error).location.start;
let end = (*error).location.start + (*error).location.length;
start..end
};
assert_eq!(location, 10..10);
Expand Down
8 changes: 4 additions & 4 deletions rust/ruby-prism/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,8 @@ fn write_node(file: &mut File, flags: &[Flags], node: &Node) -> Result<(), Box<d
NodeFieldType::OptionalLocation => {
writeln!(file, " pub fn {}(&self) -> Option<Location<'pr>> {{", field.name)?;
writeln!(file, " let pointer: *mut pm_location_t = unsafe {{ &raw mut (*self.pointer).{} }};", field.name)?;
writeln!(file, " let start = unsafe {{ (*pointer).start }};")?;
writeln!(file, " if start.is_null() {{")?;
writeln!(file, " let length = unsafe {{ (*pointer).length }};")?;
writeln!(file, " if length == 0 {{")?;
writeln!(file, " None")?;
writeln!(file, " }} else {{")?;
writeln!(file, " Some(Location::new(self.parser, unsafe {{ &(*pointer) }}))")?;
Expand Down Expand Up @@ -605,9 +605,9 @@ impl<'pr> Node<'pr> {{
/// Panics if the node type cannot be read.
///
#[allow(clippy::not_unsafe_ptr_arg_deref)]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) fn new(parser: NonNull<pm_parser_t>, node: *mut pm_node_t) -> Self {{
match unsafe {{ (*node).type_ }} {{
"
match unsafe {{ (*node).type_ }} {{"
)?;

for node in &config.nodes {
Expand Down
Loading
Loading