ruby · kddnewton · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025 · Dec 1, 2025
diff --git a/config.yml b/config.yml
@@ -653,10 +653,6 @@ tokens:
     comment: "a separator between words in a list"
   - name: __END__
     comment: "marker for the point in the file at which the parser should stop"
-  - name: MISSING
-    comment: "a token that was expected but not found"
-  - name: NOT_PROVIDED
-    comment: "a token that was not present but it is okay"
 flags:
   - name: ArgumentsNodeFlags
     values:

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -61,7 +61,7 @@ The available values for `type` are:
 * `constant` - A field that is an integer that represents an index in the constant pool. This is a `pm_constant_id_t` in C.
 * `constant[]` - A field that is an array of constants. This is a `pm_constant_id_list_t` in C.
 * `location` - A field that is a location. This is a `pm_location_t` in C.
-* `location?` - A field that is a location that is optionally present. This is a `pm_location_t` in C, but if the value is not present then the `start` and `end` fields will be `NULL`.
+* `location?` - A field that is a location that is optionally present. This is a `pm_location_t` in C, but if the value is not present then the `length` field will be `0`.
 * `uint8` - A field that is an 8-bit unsigned integer. This is a `uint8_t` in C.
 * `uint32` - A field that is a 32-bit unsigned integer. This is a `uint32_t` in C.
 

diff --git a/ext/prism/extension.c b/ext/prism/extension.c
@@ -455,23 +455,23 @@ rb_class_new_instance_freeze(int argc, const VALUE *argv, VALUE klass, bool free
  * Create a new Location instance from the given parser and bounds.
  */
 static inline VALUE
-parser_location(const pm_parser_t *parser, VALUE source, bool freeze, const uint8_t *start, size_t length) {
-    VALUE argv[] = { source, LONG2FIX(start - parser->start), LONG2FIX(length) };
+parser_location(VALUE source, bool freeze, uint32_t start, uint32_t length) {
+    VALUE argv[] = { source, LONG2FIX(start), LONG2FIX(length) };
     return rb_class_new_instance_freeze(3, argv, rb_cPrismLocation, freeze);
 }
 
 /**
  * Create a new Location instance from the given parser and location.
  */
-#define PARSER_LOCATION_LOC(parser, source, freeze, loc) \
-    parser_location(parser, source, freeze, loc.start, (size_t) (loc.end - loc.start))
+#define PARSER_LOCATION(source, freeze, location) \
+    parser_location(source, freeze, location.start, location.length)
 
 /**
  * Build a new Comment instance from the given parser and comment.
  */
 static inline VALUE
-parser_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_comment_t *comment) {
-    VALUE argv[] = { PARSER_LOCATION_LOC(parser, source, freeze, comment->location) };
+parser_comment(VALUE source, bool freeze, const pm_comment_t *comment) {
+    VALUE argv[] = { PARSER_LOCATION(source, freeze, comment->location) };
     VALUE type = (comment->type == PM_COMMENT_EMBDOC) ? rb_cPrismEmbDocComment : rb_cPrismInlineComment;
     return rb_class_new_instance_freeze(1, argv, type, freeze);
 }
@@ -488,7 +488,7 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
         comment != NULL;
         comment = (const pm_comment_t *) comment->node.next
     ) {
-        VALUE value = parser_comment(parser, source, freeze, comment);
+        VALUE value = parser_comment(source, freeze, comment);
         rb_ary_push(comments, value);
     }
 
@@ -500,9 +500,9 @@ parser_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
  * Build a new MagicComment instance from the given parser and magic comment.
  */
 static inline VALUE
-parser_magic_comment(const pm_parser_t *parser, VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
-    VALUE key_loc = parser_location(parser, source, freeze, magic_comment->key_start, magic_comment->key_length);
-    VALUE value_loc = parser_location(parser, source, freeze, magic_comment->value_start, magic_comment->value_length);
+parser_magic_comment(VALUE source, bool freeze, const pm_magic_comment_t *magic_comment) {
+    VALUE key_loc = parser_location(source, freeze, magic_comment->key.start, magic_comment->key.length);
+    VALUE value_loc = parser_location(source, freeze, magic_comment->value.start, magic_comment->value.length);
     VALUE argv[] = { key_loc, value_loc };
     return rb_class_new_instance_freeze(2, argv, rb_cPrismMagicComment, freeze);
 }
@@ -519,7 +519,7 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
         magic_comment != NULL;
         magic_comment = (const pm_magic_comment_t *) magic_comment->node.next
     ) {
-        VALUE value = parser_magic_comment(parser, source, freeze, magic_comment);
+        VALUE value = parser_magic_comment(source, freeze, magic_comment);
         rb_ary_push(magic_comments, value);
     }
 
@@ -533,10 +533,10 @@ parser_magic_comments(const pm_parser_t *parser, VALUE source, bool freeze) {
  */
 static VALUE
 parser_data_loc(const pm_parser_t *parser, VALUE source, bool freeze) {
-    if (parser->data_loc.end == NULL) {
+    if (parser->data_loc.length == 0) {
         return Qnil;
     } else {
-        return PARSER_LOCATION_LOC(parser, source, freeze, parser->data_loc);
+        return parser_location(source, freeze, parser->data_loc.start, parser->data_loc.length);
     }
 }
 
@@ -554,7 +554,7 @@ parser_errors(const pm_parser_t *parser, rb_encoding *encoding, VALUE source, bo
     ) {
         VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(error->diag_id)));
         VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(error->message, encoding));
-        VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, error->location);
+        VALUE location = PARSER_LOCATION(source, freeze, error->location);
 
         VALUE level = Qnil;
         switch (error->level) {
@@ -594,7 +594,7 @@ parser_warnings(const pm_parser_t *parser, rb_encoding *encoding, VALUE source,
     ) {
         VALUE type = ID2SYM(rb_intern(pm_diagnostic_id_human(warning->diag_id)));
         VALUE message = rb_obj_freeze(rb_enc_str_new_cstr(warning->message, encoding));
-        VALUE location = PARSER_LOCATION_LOC(parser, source, freeze, warning->location);
+        VALUE location = PARSER_LOCATION(source, freeze, warning->location);
 
         VALUE level = Qnil;
         switch (warning->level) {

diff --git a/include/prism.h b/include/prism.h
@@ -143,11 +143,10 @@ PRISM_EXPORTED_FUNCTION void pm_serialize_parse_stream(pm_buffer_t *buffer, void
 /**
  * Serialize the given list of comments to the given buffer.
  *
- * @param parser The parser to serialize.
  * @param list The list of comments to serialize.
  * @param buffer The buffer to serialize to.
  */
-void pm_serialize_comment_list(pm_parser_t *parser, pm_list_t *list, pm_buffer_t *buffer);
+void pm_serialize_comment_list(pm_list_t *list, pm_buffer_t *buffer);
 
 /**
  * Serialize the name of the encoding to the buffer.

diff --git a/include/prism/defines.h b/include/prism/defines.h
@@ -257,4 +257,37 @@
     #define PRISM_FALLTHROUGH
 #endif
 
+/**
+ * We need to align nodes in the AST to a pointer boundary so that it can be
+ * safely cast to different node types. Use PRISM_ALIGNAS/PRISM_ALIGNOF to
+ * specify alignment in a compiler-agnostic way.
+ */
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L /* C11 or later */
+    #include <stdalign.h>
+
+    /** Specify alignment for a type or variable. */
+    #define PRISM_ALIGNAS(size) alignas(size)
+
+    /** Get the alignment requirement of a type. */
+    #define PRISM_ALIGNOF(type) alignof(type)
+#elif defined(__GNUC__) || defined(__clang__)
+    /** Specify alignment for a type or variable. */
+    #define PRISM_ALIGNAS(size) __attribute__((aligned(size)))
+
+    /** Get the alignment requirement of a type. */
+    #define PRISM_ALIGNOF(type) __alignof__(type)
+#elif defined(_MSC_VER)
+    /** Specify alignment for a type or variable. */
+    #define PRISM_ALIGNAS(size) __declspec(align(size))
+
+    /** Get the alignment requirement of a type. */
+    #define PRISM_ALIGNOF(type) __alignof(type)
+#else
+    /** Void because this platform does not support specifying alignment. */
+    #define PRISM_ALIGNAS(size)
+
+    /** Fallback to sizeof as alignment requirement of a type. */
+    #define PRISM_ALIGNOF(type) sizeof(type)
+#endif
+
 #endif
diff --git a/include/prism/parser.h b/include/prism/parser.h
@@ -479,17 +479,11 @@ typedef struct {
     /** The embedded base node. */
     pm_list_node_t node;
 
-    /** A pointer to the start of the key in the source. */
-    const uint8_t *key_start;
+    /** The key of the magic comment. */
+    pm_location_t key;
 
-    /** A pointer to the start of the value in the source. */
-    const uint8_t *value_start;
-
-    /** The length of the key in the source. */
-    uint32_t key_length;
-
-    /** The length of the value in the source. */
-    uint32_t value_length;
+    /** The value of the magic comment. */
+    pm_location_t value;
 } pm_magic_comment_t;
 
 /**

diff --git a/include/prism/static_literals.h b/include/prism/static_literals.h
@@ -92,13 +92,14 @@ typedef struct {
  * Add a node to the set of static literals.
  *
  * @param newline_list The list of newline offsets to use to calculate lines.
+ * @param start The start of the source being parsed.
  * @param start_line The line number that the parser starts on.
  * @param literals The set of static literals to add the node to.
  * @param node The node to add to the set.
  * @param replace Whether to replace the previous node if one already exists.
  * @return A pointer to the node that is being overwritten, if there is one.
  */
-pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
+pm_node_t * pm_static_literals_add(const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, pm_static_literals_t *literals, pm_node_t *node, bool replace);
 
 /**
  * Free the internal memory associated with the given static literals set.
@@ -112,10 +113,11 @@ void pm_static_literals_free(pm_static_literals_t *literals);
  *
  * @param buffer The buffer to write the string to.
  * @param newline_list The list of newline offsets to use to calculate lines.
+ * @param start The start of the source being parsed.
  * @param start_line The line number that the parser starts on.
  * @param encoding_name The name of the encoding of the source being parsed.
  * @param node The node to create a string representation of.
  */
-void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, int32_t start_line, const char *encoding_name, const pm_node_t *node);
+void pm_static_literal_inspect(pm_buffer_t *buffer, const pm_newline_list_t *newline_list, const uint8_t *start, int32_t start_line, const char *encoding_name, const pm_node_t *node);
 
 #endif
diff --git a/include/prism/util/pm_char.h b/include/prism/util/pm_char.h
@@ -31,10 +31,12 @@ size_t pm_strspn_whitespace(const uint8_t *string, ptrdiff_t length);
  * @param string The string to search.
  * @param length The maximum number of characters to search.
  * @param newline_list The list of newlines to populate.
+ * @param start_offset The offset at which the string occurs in the source, for
+ *   the purpose of tracking newlines.
  * @return The number of characters at the start of the string that are
  *     whitespace.
  */
-size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list);
+size_t pm_strspn_whitespace_newlines(const uint8_t *string, ptrdiff_t length, pm_newline_list_t *newline_list, uint32_t start_offset);
 
 /**
  * Returns the number of characters at the start of the string that are inline

diff --git a/include/prism/util/pm_newline_list.h b/include/prism/util/pm_newline_list.h
@@ -26,17 +26,14 @@
  * sorted/inserted in ascending order.
  */
 typedef struct {
-    /** A pointer to the start of the source string. */
-    const uint8_t *start;
-
     /** The number of offsets in the list. */
     size_t size;
 
     /** The capacity of the list that has been allocated. */
     size_t capacity;
 
     /** The list of offsets. */
-    size_t *offsets;
+    uint32_t *offsets;
 } pm_newline_list_t;
 
 /**
@@ -55,53 +52,51 @@ typedef struct {
  * allocation of the offsets succeeds, otherwise returns false.
  *
  * @param list The list to initialize.
- * @param start A pointer to the start of the source string.
  * @param capacity The initial capacity of the list.
  * @return True if the allocation of the offsets succeeds, otherwise false.
  */
-bool pm_newline_list_init(pm_newline_list_t *list, const uint8_t *start, size_t capacity);
+bool pm_newline_list_init(pm_newline_list_t *list, size_t capacity);
 
 /**
  * Clear out the newlines that have been appended to the list.
  *
  * @param list The list to clear.
  */
-void
-pm_newline_list_clear(pm_newline_list_t *list);
+void pm_newline_list_clear(pm_newline_list_t *list);
 
 /**
  * Append a new offset to the newline list. Returns true if the reallocation of
  * the offsets succeeds (if one was necessary), otherwise returns false.
  *
  * @param list The list to append to.
- * @param cursor A pointer to the offset to append.
+ * @param cursor The offset to append.
  * @return True if the reallocation of the offsets succeeds (if one was
  *     necessary), otherwise false.
  */
-bool pm_newline_list_append(pm_newline_list_t *list, const uint8_t *cursor);
+bool pm_newline_list_append(pm_newline_list_t *list, uint32_t cursor);
 
 /**
  * Returns the line of the given offset. If the offset is not in the list, the
  * line of the closest offset less than the given offset is returned.
  *
  * @param list The list to search.
- * @param cursor A pointer to the offset to search for.
+ * @param cursor The offset to search for.
  * @param start_line The line to start counting from.
  * @return The line of the given offset.
  */
-int32_t pm_newline_list_line(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
+int32_t pm_newline_list_line(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line);
 
 /**
  * Returns the line and column of the given offset. If the offset is not in the
  * list, the line and column of the closest offset less than the given offset
  * are returned.
  *
  * @param list The list to search.
- * @param cursor A pointer to the offset to search for.
+ * @param cursor The offset to search for.
  * @param start_line The line to start counting from.
  * @return The line and column of the given offset.
  */
-pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, const uint8_t *cursor, int32_t start_line);
+pm_line_column_t pm_newline_list_line_column(const pm_newline_list_t *list, uint32_t cursor, int32_t start_line);
 
 /**
  * Free the internal memory allocated for the newline list.

diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
@@ -1767,7 +1767,7 @@ def visit_symbol_node(node)
             end
           else
             parts =
-              if node.value == ""
+              if node.value_loc.nil?
                 []
               elsif node.value.include?("\n")
                 string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)

diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
@@ -19,7 +19,6 @@ class Lexer
         TYPES = {
           # These tokens should never appear in the output of the lexer.
           MISSING: nil,
-          NOT_PROVIDED: nil,
           EMBDOC_END: nil,
           EMBDOC_LINE: nil,
 

diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
@@ -3106,14 +3106,13 @@ def visit_super_node(node)
       # :foo
       # ^^^^
       def visit_symbol_node(node)
-        if (opening = node.opening)&.match?(/^%s|['"]:?$/)
+        if node.value_loc.nil?
+          bounds(node.location)
+          on_dyna_symbol(on_string_content)
+        elsif (opening = node.opening)&.match?(/^%s|['"]:?$/)
           bounds(node.value_loc)
-          content = on_string_content
-
-          if !(value = node.value).empty?
-            content = on_string_add(content, on_tstring_content(value))
-          end
-
+          content = on_string_add(on_string_content, on_tstring_content(node.value))
+          bounds(node.location)
           on_dyna_symbol(content)
         elsif (closing = node.closing) == ":"
           bounds(node.location)

diff --git a/rust/ruby-prism-sys/tests/parser_tests.rs b/rust/ruby-prism-sys/tests/parser_tests.rs
@@ -52,8 +52,8 @@ fn comments_test() {
         assert_eq!((*comment).type_, pm_comment_type_t::PM_COMMENT_INLINE);
 
         let location = {
-            let start = (*comment).location.start.offset_from(parser.start);
-            let end = (*comment).location.end.offset_from(parser.start);
+            let start = (*comment).location.start;
+            let end = (*comment).location.start + (*comment).location.length;
             start..end
         };
         assert_eq!(location, 0..7);
@@ -89,8 +89,8 @@ fn diagnostics_test() {
         );
 
         let location = {
-            let start = (*error).location.start.offset_from(parser.start);
-            let end = (*error).location.end.offset_from(parser.start);
+            let start = (*error).location.start;
+            let end = (*error).location.start + (*error).location.length;
             start..end
         };
         assert_eq!(location, 10..10);

diff --git a/rust/ruby-prism/build.rs b/rust/ruby-prism/build.rs
@@ -372,8 +372,8 @@ fn write_node(file: &mut File, flags: &[Flags], node: &Node) -> Result<(), Box<d
             NodeFieldType::OptionalLocation => {
                 writeln!(file, "    pub fn {}(&self) -> Option<Location<'pr>> {{", field.name)?;
                 writeln!(file, "        let pointer: *mut pm_location_t = unsafe {{ &raw mut (*self.pointer).{} }};", field.name)?;
-                writeln!(file, "        let start = unsafe {{ (*pointer).start }};")?;
-                writeln!(file, "        if start.is_null() {{")?;
+                writeln!(file, "        let length = unsafe {{ (*pointer).length }};")?;
+                writeln!(file, "        if length == 0 {{")?;
                 writeln!(file, "            None")?;
                 writeln!(file, "        }} else {{")?;
                 writeln!(file, "            Some(Location::new(self.parser, unsafe {{ &(*pointer) }}))")?;
@@ -605,9 +605,9 @@ impl<'pr> Node<'pr> {{
     /// Panics if the node type cannot be read.
     ///
     #[allow(clippy::not_unsafe_ptr_arg_deref)]
+    #[allow(clippy::cast_ptr_alignment)]
     pub(crate) fn new(parser: NonNull<pm_parser_t>, node: *mut pm_node_t) -> Self {{
-        match unsafe {{ (*node).type_ }} {{
-"
+        match unsafe {{ (*node).type_ }} {{"
     )?;
 
     for node in &config.nodes {