Skip to content

Commit 02d92c7

Browse files
committed
Introduce using slices instead of locations
In the C API, we want to use slices instead of locations in the AST. In this case a "slice" is effectively the same thing as the location, expect it is represented using a 32-bit offset and a 32-bit length. This will cut down on half of the space of all of the locations in the AST. I am introducing this as a new field type to ease the migration path, so that this can be merged on its own and then fields can be moved one at a time. Note that from the Ruby/Java/JavaScript side, this is effectively an invisible change. This only impacts the C/Rust side.
1 parent eb4a823 commit 02d92c7

File tree

25 files changed

+183
-23
lines changed

25 files changed

+183
-23
lines changed

config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ nodes:
819819
alias $foo $bar
820820
^^^^
821821
- name: keyword_loc
822-
type: location
822+
type: slice
823823
comment: |
824824
The location of the `alias` keyword.
825825

rbi/prism/reflection.rbi

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ end
3535
class Prism::Reflection::LocationField < Prism::Reflection::Field
3636
end
3737

38+
class Prism::Reflection::SliceField < Prism::Reflection::Field
39+
end
40+
3841
class Prism::Reflection::OptionalLocationField < Prism::Reflection::Field
3942
end
4043

rust/ruby-prism/build.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ enum NodeFieldType {
3131
#[serde(rename = "location")]
3232
Location,
3333

34+
#[serde(rename = "slice")]
35+
Slice,
36+
3437
#[serde(rename = "location?")]
3538
OptionalLocation,
3639

@@ -369,6 +372,12 @@ fn write_node(file: &mut File, flags: &[Flags], node: &Node) -> Result<(), Box<d
369372
writeln!(file, " Location::new(self.parser, unsafe {{ &(*pointer) }})")?;
370373
writeln!(file, " }}")?;
371374
},
375+
NodeFieldType::Slice => {
376+
writeln!(file, " pub fn {}(&self) -> Slice<'pr> {{", field.name)?;
377+
writeln!(file, " let pointer: *mut pm_slice_t = unsafe {{ &raw mut (*self.pointer).{} }};", field.name)?;
378+
writeln!(file, " Slice::new(self.parser, unsafe {{ &(*pointer) }})")?;
379+
writeln!(file, " }}")?;
380+
},
372381
NodeFieldType::OptionalLocation => {
373382
writeln!(file, " pub fn {}(&self) -> Option<Location<'pr>> {{", field.name)?;
374383
writeln!(file, " let pointer: *mut pm_location_t = unsafe {{ &raw mut (*self.pointer).{} }};", field.name)?;
@@ -558,7 +567,7 @@ use std::ptr::NonNull;
558567
559568
#[allow(clippy::wildcard_imports)]
560569
use ruby_prism_sys::*;
561-
use crate::{{ConstantId, ConstantList, Integer, Location, NodeList}};
570+
use crate::{{ConstantId, ConstantList, Integer, Location, Slice, NodeList}};
562571
"
563572
)?;
564573

rust/ruby-prism/src/lib.rs

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use std::mem::MaybeUninit;
1919
use std::ptr::NonNull;
2020

2121
pub use self::bindings::*;
22-
use ruby_prism_sys::{pm_comment_t, pm_comment_type_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t};
22+
use ruby_prism_sys::{pm_comment_t, pm_comment_type_t, pm_constant_id_list_t, pm_constant_id_t, pm_diagnostic_t, pm_integer_t, pm_location_t, pm_magic_comment_t, pm_node_destroy, pm_node_list, pm_node_t, pm_parse, pm_parser_free, pm_parser_init, pm_parser_t, pm_slice_t};
2323

2424
/// A range in the source file.
2525
pub struct Location<'pr> {
@@ -109,6 +109,53 @@ impl std::fmt::Debug for Location<'_> {
109109
}
110110
}
111111

112+
/// A range in the source file, represented as a start offset and length.
113+
pub struct Slice<'pr> {
114+
parser: NonNull<pm_parser_t>,
115+
pub(crate) start: u32,
116+
pub(crate) length: u32,
117+
marker: PhantomData<&'pr [u8]>,
118+
}
119+
120+
impl<'pr> Slice<'pr> {
121+
/// Returns a byte slice for the range.
122+
#[must_use]
123+
pub fn as_slice(&self) -> &'pr [u8] {
124+
unsafe {
125+
let parser_start = (*self.parser.as_ptr()).start;
126+
std::slice::from_raw_parts(parser_start.add(self.start as usize), self.length as usize)
127+
}
128+
}
129+
130+
/// Return a Slice from the given `pm_slice_t`.
131+
#[must_use]
132+
pub(crate) const fn new(parser: NonNull<pm_parser_t>, slice: &'pr pm_slice_t) -> Self {
133+
Slice {
134+
parser,
135+
start: slice.start,
136+
length: slice.length,
137+
marker: PhantomData,
138+
}
139+
}
140+
}
141+
142+
impl std::fmt::Debug for Slice<'_> {
143+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144+
let slice: &[u8] = self.as_slice();
145+
146+
let mut visible = String::new();
147+
visible.push('"');
148+
149+
for &byte in slice {
150+
let part: Vec<u8> = std::ascii::escape_default(byte).collect();
151+
visible.push_str(std::str::from_utf8(&part).unwrap());
152+
}
153+
154+
visible.push('"');
155+
write!(f, "{visible}")
156+
}
157+
}
158+
112159
/// An iterator over the nodes in a list.
113160
pub struct NodeListIter<'pr> {
114161
parser: NonNull<pm_parser_t>,

sig/prism/reflection.rbs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ module Prism
3030
class LocationField < Field
3131
end
3232

33+
class SliceField < Field
34+
end
35+
3336
class OptionalLocationField < Field
3437
end
3538

src/prism.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1947,6 +1947,8 @@ pm_missing_node_create(pm_parser_t *parser, const uint8_t *start, const uint8_t
19471947
return node;
19481948
}
19491949

1950+
#define TOKEN2SLICE(parser_, token_) ((pm_slice_t) { .start = (uint32_t) ((token_)->start - (parser_)->start), .length = (uint32_t) ((token_)->end - (token_)->start) })
1951+
19501952
/**
19511953
* Allocate and initialize a new AliasGlobalVariableNode node.
19521954
*/
@@ -1966,7 +1968,7 @@ pm_alias_global_variable_node_create(pm_parser_t *parser, const pm_token_t *keyw
19661968
},
19671969
.new_name = new_name,
19681970
.old_name = old_name,
1969-
.keyword_loc = PM_LOCATION_TOKEN_VALUE(keyword)
1971+
.keyword_loc = TOKEN2SLICE(parser, keyword)
19701972
};
19711973

19721974
return node;

templates/ext/prism/api_node.c.erb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,22 @@ pm_location_new(const pm_parser_t *parser, const uint8_t *start, const uint8_t *
2727
}
2828
}
2929

30+
static VALUE
31+
pm_slice_new(const uint32_t start, const uint32_t length, VALUE source, bool freeze) {
32+
if (freeze) {
33+
VALUE slice_argv[] = {
34+
source,
35+
LONG2FIX(start),
36+
LONG2FIX(length)
37+
};
38+
39+
return rb_obj_freeze(rb_class_new_instance(3, slice_argv, rb_cPrismLocation));
40+
} else {
41+
uint64_t value = ((((uint64_t) start) << 32) | ((uint64_t) length));
42+
return ULL2NUM(value);
43+
}
44+
}
45+
3046
VALUE
3147
pm_token_new(const pm_parser_t *parser, const pm_token_t *token, rb_encoding *encoding, VALUE source, bool freeze) {
3248
ID type = rb_intern(pm_token_type_name(token->type));
@@ -241,6 +257,9 @@ pm_ast_new(const pm_parser_t *parser, const pm_node_t *node, rb_encoding *encodi
241257
<%- when Prism::Template::OptionalLocationField -%>
242258
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
243259
argv[<%= index %>] = cast-><%= field.name %>.start == NULL ? Qnil : pm_location_new(parser, cast-><%= field.name %>.start, cast-><%= field.name %>.end, source, freeze);
260+
<%- when Prism::Template::SliceField -%>
261+
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
262+
argv[<%= index %>] = pm_slice_new(cast-><%= field.name %>.start, cast-><%= field.name %>.length, source, freeze);
244263
<%- when Prism::Template::UInt8Field -%>
245264
#line <%= __LINE__ + 1 %> "prism/templates/ext/prism/<%= File.basename(__FILE__) %>"
246265
argv[<%= index %>] = UINT2NUM(cast-><%= field.name %>);

templates/include/prism/ast.h.erb

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,22 @@ typedef struct {
5757
const uint8_t *end;
5858
} pm_location_t;
5959

60+
/**
61+
* This struct represents a slice in the source code, defined by an offset and
62+
* a length. Note that we have confirmation that we can represent all slices
63+
* within Ruby source files using 32-bit integers per:
64+
*
65+
* https://bugs.ruby-lang.org/issues/20488#note-1
66+
*
67+
*/
68+
typedef struct {
69+
/** The offset of the slice from the start of the source. */
70+
uint32_t start;
71+
72+
/** The length of the slice. */
73+
uint32_t length;
74+
} pm_slice_t;
75+
6076
struct pm_node;
6177

6278
/**
@@ -191,6 +207,7 @@ typedef struct pm_<%= node.human %> {
191207
when Prism::Template::ConstantListField then "pm_constant_id_list_t #{field.name}"
192208
when Prism::Template::StringField then "pm_string_t #{field.name}"
193209
when Prism::Template::LocationField, Prism::Template::OptionalLocationField then "pm_location_t #{field.name}"
210+
when Prism::Template::SliceField then "pm_slice_t #{field.name}"
194211
when Prism::Template::UInt8Field then "uint8_t #{field.name}"
195212
when Prism::Template::UInt32Field then "uint32_t #{field.name}"
196213
when Prism::Template::IntegerField then "pm_integer_t #{field.name}"

templates/java/org/prism/Loader.java.erb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ public class Loader {
386386
when Prism::Template::ConstantField then "loadConstant()"
387387
when Prism::Template::OptionalConstantField then "loadOptionalConstant()"
388388
when Prism::Template::ConstantListField then "loadConstants()"
389-
when Prism::Template::LocationField then "loadLocation()"
389+
when Prism::Template::LocationField, Prism::Template::SliceField then "loadLocation()"
390390
when Prism::Template::OptionalLocationField then "loadOptionalLocation()"
391391
when Prism::Template::UInt8Field then "buffer.get()"
392392
when Prism::Template::UInt32Field then "loadVarUInt()"

templates/java/org/prism/Nodes.java.erb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ public abstract class Nodes {
342342
<%- if node.fields.any?(Prism::Template::NodeListField) or node.fields.any?(Prism::Template::ConstantListField) -%>
343343
String nextNextIndent = nextIndent + " ";
344344
<%- end -%>
345-
<%- [*node.flags, *node.fields.grep_v(Prism::Template::LocationField).grep_v(Prism::Template::OptionalLocationField)].each do |field| -%>
345+
<%- [*node.flags, *node.fields.grep_v(Prism::Template::LocationField).grep_v(Prism::Template::SliceField).grep_v(Prism::Template::OptionalLocationField)].each do |field| -%>
346346
builder.append(nextIndent);
347347
builder.append("<%= field.name %>: ");
348348
<%- case field -%>

0 commit comments

Comments
 (0)