Skip to content

Commit 4d63af7

Browse files
committed
Split parser implementation into several files
gcc/rust/ChangeLog: * parse/rust-parse-impl.h (Parser::parse_inner_attributes): Move to rust-parse-impl-XXX.hxx implementation file. (Parser::parse_doc_comment): Likewise. Change return type to AttributeBody. (Parser::parse_inner_attribute): Likewise. (Parser::parse_attribute_body): Likewise. (Parser::parse_identifier_or_keyword_token): Likewise. (Parser::parse_outer_attributes): Likewise. (Parser::is_macro_rules_def): Likewise. (Parser::parse_simple_path): Likewise. (Parser::parse_item): Likewise. (Parser::parse_simple_path_segment): Likewise. (Parser::parse_path_ident_segment): Likewise. (Parser::parse_vis_item): Likewise. (Parser::parse_attr_input): Likewise. (Parser::parse_async_item): Likewise. (Parser::parse_delim_token_tree): Likewise. (Parser::parse_macro_rules_def): Likewise. (Parser::parse_decl_macro_def): Likewise. (Parser::parse_token_tree): Likewise. (Parser::parse_visibility): Likewise. (Parser::parse_module): Likewise. (Parser::parse_outer_attribute): Likewise. (Parser::parse_extern_crate): Likewise. (Parser::parse_use_decl): Likewise. (Parser::parse_use_tree): Likewise. (Parser::parse_function): Likewise. (Parser::parse_macro_invocation): Likewise. (Parser::parse_macro_rule): Likewise. (Parser::parse_function_qualifiers): Likewise. (Parser::parse_macro_matcher): Likewise. (Parser::parse_generic_params_in_angles): Likewise. (Parser::parse_macro_match): Likewise. (Parser::parse_generic_param): Likewise. (Parser::parse_macro_match_fragment): Likewise. (Parser::parse_macro_match_repetition): Likewise. (Parser::parse_generic_params): Likewise. (Parser::parse_lifetime_params): Likewise. (Parser::parse_lifetime_params_objs): Likewise. (Parser::parse_lifetime_param): Likewise. (Parser::parse_type_params): Likewise. (Parser::parse_type_param): Likewise. (Parser::parse_function_params): Likewise. (Parser::parse_function_param): Likewise. (Parser::parse_function_return_type): Likewise. (Parser::parse_where_clause): Likewise. (Parser::parse_lifetime_where_clause_item): Likewise. (Parser::parse_for_lifetimes): Likewise. (Parser::parse_type_param_bounds): Likewise. (Parser::parse_type_param_bound): Likewise. (Parser::parse_trait_bound): Likewise. (Parser::parse_lifetime_bounds): Likewise. (Parser::parse_lifetime): Likewise. (Parser::lifetime_from_token): Likewise. (Parser::parse_external_type_item): Likewise. (Parser::parse_type_alias): Likewise. (Parser::parse_struct): Likewise. (Parser::parse_struct_fields): Likewise. (Parser::parse_struct_field): Likewise. (Parser::parse_tuple_fields): Likewise. (Parser::parse_tuple_field): Likewise. (Parser::parse_enum): Likewise. (Parser::parse_enum_items): Likewise. (Parser::parse_enum_item): Likewise. (Parser::parse_union): Likewise. (Parser::parse_const_item): Likewise. (Parser::parse_static_item): Likewise. (Parser::parse_trait): Likewise. (Parser::parse_trait_item): Likewise. (Parser::parse_trait_type): Likewise. (Parser::parse_trait_const): Likewise. (Parser::parse_impl): Likewise. (Parser::parse_inherent_impl_item): Likewise. (Parser::parse_trait_impl_item): Likewise. (Parser::parse_extern_block): Likewise. (Parser::parse_external_item): Likewise. (Parser::parse_stmt): Likewise. (Parser::parse_let_stmt): Likewise. (Parser::parse_generic_arg): Likewise. (Parser::parse_path_generic_args): Likewise. (Parser::parse_generic_args_binding): Likewise. (Parser::parse_self_param): Likewise. (Parser::parse_expr_stmt): Likewise. (Parser::parse_anon_const): Likewise. (Parser::parse_const_block_expr): Likewise. (Parser::parse_grouped_expr): Likewise. (Parser::parse_type_path): Likewise. (Parser::parse_closure_expr): Likewise. (Parser::parse_literal_expr): Likewise. (Parser::parse_box_expr): Likewise. (Parser::parse_return_expr): Likewise. (Parser::parse_try_expr): Likewise. (Parser::parse_break_expr): Likewise. (Parser::parse_continue_expr): Likewise. (Parser::parse_type_path_segment): Likewise. (Parser::parse_loop_label): Likewise. (Parser::parse_type_path_function): Likewise. (Parser::parse_if_expr): Likewise. (Parser::parse_path_in_expression): Likewise. (Parser::parse_path_expr_segment): Likewise. (Parser::parse_if_let_expr): Likewise. (Parser::parse_loop_expr): Likewise. (Parser::parse_qualified_path_in_type): Likewise. (Parser::parse_labelled_loop_expr): Likewise. (Parser::parse_match_expr): Likewise. (Parser::parse_match_arm): Likewise. (Parser::parse_match_arm_patterns): Likewise. (Parser::parse_async_block_expr): Likewise. (Parser::parse_array_expr): Likewise. (Parser::parse_closure_param): Likewise. (Parser::parse_type): Likewise. (Parser::parse_paren_prefixed_type): Likewise. (Parser::parse_for_prefixed_type): Likewise. (Parser::parse_maybe_named_param): Likewise. (Parser::parse_reference_type_inner): Likewise. (Parser::parse_reference_type): Likewise. (Parser::parse_raw_pointer_type): Likewise. (Parser::parse_slice_or_array_type): Likewise. (Parser::parse_type_no_bounds): Likewise. (Parser::parse_paren_prefixed_type_no_bounds): Likewise. (Parser::parse_literal_or_range_pattern): Likewise. (Parser::parse_range_pattern_bound): Likewise. (Parser::parse_pattern): Likewise. (Parser::parse_pattern_no_alt): Likewise. (Parser::parse_reference_pattern): Likewise. (Parser::parse_grouped_or_tuple_pattern): Likewise. (Parser::parse_slice_pattern): Likewise. (Parser::parse_identifier_pattern): Likewise. (Parser::parse_ident_leading_pattern): Likewise. (Parser::parse_struct_pattern_elems): Likewise. (Parser::parse_struct_pattern_field): Likewise. (Parser::unexpected_token): Likewise. (Parser::skip_after_semicolon): Likewise. (Parser::skip_token): Likewise. (Parser::maybe_skip_token): Likewise. (Parser::expect_token): Likewise. (Parser::skip_after_end): Likewise. (Parser::skip_after_end_block): Likewise. (Parser::skip_after_next_block): Likewise. (Parser::skip_after_end_attribute): Likewise. (Parser::done_end_or_else): Likewise. (Parser::done_end): Likewise. * parse/rust-parse.h: Change function return type to AttributeBody instead of a tuple (parse_doc_comment). Move enum binding powers from impl file. * parse/rust-parse-impl-attribute.hxx: New file. * parse/rust-parse-impl-macro.hxx: New file. * parse/rust-parse-impl-path.hxx: New file. * parse/rust-parse-impl-pattern.hxx: New file. * parse/rust-parse-impl-ttree.hxx: New file. * parse/rust-parse-impl-utils.hxx: New file. * parse/rust-parse-impl-expr.hxx: New file. Signed-off-by: Pierre-Emmanuel Patry <pierre-emmanuel.patry@embecosm.com>
1 parent 071eaf3 commit 4d63af7

9 files changed

+12869
-12670
lines changed
Lines changed: 390 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,390 @@
1+
// Copyright (C) 2025 Free Software Foundation, Inc.
2+
3+
// This file is part of GCC.
4+
5+
// GCC is free software; you can redistribute it and/or modify it under
6+
// the terms of the GNU General Public License as published by the Free
7+
// Software Foundation; either version 3, or (at your option) any later
8+
// version.
9+
10+
// GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11+
// WARRANTY; without even the implied warranty of MERCHANTABILITY or
12+
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13+
// for more details.
14+
15+
// You should have received a copy of the GNU General Public License
16+
// along with GCC; see the file COPYING3. If not see
17+
// <http://www.gnu.org/licenses/>.
18+
19+
/* DO NOT INCLUDE ANYWHERE - this is automatically included
20+
* by rust-parse-impl*.h
21+
* This is also the reason why there are no include guards. */
22+
23+
#include "rust-parse.h"
24+
#include "rust-parse-error.h"
25+
#include "rust-attribute-values.h"
26+
#include "expected.h"
27+
28+
namespace Rust {
29+
30+
// Parse a inner or outer doc comment into an doc attribute
31+
template <typename ManagedTokenSource>
32+
Parse::AttributeBody
33+
Parser<ManagedTokenSource>::parse_doc_comment ()
34+
{
35+
const_TokenPtr token = lexer.peek_token ();
36+
location_t locus = token->get_locus ();
37+
AST::SimplePathSegment segment (Values::Attributes::DOC, locus);
38+
std::vector<AST::SimplePathSegment> segments;
39+
segments.push_back (std::move (segment));
40+
AST::SimplePath attr_path (std::move (segments), false, locus);
41+
AST::LiteralExpr lit_expr (token->get_str (), AST::Literal::STRING,
42+
PrimitiveCoreType::CORETYPE_STR, {}, locus);
43+
std::unique_ptr<AST::AttrInput> attr_input (
44+
new AST::AttrInputLiteral (std::move (lit_expr)));
45+
lexer.skip_token ();
46+
47+
return Parse::AttributeBody{std::move (attr_path), std::move (attr_input),
48+
locus};
49+
}
50+
51+
// Parse a single inner attribute.
52+
template <typename ManagedTokenSource>
53+
tl::expected<AST::Attribute, Parse::Error::Attribute>
54+
Parser<ManagedTokenSource>::parse_inner_attribute ()
55+
{
56+
if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT)
57+
{
58+
auto body = parse_doc_comment ();
59+
return AST::Attribute (std::move (body.path), std::move (body.input),
60+
body.locus, true);
61+
}
62+
63+
rust_assert (lexer.peek_token ()->get_id () == HASH);
64+
65+
lexer.skip_token ();
66+
67+
if (lexer.peek_token ()->get_id () != EXCLAM)
68+
{
69+
Error error (lexer.peek_token ()->get_locus (),
70+
"expected %<!%> or %<[%> for inner attribute");
71+
add_error (std::move (error));
72+
73+
return Parse::Error::Attribute::make_malformed ();
74+
}
75+
lexer.skip_token ();
76+
77+
if (!skip_token (LEFT_SQUARE))
78+
return Parse::Error::Attribute::make_malformed ();
79+
80+
auto body_res = parse_attribute_body ();
81+
if (!body_res)
82+
return Parse::Error::Attribute::make_malformed ();
83+
auto body = std::move (body_res.value ());
84+
85+
auto actual_attribute
86+
= AST::Attribute (std::move (body.path), std::move (body.input), body.locus,
87+
true);
88+
89+
if (!skip_token (RIGHT_SQUARE))
90+
return Parse::Error::Attribute::make_malformed ();
91+
92+
return actual_attribute;
93+
}
94+
95+
// Parse a single outer attribute.
96+
template <typename ManagedTokenSource>
97+
tl::expected<AST::Attribute, Parse::Error::Attribute>
98+
Parser<ManagedTokenSource>::parse_outer_attribute ()
99+
{
100+
if (lexer.peek_token ()->get_id () == OUTER_DOC_COMMENT)
101+
{
102+
auto body = parse_doc_comment ();
103+
return AST::Attribute (std::move (body.path), std::move (body.input),
104+
body.locus, false);
105+
}
106+
107+
if (lexer.peek_token ()->get_id () == INNER_DOC_COMMENT)
108+
{
109+
Error error (
110+
lexer.peek_token ()->get_locus (), ErrorCode::E0753,
111+
"expected outer doc comment, inner doc (%<//!%> or %</*!%>) only "
112+
"allowed at start of item "
113+
"and before any outer attribute or doc (%<#[%>, %<///%> or %</**%>)");
114+
add_error (std::move (error));
115+
lexer.skip_token ();
116+
return Parse::Error::Attribute::make_unexpected_inner ();
117+
}
118+
119+
/* OuterAttribute -> '#' '[' Attr ']' */
120+
121+
if (lexer.peek_token ()->get_id () != HASH)
122+
return Parse::Error::Attribute::make_malformed ();
123+
124+
lexer.skip_token ();
125+
126+
TokenId id = lexer.peek_token ()->get_id ();
127+
if (id != LEFT_SQUARE)
128+
{
129+
if (id == EXCLAM)
130+
{
131+
// this is inner attribute syntax, so throw error
132+
// inner attributes were either already parsed or not allowed here.
133+
Error error (
134+
lexer.peek_token ()->get_locus (),
135+
"token %<!%> found, indicating inner attribute definition. Inner "
136+
"attributes are not possible at this location");
137+
add_error (std::move (error));
138+
}
139+
return Parse::Error::Attribute::make_unexpected_inner ();
140+
}
141+
142+
lexer.skip_token ();
143+
144+
auto body_res = parse_attribute_body ();
145+
if (!body_res)
146+
return Parse::Error::Attribute::make_malformed_body ();
147+
auto body = std::move (body_res.value ());
148+
149+
auto actual_attribute
150+
= AST::Attribute (std::move (body.path), std::move (body.input), body.locus,
151+
false);
152+
153+
if (lexer.peek_token ()->get_id () != RIGHT_SQUARE)
154+
return Parse::Error::Attribute::make_malformed ();
155+
156+
lexer.skip_token ();
157+
158+
return actual_attribute;
159+
}
160+
161+
// Parses the body of an attribute (inner or outer).
162+
template <typename ManagedTokenSource>
163+
tl::expected<Parse::AttributeBody, Parse::Error::AttributeBody>
164+
Parser<ManagedTokenSource>::parse_attribute_body ()
165+
{
166+
location_t locus = lexer.peek_token ()->get_locus ();
167+
168+
auto attr_path = parse_simple_path ();
169+
// ensure path is valid to parse attribute input
170+
if (!attr_path)
171+
{
172+
Error error (lexer.peek_token ()->get_locus (),
173+
"empty simple path in attribute");
174+
add_error (std::move (error));
175+
176+
// Skip past potential further info in attribute (i.e. attr_input)
177+
skip_after_end_attribute ();
178+
return Parse::Error::AttributeBody::make_invalid_path ();
179+
}
180+
181+
auto attr_input = parse_attr_input ();
182+
// AttrInput is allowed to be null, so no checks here
183+
if (attr_input)
184+
return Parse::AttributeBody{std::move (attr_path.value ()),
185+
std::move (attr_input.value ()), locus};
186+
else if (attr_input.error ().kind == Parse::Error::AttrInput::Kind::MISSING)
187+
return Parse::AttributeBody{std::move (attr_path.value ()), nullptr, locus};
188+
else
189+
return Parse::Error::AttributeBody::make_invalid_attrinput ();
190+
}
191+
192+
// Parse a contiguous block of inner attributes.
193+
template <typename ManagedTokenSource>
194+
AST::AttrVec
195+
Parser<ManagedTokenSource>::parse_inner_attributes ()
196+
{
197+
AST::AttrVec inner_attributes;
198+
199+
auto has_valid_inner_attribute_prefix = [&] () {
200+
auto id = lexer.peek_token ()->get_id ();
201+
/* Outer attribute `#[` is not allowed, only accepts `#!` */
202+
return (id == HASH && lexer.peek_token (1)->get_id () == EXCLAM)
203+
|| id == INNER_DOC_COMMENT;
204+
};
205+
206+
while (has_valid_inner_attribute_prefix ())
207+
{
208+
auto inner_attr = parse_inner_attribute ();
209+
210+
/* Ensure only valid inner attributes are added to the inner_attributes
211+
* list */
212+
if (inner_attr)
213+
{
214+
inner_attributes.push_back (std::move (inner_attr.value ()));
215+
}
216+
else
217+
{
218+
/* If no more valid inner attributes, break out of loop (only
219+
* contiguous inner attributes parsed). */
220+
break;
221+
}
222+
}
223+
224+
inner_attributes.shrink_to_fit ();
225+
return inner_attributes;
226+
}
227+
228+
// Parses a contiguous block of outer attributes.
229+
template <typename ManagedTokenSource>
230+
AST::AttrVec
231+
Parser<ManagedTokenSource>::parse_outer_attributes ()
232+
{
233+
AST::AttrVec outer_attributes;
234+
235+
auto has_valid_attribute_prefix = [&] () {
236+
auto id = lexer.peek_token ()->get_id ();
237+
/* We allow inner attributes `#!` and catch the error later */
238+
return id == HASH || id == OUTER_DOC_COMMENT || id == INNER_DOC_COMMENT;
239+
};
240+
241+
while (has_valid_attribute_prefix ()) /* For error handling. */
242+
{
243+
auto outer_attr = parse_outer_attribute ();
244+
245+
/* Ensure only valid outer attributes are added to the outer_attributes
246+
* list */
247+
if (outer_attr)
248+
{
249+
outer_attributes.push_back (std::move (outer_attr.value ()));
250+
}
251+
else
252+
{
253+
/* If no more valid outer attributes, break out of loop (only
254+
* contiguous outer attributes parsed). */
255+
break;
256+
}
257+
}
258+
259+
outer_attributes.shrink_to_fit ();
260+
return outer_attributes;
261+
262+
/* TODO: this shares basically all code with parse_inner_attributes except
263+
* function call - find way of making it more modular? function pointer? */
264+
}
265+
266+
// Parses an AttrInput AST node (polymorphic, as AttrInput is abstract)
267+
template <typename ManagedTokenSource>
268+
tl::expected<std::unique_ptr<AST::AttrInput>, Parse::Error::AttrInput>
269+
Parser<ManagedTokenSource>::parse_attr_input ()
270+
{
271+
const_TokenPtr t = lexer.peek_token ();
272+
switch (t->get_id ())
273+
{
274+
case LEFT_PAREN:
275+
case LEFT_SQUARE:
276+
case LEFT_CURLY:
277+
{
278+
auto dtoken_tree = parse_delim_token_tree ();
279+
if (!dtoken_tree)
280+
return Parse::Error::AttrInput::make_bad_token_tree ();
281+
282+
// must be a delimited token tree, so parse that
283+
std::unique_ptr<AST::AttrInput> input_tree (
284+
new AST::DelimTokenTree (dtoken_tree.value ()));
285+
286+
return tl::expected<std::unique_ptr<AST::AttrInput>,
287+
Parse::Error::AttrInput>{std::move (input_tree)};
288+
}
289+
case EQUAL:
290+
{
291+
// = LiteralExpr
292+
lexer.skip_token ();
293+
294+
t = lexer.peek_token ();
295+
296+
// attempt to parse macro
297+
// TODO: macros may/may not be allowed in attributes
298+
// this is needed for "#[doc = include_str!(...)]"
299+
if (Parse::Utils::is_simple_path_segment (t->get_id ()))
300+
{
301+
std::unique_ptr<AST::MacroInvocation> invoke
302+
= parse_macro_invocation ({});
303+
304+
if (!invoke)
305+
return Parse::Error::AttrInput::make_bad_macro_invocation ();
306+
307+
return std::unique_ptr<AST::AttrInput> (
308+
new AST::AttrInputMacro (std::move (invoke)));
309+
}
310+
311+
/* Ensure token is a "literal expression" (literally only a literal
312+
* token of any type) */
313+
if (!t->is_literal ())
314+
{
315+
Error error (
316+
t->get_locus (),
317+
"unknown token %qs in attribute body - literal expected",
318+
t->get_token_description ());
319+
add_error (std::move (error));
320+
321+
skip_after_end_attribute ();
322+
return Parse::Error::AttrInput::make_malformed ();
323+
}
324+
325+
AST::Literal::LitType lit_type = AST::Literal::STRING;
326+
// Crappy mapping of token type to literal type
327+
switch (t->get_id ())
328+
{
329+
case INT_LITERAL:
330+
lit_type = AST::Literal::INT;
331+
break;
332+
case FLOAT_LITERAL:
333+
lit_type = AST::Literal::FLOAT;
334+
break;
335+
case CHAR_LITERAL:
336+
lit_type = AST::Literal::CHAR;
337+
break;
338+
case BYTE_CHAR_LITERAL:
339+
lit_type = AST::Literal::BYTE;
340+
break;
341+
case BYTE_STRING_LITERAL:
342+
lit_type = AST::Literal::BYTE_STRING;
343+
break;
344+
case RAW_STRING_LITERAL:
345+
lit_type = AST::Literal::RAW_STRING;
346+
break;
347+
case STRING_LITERAL:
348+
default:
349+
lit_type = AST::Literal::STRING;
350+
break; // TODO: raw string? don't eliminate it from lexer?
351+
}
352+
353+
// create actual LiteralExpr
354+
AST::LiteralExpr lit_expr (t->get_str (), lit_type, t->get_type_hint (),
355+
{}, t->get_locus ());
356+
lexer.skip_token ();
357+
358+
std::unique_ptr<AST::AttrInput> attr_input_lit (
359+
new AST::AttrInputLiteral (std::move (lit_expr)));
360+
361+
// do checks or whatever? none required, really
362+
363+
// FIXME: shouldn't a skip token be required here?
364+
365+
return tl::expected<std::unique_ptr<AST::AttrInput>,
366+
Parse::Error::AttrInput>{
367+
std::move (attr_input_lit)};
368+
}
369+
break;
370+
case RIGHT_PAREN:
371+
case RIGHT_SQUARE:
372+
case RIGHT_CURLY:
373+
case END_OF_FILE:
374+
// means AttrInput is missing, which is allowed
375+
return Parse::Error::AttrInput::make_missing_attrinput ();
376+
default:
377+
add_error (
378+
Error (t->get_locus (),
379+
"unknown token %qs in attribute body - attribute input or "
380+
"none expected",
381+
t->get_token_description ()));
382+
383+
skip_after_end_attribute ();
384+
return Parse::Error::AttrInput::make_malformed ();
385+
}
386+
rust_unreachable ();
387+
// TODO: find out how to stop gcc error on "no return value"
388+
}
389+
390+
} // namespace Rust

0 commit comments

Comments
 (0)