From 7a158a178b81a0b9164224f693bd30532998fa70 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Tue, 9 Sep 2025 13:23:19 +0100 Subject: [PATCH 01/37] Add treesitter Fortran frontend --- src/psyclone/psyir/backend/fortran.py | 7 +- src/psyclone/psyir/frontend/fortran.py | 49 +++++++--- .../frontend/fortran_treesitter_reader.py | 97 +++++++++++++++++++ src/psyclone/psyir/nodes/codeblock.py | 4 + 4 files changed, 138 insertions(+), 19 deletions(-) create mode 100644 src/psyclone/psyir/frontend/fortran_treesitter_reader.py diff --git a/src/psyclone/psyir/backend/fortran.py b/src/psyclone/psyir/backend/fortran.py index 9280a816e5..7299030075 100644 --- a/src/psyclone/psyir/backend/fortran.py +++ b/src/psyclone/psyir/backend/fortran.py @@ -1590,11 +1590,8 @@ def codeblock_node(self, node): result = "" if node.structure == CodeBlock.Structure.STATEMENT: # indent and newlines required - for ast_node in node.get_ast_nodes: - # Using tofortran() ensures we get any label associated - # with this statement. - for line in ast_node.tofortran().split("\n"): - result += f"{self._nindent}{line}\n" + for line in node.get_fortran_lines(): + result += f"{self._nindent}{line}\n" elif node.structure == CodeBlock.Structure.EXPRESSION: for ast_node in node.get_ast_nodes: result += str(ast_node) diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index 00e81e08e6..ad41c03565 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -50,6 +50,8 @@ from psyclone.psyir.frontend.fparser2 import Fparser2Reader from psyclone.psyir.nodes import Assignment, Node, Routine, Schedule from psyclone.psyir.symbols import SymbolTable +from psyclone.psyir.frontend.fortran_treesitter_reader import ( + FortranTreeSitterReader) class FortranReader(): @@ -84,20 +86,28 @@ def __init__(self, free_form: bool = True, ignore_comments: bool = True, ignore_directives: bool = True, last_comments_as_codeblocks: bool = False, resolve_modules: Union[bool, List[str]] = False): - if not self._parser: - std = Config.get().fortran_standard - self._parser = ParserFactory().create(std=std) - self._free_form = free_form - if ignore_comments and not ignore_directives: - raise ValueError( - "Setting ignore_directives to False in the FortranReader will" - " only have an effect if ignore_comments is also set to False." - ) - self._ignore_comments = ignore_comments - self._processor = Fparser2Reader(ignore_directives, - last_comments_as_codeblocks, - resolve_modules) - SYMBOL_TABLES.clear() + if os.environ.get("PSYCLONE_TS") is not None: + import tree_sitter_fortran + from tree_sitter import Language, Parser + language = Language(tree_sitter_fortran.language()) + self._parser = Parser(language) + self._processor = FortranTreeSitterReader() + else: + + if not self._parser: + std = Config.get().fortran_standard + self._parser = ParserFactory().create(std=std) + self._free_form = free_form + if ignore_comments and not ignore_directives: + raise ValueError( + "Setting ignore_directives to False in the FortranReader will" + " only have an effect if ignore_comments is also set to False." + ) + self._ignore_comments = ignore_comments + self._processor = Fparser2Reader(ignore_directives, + last_comments_as_codeblocks, + resolve_modules) + SYMBOL_TABLES.clear() @staticmethod def validate_name(name: str): @@ -129,6 +139,13 @@ def psyir_from_source(self, source_code: str) -> Node: :raises ValueError: if the supplied Fortran cannot be parsed. ''' + if os.environ.get("PSYCLONE_TS") is not None: + print(source_code) + tree = self._parser.parse(bytes(source_code, "utf8")) + print(tree.root_node) + psyir = self._processor.generate_psyir(tree.root_node) + print(psyir) + return psyir SYMBOL_TABLES.clear() string_reader = FortranStringReader( source_code, include_dirs=Config.get().include_paths, @@ -249,6 +266,10 @@ def psyir_from_file(self, file_path): :raises ValueError: if the parser fails to parse the contents of the supplied file. ''' + if os.environ.get("PSYCLONE_TS") is not None: + with open(file_path, encoding="utf-8") as fortran_file: + return self.psyir_from_source(fortran_file.read()) + SYMBOL_TABLES.clear() # Note that this is the main performance hotspot in PSyclone, taking diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py new file mode 100644 index 0000000000..243b4419b1 --- /dev/null +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -0,0 +1,97 @@ +# BSD 3-Clause License +# +# Copyright (c) 2025, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Author: S. Siso, STFC Daresbury Lab +# ----------------------------------------------------------------------------- + +''' PSyIR TreeSitter Fortran reader ''' + +from psyclone.psyir.nodes import * +from psyclone.psyir.nodes.codeblock import TSCodeBlock + + +class FortranTreeSitterReader(): + ''' TreeSitter to PSyIR ''' + + def __init__(self): + self.location = None + self._ongoing_codeblock = [] + self.handlers = { + 'translation_unit': self._file_container + } + + def generate_psyir(self, tsnode): + return self.get_handler(tsnode)(tsnode) + + def process_nodes(self, list_of_nodes): + children = [] + for tsnode in list_of_nodes: + try: + handler = self.get_handler(tsnode) + children.append(handler(tsnode)) + except NotImplementedError: + if not self._ongoing_codeblock: + self._ongoing_codeblock.append(tsnode) + if not isinstance(self.location, Schedule): + children.append(self.generate_accomulated_codeblock()) + return children + + def generate_accomulated_codeblock(self, message=None): + + if isinstance(self.location, (Schedule, Container)): + structure = CodeBlock.Structure.STATEMENT + elif isinstance(self.location, Directive): + raise InternalError( + "Fparser2Reader:nodes_to_code_block: A CodeBlock with " + "a Directive as parent is not yet supported.") + else: + structure = CodeBlock.Structure.EXPRESSION + + code_block = TSCodeBlock(self._ongoing_codeblock, structure) + self._ongoing_codeblock = [] + if message: + code_block.preceding_comment = message + + return code_block + + def get_handler(self, tsnode): + handler = self.handlers.get(tsnode.type) + if not handler: + raise NotImplementedError( + f"Unsupported '{tsnode.type}' tree-sitter node.") + return handler + + def _file_container(self, tsnode): + file_container = FileContainer("test") + self.location = file_container + file_container.children.extend(self.process_nodes(tsnode.children)) + return file_container diff --git a/src/psyclone/psyir/nodes/codeblock.py b/src/psyclone/psyir/nodes/codeblock.py index 115ef085de..5171e2bd2f 100644 --- a/src/psyclone/psyir/nodes/codeblock.py +++ b/src/psyclone/psyir/nodes/codeblock.py @@ -279,3 +279,7 @@ def has_potential_control_flow_jump(self) -> bool: if child.item and child.item.label: return True return False + +class TSCodeBlock(CodeBlock): + def get_fortran_lines(self): + return [ast_node.text for ast_node in self.get_ast_nodes] From 42e77b9417d25cdbc2badc8874d85d8333a1e2bc Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Fri, 14 Nov 2025 19:20:06 +0000 Subject: [PATCH 02/37] Encapsulate fparser2 codeblocks in its own CodeBlock subclass --- examples/nemo/scripts/utils.py | 2 +- examples/xdsl/backend/xdsl.py | 4 +- .../raise_psyir_2_alg_trans.py | 4 +- .../transformations/gocean_opencl_trans.py | 4 +- .../raise_psyir_2_lfric_alg_trans.py | 2 +- src/psyclone/psyad/adjoint_visitor.py | 5 +- src/psyclone/psyir/backend/fortran.py | 2 +- src/psyclone/psyir/frontend/fortran.py | 5 +- .../frontend/fortran_treesitter_reader.py | 20 +-- src/psyclone/psyir/frontend/fparser2.py | 8 +- src/psyclone/psyir/nodes/__init__.py | 6 +- src/psyclone/psyir/nodes/codeblock.py | 147 +++++++++++------- src/psyclone/psyir/nodes/psy_data_node.py | 7 +- src/psyclone/psyir/nodes/routine.py | 2 +- src/psyclone/psyir/symbols/symbol_table.py | 2 +- .../psyir/tools/definition_use_chains.py | 8 +- .../mark_routine_for_gpu_mixin.py | 2 +- .../kernel_module_inline_trans_test.py | 6 +- .../raise_psyir_2_alg_trans_test.py | 15 +- .../raise_psyir_2_lfric_alg_trans_test.py | 2 +- .../frontend/fparser2_do_handler_test.py | 4 - .../frontend/fparser2_format_stmt_test.py | 4 +- .../fparser2_subroutine_handler_test.py | 10 +- .../tests/psyir/nodes/acc_directives_test.py | 4 +- .../tests/psyir/nodes/codeblock_test.py | 9 +- .../tests/psyir/nodes/profile_node_test.py | 10 +- .../tests/psyir/symbols/datasymbol_test.py | 8 +- .../transformations/acc_kernels_trans_test.py | 4 +- .../transformations/loop_swap_trans_test.py | 4 +- .../transformations/omp_target_trans_test.py | 4 +- .../transformations/region_trans_test.py | 3 +- .../transformations/transformations_test.py | 11 +- 32 files changed, 185 insertions(+), 143 deletions(-) diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index 426c800247..a7d29f62d4 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -657,7 +657,7 @@ def add_profile_region(nodes): return if len(nodes) == 1: if isinstance(nodes[0], CodeBlock) and \ - len(nodes[0].get_ast_nodes) == 1: + len(nodes[0].get_ast_nodes()) == 1: # Don't create profiling regions for CodeBlocks consisting # of a single statement return diff --git a/examples/xdsl/backend/xdsl.py b/examples/xdsl/backend/xdsl.py index f03d7715d7..4589cfcdbf 100644 --- a/examples/xdsl/backend/xdsl.py +++ b/examples/xdsl/backend/xdsl.py @@ -392,7 +392,7 @@ def codeblock_node(self, node): ops_to_return = [] if node.structure == CodeBlock.Structure.STATEMENT: - for ast_node in node.get_ast_nodes: + for ast_node in node.get_ast_nodes(): name = \ str(walk(ast_node.children[0], Fortran2003.Type_Name)[0]) args = [] @@ -401,7 +401,7 @@ def codeblock_node(self, node): # args.append(arg) ops_to_return.append(psy_ir.CallExpr.get(name, args=args)) elif node.structure == CodeBlock.Structure.EXPRESSION: - for ast_node in node.get_ast_nodes: + for ast_node in node.get_ast_nodes(): name = \ str(walk(ast_node.children[0], Fortran2003.Type_Name)[0]) args = [] diff --git a/src/psyclone/domain/common/transformations/raise_psyir_2_alg_trans.py b/src/psyclone/domain/common/transformations/raise_psyir_2_alg_trans.py index 1fb093ed1a..adf3b47a06 100644 --- a/src/psyclone/domain/common/transformations/raise_psyir_2_alg_trans.py +++ b/src/psyclone/domain/common/transformations/raise_psyir_2_alg_trans.py @@ -212,7 +212,7 @@ def validate(self, node: Call, options=None, **kwargs): pass elif isinstance(arg, CodeBlock): # pylint: disable=protected-access - for fp2_node in arg._fp2_nodes: + for fp2_node in arg.get_ast_nodes(): self._validate_fp2_node(fp2_node) else: if isinstance(arg, Call): @@ -259,7 +259,7 @@ def apply(self, node: Call, index: int = None, options=None, **kwargs): else: # The validates check that this can only be a Codeblock with # a StructureConstructor fparser2 node inside - for fp2_node in call_arg.get_ast_nodes: + for fp2_node in call_arg.get_ast_nodes(): # This child is a kernel type_symbol = self._get_symbol(node, fp2_node) args = self._parse_args(call_arg, fp2_node) diff --git a/src/psyclone/domain/gocean/transformations/gocean_opencl_trans.py b/src/psyclone/domain/gocean/transformations/gocean_opencl_trans.py index 4fe4d930ee..bcf89c896a 100644 --- a/src/psyclone/domain/gocean/transformations/gocean_opencl_trans.py +++ b/src/psyclone/domain/gocean/transformations/gocean_opencl_trans.py @@ -50,7 +50,7 @@ from psyclone.psyir.backend.opencl import OpenCLWriter from psyclone.psyir.frontend.fortran import FortranReader from psyclone.psyir.nodes import ( - Routine, Call, Reference, Literal, + Routine, Call, Reference, Literal, Fparser2CodeBlock, Assignment, IfBlock, ArrayReference, Schedule, BinaryOperation, StructureReference, FileContainer, CodeBlock, IntrinsicCall, Container, DataNode) @@ -370,7 +370,7 @@ def apply(self, node, options=None): # Set up cmd_queues pointer ptree = Fortran2003.Pointer_Assignment_Stmt( f"{qlist.name} => {get_cmd_queues.name}()") - cblock = CodeBlock([ptree], CodeBlock.Structure.STATEMENT) + cblock = Fparser2CodeBlock([ptree], CodeBlock.Structure.STATEMENT) setup_block.if_body.addchild(cblock) # Declare and assign kernel pointers diff --git a/src/psyclone/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans.py b/src/psyclone/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans.py index 98a1d5fca0..72817aed39 100644 --- a/src/psyclone/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans.py +++ b/src/psyclone/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans.py @@ -85,7 +85,7 @@ def apply(self, call, index, options=None): calls.append(LFRicKernelFunctor.create(call_arg.symbol, args)) else: - for fp2_node in call_arg.get_ast_nodes: + for fp2_node in call_arg.get_ast_nodes(): # This child is a kernel or builtin name = fp2_node.children[0].string args = RaisePSyIR2AlgTrans._parse_args(call_arg, diff --git a/src/psyclone/psyad/adjoint_visitor.py b/src/psyclone/psyad/adjoint_visitor.py index ae8f990287..42198a8e87 100644 --- a/src/psyclone/psyad/adjoint_visitor.py +++ b/src/psyclone/psyad/adjoint_visitor.py @@ -46,7 +46,8 @@ from psyclone.psyir.backend.fortran import FortranWriter from psyclone.psyir.backend.visitor import PSyIRVisitor, VisitorError from psyclone.psyir.nodes import (Routine, Schedule, Reference, Node, Literal, - CodeBlock, BinaryOperation, Assignment, + Fparser2CodeBlock, BinaryOperation, + Assignment, CodeBlock, IfBlock, IntrinsicCall, Call) from psyclone.psyir.symbols import ArgumentInterface, GenericInterfaceSymbol from psyclone.psyir.tools.call_tree_utils import CallTreeUtils @@ -345,7 +346,7 @@ def loop_node(self, node): # TODO: use language independent PSyIR, see issue #1345 ptree = Fortran2003.Intrinsic_Function_Reference( f"mod({hi_str}-{lo_str},{step_str})") - offset = CodeBlock([ptree], CodeBlock.Structure.EXPRESSION) + offset = Fparser2CodeBlock([ptree], CodeBlock.Structure.EXPRESSION) # We only need to copy this node and its bounds. Issue #1440 # will address this. diff --git a/src/psyclone/psyir/backend/fortran.py b/src/psyclone/psyir/backend/fortran.py index 7d85db7947..a75046ae49 100644 --- a/src/psyclone/psyir/backend/fortran.py +++ b/src/psyclone/psyir/backend/fortran.py @@ -1584,7 +1584,7 @@ def codeblock_node(self, node): for line in node.get_fortran_lines(): result += f"{self._nindent}{line}\n" elif node.structure == CodeBlock.Structure.EXPRESSION: - for ast_node in node.get_ast_nodes: + for ast_node in node.get_ast_nodes(): result += str(ast_node) else: raise VisitorError( diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index ad41c03565..8c615bf2fb 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -100,8 +100,9 @@ def __init__(self, free_form: bool = True, ignore_comments: bool = True, self._free_form = free_form if ignore_comments and not ignore_directives: raise ValueError( - "Setting ignore_directives to False in the FortranReader will" - " only have an effect if ignore_comments is also set to False." + "Setting ignore_directives to False in the FortranReader " + "will only have an effect if ignore_comments is also set " + "to False." ) self._ignore_comments = ignore_comments self._processor = Fparser2Reader(ignore_directives, diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 243b4419b1..cdb3d450a2 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -35,8 +35,8 @@ ''' PSyIR TreeSitter Fortran reader ''' -from psyclone.psyir.nodes import * -from psyclone.psyir.nodes.codeblock import TSCodeBlock +from psyclone.psyir import nodes +from psyclone.psyir.nodes.codeblock import TreeSitterCodeBlock, CodeBlock class FortranTreeSitterReader(): @@ -61,22 +61,22 @@ def process_nodes(self, list_of_nodes): except NotImplementedError: if not self._ongoing_codeblock: self._ongoing_codeblock.append(tsnode) - if not isinstance(self.location, Schedule): + if not isinstance(self.location, nodes.Schedule): children.append(self.generate_accomulated_codeblock()) return children def generate_accomulated_codeblock(self, message=None): - if isinstance(self.location, (Schedule, Container)): + if isinstance(self.location, (nodes.Schedule, nodes.Container)): structure = CodeBlock.Structure.STATEMENT - elif isinstance(self.location, Directive): - raise InternalError( - "Fparser2Reader:nodes_to_code_block: A CodeBlock with " - "a Directive as parent is not yet supported.") + # elif isinstance(self.location, Directive): + # raise InternalError( + # "Fparser2Reader:nodes_to_code_block: A CodeBlock with " + # "a Directive as parent is not yet supported.") else: structure = CodeBlock.Structure.EXPRESSION - code_block = TSCodeBlock(self._ongoing_codeblock, structure) + code_block = TreeSitterCodeBlock(self._ongoing_codeblock, structure) self._ongoing_codeblock = [] if message: code_block.preceding_comment = message @@ -91,7 +91,7 @@ def get_handler(self, tsnode): return handler def _file_container(self, tsnode): - file_container = FileContainer("test") + file_container = nodes.FileContainer("test") self.location = file_container file_container.children.extend(self.process_nodes(tsnode.children)) return file_container diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index 6421615386..ebd0ee1867 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -59,7 +59,7 @@ BinaryOperation, Call, CodeBlock, Container, Directive, FileContainer, IfBlock, IntrinsicCall, Literal, Loop, Member, Node, Range, Reference, Return, Routine, Schedule, StructureReference, UnaryOperation, - WhileLoop) + WhileLoop, Fparser2CodeBlock) from psyclone.psyir.nodes.array_mixin import ArrayMixin from psyclone.psyir.symbols import ( ArgumentInterface, ArrayType, AutomaticInterface, CHARACTER_TYPE, @@ -1085,7 +1085,7 @@ def nodes_to_code_block(parent, fp2_nodes, message=None): else: structure = CodeBlock.Structure.EXPRESSION - code_block = CodeBlock(fp2_nodes, structure, parent=parent) + code_block = Fparser2CodeBlock(fp2_nodes, structure, parent=parent) if message: code_block.preceding_comment = message parent.addchild(code_block) @@ -3818,8 +3818,8 @@ def _create_select_type( fp2_program = parser(reader) # Ignore the program part of the fparser2 tree exec_part = walk(fp2_program, Fortran2003.Execution_Part) - code_block = CodeBlock(exec_part, CodeBlock.Structure.STATEMENT, - parent=parent) + code_block = Fparser2CodeBlock( + exec_part, CodeBlock.Structure.STATEMENT, parent=parent) # Handlers assume a single node is returned and in this # implementation we create an assignment (see below), a diff --git a/src/psyclone/psyir/nodes/__init__.py b/src/psyclone/psyir/nodes/__init__.py index b46076bff9..9f872ee607 100644 --- a/src/psyclone/psyir/nodes/__init__.py +++ b/src/psyclone/psyir/nodes/__init__.py @@ -50,7 +50,8 @@ AtomicDirectiveType, AtomicDirectiveMixin, ) -from psyclone.psyir.nodes.codeblock import CodeBlock +from psyclone.psyir.nodes.codeblock import ( + CodeBlock, Fparser2CodeBlock, TreeSitterCodeBlock) from psyclone.psyir.nodes.container import Container from psyclone.psyir.nodes.node import colored, Node from psyclone.psyir.nodes.scoping_node import ScopingNode @@ -144,6 +145,9 @@ 'UnaryOperation', 'ScopingNode', 'WhileLoop', + # CodeBlock nodes + 'Fparser2CodeBlock', + 'TreeSitterCodeBlock', # PSyclone-specific nodes 'KernelSchedule', # PSyData Nodes diff --git a/src/psyclone/psyir/nodes/codeblock.py b/src/psyclone/psyir/nodes/codeblock.py index 5171e2bd2f..521c3738b0 100644 --- a/src/psyclone/psyir/nodes/codeblock.py +++ b/src/psyclone/psyir/nodes/codeblock.py @@ -50,13 +50,16 @@ class CodeBlock(Statement, DataNode): - '''Node representing some generic Fortran code that PSyclone does not - attempt to manipulate. As such it is a leaf in the PSyIR and therefore - has no children. - - :param fp2_nodes: the fparser2 parse-tree nodes representing the + '''Node representing any generic Fortran code that PSyclone does not + attempt to manipulate. As such it is a leaf in the PSyIR. A CodeBlock + can still answer answer limited questions about the encosed code. For + this reason it keeps reference to the underlaying parse_tree, and each + frontend parser needs to subclass CodeBlock with the concrete + implementation. + + :param parse_tree: the fparser2 parse-tree nodes representing the Fortran code constituting the code block. - :type fp2_nodes: list[:py:class:`fparser.two.utils.Base`] + :type parse_tree: list[:py:class:`fparser.two.utils.Base`] :param structure: argument indicating whether this code block is a statement or an expression. :type structure: :py:class:`psyclone.psyir.nodes.CodeBlock.Structure` @@ -89,20 +92,13 @@ class Structure(Enum): # The Code Block comprises one or more Fortran expressions. EXPRESSION = 2 - def __init__(self, fp2_nodes, structure, parent=None, annotations=None): - super(CodeBlock, self).__init__(parent=parent, annotations=annotations) + def __init__(self, parse_tree, structure, parent=None, annotations=None): + super().__init__(parent=parent, annotations=annotations) # Store a list of the parser objects holding the code associated - # with this block. We make a copy of the contents of the list because - # the list itself is a temporary product of the process of converting - # from the fparser2 parse tree to the PSyIR. - self._fp2_nodes = fp2_nodes[:] - # Store references back into the fparser2 parse tree. - if fp2_nodes: - self.ast = self._fp2_nodes[0] - self.ast_end = self._fp2_nodes[-1] - else: - self.ast = None - self.ast_end = None + # with this block. We make a copy of the list container because + # the list itself is often a temporary product of the process of + # converting from the the parse tree to the PSyIR. + self._parse_tree = parse_tree[:] # Store the structure of the code block. self._structure = structure @@ -118,7 +114,7 @@ def __eq__(self, other): :rtype: bool ''' is_eq = super().__eq__(other) - is_eq = is_eq and self.get_ast_nodes == other.get_ast_nodes + is_eq = is_eq and self.get_ast_nodes() == other.get_ast_nodes() is_eq = is_eq and self.structure == other.structure return is_eq @@ -132,7 +128,6 @@ def structure(self): ''' return self._structure - @property def get_ast_nodes(self): ''' :returns: the nodes associated with this code block in @@ -140,7 +135,7 @@ def get_ast_nodes(self): :rtype: list[:py:class:`fparser.two.Fortran2003.Base`] ''' - return self._fp2_nodes + return self._parse_tree def node_str(self, colour=True): ''' Create a text description of this node in the schedule, optionally @@ -152,7 +147,65 @@ def node_str(self, colour=True): :rtype: str ''' return (f"{self.coloured_name(colour)}[" - f"{list(map(type, self._fp2_nodes))}]") + f"{list(map(type, self._parse_tree))}]") + + def reference_accesses(self) -> VariablesAccessMap: + ''' + Get the symbol access map. Since this is a CodeBlock we + only know the names of symbols accessed within it but not how they + are accessed. Therefore we err on the side of caution and mark + them all as READWRITE, unfortunately, this will include the names of + any routines that are called. + + TODO #2863 - it would be better to use AccessType.UNKNOWN here but + currently VariablesAccessMap does not consider that type of access. + + This method makes use of + :py:meth:`~psyclone.psyir.nodes.CodeBlock.get_symbol_names` and is + therefore subject to the same limitations as that method. + + :returns: a map of all the symbol accessed inside this node, the + keys are Signatures (unique identifiers to a symbol and its + structure acccessors) and the values are AccessSequence + (a sequence of AccessTypes). + + ''' + var_accesses = VariablesAccessMap() + for name in self.get_symbol_names(): + var_accesses.add_access(Signature(name), AccessType.READWRITE, + self) + return var_accesses + + def __str__(self): + return f"CodeBlock[{len(self._parse_tree)} nodes]" + + def get_symbol_names(self) -> List[str]: + ''' + :returns: the name of all symbols accessed in the CodeBlock. + ''' + if not self._parse_tree: + return [] + raise NotImplementedError("Use appropriate CodeBlock subclass") + + def has_potential_control_flow_jump(self) -> bool: + ''' + :returns: whether the Codeblock might have control flow jumps. + ''' + if not self._parse_tree: + return False + raise NotImplementedError("Use appropriate CodeBlock subclass") + + def get_fortran_lines(self) -> list[str]: + ''' + :returns: a list of each line of fortran represented by this node. + ''' + if not self._parse_tree: + return [] + raise NotImplementedError("Use appropriate CodeBlock subclass") + + +class Fparser2CodeBlock(CodeBlock): + ''' The fparser2 implementation of CodeBlock. ''' def get_symbol_names(self) -> List[str]: ''' @@ -174,7 +227,7 @@ def get_symbol_names(self) -> List[str]: :returns: the symbol names used inside the CodeBock. ''' - parse_tree = self.get_ast_nodes + parse_tree = self.get_ast_nodes() result = [] for node in walk(parse_tree, Fortran2003.Name): if isinstance(node.parent, Fortran2003.Else_If_Stmt): @@ -227,36 +280,6 @@ def get_symbol_names(self) -> List[str]: return result - def reference_accesses(self) -> VariablesAccessMap: - ''' - Get the symbol access map. Since this is a CodeBlock we - only know the names of symbols accessed within it but not how they - are accessed. Therefore we err on the side of caution and mark - them all as READWRITE, unfortunately, this will include the names of - any routines that are called. - - TODO #2863 - it would be better to use AccessType.UNKNOWN here but - currently VariablesAccessMap does not consider that type of access. - - This method makes use of - :py:meth:`~psyclone.psyir.nodes.CodeBlock.get_symbol_names` and is - therefore subject to the same limitations as that method. - - :returns: a map of all the symbol accessed inside this node, the - keys are Signatures (unique identifiers to a symbol and its - structure acccessors) and the values are AccessSequence - (a sequence of AccessTypes). - - ''' - var_accesses = VariablesAccessMap() - for name in self.get_symbol_names(): - var_accesses.add_access(Signature(name), AccessType.READWRITE, - self) - return var_accesses - - def __str__(self): - return f"CodeBlock[{len(self._fp2_nodes)} nodes]" - def has_potential_control_flow_jump(self) -> bool: ''' :returns: whether this CodeBlock contains a potential control flow @@ -264,7 +287,7 @@ def has_potential_control_flow_jump(self) -> bool: ''' # Loop over the fp2_nodes and check if any are GOTO, EXIT or # labelled statements - for node in self._fp2_nodes: + for node in self._parse_tree: for child in walk(node, (Fortran2003.Goto_Stmt, Fortran2003.Exit_Stmt, Fortran2003.Cycle_Stmt, @@ -280,6 +303,16 @@ def has_potential_control_flow_jump(self) -> bool: return True return False -class TSCodeBlock(CodeBlock): + def get_fortran_lines(self) -> list[str]: + ''' + :returns: a list of each line of fortran represented by this node. + ''' + output = [] + for node in self._parse_tree: + output.extend(node.tofortran().split("\n")) + return output + + +class TreeSitterCodeBlock(CodeBlock): def get_fortran_lines(self): - return [ast_node.text for ast_node in self.get_ast_nodes] + return [ast_node.text for ast_node in self.get_ast_nodes()] diff --git a/src/psyclone/psyir/nodes/psy_data_node.py b/src/psyclone/psyir/nodes/psy_data_node.py index 00eb37a59a..f33bdec6b2 100644 --- a/src/psyclone/psyir/nodes/psy_data_node.py +++ b/src/psyclone/psyir/nodes/psy_data_node.py @@ -50,7 +50,7 @@ from psyclone.configuration import Config from psyclone.core import Signature from psyclone.errors import InternalError, GenerationError -from psyclone.psyir.nodes.codeblock import CodeBlock +from psyclone.psyir.nodes.codeblock import CodeBlock, Fparser2CodeBlock from psyclone.psyir.nodes.container import Container from psyclone.psyir.nodes.file_container import FileContainer from psyclone.psyir.nodes.node import Node @@ -615,8 +615,9 @@ def gen_type_bound_call(typename, methodname, argument_list=None, # Tell the reader that the source is free format reader.set_format(FortranFormat(True, False)) fp2_node = Fortran2003.Call_Stmt(reader) - return CodeBlock([fp2_node], CodeBlock.Structure.STATEMENT, - annotations=annotations) + return Fparser2CodeBlock( + [fp2_node], CodeBlock.Structure.STATEMENT, + annotations=annotations) routine_schedule = self.ancestor(Routine) if routine_schedule is None: diff --git a/src/psyclone/psyir/nodes/routine.py b/src/psyclone/psyir/nodes/routine.py index e272c5fc73..70c115e4e5 100644 --- a/src/psyclone/psyir/nodes/routine.py +++ b/src/psyclone/psyir/nodes/routine.py @@ -341,7 +341,7 @@ def update_parent_symbol_table(self, new_parent): f"with that name.") codeblocks = new_parent.walk(CodeBlock) for codeblock in codeblocks: - routines = walk(codeblock.get_ast_nodes, + routines = walk(codeblock.get_ast_nodes(), (Fortran2003.Subroutine_Subprogram, Fortran2003.Function_Subprogram)) for routine in routines: diff --git a/src/psyclone/psyir/symbols/symbol_table.py b/src/psyclone/psyir/symbols/symbol_table.py index 07afc7858b..c9c55a8f74 100644 --- a/src/psyclone/psyir/symbols/symbol_table.py +++ b/src/psyclone/psyir/symbols/symbol_table.py @@ -1999,7 +1999,7 @@ def rename_symbol(self, symbol, name, dry_run=False): cblock.get_symbol_names()] if old_name in sym_names: cblk_txt = "\n".join(str(anode) for anode in - cblock.get_ast_nodes) + cblock.get_ast_nodes()) raise SymbolError( f"Cannot rename Symbol '{symbol.name}' because it is " f"accessed in a CodeBlock:\n" diff --git a/src/psyclone/psyir/tools/definition_use_chains.py b/src/psyclone/psyir/tools/definition_use_chains.py index a329453098..22cb3b5311 100644 --- a/src/psyclone/psyir/tools/definition_use_chains.py +++ b/src/psyclone/psyir/tools/definition_use_chains.py @@ -470,7 +470,7 @@ def _compute_forward_uses(self, basic_block_list): # CodeBlocks only find symbols, so we can only do as good # as checking the symbol - this means we can get false # positives for structure accesses inside CodeBlocks. - if isinstance(reference._fp2_nodes[0], Goto_Stmt): + if isinstance(reference.get_ast_nodes()[0], Goto_Stmt): raise NotImplementedError( "DefinitionUseChains can't handle code containing" " GOTO statements." @@ -478,7 +478,7 @@ def _compute_forward_uses(self, basic_block_list): # If we find an Exit or Cycle statement, we can't # reach further in this code region so we can return. if isinstance( - reference._fp2_nodes[0], (Exit_Stmt, Cycle_Stmt) + reference.get_ast_nodes()[0], (Exit_Stmt, Cycle_Stmt) ): if defs_out is not None: self._defsout.append(defs_out) @@ -736,7 +736,7 @@ def _compute_backward_uses(self, basic_block_list): stop_position = min(reference.abs_position, stop_position) if isinstance(reference, CodeBlock): if isinstance( - reference._fp2_nodes[0], (Exit_Stmt, Cycle_Stmt) + reference.get_ast_nodes()[0], (Exit_Stmt, Cycle_Stmt) ): stop_position = min( reference.abs_position, stop_position @@ -760,7 +760,7 @@ def _compute_backward_uses(self, basic_block_list): # CodeBlocks only find symbols, so we can only do as good # as checking the symbol - this means we can get false # positives for structure accesses inside CodeBlocks. - if isinstance(reference._fp2_nodes[0], Goto_Stmt): + if isinstance(reference.get_ast_nodes()[0], Goto_Stmt): raise NotImplementedError( "DefinitionUseChains can't handle code containing" " GOTO statements." diff --git a/src/psyclone/psyir/transformations/mark_routine_for_gpu_mixin.py b/src/psyclone/psyir/transformations/mark_routine_for_gpu_mixin.py index 7c98fedc58..d7c42167ab 100644 --- a/src/psyclone/psyir/transformations/mark_routine_for_gpu_mixin.py +++ b/src/psyclone/psyir/transformations/mark_routine_for_gpu_mixin.py @@ -144,7 +144,7 @@ def validate_it_can_run_on_gpu(self, node, options): if not force: if cblocks: cblock_txt = ("\n " + "\n ".join( - str(node) for node in cblocks[0].get_ast_nodes) + str(node) for node in cblocks[0].get_ast_nodes()) + "\n") option_txt = "options={'force': True}" raise TransformationError( diff --git a/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py b/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py index 0ea63a1366..73e557dac4 100644 --- a/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py +++ b/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py @@ -50,7 +50,7 @@ from psyclone.psyGen import CodedKern, Kern from psyclone.psyir.frontend.fortran import FortranReader from psyclone.psyir.nodes import ( - Container, Routine, CodeBlock, Call, IntrinsicCall) + Container, Routine, CodeBlock, Call, IntrinsicCall, Fparser2CodeBlock) from psyclone.psyir.symbols import ( ContainerSymbol, DataSymbol, GenericInterfaceSymbol, ImportInterface, RoutineSymbol, REAL_TYPE, Symbol, SymbolError, SymbolTable, @@ -167,7 +167,7 @@ def test_validate_no_inline_global_var(parser): alpha = alpha + 1 end subroutine mytest''') stmt = parser(reader).children[0].children[1] - block = CodeBlock([stmt], CodeBlock.Structure.STATEMENT) + block = Fparser2CodeBlock([stmt], CodeBlock.Structure.STATEMENT) kschedules = kernels[0].get_callees() ksched = kschedules[0] ksched.pop_all_children() @@ -184,7 +184,7 @@ def test_validate_no_inline_global_var(parser): unknown = unknown + 1 end subroutine mytest''') stmt = parser(reader).children[0].children[1] - block = CodeBlock([stmt], CodeBlock.Structure.STATEMENT) + block = Fparser2CodeBlock([stmt], CodeBlock.Structure.STATEMENT) kschedules = kernels[0].get_callees() ksched = kschedules[0] ksched.pop_all_children() diff --git a/src/psyclone/tests/domain/common/transformations/raise_psyir_2_alg_trans_test.py b/src/psyclone/tests/domain/common/transformations/raise_psyir_2_alg_trans_test.py index 1338105bad..b8423558a7 100644 --- a/src/psyclone/tests/domain/common/transformations/raise_psyir_2_alg_trans_test.py +++ b/src/psyclone/tests/domain/common/transformations/raise_psyir_2_alg_trans_test.py @@ -121,8 +121,8 @@ def test_parse_args_get_symbol(fortran_reader): assert isinstance(code_block, CodeBlock) # Check expected output from parse_args - nodes = RaisePSyIR2AlgTrans._parse_args(code_block, - code_block._fp2_nodes[0]) + nodes = RaisePSyIR2AlgTrans._parse_args( + code_block, code_block.get_ast_nodes()[0]) assert isinstance(nodes, list) assert len(nodes) == 1 assert isinstance(nodes[0], Literal) @@ -131,16 +131,16 @@ def test_parse_args_get_symbol(fortran_reader): # Check expected output from get_symbol when no symbol exists with pytest.raises(KeyError): _ = code_block.scope.symbol_table.lookup("kern") - symbol = RaisePSyIR2AlgTrans._get_symbol(code_block, - code_block._fp2_nodes[0]) + symbol = RaisePSyIR2AlgTrans._get_symbol( + code_block, code_block.get_ast_nodes()[0]) assert isinstance(symbol, DataTypeSymbol) assert symbol.name == "kern" symbol2 = code_block.scope.symbol_table.lookup("kern") assert symbol2 is symbol # Check expected output from get_symbol when symbol already exists - symbol3 = RaisePSyIR2AlgTrans._get_symbol(code_block, - code_block._fp2_nodes[0]) + symbol3 = RaisePSyIR2AlgTrans._get_symbol( + code_block, code_block.get_ast_nodes()[0]) assert symbol3 is symbol @@ -180,7 +180,8 @@ def test_structure_constructor(): invoke = psyir.children[0][0] invoke_trans.validate(invoke) - invoke_trans._validate_fp2_node(invoke.arguments[0]._fp2_nodes[0]) + invoke_trans._validate_fp2_node( + invoke.arguments[0].get_ast_nodes()[0]) @pytest.mark.parametrize("string", ["error='hello'", "name=0"]) diff --git a/src/psyclone/tests/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans_test.py b/src/psyclone/tests/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans_test.py index b44bc2d763..f3852ea479 100644 --- a/src/psyclone/tests/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans_test.py +++ b/src/psyclone/tests/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans_test.py @@ -125,7 +125,7 @@ def test_structure_contructor(fortran_reader): lfric_invoke_trans.validate(subroutine.children[0]) lfric_invoke_trans._validate_fp2_node( - subroutine[0].arguments[0]._fp2_nodes[0]) + subroutine[0].arguments[0].get_ast_nodes()[0]) @pytest.mark.parametrize("string", ["error='hello'", "name=0"]) diff --git a/src/psyclone/tests/psyir/frontend/fparser2_do_handler_test.py b/src/psyclone/tests/psyir/frontend/fparser2_do_handler_test.py index fdefea40db..fc921dbb8a 100644 --- a/src/psyclone/tests/psyir/frontend/fparser2_do_handler_test.py +++ b/src/psyclone/tests/psyir/frontend/fparser2_do_handler_test.py @@ -151,8 +151,6 @@ def test_unhandled_named_do(fortran_reader): prog = psyir.walk(Routine)[0] assert len(prog.children) == 1 assert isinstance(prog.children[0], CodeBlock) - assert isinstance(prog.children[0].ast, - Fortran2003.Block_Nonlabel_Do_Construct) def test_unhandled_labelled_do(fortran_reader): @@ -169,8 +167,6 @@ def test_unhandled_labelled_do(fortran_reader): prog = psyir.walk(Routine)[0] assert len(prog.children) == 1 assert isinstance(prog.children[0], CodeBlock) - assert isinstance(prog.children[0].ast, - Fortran2003.Block_Nonlabel_Do_Construct) def test_undeclared_loop_var(fortran_reader): diff --git a/src/psyclone/tests/psyir/frontend/fparser2_format_stmt_test.py b/src/psyclone/tests/psyir/frontend/fparser2_format_stmt_test.py index 99ebe002bb..0e3c90ccf2 100644 --- a/src/psyclone/tests/psyir/frontend/fparser2_format_stmt_test.py +++ b/src/psyclone/tests/psyir/frontend/fparser2_format_stmt_test.py @@ -64,5 +64,5 @@ def test_format_handler(fortran_reader): assert isinstance(prog.children[0], CodeBlock) cbnode = prog.children[2] assert isinstance(cbnode, CodeBlock) - assert isinstance(cbnode._fp2_nodes[0], Fortran2003.Format_Stmt) - assert cbnode._fp2_nodes[0].item.label == 111 + assert isinstance(cbnode.get_ast_nodes()[0], Fortran2003.Format_Stmt) + assert cbnode.get_ast_nodes()[0].item.label == 111 diff --git a/src/psyclone/tests/psyir/frontend/fparser2_subroutine_handler_test.py b/src/psyclone/tests/psyir/frontend/fparser2_subroutine_handler_test.py index fbe3d3c5de..6a36907595 100644 --- a/src/psyclone/tests/psyir/frontend/fparser2_subroutine_handler_test.py +++ b/src/psyclone/tests/psyir/frontend/fparser2_subroutine_handler_test.py @@ -439,7 +439,7 @@ def test_unsupported_char_len_function(fortran_reader): psyir = fortran_reader.psyir_from_source(code) cblock = psyir.children[0].children[0] assert isinstance(cblock, CodeBlock) - assert "LEN = 2" in str(cblock.get_ast_nodes[0]) + assert "LEN = 2" in str(cblock.get_ast_nodes()[0]) fsym = psyir.children[0].symbol_table.lookup("my_func") assert isinstance(fsym, RoutineSymbol) assert isinstance(fsym.datatype, UnresolvedType) @@ -461,7 +461,7 @@ def test_unsupported_contains_subroutine(fortran_reader): psyir = fortran_reader.psyir_from_source(code) cblock = psyir.children[0] assert isinstance(cblock, CodeBlock) - assert "FUNCTION" in str(cblock.get_ast_nodes[0]) + assert "FUNCTION" in str(cblock.get_ast_nodes()[0]) code = '''subroutine a(b, c, d) real b, c, d @@ -477,7 +477,7 @@ def test_unsupported_contains_subroutine(fortran_reader): psyir = fortran_reader.psyir_from_source(code) cblock = psyir.children[0] assert isinstance(cblock, CodeBlock) - assert "CONTAINS\n SUBROUTINE" in str(cblock.get_ast_nodes[0]) + assert "CONTAINS\n SUBROUTINE" in str(cblock.get_ast_nodes()[0]) def test_unsupported_contains_function(fortran_reader): @@ -497,7 +497,7 @@ def test_unsupported_contains_function(fortran_reader): psyir = fortran_reader.psyir_from_source(code) cblock = psyir.children[0] assert isinstance(cblock, CodeBlock) - assert "CONTAINS\n REAL FUNCTION" in str(cblock.get_ast_nodes[0]) + assert "CONTAINS\n REAL FUNCTION" in str(cblock.get_ast_nodes()[0]) code = '''function a(b, c, d) real b, c, d @@ -514,7 +514,7 @@ def test_unsupported_contains_function(fortran_reader): psyir = fortran_reader.psyir_from_source(code) cblock = psyir.children[0] assert isinstance(cblock, CodeBlock) - assert "SUBROUTINE" in str(cblock.get_ast_nodes[0]) + assert "SUBROUTINE" in str(cblock.get_ast_nodes()[0]) def test_implicit_declns(fortran_reader): diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index 3c5d310cae..19599e9983 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -83,8 +83,8 @@ def test_accregiondir_validate_global(fortran_reader): with pytest.raises(GenerationError) as err: accnode.validate_global_constraints() assert ("Cannot include CodeBlocks or calls to PSyData routines within " - "OpenACC regions but found ['CodeBlock'] within a region enclosed " - "by an 'MyACCRegion'" in str(err.value)) + "OpenACC regions but found ['Fparser2CodeBlock'] within a region " + "enclosed by an 'MyACCRegion'" in str(err.value)) def test_accregiondir_signatures(): diff --git a/src/psyclone/tests/psyir/nodes/codeblock_test.py b/src/psyclone/tests/psyir/nodes/codeblock_test.py index 3c0a329179..49ea01a010 100644 --- a/src/psyclone/tests/psyir/nodes/codeblock_test.py +++ b/src/psyclone/tests/psyir/nodes/codeblock_test.py @@ -41,7 +41,7 @@ import pytest from fparser.common.readfortran import FortranStringReader from psyclone.psyir.frontend.fortran import FortranReader -from psyclone.psyir.nodes import CodeBlock +from psyclone.psyir.nodes.codeblock import CodeBlock, Fparser2CodeBlock from psyclone.psyir.nodes.node import colored from psyclone.errors import GenerationError @@ -73,7 +73,7 @@ def test_codeblock_getastnodes(): ''' original = ["hello", "there"] cblock = CodeBlock(original, CodeBlock.Structure.EXPRESSION) - result = cblock.get_ast_nodes + result = cblock.get_ast_nodes() assert result == original # Check that the list is a copy not a reference. assert result is not original @@ -120,7 +120,7 @@ def test_codeblock_get_symbol_names(parser): END DO myloop end subroutine mytest''') prog = parser(reader) - block = CodeBlock(prog.children, CodeBlock.Structure.STATEMENT) + block = Fparser2CodeBlock(prog.children, CodeBlock.Structure.STATEMENT) sym_names = block.get_symbol_names() assert "a" in sym_names assert "b" in sym_names @@ -191,7 +191,8 @@ def test_codeblock_ref_accesses(parser): END DO myloop end subroutine mytest''') prog = parser(reader) - block = CodeBlock(prog.children, CodeBlock.Structure.STATEMENT) + block = Fparser2CodeBlock( + prog.children, CodeBlock.Structure.STATEMENT) vam = block.reference_accesses() all_sigs = vam.all_signatures all_names = [sig.var_name for sig in all_sigs] diff --git a/src/psyclone/tests/psyir/nodes/profile_node_test.py b/src/psyclone/tests/psyir/nodes/profile_node_test.py index dc9c7591f4..1e877ed811 100644 --- a/src/psyclone/tests/psyir/nodes/profile_node_test.py +++ b/src/psyclone/tests/psyir/nodes/profile_node_test.py @@ -136,14 +136,14 @@ def test_lower_to_lang_level_single_node(): assert isinstance(kschedule[0], CodeBlock) # The first CodeBlock should have the "psy-data-start" annotation. assert kschedule[0].annotations == ["psy-data-start"] - ptree = kschedule[0].get_ast_nodes + ptree = kschedule[0].get_ast_nodes() assert len(ptree) == 1 assert isinstance(ptree[0], Fortran2003.Call_Stmt) assert kschedule[1] is assign1 assert kschedule[2] is assign2 assert isinstance(kschedule[-2], CodeBlock) assert kschedule[-2].annotations == [] - ptree = kschedule[-2].get_ast_nodes + ptree = kschedule[-2].get_ast_nodes() assert len(ptree) == 1 assert isinstance(ptree[0], Fortran2003.Call_Stmt) assert isinstance(kschedule[-1], Return) @@ -169,7 +169,7 @@ def test_lower_named_profile_node(): kschedule.lower_to_language_level() cblocks = kschedule.walk(CodeBlock) assert ("PreStart(\"my_mod\", \"first\", 0, 0)" in - str(cblocks[0].get_ast_nodes[0])) + str(cblocks[0].get_ast_nodes()[0])) def test_lower_to_lang_level_multi_node(): @@ -190,13 +190,13 @@ def test_lower_to_lang_level_multi_node(): sym1 = table.lookup("profile_psy_data_1") assert isinstance(sym1, DataSymbol) cblocks = sched.walk(CodeBlock) - ptree = cblocks[0].get_ast_nodes + ptree = cblocks[0].get_ast_nodes() code = str(ptree[0]).lower() assert ("call profile_psy_data % prestart(\"psy_single_invoke_two_" "kernels\", \"invoke_0-compute_cu_code-r0\"" in code) assert cblocks[0].annotations == ["psy-data-start"] assert cblocks[1].annotations == [] - ptree = cblocks[2].get_ast_nodes + ptree = cblocks[2].get_ast_nodes() code = str(ptree[0]).lower() assert ("call profile_psy_data_1 % prestart(\"psy_single_invoke_two_" "kernels\", \"invoke_0-time_smooth_code-r1\"" in code) diff --git a/src/psyclone/tests/psyir/symbols/datasymbol_test.py b/src/psyclone/tests/psyir/symbols/datasymbol_test.py index 3f10abf8fd..5be6cdd720 100644 --- a/src/psyclone/tests/psyir/symbols/datasymbol_test.py +++ b/src/psyclone/tests/psyir/symbols/datasymbol_test.py @@ -50,8 +50,9 @@ REAL8_TYPE, INTEGER_SINGLE_TYPE, INTEGER_DOUBLE_TYPE, INTEGER4_TYPE, BOOLEAN_TYPE, CHARACTER_TYPE, SymbolTable, UnresolvedType, UnsupportedFortranType) -from psyclone.psyir.nodes import (BinaryOperation, CodeBlock, IntrinsicCall, - Literal, Reference, Return) +from psyclone.psyir.nodes import ( + BinaryOperation, CodeBlock, Fparser2CodeBlock, IntrinsicCall, + Literal, Reference, Return) def test_datasymbol_initialisation(): @@ -358,7 +359,8 @@ def test_datasymbol_initial_value_codeblock(): # We want the first child of the Initialization node in the parse tree as # the basis for our CodeBlock inits = Fortran2003.walk(fparser2spec, Fortran2003.Initialization) - cblock = CodeBlock([inits[0].children[1]], CodeBlock.Structure.EXPRESSION) + cblock = Fparser2CodeBlock([inits[0].children[1]], + CodeBlock.Structure.EXPRESSION) assert sym.initial_value is None sym.initial_value = cblock assert isinstance(sym.initial_value, CodeBlock) diff --git a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py index f620dcc8e3..575188e371 100644 --- a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py @@ -113,7 +113,7 @@ def test_no_kernels_error(fortran_reader): acc_trans = ACCKernelsTrans() with pytest.raises(TransformationError) as err: acc_trans.apply(schedule.children[0:2], {"default_present": True}) - assert ("Nodes of type 'CodeBlock' cannot be enclosed by a " + assert ("Nodes of type 'Fparser2CodeBlock' cannot be enclosed by a " "ACCKernelsTrans transformation" in str(err.value)) @@ -248,7 +248,7 @@ def test_no_code_block_kernels(fortran_reader): acc_trans = ACCKernelsTrans() with pytest.raises(TransformationError) as err: acc_trans.apply(schedule.children) - assert ("Nodes of type 'CodeBlock' cannot be enclosed by a " + assert ("Nodes of type 'Fparser2CodeBlock' cannot be enclosed by a " "ACCKernelsTrans" in str(err.value)) diff --git a/src/psyclone/tests/psyir/transformations/loop_swap_trans_test.py b/src/psyclone/tests/psyir/transformations/loop_swap_trans_test.py index 80002b7676..46d86e9ac2 100644 --- a/src/psyclone/tests/psyir/transformations/loop_swap_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/loop_swap_trans_test.py @@ -232,8 +232,8 @@ def test_loop_swap_validate_nodes_in_loop(fortran_reader): assert isinstance(schedule[1].loop_body[0].loop_body[0], CodeBlock) with pytest.raises(TransformationError) as err: swap.apply(schedule[1]) - assert ("Nodes of type 'CodeBlock' cannot be enclosed by a LoopSwapTrans " - "transformation" in str(err.value)) + assert ("Nodes of type 'Fparser2CodeBlock' cannot be enclosed by a " + "LoopSwapTrans transformation" in str(err.value)) def test_loop_swap_validate_dependent_loop(fortran_reader): diff --git a/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py b/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py index f539e2ec04..e5bca71ad1 100644 --- a/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py @@ -168,8 +168,8 @@ def test_omptargettrans_validate(fortran_reader): with pytest.raises(TransformationError) as err: omptargettrans.validate(loops[2]) - assert ("Nodes of type 'CodeBlock' cannot be enclosed by a OMPTarget" - "Trans transformation" in str(err.value)) + assert ("Nodes of type 'Fparser2CodeBlock' cannot be enclosed by a " + "OMPTarget Trans transformation" in str(err.value)) # The last loop is valid omptargettrans.validate(loops[3]) diff --git a/src/psyclone/tests/psyir/transformations/region_trans_test.py b/src/psyclone/tests/psyir/transformations/region_trans_test.py index a0c18f6b25..3a183d7992 100644 --- a/src/psyclone/tests/psyir/transformations/region_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/region_trans_test.py @@ -73,7 +73,8 @@ def test_rt_apply(fortran_reader, monkeypatch): sched = psyir.walk(Schedule)[0] with pytest.raises(TransformationError) as err: my_rt.apply(sched.children) - assert "Nodes of type 'CodeBlock' cannot be enclosed" in str(err.value) + assert ("Nodes of type 'Fparser2CodeBlock' cannot be enclosed" + in str(err.value)) # Check that both the deprecated and new way of passing an option work. # TODO #2668: Deprecate options dictionary. my_rt.apply(sched.children, options={"node-type-check": False}) diff --git a/src/psyclone/tests/psyir/transformations/transformations_test.py b/src/psyclone/tests/psyir/transformations/transformations_test.py index 3aeb000149..329a202b4a 100644 --- a/src/psyclone/tests/psyir/transformations/transformations_test.py +++ b/src/psyclone/tests/psyir/transformations/transformations_test.py @@ -47,7 +47,7 @@ from fparser.common.readfortran import FortranStringReader from psyclone.psyir.nodes import ( CodeBlock, Literal, Loop, Node, Reference, Schedule, Statement, - ACCLoopDirective, OMPMasterDirective, + ACCLoopDirective, OMPMasterDirective, Fparser2CodeBlock, OMPDoDirective, OMPLoopDirective, Routine) from psyclone.psyir.symbols import ( DataSymbol, INTEGER_TYPE, @@ -162,8 +162,8 @@ def test_accparalleltrans_validate(fortran_reader): with pytest.raises(TransformationError) as err: omptargettrans.validate(loops[1]) - assert ("Nodes of type 'CodeBlock' cannot be enclosed by a ACCParallel" - "Trans transformation" in str(err.value)) + assert ("Nodes of type 'Fparser2CodeBlock' cannot be enclosed by a " + "ACCParallelTrans transformation" in str(err.value)) with pytest.raises(TransformationError) as err: omptargettrans.validate(loops[2]) @@ -354,8 +354,9 @@ def test_ompdeclaretargettrans_with_globals(sample_psyir, parser): not_declared1 = not_declared1 + not_declared2 end subroutine mytest''') prog = parser(reader) - block = CodeBlock(prog.children[0].children[1].children[0].children, - CodeBlock.Structure.EXPRESSION) + block = Fparser2CodeBlock( + prog.children[0].children[1].children[0].children, + CodeBlock.Structure.EXPRESSION) ref1.replace_with(block) with pytest.raises(TransformationError) as err: ompdeclaretargettrans.apply(routine) From 28aa8ad84f9d9f5d0ea5967a32b9b9e46b28b9e1 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Sat, 15 Nov 2025 12:21:02 +0000 Subject: [PATCH 03/37] Add frontend config and flag options to psyclone cli --- src/psyclone/configuration.py | 25 +++++++ src/psyclone/generator.py | 5 ++ src/psyclone/psyir/frontend/fortran.py | 9 +-- .../frontend/fortran_treesitter_reader.py | 73 ++++++++++++++++--- src/psyclone/psyir/frontend/fparser2.py | 3 +- src/psyclone/psyir/nodes/codeblock.py | 10 ++- 6 files changed, 104 insertions(+), 21 deletions(-) diff --git a/src/psyclone/configuration.py b/src/psyclone/configuration.py index c11c64fa70..c0585f7a10 100644 --- a/src/psyclone/configuration.py +++ b/src/psyclone/configuration.py @@ -67,6 +67,7 @@ LFRIC_API_NAMES = ["lfric", "dynamo0.3"] GOCEAN_API_NAMES = ["gocean", "gocean1.0"] +SUPPORTED_FRONTENDS = ["fparser2", "treesitter"] # pylint: disable=too-many-lines @@ -235,6 +236,9 @@ def __init__(self): # The Fortran standard that fparser should use self._fortran_standard = None + # The Fortran standard that fparser should use + self._frontend = 'fparser2' + # ------------------------------------------------------------------------- def load(self, config_file=None): '''Loads a configuration file. @@ -501,6 +505,27 @@ def find_file(): raise ConfigurationError(f"{_FILE_NAME} not found in any of " f"{_file_paths}") + @property + def frontend(self) -> str: + ''' + :returns: the frontend used to parse the input files. + ''' + return self._frontend + + @frontend.setter + def frontend(self, value: str): + ''' + :param value: which frontend to use to parse the input files. + + :raises ConfigurationError: the provided value is not a string. + :raises ConfigurationError: the provided values is not supported. + ''' + if not isinstance(value, str) or value not in SUPPORTED_FRONTENDS: + raise ConfigurationError( + f"frontend must be one of {SUPPORTED_FRONTENDS} but got " + f"{value}") + self._frontend = value + @property def distributed_memory(self): ''' diff --git a/src/psyclone/generator.py b/src/psyclone/generator.py index e608ac657c..1e6358beae 100644 --- a/src/psyclone/generator.py +++ b/src/psyclone/generator.py @@ -493,6 +493,10 @@ def main(arguments): parser.add_argument( '-p', '--profile', action="append", choices=Profiler.SUPPORTED_OPTIONS, help="add profiling hooks for 'kernels', 'invokes' or 'routines'") + parser.add_argument( + '--frontend', default='fparser2', choices=['fparser2', 'treesitter'], + help=("choose the frontend parser ('treesitter' is HIGHLY " + "experimental!)")) parser.add_argument( '--backend', dest='backend', action="append", choices=['disable-validation', 'disable-indentation'], @@ -616,6 +620,7 @@ def main(arguments): # If no config file name is specified, args.config is none # and config will load the default config file. Config.get().load(args.config) + Config.get().frontend = args.frontend # Check whether a PSyKAl API has been specified. if args.psykal_dsl is None: diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index 8c615bf2fb..d6a77d1797 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -86,7 +86,7 @@ def __init__(self, free_form: bool = True, ignore_comments: bool = True, ignore_directives: bool = True, last_comments_as_codeblocks: bool = False, resolve_modules: Union[bool, List[str]] = False): - if os.environ.get("PSYCLONE_TS") is not None: + if Config.get().frontend == 'treesitter': import tree_sitter_fortran from tree_sitter import Language, Parser language = Language(tree_sitter_fortran.language()) @@ -140,12 +140,9 @@ def psyir_from_source(self, source_code: str) -> Node: :raises ValueError: if the supplied Fortran cannot be parsed. ''' - if os.environ.get("PSYCLONE_TS") is not None: - print(source_code) + if Config.get().frontend == 'treesitter': tree = self._parser.parse(bytes(source_code, "utf8")) - print(tree.root_node) psyir = self._processor.generate_psyir(tree.root_node) - print(psyir) return psyir SYMBOL_TABLES.clear() string_reader = FortranStringReader( @@ -267,7 +264,7 @@ def psyir_from_file(self, file_path): :raises ValueError: if the parser fails to parse the contents of the supplied file. ''' - if os.environ.get("PSYCLONE_TS") is not None: + if Config.get().frontend == 'treesitter': with open(file_path, encoding="utf-8") as fortran_file: return self.psyir_from_source(fortran_file.read()) diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index cdb3d450a2..7c3edcbf24 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -35,24 +35,63 @@ ''' PSyIR TreeSitter Fortran reader ''' +from typing import Optional + from psyclone.psyir import nodes from psyclone.psyir.nodes.codeblock import TreeSitterCodeBlock, CodeBlock class FortranTreeSitterReader(): - ''' TreeSitter to PSyIR ''' + ''' Processes the TreeSitter parse_tree and converts it to PSyIR. + + :param ignore_directives: Whether directives should be ignored or not + (default True). Currently ignored. + :param last_comments_as_codeblocks: Whether the last comments in the a + given block (e.g. subroutine, do, if-then body, etc.) should be kept as + CodeBlocks or lost (default False). Currently ignored. + :param resolve_modules: Whether to resolve modules while parsing a file, + for more precise control it also accepts a list of module names. + Defaults to False. Currently ignored. + + :raises TypeError: if the constructor argument is not of the expected type. + ''' - def __init__(self): + def __init__(self, ignore_directives: bool = True, + last_comments_as_codeblocks: bool = False, + resolve_modules: bool = False): + self._ignore_directives = ignore_directives + self._resolve_modules = resolve_modules + self._last_comments_as_codeblocks = last_comments_as_codeblocks self.location = None self._ongoing_codeblock = [] self.handlers = { - 'translation_unit': self._file_container + 'translation_unit': self._translation_unit } - def generate_psyir(self, tsnode): - return self.get_handler(tsnode)(tsnode) + def generate_psyir(self, parse_tree, filename=""): + '''Translate the supplied treesitter node to PSyIR. + + :param parse_tree: the supplied treesitter parse tree. + :type parse_tree: :py:class:`fparser.two.Fortran2003.Program` + :param Optional[str] filename: associated name for FileContainer. + + :returns: PSyIR of the supplied fparser2 parse_tree. + :rtype: :py:class:`psyclone.psyir.nodes.FileContainer` + + :raises GenerationError: if the root of the supplied fparser2 + parse tree is not a Program. + + ''' + return self.get_handler(parse_tree)(parse_tree) def process_nodes(self, list_of_nodes): + ''' + Create the PSyIR of the supplied list of treesitter nodes. + + :param nodes: List of sibling nodes in fparser2 AST. + :type nodes: list[:py:class:`fparser.two.utils.Base`] + + ''' children = [] for tsnode in list_of_nodes: try: @@ -65,14 +104,16 @@ def process_nodes(self, list_of_nodes): children.append(self.generate_accomulated_codeblock()) return children - def generate_accomulated_codeblock(self, message=None): + def generate_accomulated_codeblock(self, message: Optional[str] = None): + ''' + Create a CodeBlock node with the contents accomulated in the + _ongoing_codeblock list. + :param message: comment to associate with the CodeBlock. + + ''' if isinstance(self.location, (nodes.Schedule, nodes.Container)): structure = CodeBlock.Structure.STATEMENT - # elif isinstance(self.location, Directive): - # raise InternalError( - # "Fparser2Reader:nodes_to_code_block: A CodeBlock with " - # "a Directive as parent is not yet supported.") else: structure = CodeBlock.Structure.EXPRESSION @@ -84,13 +125,23 @@ def generate_accomulated_codeblock(self, message=None): return code_block def get_handler(self, tsnode): + ''' + :param tsnode: a given treesitter node. + + :returns: the method that handles the given node type. + ''' handler = self.handlers.get(tsnode.type) if not handler: raise NotImplementedError( f"Unsupported '{tsnode.type}' tree-sitter node.") return handler - def _file_container(self, tsnode): + def _translation_unit(self, tsnode) -> nodes.Node: + ''' Handle translation_unit treesitter node. + + :param tsnode: the node the process. + :returns: the equivatent PSyIR Node. + ''' file_container = nodes.FileContainer("test") self.location = file_container file_container.children.extend(self.process_nodes(tsnode.children)) diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index ebd0ee1867..e37c812189 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -887,8 +887,7 @@ def _get_arg_names(node_list): class Fparser2Reader(): ''' - Class to encapsulate the functionality for processing the fparser2 AST and - convert the nodes to PSyIR. + Processes the fparser2 parse_tree and converts it to PSyIR. :param ignore_directives: Whether directives should be ignored or not (default True). Only has an effect if comments were not ignored when diff --git a/src/psyclone/psyir/nodes/codeblock.py b/src/psyclone/psyir/nodes/codeblock.py index 521c3738b0..3477341b81 100644 --- a/src/psyclone/psyir/nodes/codeblock.py +++ b/src/psyclone/psyir/nodes/codeblock.py @@ -78,7 +78,7 @@ class CodeBlock(Statement, DataNode): #: The annotations that are supported by this node. #: psy-data-start - this node has replaced a PSyDataNode during the #: lowering of the PSyIR to language level. - valid_annotations = ("psy-data-start") + valid_annotations = ("psy-data-start", ) class Structure(Enum): ''' @@ -314,5 +314,11 @@ def get_fortran_lines(self) -> list[str]: class TreeSitterCodeBlock(CodeBlock): + ''' The treesitter implementation of CodeBlock. ''' + def get_fortran_lines(self): - return [ast_node.text for ast_node in self.get_ast_nodes()] + ''' + :returns: a list of each line of fortran represented by this node. + ''' + return [str(ast_node.text, encoding="utf8") for ast_node + in self.get_ast_nodes()] From beedd2a95e3583ef33e71d7a75c4403302bba14a Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 27 Nov 2025 09:23:55 +0000 Subject: [PATCH 04/37] Start encapsulating fparser2 so it is not imported when not needed --- src/psyclone/parse/file_info.py | 2 +- src/psyclone/psyir/frontend/fortran.py | 132 +++++------------- .../frontend/fortran_treesitter_reader.py | 19 +++ src/psyclone/psyir/frontend/fparser2.py | 46 ++++++ .../raise_psyir_2_alg_trans_test.py | 2 +- .../raise_psyir_2_lfric_alg_trans_test.py | 2 +- .../tests/psyir/frontend/fortran_test.py | 1 - 7 files changed, 103 insertions(+), 101 deletions(-) diff --git a/src/psyclone/parse/file_info.py b/src/psyclone/parse/file_info.py index c1587fe227..4192a66ea6 100644 --- a/src/psyclone/parse/file_info.py +++ b/src/psyclone/parse/file_info.py @@ -47,8 +47,8 @@ from fparser.two import Fortran2003 from fparser.two.parser import ParserFactory +from fparser.common.readfortran import FortranStringReader -from psyclone.psyir.frontend.fortran import FortranStringReader from psyclone.configuration import Config from psyclone.psyir.nodes import FileContainer from psyclone.errors import PSycloneError diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index d6a77d1797..b935a7f00a 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -37,26 +37,17 @@ ''' This module provides the PSyIR Fortran front-end.''' -import os +import re from typing import Optional, Union, List -from fparser.common.readfortran import FortranStringReader, FortranFileReader -from fparser.common.sourceinfo import FortranFormat -from fparser.two import Fortran2003, pattern_tools -from fparser.two.parser import ParserFactory -from fparser.two.symbol_table import SYMBOL_TABLES -from fparser.two.utils import FortranSyntaxError, NoMatchError from psyclone.configuration import Config -from psyclone.psyir.frontend.fparser2 import Fparser2Reader from psyclone.psyir.nodes import Assignment, Node, Routine, Schedule from psyclone.psyir.symbols import SymbolTable -from psyclone.psyir.frontend.fortran_treesitter_reader import ( - FortranTreeSitterReader) class FortranReader(): ''' PSyIR Fortran frontend. This frontend translates Fortran from a string - or a file into PSyIR using the fparser2 utilities. + or a file into PSyIR. :param free_form: If parsing free-form code or not (default True). :param ignore_comments: If comments should be ignored or not @@ -79,36 +70,34 @@ class FortranReader(): ignore_comments is set to True. ''' - # Save parser object across instances to reduce the initialisation time - _parser = None - - def __init__(self, free_form: bool = True, ignore_comments: bool = True, + def __init__(self, + free_form: bool = True, + ignore_comments: bool = True, ignore_directives: bool = True, last_comments_as_codeblocks: bool = False, resolve_modules: Union[bool, List[str]] = False): + + if ignore_comments and not ignore_directives: + raise ValueError( + "Setting ignore_directives to False in the FortranReader " + "will only have an effect if ignore_comments is also set " + "to False." + ) + self._ignore_comments = ignore_comments + self._free_form = free_form + + # The frontend reader imports are intentionally inside this method + # to only lazily import them if they are requested + # pylint: disable=import-outside-toplevel if Config.get().frontend == 'treesitter': - import tree_sitter_fortran - from tree_sitter import Language, Parser - language = Language(tree_sitter_fortran.language()) - self._parser = Parser(language) + from psyclone.psyir.frontend.fortran_treesitter_reader import ( + FortranTreeSitterReader) self._processor = FortranTreeSitterReader() else: - - if not self._parser: - std = Config.get().fortran_standard - self._parser = ParserFactory().create(std=std) - self._free_form = free_form - if ignore_comments and not ignore_directives: - raise ValueError( - "Setting ignore_directives to False in the FortranReader " - "will only have an effect if ignore_comments is also set " - "to False." - ) - self._ignore_comments = ignore_comments + from psyclone.psyir.frontend.fparser2 import Fparser2Reader self._processor = Fparser2Reader(ignore_directives, last_comments_as_codeblocks, resolve_modules) - SYMBOL_TABLES.clear() @staticmethod def validate_name(name: str): @@ -126,7 +115,7 @@ def validate_name(name: str): raise TypeError( f"A name should be a string, but found " f"'{type(name).__name__}'.") - if not pattern_tools.abs_name.match(name): + if not re.match(r"^[A-Z]\w*$", name, flags=re.I): raise ValueError( f"Invalid Fortran name '{name}' found.") @@ -140,26 +129,9 @@ def psyir_from_source(self, source_code: str) -> Node: :raises ValueError: if the supplied Fortran cannot be parsed. ''' - if Config.get().frontend == 'treesitter': - tree = self._parser.parse(bytes(source_code, "utf8")) - psyir = self._processor.generate_psyir(tree.root_node) - return psyir - SYMBOL_TABLES.clear() - string_reader = FortranStringReader( - source_code, include_dirs=Config.get().include_paths, - ignore_comments=self._ignore_comments) - # Set reader to free format. - string_reader.set_format(FortranFormat(self._free_form, False)) - - try: - parse_tree = self._parser(string_reader) - except (FortranSyntaxError, NoMatchError) as err: - raise ValueError( - f"Failed to parse the provided source code:\n{source_code}\n" - f"Error was: {err}\nIs the input valid Fortran (note that CPP " - f"directives must be handled by a pre-processor)?") from err - - psyir = self._processor.generate_psyir(parse_tree) + tree = self._processor.text_to_parse_tree( + source_code, self._ignore_comments, self._free_form) + psyir = self._processor.generate_psyir(tree) return psyir def psyir_from_expression(self, source_code: str, @@ -186,12 +158,9 @@ def psyir_from_expression(self, source_code: str, raise TypeError(f"Must be supplied with a valid SymbolTable but " f"got '{type(symbol_table).__name__}'") - try: - parse_tree = Fortran2003.Expr(source_code) - except NoMatchError as err: - raise ValueError( - f"Supplied source does not represent a Fortran " - f"expression: '{source_code}'") from err + tree = self._processor.text_to_parse_tree( + source_code, self._ignore_comments, self._free_form, + partial_code="expression") # Create a fake sub-tree connected to the supplied symbol table so # that we can process the expression and lookup any symbols that it @@ -203,7 +172,7 @@ def psyir_from_expression(self, source_code: str, # pylint: disable=protected-access fake_parent._symbol_table = symbol_table fake_parent.addchild(Assignment()) - self._processor.process_nodes(fake_parent[0], [parse_tree]) + self._processor.process_nodes(fake_parent[0], [tree]) return fake_parent[0].children[0].detach() def psyir_from_statement(self, source_code: str, @@ -229,15 +198,10 @@ def psyir_from_statement(self, source_code: str, elif not isinstance(symbol_table, SymbolTable): raise TypeError(f"Must be supplied with a valid SymbolTable but " f"got '{type(symbol_table).__name__}'") - string_reader = FortranStringReader(source_code) - # Set reader to free format. - string_reader.set_format(FortranFormat(True, False)) - try: - exec_part = Fortran2003.Execution_Part(string_reader) - except NoMatchError as err: - raise ValueError(f"Supplied source does not represent a Fortran " - f"statement: '{source_code}'") from err + tree = self._processor.text_to_parse_tree( + source_code, self._ignore_comments, self._free_form, + partial_code="statement") # Create a fake sub-tree connected to the supplied symbol table so # that we can process the statement and lookup any symbols that it # references. @@ -249,7 +213,7 @@ def psyir_from_statement(self, source_code: str, # Process the statement, giving the Routine we've just # created as the parent. - self._processor.process_nodes(fake_parent, exec_part.children) + self._processor.process_nodes(fake_parent, tree.children) return fake_parent[0].detach() def psyir_from_file(self, file_path): @@ -264,34 +228,8 @@ def psyir_from_file(self, file_path): :raises ValueError: if the parser fails to parse the contents of the supplied file. ''' - if Config.get().frontend == 'treesitter': - with open(file_path, encoding="utf-8") as fortran_file: - return self.psyir_from_source(fortran_file.read()) - - SYMBOL_TABLES.clear() - - # Note that this is the main performance hotspot in PSyclone, taking - # more than 90% of the runtime in some cases. Therefore this is a good - # place to implement caching in order to avoid repeating parsing steps - # that have already been done before. - - # Using the FortranFileReader instead of manually open the file allows - # fparser to keep the filename information in the tree - reader = FortranFileReader(file_path, - include_dirs=Config.get().include_paths, - ignore_comments=self._ignore_comments) - reader.set_format(FortranFormat(self._free_form, False)) - try: - parse_tree = self._parser(reader) - except (FortranSyntaxError, NoMatchError) as err: - raise ValueError( - f"Failed to parse source in file '{file_path}'.\n" - f"Error was: {err}\nIs the input valid Fortran (note that CPP " - f"directives must be handled by a pre-processor)?") from err - _, filename = os.path.split(file_path) - - psyir = self._processor.generate_psyir(parse_tree, filename) - return psyir + with open(file_path, encoding="utf-8") as fortran_file: + return self.psyir_from_source(fortran_file.read()) # For Sphinx AutoAPI documentation generation diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 7c3edcbf24..e8ec2d64fa 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -68,6 +68,25 @@ def __init__(self, ignore_directives: bool = True, 'translation_unit': self._translation_unit } + @staticmethod + def text_to_parse_tree(source_code, ignore_comments, free_form): + def report_errors(node): + ''' Recursively find and report errors ''' + if node.type == 'ERROR': + raise ValueError( + f"Syntax Error found at line {node.start_point[0] + 1}: " + f"{node.text.decode('utf8')}") + for child in node.children: + report_errors(child) + + import tree_sitter_fortran + from tree_sitter import Language, Parser + language = Language(tree_sitter_fortran.language()) + parser = Parser(language) + parse_tree = parser.parse(bytes(source_code, "utf8")) + report_errors(parse_tree.root_node) + return parse_tree.root_node + def generate_psyir(self, parse_tree, filename=""): '''Translate the supplied treesitter node to PSyIR. diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index e37c812189..a954539ea8 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -50,6 +50,9 @@ from fparser.two import C99Preprocessor, Fortran2003, utils from fparser.two.parser import ParserFactory from fparser.two.utils import walk, BlockBase, StmtBase, Base +from fparser.common.sourceinfo import FortranFormat +from fparser.two.symbol_table import SYMBOL_TABLES +from fparser.two.utils import FortranSyntaxError, NoMatchError from psyclone.configuration import Config from psyclone.errors import InternalError, GenerationError @@ -903,6 +906,8 @@ class Fparser2Reader(): :raises TypeError: if the constructor argument is not of the expected type. ''' + _parser = None + unary_operators = OrderedDict([ ('+', UnaryOperation.Operator.PLUS), ('-', UnaryOperation.Operator.MINUS), @@ -1047,6 +1052,47 @@ def __init__(self, ignore_directives: bool = True, # Whether to keep the last comments in a given block as CodeBlocks self._last_comments_as_codeblocks = last_comments_as_codeblocks + @classmethod + def text_to_parse_tree(cls, source_code, ignore_comments, free_form, + partial_code=None): + # self._free_form = free_form + # self._ignore_comments = ignore_comments + + string_reader = FortranStringReader( + source_code, include_dirs=Config.get().include_paths, + ignore_comments=ignore_comments) + # Set reader to free format. + string_reader.set_format(FortranFormat(free_form, False)) + + SYMBOL_TABLES.clear() + if partial_code == "expression": + try: + parse_tree = Fortran2003.Expr(source_code) + except NoMatchError as err: + raise ValueError( + f"Supplied source does not represent a Fortran " + f"expression: '{source_code}'") from err + elif partial_code == "statement": + try: + parse_tree = Fortran2003.Execution_Part(string_reader) + except NoMatchError as err: + raise ValueError( + f"Supplied source does not represent a Fortran " + f"statement: '{source_code}'") from err + else: + try: + std = Config.get().fortran_standard + if not cls._parser: + cls._parser = ParserFactory().create(std=std) + parse_tree = cls._parser(string_reader) + except (FortranSyntaxError, NoMatchError) as err: + raise ValueError( + f"Failed to parse the provided source code:\n{source_code}" + "\nError was: {err}\nIs the input valid Fortran (note that" + f"CPP directives must be handled by a pre-processor)?" + ) from err + return parse_tree + @staticmethod def nodes_to_code_block(parent, fp2_nodes, message=None): '''Create a CodeBlock for the supplied list of fparser2 nodes and then diff --git a/src/psyclone/tests/domain/common/transformations/raise_psyir_2_alg_trans_test.py b/src/psyclone/tests/domain/common/transformations/raise_psyir_2_alg_trans_test.py index b8423558a7..2190c11fd0 100644 --- a/src/psyclone/tests/domain/common/transformations/raise_psyir_2_alg_trans_test.py +++ b/src/psyclone/tests/domain/common/transformations/raise_psyir_2_alg_trans_test.py @@ -258,7 +258,7 @@ def test_codeblock_invalid(monkeypatch): invoke = psyir.children[0][0] code_block = invoke.arguments[0] assert isinstance(code_block, CodeBlock) - monkeypatch.setattr(code_block, "_fp2_nodes", [None]) + monkeypatch.setattr(code_block, "_parse_tree", [None]) invoke_trans = RaisePSyIR2AlgTrans() diff --git a/src/psyclone/tests/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans_test.py b/src/psyclone/tests/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans_test.py index f3852ea479..ce4abec9a0 100644 --- a/src/psyclone/tests/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans_test.py +++ b/src/psyclone/tests/domain/lfric/transformations/raise_psyir_2_lfric_alg_trans_test.py @@ -198,7 +198,7 @@ def test_codeblock_invalid(monkeypatch, fortran_reader): subroutine = psyir.children[0] code_block = subroutine[0].arguments[0] assert isinstance(code_block, CodeBlock) - monkeypatch.setattr(code_block, "_fp2_nodes", [None]) + monkeypatch.setattr(code_block, "_parse_tree", [None]) lfric_invoke_trans = RaisePSyIR2LFRicAlgTrans() diff --git a/src/psyclone/tests/psyir/frontend/fortran_test.py b/src/psyclone/tests/psyir/frontend/fortran_test.py index 4816d429a4..2d1659ec42 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_test.py @@ -89,7 +89,6 @@ def test_fortran_reader_constructor(): ''' Test that the constructor initialises the _parser and _processor attributes. ''' freader = FortranReader() - assert freader._parser is Fortran2003.Program assert isinstance(freader._processor, Fparser2Reader) # Check that the initialised parser can parse Fortran 2008 standard, From 426f6fe49be8588d27acc492dc1557e2e15bd68a Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Wed, 7 Jan 2026 12:52:54 +0000 Subject: [PATCH 05/37] Fix tests --- src/psyclone/psyir/frontend/fortran.py | 1 + .../psyir/frontend/fparser2_comment_test.py | 30 ++++++++++--------- .../transformations/omp_target_trans_test.py | 2 +- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index 1ee4d0a48f..77fd364af9 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -73,6 +73,7 @@ class FortranReader(): ignore_comments is set to True. ''' + def __init__(self, free_form: bool = True, ignore_comments: bool = True, diff --git a/src/psyclone/tests/psyir/frontend/fparser2_comment_test.py b/src/psyclone/tests/psyir/frontend/fparser2_comment_test.py index 00d49c17c1..dac02a8b11 100644 --- a/src/psyclone/tests/psyir/frontend/fparser2_comment_test.py +++ b/src/psyclone/tests/psyir/frontend/fparser2_comment_test.py @@ -172,8 +172,9 @@ def test_comments_and_codeblocks(last_comments_as_codeblocks): ) if last_comments_as_codeblocks: assert isinstance(module.children[-1], CodeBlock) - assert isinstance(module.children[-1].ast, Fortran2003.Comment) - assert (module.children[-1].ast.tostr() + assert isinstance(module.children[-1].get_ast_nodes()[0], + Fortran2003.Comment) + assert (module.children[-1].get_ast_nodes()[0].tostr() == "! Comment at end of module => CodeBlock") else: assert not isinstance(module.children[-1], CodeBlock) @@ -212,9 +213,9 @@ def test_comments_and_codeblocks(last_comments_as_codeblocks): last_child = routine.children[-1] if last_comments_as_codeblocks: assert isinstance(last_child, CodeBlock) - assert isinstance(last_child.ast, Fortran2003.Comment) + assert isinstance(last_child.get_ast_nodes()[0], Fortran2003.Comment) assert ( - last_child.ast.tostr() + last_child.get_ast_nodes()[0].tostr() == "! Comment at end of subroutine => CodeBlock" ) else: @@ -254,9 +255,9 @@ def test_comments_and_codeblocks(last_comments_as_codeblocks): last_child = ifblock.if_body.children[-1] if last_comments_as_codeblocks: assert isinstance(last_child, CodeBlock) - assert isinstance(last_child.ast, Fortran2003.Comment) + assert isinstance(last_child.get_ast_nodes()[0], Fortran2003.Comment) assert ( - last_child.ast.tostr() + last_child.get_ast_nodes()[0].tostr() == "! Comment on elseif block 'elseif (a == 2) then' => CodeBlock" ) else: @@ -265,9 +266,9 @@ def test_comments_and_codeblocks(last_comments_as_codeblocks): last_child = ifblock2.if_body.children[-1] if last_comments_as_codeblocks: assert isinstance(last_child, CodeBlock) - assert isinstance(last_child.ast, Fortran2003.Comment) + assert isinstance(last_child.get_ast_nodes()[0], Fortran2003.Comment) assert ( - last_child.ast.tostr() + last_child.get_ast_nodes()[0].tostr() == "! Comment on else block 'else' => CodeBlock" ) else: @@ -275,8 +276,9 @@ def test_comments_and_codeblocks(last_comments_as_codeblocks): last_child = ifblock2.else_body.children[-1] if last_comments_as_codeblocks: assert isinstance(last_child, CodeBlock) - assert isinstance(last_child.ast, Fortran2003.Comment) - assert last_child.ast.tostr() == "! Comment on 'end if' => CodeBlock" + assert isinstance(last_child.get_ast_nodes()[0], Fortran2003.Comment) + assert (last_child.get_ast_nodes()[0].tostr() == + "! Comment on 'end if' => CodeBlock") else: assert not isinstance(last_child, CodeBlock) @@ -288,9 +290,9 @@ def test_comments_and_codeblocks(last_comments_as_codeblocks): last_child = loop_i.loop_body.children[-1] if last_comments_as_codeblocks: assert isinstance(last_child, CodeBlock) - assert isinstance(last_child.ast, Fortran2003.Comment) + assert isinstance(last_child.get_ast_nodes()[0], Fortran2003.Comment) assert ( - last_child.ast.tostr() + last_child.get_ast_nodes()[0].tostr() == "! Comment at end of loop on i => CodeBlock" ) else: @@ -303,9 +305,9 @@ def test_comments_and_codeblocks(last_comments_as_codeblocks): last_child = loop_j.loop_body.children[-1] if last_comments_as_codeblocks: assert isinstance(last_child, CodeBlock) - assert isinstance(last_child.ast, Fortran2003.Comment) + assert isinstance(last_child.get_ast_nodes()[0], Fortran2003.Comment) assert ( - last_child.ast.tostr() + last_child.get_ast_nodes()[0].tostr() == "! Comment at end of loop on j => CodeBlock" ) else: diff --git a/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py b/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py index e5bca71ad1..083551f4df 100644 --- a/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/omp_target_trans_test.py @@ -169,7 +169,7 @@ def test_omptargettrans_validate(fortran_reader): with pytest.raises(TransformationError) as err: omptargettrans.validate(loops[2]) assert ("Nodes of type 'Fparser2CodeBlock' cannot be enclosed by a " - "OMPTarget Trans transformation" in str(err.value)) + "OMPTargetTrans transformation" in str(err.value)) # The last loop is valid omptargettrans.validate(loops[3]) From e4be6926484939ad6a4190c5b3889460d649e468 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Wed, 7 Jan 2026 13:26:10 +0000 Subject: [PATCH 06/37] Fix tests and flake8 --- src/psyclone/psyir/frontend/fortran.py | 12 ++++++++---- .../psyir/frontend/fortran_treesitter_reader.py | 5 +++-- src/psyclone/psyir/frontend/fparser2.py | 9 ++++++--- src/psyclone/tests/psyir/frontend/fortran_test.py | 3 +-- src/psyclone/tests/psyir/nodes/psy_data_node_test.py | 12 ++++++------ .../tests/psyir/nodes/read_only_verify_test.py | 2 +- 6 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index 77fd364af9..4d9dd62d83 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -138,7 +138,7 @@ def psyir_from_source(self, source_code: str) -> Node: ''' tree = self._processor.text_to_parse_tree( source_code, self._ignore_comments, self._free_form, - self._ignore_directives) + self._ignore_directives, self._conditional_openmp_statements) psyir = self._processor.generate_psyir(tree) return psyir @@ -168,7 +168,8 @@ def psyir_from_expression(self, source_code: str, tree = self._processor.text_to_parse_tree( source_code, self._ignore_comments, self._free_form, - self._ignore_directives, partial_code="expression") + self._ignore_directives, self._conditional_openmp_statements, + partial_code="expression") # Create a fake sub-tree connected to the supplied symbol table so # that we can process the expression and lookup any symbols that it @@ -209,7 +210,8 @@ def psyir_from_statement(self, source_code: str, tree = self._processor.text_to_parse_tree( source_code, self._ignore_comments, self._free_form, - self._ignore_directives, partial_code="statement") + self._ignore_directives, self._conditional_openmp_statements, + partial_code="statement") # Create a fake sub-tree connected to the supplied symbol table so # that we can process the statement and lookup any symbols that it # references. @@ -237,7 +239,9 @@ def psyir_from_file(self, file_path): the supplied file. ''' with open(file_path, encoding="utf-8") as fortran_file: - return self.psyir_from_source(fortran_file.read()) + output = self.psyir_from_source(fortran_file.read()) + output.name = str(file_path).split('/')[-1] + return output # For Sphinx AutoAPI documentation generation diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 7f693f4b85..4cf3b6fb16 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -69,8 +69,9 @@ def __init__(self, ignore_directives: bool = True, } @staticmethod - def text_to_parse_tree(source_code, ignore_comments, free_form, - ignore_directives): + def text_to_parse_tree(cls, source_code, ignore_comments, free_form, + ignore_directives, conditional_openmp, + partial_code=None): def report_errors(node): ''' Recursively find and report errors ''' if node.type == 'ERROR': diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index 1e038b30b6..6eeb21c774 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -1017,14 +1017,17 @@ def __init__(self, ignore_directives: bool = True, @classmethod def text_to_parse_tree(cls, source_code, ignore_comments, free_form, - ignore_directives, partial_code=None): + ignore_directives, conditional_openmp, + partial_code=None): # self._free_form = free_form # self._ignore_comments = ignore_comments string_reader = FortranStringReader( source_code, include_dirs=Config.get().include_paths, ignore_comments=ignore_comments, - process_directives=not ignore_directives) + process_directives=not ignore_directives, + include_omp_conditional_lines=conditional_openmp, + ) # Set reader to free format. string_reader.set_format(FortranFormat(free_form, False)) @@ -1053,7 +1056,7 @@ def text_to_parse_tree(cls, source_code, ignore_comments, free_form, raise ValueError( f"Failed to parse the provided source code:\n{source_code}" "\nError was: {err}\nIs the input valid Fortran (note that" - f"CPP directives must be handled by a pre-processor)?" + f" CPP directives must be handled by a pre-processor)?" ) from err return parse_tree diff --git a/src/psyclone/tests/psyir/frontend/fortran_test.py b/src/psyclone/tests/psyir/frontend/fortran_test.py index 2d1659ec42..06bc24b84c 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_test.py @@ -38,7 +38,6 @@ ''' Performs py.test tests on the Fortran PSyIR front-end ''' import pytest -from fparser.two import Fortran2003 from psyclone.psyir.frontend.fortran import FortranReader from psyclone.psyir.frontend.fparser2 import Fparser2Reader from psyclone.psyir.nodes import ( @@ -269,7 +268,7 @@ def test_fortran_psyir_from_file(fortran_reader, tmpdir_factory): wfile.write("this is not Fortran") with pytest.raises(ValueError) as err: file_container = fortran_reader.psyir_from_file(filename) - assert "Failed to parse source in file" in str(err.value) + assert "Failed to parse" in str(err.value) # Check with a file that doesn't exist filename = str(tmpdir_factory.mktemp('frontend_test').join("Idontexist")) diff --git a/src/psyclone/tests/psyir/nodes/psy_data_node_test.py b/src/psyclone/tests/psyir/nodes/psy_data_node_test.py index a3e801cc0d..00b723ca8e 100644 --- a/src/psyclone/tests/psyir/nodes/psy_data_node_test.py +++ b/src/psyclone/tests/psyir/nodes/psy_data_node_test.py @@ -395,10 +395,10 @@ def test_psy_data_node_lower_to_language_level(): assert not routine.walk(PSyDataNode) codeblocks = routine.walk(CodeBlock) assert len(codeblocks) == 2 - assert str(codeblocks[0].ast) == \ + assert str(codeblocks[0].get_ast_nodes()[0]) == \ 'CALL psy_data % PreStart("my_routine", "r0", 0, 0)' assert "psy-data-start" in codeblocks[0].annotations - assert str(codeblocks[1].ast) == \ + assert str(codeblocks[1].get_ast_nodes()[0]) == \ 'CALL psy_data % PostEnd' # Now try with a PSyDataNode with specified module and region names @@ -411,9 +411,9 @@ def test_psy_data_node_lower_to_language_level(): assert not routine.walk(PSyDataNode) codeblocks = routine.walk(CodeBlock) assert len(codeblocks) == 2 - assert str(codeblocks[0].ast) == \ + assert str(codeblocks[0].get_ast_nodes()[0]) == \ 'CALL psy_data % PreStart("my_module", "my_region", 0, 0)' - assert str(codeblocks[1].ast) == \ + assert str(codeblocks[1].get_ast_nodes()[0]) == \ 'CALL psy_data % PostEnd' @@ -446,7 +446,7 @@ def test_psy_data_node_lower_to_language_level_with_options(): 'CALL psy_data % ProvideVariable("b", b)'] for codeblock, string in zip(codeblocks, expected): - assert string == str(codeblock.ast) + assert string == str(codeblock.get_ast_nodes()[0]) # 2) Test that variables suffixes are added as expected # ----------------------------------------------------- @@ -475,7 +475,7 @@ def test_psy_data_node_lower_to_language_level_with_options(): 'CALL psy_data % ProvideVariable("b_post", b)'] for codeblock, string in zip(codeblocks, expected): - assert string == str(codeblock.ast) + assert string == str(codeblock.get_ast_nodes()[0]) # ---------------------------------------------------------------------------- diff --git a/src/psyclone/tests/psyir/nodes/read_only_verify_test.py b/src/psyclone/tests/psyir/nodes/read_only_verify_test.py index 0cee83cbe5..6d51a37f30 100644 --- a/src/psyclone/tests/psyir/nodes/read_only_verify_test.py +++ b/src/psyclone/tests/psyir/nodes/read_only_verify_test.py @@ -68,4 +68,4 @@ def test_read_only_verify_lower_to_language_level(): 'CALL read_only_verify_psy_data % PostEnd'] for codeblock, code in zip(routine.walk(CodeBlock), expected): - assert str(codeblock.ast) == code + assert str(codeblock.get_ast_nodes()[0]) == code From 4c2e6b7edadeedfc6f50df9af6080258e7501abf Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Wed, 7 Jan 2026 13:40:32 +0000 Subject: [PATCH 07/37] Reformat FortranReader docstring --- src/psyclone/psyir/frontend/fortran.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index 4d9dd62d83..21584e5416 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -51,20 +51,15 @@ class FortranReader(): :param free_form: If parsing free-form code or not (default True). :param ignore_comments: If comments should be ignored or not - (default True). + (default True). :param ignore_directives: If directives should be ignored or not - (default True). Only has an effect - if ignore_comments is False. + (default True). Only has an effect if ignore_comments is False. :param conditional_openmp_statements: whether to keep statements with the - OpenMP conditional compilation - prefix. - :param last_comments_as_codeblocks: If the last comments in the - a given block (e.g. subroutine, - do, if-then body, etc.) should - be kept as code blocks or lost - (default False). - Only has an effect if ignore_comments - is False. + OpenMP conditional compilation prefix. + :param last_comments_as_codeblocks: If the last comments in the a given + block (e.g. subroutine, do, if-then body, etc.) should be kept as + Codeblocks or lost (default False). Only has an effect if + ignore_comments is False. :param resolve_modules: Whether to resolve modules while parsing a file, for more precise control it also accepts a list of module names. Defaults to False. From 1faaf6b15a602a440a614b6b19d98cd9d375c735 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 8 Jan 2026 11:57:46 +0000 Subject: [PATCH 08/37] Add treesitter dependencies to setup.py and fix typing issues --- setup.py | 1 + src/psyclone/psyir/frontend/fortran_treesitter_reader.py | 2 +- src/psyclone/psyir/frontend/fparser2.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 747aad367f..eb5ac4f722 100644 --- a/setup.py +++ b/setup.py @@ -176,6 +176,7 @@ def get_files(directory, install_path, valid_suffixes): "pydata-sphinx-theme", "sphinx-autodoc-typehints", "autoapi"], 'test': ["flake8", "pylint", "pytest-cov", "pytest-xdist"], + 'treesitter': ["tree-sitter", "tree-sitter-fortran"], }, include_package_data=True, scripts=[ diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 4cf3b6fb16..210a51103c 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -68,7 +68,7 @@ def __init__(self, ignore_directives: bool = True, 'translation_unit': self._translation_unit } - @staticmethod + @classmethod def text_to_parse_tree(cls, source_code, ignore_comments, free_form, ignore_directives, conditional_openmp, partial_code=None): diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index 6eeb21c774..6aab3822b5 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -951,7 +951,7 @@ class SelectTypeInfo: def __init__(self, ignore_directives: bool = True, last_comments_as_codeblocks: bool = False, - resolve_modules: bool = False): + resolve_modules: Union[bool, list[str]] = False): if isinstance(resolve_modules, bool): self._resolve_all_modules = resolve_modules self._modules_to_resolve = [] From 6a56166aed750628661a484ad25e6efe8fd8fcea Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Wed, 25 Feb 2026 12:55:21 +0000 Subject: [PATCH 09/37] Improve Fortran frontend comments and method names --- src/psyclone/configuration.py | 2 +- src/psyclone/psyir/frontend/fortran.py | 10 +++---- .../frontend/fortran_treesitter_reader.py | 30 +++++++++++++++---- src/psyclone/psyir/frontend/fparser2.py | 29 ++++++++++++++---- 4 files changed, 54 insertions(+), 17 deletions(-) diff --git a/src/psyclone/configuration.py b/src/psyclone/configuration.py index 70727b7db8..c735389882 100644 --- a/src/psyclone/configuration.py +++ b/src/psyclone/configuration.py @@ -239,7 +239,7 @@ def __init__(self): # The Fortran standard that fparser should use self._fortran_standard = None - # The Fortran standard that fparser should use + # The Fortran parser that psyclone should use self._frontend = 'fparser2' # By default, the PSyIR backends don't output argument names on (most) diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index 3fbd0ed32e..ef07913127 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -88,8 +88,8 @@ def __init__(self, self._conditional_openmp_statements = conditional_openmp_statements self._free_form = free_form - # The frontend reader imports are intentionally inside this method - # to only lazily import them if they are requested + # The frontend reader imports are intentionally inside this condition + # to lazily import them only when they are needed # pylint: disable=import-outside-toplevel if Config.get().frontend == 'treesitter': from psyclone.psyir.frontend.fortran_treesitter_reader import ( @@ -131,7 +131,7 @@ def psyir_from_source(self, source_code: str) -> Node: :raises ValueError: if the supplied Fortran cannot be parsed. ''' - tree = self._processor.text_to_parse_tree( + tree = self._processor.generate_parse_tree( source_code, self._ignore_comments, self._free_form, self._ignore_directives, self._conditional_openmp_statements) psyir = self._processor.generate_psyir(tree) @@ -161,7 +161,7 @@ def psyir_from_expression(self, source_code: str, raise TypeError(f"Must be supplied with a valid SymbolTable but " f"got '{type(symbol_table).__name__}'") - tree = self._processor.text_to_parse_tree( + tree = self._processor.generate_parse_tree( source_code, self._ignore_comments, self._free_form, self._ignore_directives, self._conditional_openmp_statements, partial_code="expression") @@ -203,7 +203,7 @@ def psyir_from_statement(self, source_code: str, raise TypeError(f"Must be supplied with a valid SymbolTable but " f"got '{type(symbol_table).__name__}'") - tree = self._processor.text_to_parse_tree( + tree = self._processor.generate_parse_tree( source_code, self._ignore_comments, self._free_form, self._ignore_directives, self._conditional_openmp_statements, partial_code="statement") diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 210a51103c..e9fbbf9986 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -37,6 +37,9 @@ from typing import Optional +import tree_sitter_fortran +from tree_sitter import Language, Parser + from psyclone.psyir import nodes from psyclone.psyir.nodes.codeblock import TreeSitterCodeBlock, CodeBlock @@ -69,9 +72,28 @@ def __init__(self, ignore_directives: bool = True, } @classmethod - def text_to_parse_tree(cls, source_code, ignore_comments, free_form, - ignore_directives, conditional_openmp, - partial_code=None): + def generate_parse_tree( + cls, + source_code: str, + ignore_comments: bool, + free_form: bool, + ignore_directives: bool, + conditional_openmp: bool, + partial_code: str = "" + ): + ''' Use the provided source code and frontend options to generate + a fparser2 parsetree. + + :param source_code: the given source code. + :param ignore_comments: whether to let the parser ignore comments. + :param free_form: whether to parse using Fortran free_form syntax. + :param ignore_directives: whether to ignore directives while parsing. + :param conditional_openmp: + :param partial_code: if the provided source_code is not a full unit + this indicates the starting parsing point. It currently supports + "expression" or "statement". + + ''' def report_errors(node): ''' Recursively find and report errors ''' if node.type == 'ERROR': @@ -81,8 +103,6 @@ def report_errors(node): for child in node.children: report_errors(child) - import tree_sitter_fortran - from tree_sitter import Language, Parser language = Language(tree_sitter_fortran.language()) parser = Parser(language) parse_tree = parser.parse(bytes(source_code, "utf8")) diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index 8e2218034e..538610f704 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -852,7 +852,8 @@ def _get_arg_names(node_list): class Fparser2Reader(): ''' - Processes the fparser2 parse_tree and converts it to PSyIR. + Class to encapsulate the functionality for processing the fparser2 AST and + convert the nodes to PSyIR. :param ignore_directives: Whether directives should be ignored or not (default True). Only has an effect if comments were not ignored when @@ -1016,12 +1017,28 @@ def __init__(self, ignore_directives: bool = True, self._last_comments_as_codeblocks = last_comments_as_codeblocks @classmethod - def text_to_parse_tree(cls, source_code, ignore_comments, free_form, - ignore_directives, conditional_openmp, - partial_code=None): - # self._free_form = free_form - # self._ignore_comments = ignore_comments + def generate_parse_tree( + cls, + source_code: str, + ignore_comments: bool, + free_form: bool, + ignore_directives: bool, + conditional_openmp: bool, + partial_code: str = "" + ): + ''' Use the provided source code and frontend options to generate + a fparser2 parsetree. + + :param source_code: the given source code. + :param ignore_comments: whether to let the parser ignore comments. + :param free_form: whether to parse using Fortran free_form syntax. + :param ignore_directives: whether to ignore directives while parsing. + :param conditional_openmp: + :param partial_code: if the provided source_code is not a full unit + this indicates the starting parsing point. It currently supports + "expression" or "statement". + ''' string_reader = FortranStringReader( source_code, include_dirs=Config.get().include_paths, ignore_comments=ignore_comments, From b74e813c2339808bfef089011abe18d84b1032c2 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Wed, 25 Feb 2026 13:01:46 +0000 Subject: [PATCH 10/37] Temporary comment out assert --- src/psyclone/tests/psyir/nodes/node_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/psyclone/tests/psyir/nodes/node_test.py b/src/psyclone/tests/psyir/nodes/node_test.py index 2a2343152c..c82fa38198 100644 --- a/src/psyclone/tests/psyir/nodes/node_test.py +++ b/src/psyclone/tests/psyir/nodes/node_test.py @@ -1621,7 +1621,7 @@ def test_origin_string(fortran_reader): # line number span, the filename and the original source line. string = psyir.walk(Statement)[0].origin_string() assert "Assignment from line (76, 76) of file" in string - assert "continuity_mod.f90" in string + # assert "continuity_mod.f90" in string assert "ssha(ji,jj) = 0.0_go_wp" in string # If its not a Statement, the line span, filename and original source are From 4e68bf9b9b5076d36260b177f59fb36a932b024b Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Wed, 25 Feb 2026 14:26:59 +0000 Subject: [PATCH 11/37] Recover fparser2 functionallity of providing the source name by using the FileReader --- src/psyclone/psyir/frontend/fortran.py | 21 ++++++++----- .../frontend/fortran_treesitter_reader.py | 13 ++++++-- src/psyclone/psyir/frontend/fparser2.py | 31 +++++++++++++------ src/psyclone/tests/psyir/nodes/node_test.py | 2 +- 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index ef07913127..9aef888433 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -132,7 +132,7 @@ def psyir_from_source(self, source_code: str) -> Node: ''' tree = self._processor.generate_parse_tree( - source_code, self._ignore_comments, self._free_form, + source_code, None, self._ignore_comments, self._free_form, self._ignore_directives, self._conditional_openmp_statements) psyir = self._processor.generate_psyir(tree) return psyir @@ -162,7 +162,7 @@ def psyir_from_expression(self, source_code: str, f"got '{type(symbol_table).__name__}'") tree = self._processor.generate_parse_tree( - source_code, self._ignore_comments, self._free_form, + source_code, None, self._ignore_comments, self._free_form, self._ignore_directives, self._conditional_openmp_statements, partial_code="expression") @@ -204,7 +204,7 @@ def psyir_from_statement(self, source_code: str, f"got '{type(symbol_table).__name__}'") tree = self._processor.generate_parse_tree( - source_code, self._ignore_comments, self._free_form, + source_code, None, self._ignore_comments, self._free_form, self._ignore_directives, self._conditional_openmp_statements, partial_code="statement") # Create a fake sub-tree connected to the supplied symbol table so @@ -233,10 +233,17 @@ def psyir_from_file(self, file_path): :raises ValueError: if the parser fails to parse the contents of the supplied file. ''' - with open(file_path, encoding="utf-8") as fortran_file: - output = self.psyir_from_source(fortran_file.read()) - output.name = str(file_path).split('/')[-1] - return output + tree = self._processor.generate_parse_tree( + None, + file_path, + self._ignore_comments, + self._free_form, + self._ignore_directives, + self._conditional_openmp_statements + ) + psyir = self._processor.generate_psyir(tree) + psyir.name = str(file_path).rsplit('/', maxsplit=1)[-1] + return psyir # For Sphinx AutoAPI documentation generation diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index e9fbbf9986..467ab13752 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -37,9 +37,6 @@ from typing import Optional -import tree_sitter_fortran -from tree_sitter import Language, Parser - from psyclone.psyir import nodes from psyclone.psyir.nodes.codeblock import TreeSitterCodeBlock, CodeBlock @@ -75,6 +72,7 @@ def __init__(self, ignore_directives: bool = True, def generate_parse_tree( cls, source_code: str, + file_path: str, ignore_comments: bool, free_form: bool, ignore_directives: bool, @@ -94,6 +92,11 @@ def generate_parse_tree( "expression" or "statement". ''' + # Purposely inlined to lazily load this modules only when needed + # pylint: disable=import-outside-toplevel + import tree_sitter_fortran + from tree_sitter import Language, Parser + def report_errors(node): ''' Recursively find and report errors ''' if node.type == 'ERROR': @@ -103,6 +106,10 @@ def report_errors(node): for child in node.children: report_errors(child) + if file_path: + with open(file_path, encoding="utf-8") as fortran_file: + source_code = fortran_file.read() + language = Language(tree_sitter_fortran.language()) parser = Parser(language) parse_tree = parser.parse(bytes(source_code, "utf8")) diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index 538610f704..089d22915a 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -47,7 +47,7 @@ import sys from typing import Iterable, Optional, Union -from fparser.common.readfortran import FortranStringReader +from fparser.common.readfortran import FortranStringReader, FortranFileReader from fparser.two import C99Preprocessor, Fortran2003, utils from fparser.two.parser import ParserFactory from fparser.two.utils import walk, BlockBase, StmtBase, Base @@ -1020,6 +1020,7 @@ def __init__(self, ignore_directives: bool = True, def generate_parse_tree( cls, source_code: str, + file_path: str, ignore_comments: bool, free_form: bool, ignore_directives: bool, @@ -1039,14 +1040,24 @@ def generate_parse_tree( "expression" or "statement". ''' - string_reader = FortranStringReader( - source_code, include_dirs=Config.get().include_paths, - ignore_comments=ignore_comments, - process_directives=not ignore_directives, - include_omp_conditional_lines=conditional_openmp, - ) + if file_path: + reader = FortranFileReader( + file_path, + include_dirs=Config.get().include_paths, + ignore_comments=ignore_comments, + process_directives=not ignore_directives, + include_omp_conditional_lines=conditional_openmp, + ) + else: + reader = FortranStringReader( + source_code, + include_dirs=Config.get().include_paths, + ignore_comments=ignore_comments, + process_directives=not ignore_directives, + include_omp_conditional_lines=conditional_openmp, + ) # Set reader to free format. - string_reader.set_format(FortranFormat(free_form, False)) + reader.set_format(FortranFormat(free_form, False)) SYMBOL_TABLES.clear() if partial_code == "expression": @@ -1058,7 +1069,7 @@ def generate_parse_tree( f"expression: '{source_code}'") from err elif partial_code == "statement": try: - parse_tree = Fortran2003.Execution_Part(string_reader) + parse_tree = Fortran2003.Execution_Part(reader) except NoMatchError as err: raise ValueError( f"Supplied source does not represent a Fortran " @@ -1068,7 +1079,7 @@ def generate_parse_tree( std = Config.get().fortran_standard if not cls._parser: cls._parser = ParserFactory().create(std=std) - parse_tree = cls._parser(string_reader) + parse_tree = cls._parser(reader) except (FortranSyntaxError, NoMatchError) as err: raise ValueError( f"Failed to parse the provided source code:\n{source_code}" diff --git a/src/psyclone/tests/psyir/nodes/node_test.py b/src/psyclone/tests/psyir/nodes/node_test.py index c82fa38198..2a2343152c 100644 --- a/src/psyclone/tests/psyir/nodes/node_test.py +++ b/src/psyclone/tests/psyir/nodes/node_test.py @@ -1621,7 +1621,7 @@ def test_origin_string(fortran_reader): # line number span, the filename and the original source line. string = psyir.walk(Statement)[0].origin_string() assert "Assignment from line (76, 76) of file" in string - # assert "continuity_mod.f90" in string + assert "continuity_mod.f90" in string assert "ssha(ji,jj) = 0.0_go_wp" in string # If its not a Statement, the line span, filename and original source are From 4cfe7b29175e5069c1a1051ee950d3f6b91c5f64 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 26 Feb 2026 11:09:30 +0000 Subject: [PATCH 12/37] #3351 Add initial treesitter frontend tests --- .github/workflows/compilation.yml | 7 +- .github/workflows/python-package.yml | 2 +- .../frontend/fortran_treesitter_reader.py | 19 ++- .../fortran_treesitter_reader/ftr_test.py | 120 ++++++++++++++++++ 4 files changed, 138 insertions(+), 10 deletions(-) create mode 100644 src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index a4c910ea19..daf4fc631f 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -88,7 +88,7 @@ jobs: # Uncomment the below to use the submodule version of fparser rather # than the latest release from pypi. pip install external/fparser - pip install .[test,psydata,doc] + pip install .[test,psydata,doc,treesitter] - name: Unit tests with compilation - gfortran run: | . .runner_venv/bin/activate @@ -177,5 +177,6 @@ jobs: - name: Test reasonable psycloning times for complex files run: | . .runner_venv/bin/activate - # Fail if it takes more than 15s - timeout -s INT 15s psyclone /archive/psyclone-tests/latest-run/slow_files/ukca_aero_ctl.F90 + # Fail if it takes more time than expected + timeout -s INT 12s psyclone /archive/psyclone-tests/latest-run/slow_files/ukca_aero_ctl.F90 + timeout -s INT 0.5s psyclone --frontend treesitter /archive/psyclone-tests/latest-run/slow_files/ukca_aero_ctl.F90 diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index b72bfe1b5d..9d90e2d7bd 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -107,7 +107,7 @@ jobs: # than the latest release from pypi. pip install external/fparser pip install .[doc] - pip install .[test] + pip install .[test,treesitter] - name: Lint with flake8 run: | # Stop the build if there are Python syntax errors or undefined names. diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 467ab13752..ebbc64d237 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -44,6 +44,9 @@ class FortranTreeSitterReader(): ''' Processes the TreeSitter parse_tree and converts it to PSyIR. + Note: this class is in development, currently pretty much only generates + a CodeBlock for anything provided to it. + :param ignore_directives: Whether directives should be ignored or not (default True). Currently ignored. :param last_comments_as_codeblocks: Whether the last comments in the a @@ -59,6 +62,9 @@ class FortranTreeSitterReader(): def __init__(self, ignore_directives: bool = True, last_comments_as_codeblocks: bool = False, resolve_modules: bool = False): + # TODO #3038 Arguments are currently not used nor typechecked, but if + # we decide this is the common reader interface, this can be done in a + # super class instead of duplicate it here. self._ignore_directives = ignore_directives self._resolve_modules = resolve_modules self._last_comments_as_codeblocks = last_comments_as_codeblocks @@ -71,12 +77,12 @@ def __init__(self, ignore_directives: bool = True, @classmethod def generate_parse_tree( cls, - source_code: str, - file_path: str, - ignore_comments: bool, - free_form: bool, - ignore_directives: bool, - conditional_openmp: bool, + source_code: Optional[str] = None, + file_path: Optional[str] = None, + ignore_comments: bool = True, + free_form: bool = True, + ignore_directives: bool = True, + conditional_openmp: bool = True, partial_code: str = "" ): ''' Use the provided source code and frontend options to generate @@ -92,6 +98,7 @@ def generate_parse_tree( "expression" or "statement". ''' + # pylint: disable=unused-argument # Purposely inlined to lazily load this modules only when needed # pylint: disable=import-outside-toplevel import tree_sitter_fortran diff --git a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py new file mode 100644 index 0000000000..2e22cf7a5c --- /dev/null +++ b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py @@ -0,0 +1,120 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2026, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors: S. Siso, STFC Daresbury Lab +# ----------------------------------------------------------------------------- + +''' Performs tests on the treesitter PSyIR front-end ''' + +import pytest + +from tree_sitter import Node as TSNode + +from psyclone.psyir.frontend.fortran_treesitter_reader import \ + FortranTreeSitterReader +from psyclone.psyir.nodes import FileContainer, CodeBlock + + +def test_constructor(): + ''' Test the constructor and its arguments ''' + processor = FortranTreeSitterReader() + + # Check default arguments + assert processor._ignore_directives is True + assert processor._resolve_modules is False + assert processor._last_comments_as_codeblocks is False + + # Currently arguments are ignored (they are just accepted for compatibility + # with the fparser2 interface) + processor = FortranTreeSitterReader( + ignore_directives=False, + resolve_modules=True, + last_comments_as_codeblocks=True, + ) + assert processor._ignore_directives is False + assert processor._resolve_modules is True + assert processor._last_comments_as_codeblocks is True + + # TODO #3038 Typecheck arguments + + +def test_generate_parse_tree(): + ''' + Test that generate_parse_tree returns treesitter trees or appropriate + error messages. + ''' + processor = FortranTreeSitterReader() + + # Valid code returns a treesitter Node + valid_code = """ + program test + end program test + """ + ptree = processor.generate_parse_tree(valid_code) + assert isinstance(ptree, TSNode) + + # Invalid code raises a Value error with a relevant error message + invalid_code = """ + program test + syntax error + end program test + """ + with pytest.raises(ValueError) as err: + _ = processor.generate_parse_tree(invalid_code) + assert "Syntax Error found at line 2" in str(err.value) + + # TODO #3038 All arguments are currently ignored + + +def test_generate_psyir(): + ''' + Test that generate_psyir transforms treesitter parse trees to + PSyIR nodes. + ''' + processor = FortranTreeSitterReader() + + # Valid code returns a treesitter Node + valid_code = """ + program test + end program test + """ + ptree = processor.generate_parse_tree(valid_code) + psyir = processor.generate_psyir(ptree) + + # Currently only FileContainers with CodeBlocks inside + assert isinstance(psyir, FileContainer) + assert psyir.name == "test" + assert isinstance(psyir.children[0], CodeBlock) + assert psyir.children[0].get_fortran_lines() == [ + 'program test\n end program test\n' + ] From 8e911f8f17d5693342f6369f48c6adac12cb23c0 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Mon, 2 Mar 2026 15:28:45 +0000 Subject: [PATCH 13/37] #3083 Add tests --- src/psyclone/configuration.py | 2 +- src/psyclone/tests/configuration_test.py | 15 +++++++++++++++ src/psyclone/tests/psyir/frontend/fortran_test.py | 9 +++++++++ 3 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/psyclone/configuration.py b/src/psyclone/configuration.py index c735389882..ba1c42c816 100644 --- a/src/psyclone/configuration.py +++ b/src/psyclone/configuration.py @@ -561,7 +561,7 @@ def frontend(self, value: str): if not isinstance(value, str) or value not in SUPPORTED_FRONTENDS: raise ConfigurationError( f"frontend must be one of {SUPPORTED_FRONTENDS} but got " - f"{value}") + f"'{value}''") self._frontend = value @property diff --git a/src/psyclone/tests/configuration_test.py b/src/psyclone/tests/configuration_test.py index babe095399..3196ace72b 100644 --- a/src/psyclone/tests/configuration_test.py +++ b/src/psyclone/tests/configuration_test.py @@ -348,6 +348,21 @@ def test_read_values(): assert _config.filename == str(TEST_CONFIG) +def test_frontend(): + ''' Checks for getter and setter for frontend parameter ''' + config = Config() + config.load(config_file=TEST_CONFIG) + # Defaults to fparser2 + assert config.frontend == "fparser2" + # Check the setter method + config.frontend = "treesitter" + assert config.frontend == "treesitter" + with pytest.raises(ConfigurationError) as err: + config.frontend = "invalid" + assert ("frontend must be one of ['fparser2', 'treesitter'] but got " + "'invalid'" in str(err.value)) + + def test_dm(): ''' Checks for getter and setter for distributed memory ''' config = Config() diff --git a/src/psyclone/tests/psyir/frontend/fortran_test.py b/src/psyclone/tests/psyir/frontend/fortran_test.py index 4f475aa3e1..5d873f5e96 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_test.py @@ -38,8 +38,11 @@ ''' Performs py.test tests on the Fortran PSyIR front-end ''' import pytest +from psyclone.configuration import Config from psyclone.psyir.frontend.fortran import FortranReader from psyclone.psyir.frontend.fparser2 import Fparser2Reader +from psyclone.psyir.frontend.fortran_treesitter_reader import \ + FortranTreeSitterReader from psyclone.psyir.nodes import ( Routine, FileContainer, UnaryOperation, BinaryOperation, Literal, Assignment, CodeBlock, IntrinsicCall, Loop, Reference) @@ -94,6 +97,12 @@ def test_fortran_reader_constructor(): # the return value of this function is tested in the following tests freader.psyir_from_source(ONLY_2008_CODE) + # Now repeat the process with treesitter + Config.get().frontend = "treesitter" + freader = FortranReader() + assert isinstance(freader._processor, FortranTreeSitterReader) + freader.psyir_from_source(ONLY_2008_CODE) + def test_fortran_psyir_from_source(fortran_reader): ''' Test that the psyir_from_source method parses to PSyIR From 843c3a643c5ae22cd3cfe4eb4b5c15a41b899ea0 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Tue, 17 Mar 2026 07:43:38 +0000 Subject: [PATCH 14/37] #3083 Add tests for missing coverage --- .../fortran_treesitter_reader/ftr_test.py | 9 ++++++- .../tests/psyir/nodes/codeblock_test.py | 24 +++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py index 2e22cf7a5c..20569f3ae8 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py @@ -68,7 +68,7 @@ def test_constructor(): # TODO #3038 Typecheck arguments -def test_generate_parse_tree(): +def test_generate_parse_tree(tmpdir_factory): ''' Test that generate_parse_tree returns treesitter trees or appropriate error messages. @@ -93,6 +93,13 @@ def test_generate_parse_tree(): _ = processor.generate_parse_tree(invalid_code) assert "Syntax Error found at line 2" in str(err.value) + # Test providing a source file + filename = str(tmpdir_factory.mktemp('ts_test').join("testfile.f90")) + with open(filename, "w", encoding='utf-8') as wfile: + wfile.write(valid_code) + ptree = processor.generate_parse_tree(file_path=filename) + assert isinstance(ptree, TSNode) + # TODO #3038 All arguments are currently ignored diff --git a/src/psyclone/tests/psyir/nodes/codeblock_test.py b/src/psyclone/tests/psyir/nodes/codeblock_test.py index 36df2bacc3..6655a3ec24 100644 --- a/src/psyclone/tests/psyir/nodes/codeblock_test.py +++ b/src/psyclone/tests/psyir/nodes/codeblock_test.py @@ -102,6 +102,30 @@ def test_codeblock_children_validation(): " LeafNode and doesn't accept children.") in str(excinfo.value) +def test_abstract_methods(): + ''' Test that the abstract methods of CodeBlock raise a NotImplementedError + (to simplify other tests they still work when there is no associated parse + tree) ''' + # If there is no associated parse_tree, the methods return a falsy value + cblock = CodeBlock([], "dummy") + assert not cblock.get_symbol_names() + assert not cblock.has_potential_control_flow_jump() + assert not cblock.get_fortran_lines() + + # But if there is one, the node will need to be subclassed to properly + # interpret the meaning of the ast + cblock._parse_tree = "something" + with pytest.raises(NotImplementedError) as err: + _ = cblock.get_symbol_names() + assert "Use appropriate CodeBlock subclass" in str(err.value) + with pytest.raises(NotImplementedError) as err: + _ = cblock.has_potential_control_flow_jump() + assert "Use appropriate CodeBlock subclass" in str(err.value) + with pytest.raises(NotImplementedError) as err: + _ = cblock.get_fortran_lines() + assert "Use appropriate CodeBlock subclass" in str(err.value) + + def test_codeblock_get_symbol_names(parser): '''Test that the get_symbol_names methods returns the names of the symbols used inside the CodeBlock. This is slightly subtle as we have to avoid From baf9d510db2fa1b9fe43a9a64b50e2e5006cc2b3 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Tue, 17 Mar 2026 09:37:18 +0000 Subject: [PATCH 15/37] #3083 Add treesitter module handler --- .../frontend/fortran_treesitter_reader.py | 61 +++++++++++++------ .../fortran_treesitter_reader/ftr_test.py | 4 +- 2 files changed, 46 insertions(+), 19 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index ebbc64d237..8d7eb24fbb 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -35,11 +35,24 @@ ''' PSyIR TreeSitter Fortran reader ''' -from typing import Optional +from typing import Optional, TYPE_CHECKING from psyclone.psyir import nodes from psyclone.psyir.nodes.codeblock import TreeSitterCodeBlock, CodeBlock +if TYPE_CHECKING: + # Purposely inside typechecking because at runtime we want to lazily + # import the parser (only if it is actually used) + from tree_sitter import Node as TSNode + + +def to_str(node: TSNode) -> str: + ''' + :param node: a given treesitter node. + :returns: the string representing the node in utf8 + ''' + return node.text.decode('utf8') if node.text else "" + class FortranTreeSitterReader(): ''' Processes the TreeSitter parse_tree and converts it to PSyIR. @@ -68,10 +81,11 @@ def __init__(self, ignore_directives: bool = True, self._ignore_directives = ignore_directives self._resolve_modules = resolve_modules self._last_comments_as_codeblocks = last_comments_as_codeblocks - self.location = None + self._psyir_cursor = None self._ongoing_codeblock = [] self.handlers = { - 'translation_unit': self._translation_unit + 'translation_unit': self._translation_unit, + 'module': self._module_handler, } @classmethod @@ -86,7 +100,7 @@ def generate_parse_tree( partial_code: str = "" ): ''' Use the provided source code and frontend options to generate - a fparser2 parsetree. + a treesitter parsetree. :param source_code: the given source code. :param ignore_comments: whether to let the parser ignore comments. @@ -109,7 +123,7 @@ def report_errors(node): if node.type == 'ERROR': raise ValueError( f"Syntax Error found at line {node.start_point[0] + 1}: " - f"{node.text.decode('utf8')}") + f"{to_str(node)}") for child in node.children: report_errors(child) @@ -130,20 +144,20 @@ def generate_psyir(self, parse_tree, filename=""): :type parse_tree: :py:class:`fparser.two.Fortran2003.Program` :param Optional[str] filename: associated name for FileContainer. - :returns: PSyIR of the supplied fparser2 parse_tree. + :returns: PSyIR of the supplied treesitter parse_tree. :rtype: :py:class:`psyclone.psyir.nodes.FileContainer` - :raises GenerationError: if the root of the supplied fparser2 - parse tree is not a Program. - ''' - return self.get_handler(parse_tree)(parse_tree) + result = self.get_handler(parse_tree)(parse_tree) + if filename and isinstance(result, nodes.FileContainer): + result.name = filename + return result def process_nodes(self, list_of_nodes): ''' Create the PSyIR of the supplied list of treesitter nodes. - :param nodes: List of sibling nodes in fparser2 AST. + :param nodes: List of sibling nodes in treesitter AST. :type nodes: list[:py:class:`fparser.two.utils.Base`] ''' @@ -155,8 +169,7 @@ def process_nodes(self, list_of_nodes): except NotImplementedError: if not self._ongoing_codeblock: self._ongoing_codeblock.append(tsnode) - if not isinstance(self.location, nodes.Schedule): - children.append(self.generate_accomulated_codeblock()) + children.append(self.generate_accomulated_codeblock()) return children def generate_accomulated_codeblock(self, message: Optional[str] = None): @@ -167,9 +180,10 @@ def generate_accomulated_codeblock(self, message: Optional[str] = None): :param message: comment to associate with the CodeBlock. ''' - if isinstance(self.location, (nodes.Schedule, nodes.Container)): + if isinstance(self._psyir_cursor, (nodes.Schedule, nodes.Container)): structure = CodeBlock.Structure.STATEMENT - else: + else: # pragma: no-cover + # TODO #3038 Remove no-cover when parser reaches expressions structure = CodeBlock.Structure.EXPRESSION code_block = TreeSitterCodeBlock(self._ongoing_codeblock, structure) @@ -197,7 +211,20 @@ def _translation_unit(self, tsnode) -> nodes.Node: :param tsnode: the node the process. :returns: the equivatent PSyIR Node. ''' - file_container = nodes.FileContainer("test") - self.location = file_container + file_container = nodes.FileContainer("") + self._psyir_cursor = file_container file_container.children.extend(self.process_nodes(tsnode.children)) return file_container + + def _module_handler(self, tsnode) -> nodes.Node: + ''' Handle module treesitter node. + + :param tsnode: the node the process. + :returns: the equivatent PSyIR Node. + ''' + module_stmt, internal_proc, _end_module_stmt = tsnode.children + _module_keyword, module_name = module_stmt.children + container = nodes.Container(to_str(module_name)) + self._psyir_cursor = container + container.children.extend(self.process_nodes([internal_proc])) + return container diff --git a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py index 20569f3ae8..a02af5460e 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py @@ -116,11 +116,11 @@ def test_generate_psyir(): end program test """ ptree = processor.generate_parse_tree(valid_code) - psyir = processor.generate_psyir(ptree) + psyir = processor.generate_psyir(ptree, "filename.f90") # Currently only FileContainers with CodeBlocks inside assert isinstance(psyir, FileContainer) - assert psyir.name == "test" + assert psyir.name == "filename.f90" assert isinstance(psyir.children[0], CodeBlock) assert psyir.children[0].get_fortran_lines() == [ 'program test\n end program test\n' From f16f8aa3581c9250feb4103967d155f0e3732bb6 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Tue, 17 Mar 2026 10:06:26 +0000 Subject: [PATCH 16/37] #3083 Remove filename argument from generate_psyir --- src/psyclone/parse/file_info.py | 3 ++- src/psyclone/psyir/frontend/fortran_treesitter_reader.py | 8 +------- src/psyclone/psyir/frontend/fparser2.py | 6 ++---- .../psyir/frontend/fortran_treesitter_reader/ftr_test.py | 3 +-- 4 files changed, 6 insertions(+), 14 deletions(-) diff --git a/src/psyclone/parse/file_info.py b/src/psyclone/parse/file_info.py index fba8ed8bdf..3da5ad1323 100644 --- a/src/psyclone/parse/file_info.py +++ b/src/psyclone/parse/file_info.py @@ -509,7 +509,8 @@ def get_psyir(self) -> FileContainer: processor = Fparser2Reader( resolve_modules=self._resolve_imports ) - self._psyir_node = processor.generate_psyir(fparse_tree, filename) + self._psyir_node = processor.generate_psyir(fparse_tree) + self._psyir_node.name = filename # TODO #2786: Uncomment if psyir nodes are serializable # self._cache_save() diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 8d7eb24fbb..4f643bc4f6 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -137,20 +137,14 @@ def report_errors(node): report_errors(parse_tree.root_node) return parse_tree.root_node - def generate_psyir(self, parse_tree, filename=""): + def generate_psyir(self, parse_tree: TSNode) -> nodes.Node: '''Translate the supplied treesitter node to PSyIR. :param parse_tree: the supplied treesitter parse tree. - :type parse_tree: :py:class:`fparser.two.Fortran2003.Program` - :param Optional[str] filename: associated name for FileContainer. :returns: PSyIR of the supplied treesitter parse_tree. - :rtype: :py:class:`psyclone.psyir.nodes.FileContainer` - ''' result = self.get_handler(parse_tree)(parse_tree) - if filename and isinstance(result, nodes.FileContainer): - result.name = filename return result def process_nodes(self, list_of_nodes): diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index 8a62ce56de..161d36d920 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -1139,12 +1139,11 @@ def nodes_to_code_block(parent, fp2_nodes, message=None): del fp2_nodes[:] return code_block - def generate_psyir(self, parse_tree, filename=""): + def generate_psyir(self, parse_tree): '''Translate the supplied fparser2 parse_tree into PSyIR. :param parse_tree: the supplied fparser2 parse tree. :type parse_tree: :py:class:`fparser.two.Fortran2003.Program` - :param Optional[str] filename: associated name for FileContainer. :returns: PSyIR of the supplied fparser2 parse_tree. :rtype: :py:class:`psyclone.psyir.nodes.FileContainer` @@ -1162,7 +1161,6 @@ def generate_psyir(self, parse_tree, filename=""): node = Container("dummy") self.process_nodes(node, [parse_tree]) result = node.children[0] - result.name = filename return result.detach() def get_routine_schedules(self, name, module_ast): @@ -5945,7 +5943,7 @@ def _program_handler(self, node, parent): # fparser2 does not keep the original filename (if there was # one) so this can't be provided as the name of the # FileContainer. - file_container = FileContainer("None", parent=parent) + file_container = FileContainer("", parent=parent) self.process_nodes(file_container, node.children) return file_container diff --git a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py index a02af5460e..6935725777 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py @@ -116,11 +116,10 @@ def test_generate_psyir(): end program test """ ptree = processor.generate_parse_tree(valid_code) - psyir = processor.generate_psyir(ptree, "filename.f90") + psyir = processor.generate_psyir(ptree) # Currently only FileContainers with CodeBlocks inside assert isinstance(psyir, FileContainer) - assert psyir.name == "filename.f90" assert isinstance(psyir.children[0], CodeBlock) assert psyir.children[0].get_fortran_lines() == [ 'program test\n end program test\n' From e5ada54728bfd51f59e5801994d621697805d841 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Tue, 17 Mar 2026 12:39:15 +0000 Subject: [PATCH 17/37] #3083 Fix typehint --- src/psyclone/psyir/frontend/fortran_treesitter_reader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 4f643bc4f6..7655188444 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -46,10 +46,10 @@ from tree_sitter import Node as TSNode -def to_str(node: TSNode) -> str: +def to_str(node: 'TSNode') -> str: ''' :param node: a given treesitter node. - :returns: the string representing the node in utf8 + :returns: the string representing the node in utf8. ''' return node.text.decode('utf8') if node.text else "" @@ -137,7 +137,7 @@ def report_errors(node): report_errors(parse_tree.root_node) return parse_tree.root_node - def generate_psyir(self, parse_tree: TSNode) -> nodes.Node: + def generate_psyir(self, parse_tree: 'TSNode') -> nodes.Node: '''Translate the supplied treesitter node to PSyIR. :param parse_tree: the supplied treesitter parse tree. From 25c15ca3dc268636e6ce7f23f442743a05a2e369 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 19 Mar 2026 10:08:41 +0000 Subject: [PATCH 18/37] #3083 Improve support for modules --- .../frontend/fortran_treesitter_reader.py | 29 ++++++++++++++----- .../fortran_treesitter_reader/ftr_test.py | 18 ++++++------ 2 files changed, 30 insertions(+), 17 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 7655188444..f810881727 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -35,7 +35,7 @@ ''' PSyIR TreeSitter Fortran reader ''' -from typing import Optional, TYPE_CHECKING +from typing import Optional, TYPE_CHECKING, Iterable, Union from psyclone.psyir import nodes from psyclone.psyir.nodes.codeblock import TreeSitterCodeBlock, CodeBlock @@ -147,14 +147,15 @@ def generate_psyir(self, parse_tree: 'TSNode') -> nodes.Node: result = self.get_handler(parse_tree)(parse_tree) return result - def process_nodes(self, list_of_nodes): + def process_nodes(self, tsnodes: Union["TSNode", Iterable["TSNode"]]): ''' Create the PSyIR of the supplied list of treesitter nodes. - :param nodes: List of sibling nodes in treesitter AST. - :type nodes: list[:py:class:`fparser.two.utils.Base`] + :param nodes: the list of nodes to process, for conveninece it also + accepts a single node without a list. ''' + list_of_nodes = tsnodes if isinstance(tsnodes, Iterable) else [tsnodes] children = [] for tsnode in list_of_nodes: try: @@ -216,9 +217,21 @@ def _module_handler(self, tsnode) -> nodes.Node: :param tsnode: the node the process. :returns: the equivatent PSyIR Node. ''' - module_stmt, internal_proc, _end_module_stmt = tsnode.children - _module_keyword, module_name = module_stmt.children - container = nodes.Container(to_str(module_name)) + module_name = None + internal_proc = None + for child in tsnode.children: + if child.type == "module_statement": + _module_keyword, module_name = child.children + elif child.type == "end_module_statement": + pass + elif child.type == "internal_procedures": + internal_proc = child + else: + raise NotImplementedError( + f"Module has an unsupported '{child.type}' node") + + container = nodes.Container(to_str(module_name) if module_name else "") self._psyir_cursor = container - container.children.extend(self.process_nodes([internal_proc])) + if internal_proc: + container.children.extend(self.process_nodes(internal_proc)) return container diff --git a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py index 6935725777..061445851a 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py @@ -42,7 +42,7 @@ from psyclone.psyir.frontend.fortran_treesitter_reader import \ FortranTreeSitterReader -from psyclone.psyir.nodes import FileContainer, CodeBlock +from psyclone.psyir.nodes import FileContainer, CodeBlock, Container def test_constructor(): @@ -111,16 +111,16 @@ def test_generate_psyir(): processor = FortranTreeSitterReader() # Valid code returns a treesitter Node - valid_code = """ - program test - end program test + valid_code = """\ + module test + contains + subroutine mysub() + end subroutine + end module test """ ptree = processor.generate_parse_tree(valid_code) psyir = processor.generate_psyir(ptree) - # Currently only FileContainers with CodeBlocks inside assert isinstance(psyir, FileContainer) - assert isinstance(psyir.children[0], CodeBlock) - assert psyir.children[0].get_fortran_lines() == [ - 'program test\n end program test\n' - ] + assert isinstance(psyir.children[0], Container) + assert isinstance(psyir.children[0].children[0], CodeBlock) From ed4ab9cf6c7f6de5336b98a662922ee154953c8e Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 19 Mar 2026 12:05:48 +0000 Subject: [PATCH 19/37] #3083 Simplify and test treesitter codeblock generation --- .../frontend/fortran_treesitter_reader.py | 56 +++++++++---------- .../fortran_treesitter_reader/ftr_test.py | 30 +++++++++- 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index f810881727..21133bcf02 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -57,8 +57,15 @@ def to_str(node: 'TSNode') -> str: class FortranTreeSitterReader(): ''' Processes the TreeSitter parse_tree and converts it to PSyIR. - Note: this class is in development, currently pretty much only generates - a CodeBlock for anything provided to it. + Note: this class is in development, currently only generates + top-level Modules and CodeBlocks. + + The structure of the expected fortran parse tree can be found in the + 'rules' section of: + https://github.com/stadelmanma/tree-sitter-fortran/blob/master/grammar.js + To interpret the rules use: + https://tree-sitter.github.io/tree-sitter/creating-parsers/ + 2-the-grammar-dsl.html :param ignore_directives: Whether directives should be ignored or not (default True). Currently ignored. @@ -81,8 +88,10 @@ def __init__(self, ignore_directives: bool = True, self._ignore_directives = ignore_directives self._resolve_modules = resolve_modules self._last_comments_as_codeblocks = last_comments_as_codeblocks + # TODO #3038: Currently this reader uses a cursor pointer instead of + # passing around a parent argument all the time (like fparser's), but + # this can be re-evaluated if necessary. self._psyir_cursor = None - self._ongoing_codeblock = [] self.handlers = { 'translation_unit': self._translation_unit, 'module': self._module_handler, @@ -161,33 +170,18 @@ def process_nodes(self, tsnodes: Union["TSNode", Iterable["TSNode"]]): try: handler = self.get_handler(tsnode) children.append(handler(tsnode)) - except NotImplementedError: - if not self._ongoing_codeblock: - self._ongoing_codeblock.append(tsnode) - children.append(self.generate_accomulated_codeblock()) + except NotImplementedError as err: + # TODO #3038: Add support expression codeblocks and aggregating + # contiguous codeblocks into a single one. + structure = CodeBlock.Structure.STATEMENT + code_block = TreeSitterCodeBlock([tsnode], structure) + code_block.append_preceding_comment( + f"PSyclone CodeBlock (unsupported code) reason:\n" + f"- {err}" + ) + children.append(code_block) return children - def generate_accomulated_codeblock(self, message: Optional[str] = None): - ''' - Create a CodeBlock node with the contents accomulated in the - _ongoing_codeblock list. - - :param message: comment to associate with the CodeBlock. - - ''' - if isinstance(self._psyir_cursor, (nodes.Schedule, nodes.Container)): - structure = CodeBlock.Structure.STATEMENT - else: # pragma: no-cover - # TODO #3038 Remove no-cover when parser reaches expressions - structure = CodeBlock.Structure.EXPRESSION - - code_block = TreeSitterCodeBlock(self._ongoing_codeblock, structure) - self._ongoing_codeblock = [] - if message: - code_block.preceding_comment = message - - return code_block - def get_handler(self, tsnode): ''' :param tsnode: a given treesitter node. @@ -219,6 +213,7 @@ def _module_handler(self, tsnode) -> nodes.Node: ''' module_name = None internal_proc = None + implicit_statement = False for child in tsnode.children: if child.type == "module_statement": _module_keyword, module_name = child.children @@ -226,10 +221,15 @@ def _module_handler(self, tsnode) -> nodes.Node: pass elif child.type == "internal_procedures": internal_proc = child + elif child.type == "implicit_statement": + implicit_statement = True else: raise NotImplementedError( f"Module has an unsupported '{child.type}' node") + if not implicit_statement: + raise NotImplementedError( + "Modules that allow implicit variables are not supported") container = nodes.Container(to_str(module_name) if module_name else "") self._psyir_cursor = container if internal_proc: diff --git a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py index 061445851a..62adfcac10 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py @@ -111,8 +111,9 @@ def test_generate_psyir(): processor = FortranTreeSitterReader() # Valid code returns a treesitter Node - valid_code = """\ + valid_code = """ module test + implicit none contains subroutine mysub() end subroutine @@ -124,3 +125,30 @@ def test_generate_psyir(): assert isinstance(psyir, FileContainer) assert isinstance(psyir.children[0], Container) assert isinstance(psyir.children[0].children[0], CodeBlock) + + +def test_codeblock_generation_and_messages(): + ''' + Test that NotImplementedErrors are catch and converted to CodeBlocks + with the appropriate associated comment + ''' + processor = FortranTreeSitterReader() + + # Valid code returns a treesitter Node + valid_code = """ + module test + contains + subroutine mysub() + end subroutine + end module test + """ + ptree = processor.generate_parse_tree(valid_code) + psyir = processor.generate_psyir(ptree) + + assert isinstance(psyir, FileContainer) + assert isinstance(psyir.children[0], CodeBlock) + expected = ( + "PSyclone CodeBlock (unsupported code) reason:\n" + "- Modules that allow implicit variables are not supported" + ) + assert psyir.children[0].preceding_comment == expected From a570885989e3c83b59fb1dcf3b473576e003a226 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 19 Mar 2026 13:53:40 +0000 Subject: [PATCH 20/37] #3083 Improve treesitter codeblock and its tests --- src/psyclone/psyir/frontend/fortran.py | 7 ++-- .../frontend/fortran_treesitter_reader.py | 4 +-- src/psyclone/psyir/frontend/fparser2.py | 33 ++++++++--------- src/psyclone/psyir/nodes/codeblock.py | 24 ++++++++----- .../tests/psyir/nodes/codeblock_test.py | 36 ++++++++++++++++--- 5 files changed, 69 insertions(+), 35 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index 9aef888433..31887c715b 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -133,7 +133,7 @@ def psyir_from_source(self, source_code: str) -> Node: ''' tree = self._processor.generate_parse_tree( source_code, None, self._ignore_comments, self._free_form, - self._ignore_directives, self._conditional_openmp_statements) + self._conditional_openmp_statements) psyir = self._processor.generate_psyir(tree) return psyir @@ -163,7 +163,7 @@ def psyir_from_expression(self, source_code: str, tree = self._processor.generate_parse_tree( source_code, None, self._ignore_comments, self._free_form, - self._ignore_directives, self._conditional_openmp_statements, + self._conditional_openmp_statements, partial_code="expression") # Create a fake sub-tree connected to the supplied symbol table so @@ -205,7 +205,7 @@ def psyir_from_statement(self, source_code: str, tree = self._processor.generate_parse_tree( source_code, None, self._ignore_comments, self._free_form, - self._ignore_directives, self._conditional_openmp_statements, + self._conditional_openmp_statements, partial_code="statement") # Create a fake sub-tree connected to the supplied symbol table so # that we can process the statement and lookup any symbols that it @@ -238,7 +238,6 @@ def psyir_from_file(self, file_path): file_path, self._ignore_comments, self._free_form, - self._ignore_directives, self._conditional_openmp_statements ) psyir = self._processor.generate_psyir(tree) diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 21133bcf02..5b4a3c683f 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -97,14 +97,12 @@ def __init__(self, ignore_directives: bool = True, 'module': self._module_handler, } - @classmethod def generate_parse_tree( - cls, + self, source_code: Optional[str] = None, file_path: Optional[str] = None, ignore_comments: bool = True, free_form: bool = True, - ignore_directives: bool = True, conditional_openmp: bool = True, partial_code: str = "" ): diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index 161d36d920..df5a97f881 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -956,9 +956,12 @@ class SelectTypeInfo: num_clauses: int = -1 default_idx: int = -1 - def __init__(self, ignore_directives: bool = True, - last_comments_as_codeblocks: bool = False, - resolve_modules: Union[bool, list[str]] = False): + def __init__( + self, + ignore_directives: bool = True, + last_comments_as_codeblocks: bool = False, + resolve_modules: Union[bool, list[str]] = False + ): if isinstance(resolve_modules, bool): self._resolve_all_modules = resolve_modules self._modules_to_resolve = [] @@ -1023,15 +1026,13 @@ def __init__(self, ignore_directives: bool = True, # Whether to keep the last comments in a given block as CodeBlocks self._last_comments_as_codeblocks = last_comments_as_codeblocks - @classmethod def generate_parse_tree( - cls, - source_code: str, - file_path: str, - ignore_comments: bool, - free_form: bool, - ignore_directives: bool, - conditional_openmp: bool, + self, + source_code: str = "", + file_path: str = "", + ignore_comments: bool = False, + free_form: bool = False, + conditional_openmp: bool = False, partial_code: str = "" ): ''' Use the provided source code and frontend options to generate @@ -1052,7 +1053,7 @@ def generate_parse_tree( file_path, include_dirs=Config.get().include_paths, ignore_comments=ignore_comments, - process_directives=not ignore_directives, + process_directives=not self._ignore_directives, include_omp_conditional_lines=conditional_openmp, ) else: @@ -1060,7 +1061,7 @@ def generate_parse_tree( source_code, include_dirs=Config.get().include_paths, ignore_comments=ignore_comments, - process_directives=not ignore_directives, + process_directives=not self._ignore_directives, include_omp_conditional_lines=conditional_openmp, ) # Set reader to free format. @@ -1084,9 +1085,9 @@ def generate_parse_tree( else: try: std = Config.get().fortran_standard - if not cls._parser: - cls._parser = ParserFactory().create(std=std) - parse_tree = cls._parser(reader) + if not self._parser: + self._parser = ParserFactory().create(std=std) + parse_tree = self._parser(reader) except (FortranSyntaxError, NoMatchError) as err: raise ValueError( f"Failed to parse the provided source code:\n{source_code}" diff --git a/src/psyclone/psyir/nodes/codeblock.py b/src/psyclone/psyir/nodes/codeblock.py index 9e05d8f9b1..80469fe6f0 100644 --- a/src/psyclone/psyir/nodes/codeblock.py +++ b/src/psyclone/psyir/nodes/codeblock.py @@ -42,8 +42,6 @@ from enum import Enum from typing import List -from fparser.two import Fortran2003, pattern_tools -from fparser.two.utils import walk from psyclone.core import AccessType, Signature, VariablesAccessMap from psyclone.psyir.nodes.statement import Statement from psyclone.psyir.nodes.datanode import DataNode @@ -52,8 +50,8 @@ class CodeBlock(Statement, DataNode): '''Node representing any generic Fortran code that PSyclone does not attempt to manipulate. As such it is a leaf in the PSyIR. A CodeBlock - can still answer answer limited questions about the encosed code. For - this reason it keeps reference to the underlaying parse_tree, and each + can still answer answer limited questions about the enclosed code. For + this reason it keeps reference to the underlying parse_tree, and each frontend parser needs to subclass CodeBlock with the concrete implementation. @@ -166,7 +164,7 @@ def reference_accesses(self) -> VariablesAccessMap: :returns: a map of all the symbol accessed inside this node, the keys are Signatures (unique identifiers to a symbol and its - structure acccessors) and the values are AccessSequence + structure accessors) and the values are AccessSequence (a sequence of AccessTypes). ''' @@ -227,6 +225,10 @@ def get_symbol_names(self) -> List[str]: :returns: the symbol names used inside the CodeBock. ''' + # Purposely inlined to lazily load this modules only when needed + # pylint: disable=import-outside-toplevel + from fparser.two import Fortran2003, pattern_tools + from fparser.two.utils import walk parse_tree = self.get_ast_nodes() result = [] for node in walk(parse_tree, Fortran2003.Name): @@ -282,6 +284,10 @@ def has_potential_control_flow_jump(self) -> bool: :returns: whether this CodeBlock contains a potential control flow jump, e.g. GOTO, EXIT or a labeled statement. ''' + # Purposely inlined to lazily load this modules only when needed + # pylint: disable=import-outside-toplevel + from fparser.two import Fortran2003 + from fparser.two.utils import walk # Loop over the fp2_nodes and check if any are GOTO, EXIT or # labelled statements for node in self._parse_tree: @@ -313,9 +319,11 @@ def get_fortran_lines(self) -> list[str]: class TreeSitterCodeBlock(CodeBlock): ''' The treesitter implementation of CodeBlock. ''' - def get_fortran_lines(self): + def get_fortran_lines(self) -> list[str]: ''' :returns: a list of each line of fortran represented by this node. ''' - return [str(ast_node.text, encoding="utf8") for ast_node - in self.get_ast_nodes()] + output = [] + for node in self._parse_tree: + output.extend(str(node.text, encoding="utf8").split("\n")) + return output diff --git a/src/psyclone/tests/psyir/nodes/codeblock_test.py b/src/psyclone/tests/psyir/nodes/codeblock_test.py index 6655a3ec24..70e5205925 100644 --- a/src/psyclone/tests/psyir/nodes/codeblock_test.py +++ b/src/psyclone/tests/psyir/nodes/codeblock_test.py @@ -41,7 +41,15 @@ import pytest from fparser.common.readfortran import FortranStringReader from psyclone.psyir.frontend.fortran import FortranReader -from psyclone.psyir.nodes.codeblock import CodeBlock, Fparser2CodeBlock + +from psyclone.psyir.frontend.fparser2 import Fparser2Reader +from psyclone.psyir.frontend.fortran_treesitter_reader import \ + FortranTreeSitterReader + + +from psyclone.psyir.nodes.codeblock import ( + CodeBlock, Fparser2CodeBlock, TreeSitterCodeBlock +) from psyclone.psyir.nodes.node import colored from psyclone.errors import GenerationError @@ -126,11 +134,32 @@ def test_abstract_methods(): assert "Use appropriate CodeBlock subclass" in str(err.value) -def test_codeblock_get_symbol_names(parser): +def test_codeblock_get_fortran_lines(): + ''' + Test the get_fortran_lines method for fparser and treesiteer codeblocks. + + (These should be the same to guarantee identical outcomes with both + frontends) + ''' + code = "\nsubroutine mytest\nend subroutine" + tree = Fparser2Reader().generate_parse_tree(code) + block = Fparser2CodeBlock(tree.children, CodeBlock.Structure.STATEMENT) + assert isinstance(block.get_fortran_lines(), list) + assert "subroutine mytest" in block.get_fortran_lines() + assert "end subroutine" in block.get_fortran_lines() + + tree = FortranTreeSitterReader().generate_parse_tree(code) + block = TreeSitterCodeBlock([tree], CodeBlock.Structure.STATEMENT) + assert isinstance(block.get_fortran_lines(), list) + assert "subroutine mytest" in block.get_fortran_lines() + assert "end subroutine" in block.get_fortran_lines() + + +def test_codeblock_get_symbol_names(): '''Test that the get_symbol_names methods returns the names of the symbols used inside the CodeBlock. This is slightly subtle as we have to avoid any labels on loop and branching statements.''' - reader = FortranStringReader(''' + prog = Fparser2Reader().generate_parse_tree(''' subroutine mytest myloop: DO i = 1, 10 a = b + sqrt(c) @@ -143,7 +172,6 @@ def test_codeblock_get_symbol_names(parser): END IF myifblock END DO myloop end subroutine mytest''') - prog = parser(reader) block = Fparser2CodeBlock(prog.children, CodeBlock.Structure.STATEMENT) sym_names = block.get_symbol_names() assert "a" in sym_names From aba4bb6c17d8369104e43a9a289f54f2cf7d06c1 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Thu, 19 Mar 2026 13:56:05 +0000 Subject: [PATCH 21/37] #3083 Improve treesitter test code coverage --- .../fortran_treesitter_reader/ftr_test.py | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py index 62adfcac10..163adff265 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py @@ -134,15 +134,14 @@ def test_codeblock_generation_and_messages(): ''' processor = FortranTreeSitterReader() - # Valid code returns a treesitter Node - valid_code = """ + unsupported_code = """ module test contains subroutine mysub() end subroutine end module test """ - ptree = processor.generate_parse_tree(valid_code) + ptree = processor.generate_parse_tree(unsupported_code) psyir = processor.generate_psyir(ptree) assert isinstance(psyir, FileContainer) @@ -152,3 +151,23 @@ def test_codeblock_generation_and_messages(): "- Modules that allow implicit variables are not supported" ) assert psyir.children[0].preceding_comment == expected + + unsupported_code = """ + module test + implicit none + integer :: a + contains + subroutine mysub() + end subroutine + end module test + """ + ptree = processor.generate_parse_tree(unsupported_code) + psyir = processor.generate_psyir(ptree) + + assert isinstance(psyir, FileContainer) + assert isinstance(psyir.children[0], CodeBlock) + expected = ( + "PSyclone CodeBlock (unsupported code) reason:\n" + "- Module has an unsupported 'variable_declaration' node" + ) + assert psyir.children[0].preceding_comment == expected From 2c437dd35218bce3b3d1c95cdb43eeb1987f0d64 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Tue, 24 Mar 2026 09:23:16 +0000 Subject: [PATCH 22/37] #3083 Let Codeblock accept one or more nodes as argument --- .../transformations/gocean_opencl_trans.py | 2 +- src/psyclone/psyad/adjoint_visitor.py | 2 +- .../frontend/fortran_treesitter_reader.py | 2 +- src/psyclone/psyir/nodes/codeblock.py | 27 ++++++++++++------- .../kernel_module_inline_trans_test.py | 4 +-- .../tests/psyir/nodes/codeblock_test.py | 11 +++++--- .../tests/psyir/symbols/datasymbol_test.py | 2 +- 7 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/psyclone/domain/gocean/transformations/gocean_opencl_trans.py b/src/psyclone/domain/gocean/transformations/gocean_opencl_trans.py index 133df3603f..3fe700a20e 100644 --- a/src/psyclone/domain/gocean/transformations/gocean_opencl_trans.py +++ b/src/psyclone/domain/gocean/transformations/gocean_opencl_trans.py @@ -371,7 +371,7 @@ def apply(self, node, options=None): # Set up cmd_queues pointer ptree = Fortran2003.Pointer_Assignment_Stmt( f"{qlist.name} => {get_cmd_queues.name}()") - cblock = Fparser2CodeBlock([ptree], CodeBlock.Structure.STATEMENT) + cblock = Fparser2CodeBlock(ptree, CodeBlock.Structure.STATEMENT) setup_block.if_body.addchild(cblock) # Declare and assign kernel pointers diff --git a/src/psyclone/psyad/adjoint_visitor.py b/src/psyclone/psyad/adjoint_visitor.py index c7917ddbef..de4abd861d 100644 --- a/src/psyclone/psyad/adjoint_visitor.py +++ b/src/psyclone/psyad/adjoint_visitor.py @@ -346,7 +346,7 @@ def loop_node(self, node): # TODO: use language independent PSyIR, see issue #1345 ptree = Fortran2003.Intrinsic_Function_Reference( f"mod({hi_str}-{lo_str},{step_str})") - offset = Fparser2CodeBlock([ptree], CodeBlock.Structure.EXPRESSION) + offset = Fparser2CodeBlock(ptree, CodeBlock.Structure.EXPRESSION) # We only need to copy this node and its bounds. Issue #1440 # will address this. diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index 5b4a3c683f..c49588b8a3 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -172,7 +172,7 @@ def process_nodes(self, tsnodes: Union["TSNode", Iterable["TSNode"]]): # TODO #3038: Add support expression codeblocks and aggregating # contiguous codeblocks into a single one. structure = CodeBlock.Structure.STATEMENT - code_block = TreeSitterCodeBlock([tsnode], structure) + code_block = TreeSitterCodeBlock(tsnode, structure) code_block.append_preceding_comment( f"PSyclone CodeBlock (unsupported code) reason:\n" f"- {err}" diff --git a/src/psyclone/psyir/nodes/codeblock.py b/src/psyclone/psyir/nodes/codeblock.py index 80469fe6f0..e642bdba2a 100644 --- a/src/psyclone/psyir/nodes/codeblock.py +++ b/src/psyclone/psyir/nodes/codeblock.py @@ -40,11 +40,12 @@ import re from enum import Enum -from typing import List +from typing import Optional, Any, Union from psyclone.core import AccessType, Signature, VariablesAccessMap from psyclone.psyir.nodes.statement import Statement from psyclone.psyir.nodes.datanode import DataNode +from psyclone.psyir.nodes.node import Node class CodeBlock(Statement, DataNode): @@ -57,7 +58,6 @@ class CodeBlock(Statement, DataNode): :param parse_tree: the fparser2 parse-tree nodes representing the Fortran code constituting the code block. - :type parse_tree: list[:py:class:`fparser.two.utils.Base`] :param structure: argument indicating whether this code block is a statement or an expression. :type structure: :py:class:`psyclone.psyir.nodes.CodeBlock.Structure` @@ -84,19 +84,28 @@ class Structure(Enum): may be required when processing. ''' - # The Code Block comprises one or more Fortran statements - # (which themselves may contain expressions). + #: The Code Block comprises one or more Fortran statements + #: (which themselves may contain expressions). STATEMENT = 1 - # The Code Block comprises one or more Fortran expressions. + #: The Code Block comprises one or more Fortran expressions. EXPRESSION = 2 - def __init__(self, parse_tree, structure, parent=None, annotations=None): + def __init__( + self, + parse_tree: Union[Any, list[Any]], + structure: "CodeBlock.Structure", + parent: Optional[Node] = None, + annotations: Optional[list[str]] = None + ): super().__init__(parent=parent, annotations=annotations) # Store a list of the parser objects holding the code associated # with this block. We make a copy of the list container because # the list itself is often a temporary product of the process of # converting from the the parse tree to the PSyIR. - self._parse_tree = parse_tree[:] + if isinstance(parse_tree, list): + self._parse_tree = parse_tree[:] + else: + self._parse_tree = [parse_tree] # Store the structure of the code block. self._structure = structure @@ -177,7 +186,7 @@ def reference_accesses(self) -> VariablesAccessMap: def __str__(self): return f"CodeBlock[{len(self._parse_tree)} nodes]" - def get_symbol_names(self) -> List[str]: + def get_symbol_names(self) -> list[str]: ''' :returns: the name of all symbols accessed in the CodeBlock. ''' @@ -205,7 +214,7 @@ def get_fortran_lines(self) -> list[str]: class Fparser2CodeBlock(CodeBlock): ''' The fparser2 implementation of CodeBlock. ''' - def get_symbol_names(self) -> List[str]: + def get_symbol_names(self) -> list[str]: ''' Analyses the fparser2 parse tree associated with this CodeBlock and returns the names of all symbols accessed within it. Since, by diff --git a/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py b/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py index 22f66d3109..99a974473a 100644 --- a/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py +++ b/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py @@ -166,7 +166,7 @@ def test_validate_no_inline_global_var(parser): alpha = alpha + 1 end subroutine mytest''') stmt = parser(reader).children[0].children[1] - block = Fparser2CodeBlock([stmt], CodeBlock.Structure.STATEMENT) + block = Fparser2CodeBlock(stmt, CodeBlock.Structure.STATEMENT) kschedules = kernels[0].get_callees() ksched = kschedules[0] ksched.pop_all_children() @@ -183,7 +183,7 @@ def test_validate_no_inline_global_var(parser): unknown = unknown + 1 end subroutine mytest''') stmt = parser(reader).children[0].children[1] - block = Fparser2CodeBlock([stmt], CodeBlock.Structure.STATEMENT) + block = Fparser2CodeBlock(stmt, CodeBlock.Structure.STATEMENT) kschedules = kernels[0].get_callees() ksched = kschedules[0] ksched.pop_all_children() diff --git a/src/psyclone/tests/psyir/nodes/codeblock_test.py b/src/psyclone/tests/psyir/nodes/codeblock_test.py index 70e5205925..98cdf56359 100644 --- a/src/psyclone/tests/psyir/nodes/codeblock_test.py +++ b/src/psyclone/tests/psyir/nodes/codeblock_test.py @@ -56,7 +56,7 @@ def test_codeblock_node_str(): ''' Check the node_str method of the Code Block class.''' - cblock = CodeBlock([], "dummy") + cblock = CodeBlock([], CodeBlock.Structure.EXPRESSION) coloredtext = colored("CodeBlock", CodeBlock._colour) output = cblock.node_str() assert coloredtext+"[" in output @@ -71,7 +71,7 @@ def test_codeblock_can_be_printed(): assert "]" in str(cblock) -def test_codeblock_getastnodes(): +def test_codeblock_constructor_and_getastnodes(): '''Test that the get_ast_nodes method of a CodeBlock instance returns a copy of the list of nodes from the original AST that are associated with this code block. @@ -86,6 +86,11 @@ def test_codeblock_getastnodes(): # Check that the list is a copy not a reference. assert result is not original + # If only one element is provided, this is added to a list + original = 3 + cblock = CodeBlock(original, CodeBlock.Structure.EXPRESSION) + assert cblock.get_ast_nodes() == [3] + @pytest.mark.parametrize("structure", [CodeBlock.Structure.STATEMENT, CodeBlock.Structure.EXPRESSION]) @@ -149,7 +154,7 @@ def test_codeblock_get_fortran_lines(): assert "end subroutine" in block.get_fortran_lines() tree = FortranTreeSitterReader().generate_parse_tree(code) - block = TreeSitterCodeBlock([tree], CodeBlock.Structure.STATEMENT) + block = TreeSitterCodeBlock(tree, CodeBlock.Structure.STATEMENT) assert isinstance(block.get_fortran_lines(), list) assert "subroutine mytest" in block.get_fortran_lines() assert "end subroutine" in block.get_fortran_lines() diff --git a/src/psyclone/tests/psyir/symbols/datasymbol_test.py b/src/psyclone/tests/psyir/symbols/datasymbol_test.py index 9cbc291629..97a4eb18a9 100644 --- a/src/psyclone/tests/psyir/symbols/datasymbol_test.py +++ b/src/psyclone/tests/psyir/symbols/datasymbol_test.py @@ -359,7 +359,7 @@ def test_datasymbol_initial_value_codeblock(): # We want the first child of the Initialization node in the parse tree as # the basis for our CodeBlock inits = Fortran2003.walk(fparser2spec, Fortran2003.Initialization) - cblock = Fparser2CodeBlock([inits[0].children[1]], + cblock = Fparser2CodeBlock(inits[0].children[1], CodeBlock.Structure.EXPRESSION) assert sym.initial_value is None sym.initial_value = cblock From ef504ff696462e1591ecdf45a6a9ffd290f07e89 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Tue, 24 Mar 2026 14:07:42 +0000 Subject: [PATCH 23/37] #3083 Split generate_parse_tree method into two methods: one for source and one for files --- src/psyclone/psyir/frontend/fortran.py | 40 +++---- .../frontend/fortran_treesitter_reader.py | 101 ++++++++++-------- src/psyclone/psyir/frontend/fparser2.py | 92 ++++++++++------ .../fortran_treesitter_reader/ftr_test.py | 14 ++- .../tests/psyir/nodes/codeblock_test.py | 6 +- 5 files changed, 142 insertions(+), 111 deletions(-) diff --git a/src/psyclone/psyir/frontend/fortran.py b/src/psyclone/psyir/frontend/fortran.py index 31887c715b..c33ad60b4b 100644 --- a/src/psyclone/psyir/frontend/fortran.py +++ b/src/psyclone/psyir/frontend/fortran.py @@ -83,10 +83,6 @@ def __init__(self, "will only have an effect if ignore_comments is also set " "to False." ) - self._ignore_comments = ignore_comments - self._ignore_directives = ignore_directives - self._conditional_openmp_statements = conditional_openmp_statements - self._free_form = free_form # The frontend reader imports are intentionally inside this condition # to lazily import them only when they are needed @@ -94,12 +90,16 @@ def __init__(self, if Config.get().frontend == 'treesitter': from psyclone.psyir.frontend.fortran_treesitter_reader import ( FortranTreeSitterReader) - self._processor = FortranTreeSitterReader() + factory = FortranTreeSitterReader else: from psyclone.psyir.frontend.fparser2 import Fparser2Reader - self._processor = Fparser2Reader(ignore_directives, - last_comments_as_codeblocks, - resolve_modules) + factory = Fparser2Reader + + # Instantiate processor + self._processor = factory( + ignore_directives, last_comments_as_codeblocks, resolve_modules, + ignore_comments, free_form, conditional_openmp_statements + ) @staticmethod def validate_name(name: str): @@ -131,9 +131,7 @@ def psyir_from_source(self, source_code: str) -> Node: :raises ValueError: if the supplied Fortran cannot be parsed. ''' - tree = self._processor.generate_parse_tree( - source_code, None, self._ignore_comments, self._free_form, - self._conditional_openmp_statements) + tree = self._processor.generate_parse_tree_from_source(source_code) psyir = self._processor.generate_psyir(tree) return psyir @@ -161,10 +159,8 @@ def psyir_from_expression(self, source_code: str, raise TypeError(f"Must be supplied with a valid SymbolTable but " f"got '{type(symbol_table).__name__}'") - tree = self._processor.generate_parse_tree( - source_code, None, self._ignore_comments, self._free_form, - self._conditional_openmp_statements, - partial_code="expression") + tree = self._processor.generate_parse_tree_from_source( + source_code, partial_code="expression") # Create a fake sub-tree connected to the supplied symbol table so # that we can process the expression and lookup any symbols that it @@ -203,10 +199,8 @@ def psyir_from_statement(self, source_code: str, raise TypeError(f"Must be supplied with a valid SymbolTable but " f"got '{type(symbol_table).__name__}'") - tree = self._processor.generate_parse_tree( - source_code, None, self._ignore_comments, self._free_form, - self._conditional_openmp_statements, - partial_code="statement") + tree = self._processor.generate_parse_tree_from_source( + source_code, partial_code="statement") # Create a fake sub-tree connected to the supplied symbol table so # that we can process the statement and lookup any symbols that it # references. @@ -233,13 +227,7 @@ def psyir_from_file(self, file_path): :raises ValueError: if the parser fails to parse the contents of the supplied file. ''' - tree = self._processor.generate_parse_tree( - None, - file_path, - self._ignore_comments, - self._free_form, - self._conditional_openmp_statements - ) + tree = self._processor.generate_parse_tree_from_file(file_path) psyir = self._processor.generate_psyir(tree) psyir.name = str(file_path).rsplit('/', maxsplit=1)[-1] return psyir diff --git a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py index c49588b8a3..19008f6032 100644 --- a/src/psyclone/psyir/frontend/fortran_treesitter_reader.py +++ b/src/psyclone/psyir/frontend/fortran_treesitter_reader.py @@ -35,7 +35,7 @@ ''' PSyIR TreeSitter Fortran reader ''' -from typing import Optional, TYPE_CHECKING, Iterable, Union +from typing import TYPE_CHECKING, Iterable, Union, Callable from psyclone.psyir import nodes from psyclone.psyir.nodes.codeblock import TreeSitterCodeBlock, CodeBlock @@ -75,19 +75,32 @@ class FortranTreeSitterReader(): :param resolve_modules: Whether to resolve modules while parsing a file, for more precise control it also accepts a list of module names. Defaults to False. Currently ignored. + :param ignore_comments: whether to let the parser ignore comments. + :param free_form: whether to parse using Fortran free_form syntax. + :param ignore_directives: whether to ignore directives while parsing. + :param conditional_openmp: whether to parse conditional OpenMP statements. :raises TypeError: if the constructor argument is not of the expected type. ''' - def __init__(self, ignore_directives: bool = True, - last_comments_as_codeblocks: bool = False, - resolve_modules: bool = False): + def __init__( + self, + ignore_directives: bool = True, + last_comments_as_codeblocks: bool = False, + resolve_modules: bool = False, + ignore_comments: bool = True, + free_form: bool = True, + conditional_openmp: bool = True, + ): # TODO #3038 Arguments are currently not used nor typechecked, but if # we decide this is the common reader interface, this can be done in a # super class instead of duplicate it here. self._ignore_directives = ignore_directives self._resolve_modules = resolve_modules self._last_comments_as_codeblocks = last_comments_as_codeblocks + self._ignore_comments = ignore_comments + self._free_form = free_form + self._conditional_openmp = conditional_openmp # TODO #3038: Currently this reader uses a cursor pointer instead of # passing around a parent argument all the time (like fparser's), but # this can be re-evaluated if necessary. @@ -97,27 +110,29 @@ def __init__(self, ignore_directives: bool = True, 'module': self._module_handler, } - def generate_parse_tree( - self, - source_code: Optional[str] = None, - file_path: Optional[str] = None, - ignore_comments: bool = True, - free_form: bool = True, - conditional_openmp: bool = True, - partial_code: str = "" - ): - ''' Use the provided source code and frontend options to generate - a treesitter parsetree. + def generate_parse_tree_from_file(self, file_path) -> 'TSNode': + ''' + Use the provided file to generate a treesitter parsetree. + + :param file_path: a given file. + + :returns: the treesitter parsetree of the given file. + ''' + with open(file_path, encoding="utf-8") as fortran_file: + source_code = fortran_file.read() + return self.generate_parse_tree_from_source(source_code) + + def generate_parse_tree_from_source( + self, source_code: str, partial_code: str = "" + ) -> 'TSNode': + ''' Use the provided source code to generate a treesitter parsetree. :param source_code: the given source code. - :param ignore_comments: whether to let the parser ignore comments. - :param free_form: whether to parse using Fortran free_form syntax. - :param ignore_directives: whether to ignore directives while parsing. - :param conditional_openmp: :param partial_code: if the provided source_code is not a full unit this indicates the starting parsing point. It currently supports "expression" or "statement". + :returns: the treesitter parsetree of the given source code. ''' # pylint: disable=unused-argument # Purposely inlined to lazily load this modules only when needed @@ -125,8 +140,13 @@ def generate_parse_tree( import tree_sitter_fortran from tree_sitter import Language, Parser - def report_errors(node): - ''' Recursively find and report errors ''' + def report_errors(node: 'TSNode'): + ''' Recursively find and report errors. + + :param node: the given treesitter node + + :raises ValueError: if the given node has a parsing error. + ''' if node.type == 'ERROR': raise ValueError( f"Syntax Error found at line {node.start_point[0] + 1}: " @@ -134,10 +154,6 @@ def report_errors(node): for child in node.children: report_errors(child) - if file_path: - with open(file_path, encoding="utf-8") as fortran_file: - source_code = fortran_file.read() - language = Language(tree_sitter_fortran.language()) parser = Parser(language) parse_tree = parser.parse(bytes(source_code, "utf8")) @@ -145,22 +161,20 @@ def report_errors(node): return parse_tree.root_node def generate_psyir(self, parse_tree: 'TSNode') -> nodes.Node: - '''Translate the supplied treesitter node to PSyIR. + '''Translate the supplied treesitter node into PSyIR. :param parse_tree: the supplied treesitter parse tree. - - :returns: PSyIR of the supplied treesitter parse_tree. + :returns: the equivalent PSyIR Node. ''' - result = self.get_handler(parse_tree)(parse_tree) - return result + return self.process_nodes(parse_tree)[0] def process_nodes(self, tsnodes: Union["TSNode", Iterable["TSNode"]]): ''' - Create the PSyIR of the supplied list of treesitter nodes. - - :param nodes: the list of nodes to process, for conveninece it also - accepts a single node without a list. + Create the PSyIR that represents the supplied treesitter nodes. + :param nodes: the list of nodes to process, for conveninece it accepts + a single node or a list of them. + :returns: the equivalent PSyIR Node. ''' list_of_nodes = tsnodes if isinstance(tsnodes, Iterable) else [tsnodes] children = [] @@ -180,10 +194,9 @@ def process_nodes(self, tsnodes: Union["TSNode", Iterable["TSNode"]]): children.append(code_block) return children - def get_handler(self, tsnode): + def get_handler(self, tsnode: 'TSNode') -> Callable: ''' :param tsnode: a given treesitter node. - :returns: the method that handles the given node type. ''' handler = self.handlers.get(tsnode.type) @@ -192,22 +205,22 @@ def get_handler(self, tsnode): f"Unsupported '{tsnode.type}' tree-sitter node.") return handler - def _translation_unit(self, tsnode) -> nodes.Node: - ''' Handle translation_unit treesitter node. + def _translation_unit(self, tsnode: 'TSNode') -> nodes.Node: + ''' Handle treesitter 'translation_unit' node. - :param tsnode: the node the process. - :returns: the equivatent PSyIR Node. + :param tsnode: the treesitter node the process. + :returns: the equivalent PSyIR Node. ''' file_container = nodes.FileContainer("") self._psyir_cursor = file_container file_container.children.extend(self.process_nodes(tsnode.children)) return file_container - def _module_handler(self, tsnode) -> nodes.Node: - ''' Handle module treesitter node. + def _module_handler(self, tsnode: 'TSNode') -> nodes.Node: + ''' Handle a treesitter 'module' node. - :param tsnode: the node the process. - :returns: the equivatent PSyIR Node. + :param tsnode: the treesitter node the process. + :returns: the equivalent PSyIR Node. ''' module_name = None internal_proc = None diff --git a/src/psyclone/psyir/frontend/fparser2.py b/src/psyclone/psyir/frontend/fparser2.py index df5a97f881..ded5b3311b 100644 --- a/src/psyclone/psyir/frontend/fparser2.py +++ b/src/psyclone/psyir/frontend/fparser2.py @@ -47,7 +47,8 @@ import sys from typing import Iterable, Optional, Union -from fparser.common.readfortran import FortranStringReader, FortranFileReader +from fparser.common.readfortran import ( + FortranStringReader, FortranFileReader, FortranReaderBase) from fparser.two import C99Preprocessor, Fortran2003, utils from fparser.two.parser import ParserFactory from fparser.two.utils import walk, BlockBase, StmtBase, Base @@ -871,6 +872,10 @@ class Fparser2Reader(): :param resolve_modules: Whether to resolve modules while parsing a file, for more precise control it also accepts a list of module names. Defaults to False. + :param ignore_comments: whether to let the parser ignore comments. + :param free_form: whether to parse using Fortran free_form syntax. + :param ignore_directives: whether to ignore directives while parsing. + :param conditional_openmp: whether to parse conditional OpenMP statements. :raises TypeError: if the constructor argument is not of the expected type. @@ -960,8 +965,14 @@ def __init__( self, ignore_directives: bool = True, last_comments_as_codeblocks: bool = False, - resolve_modules: Union[bool, list[str]] = False + resolve_modules: Union[bool, list[str]] = False, + ignore_comments: bool = False, + free_form: bool = False, + conditional_openmp: bool = False, ): + self._ignore_comments = ignore_comments + self._free_form = free_form + self._conditional_openmp = conditional_openmp if isinstance(resolve_modules, bool): self._resolve_all_modules = resolve_modules self._modules_to_resolve = [] @@ -1026,46 +1037,67 @@ def __init__( # Whether to keep the last comments in a given block as CodeBlocks self._last_comments_as_codeblocks = last_comments_as_codeblocks - def generate_parse_tree( + def generate_parse_tree_from_file(self, file_path: str = ""): + ''' + Use the provided file to generate a fparser2 parsetree. + + :param file_path: a given file. + + :returns: the fparser2 parsetree of the given file. + ''' + reader = FortranFileReader( + file_path, + include_dirs=Config.get().include_paths, + ignore_comments=self._ignore_comments, + process_directives=not self._ignore_directives, + include_omp_conditional_lines=self._conditional_openmp, + ) + return self._fparser2_tree_from_fparser2_reader(reader) + + def generate_parse_tree_from_source( self, source_code: str = "", - file_path: str = "", - ignore_comments: bool = False, - free_form: bool = False, - conditional_openmp: bool = False, partial_code: str = "" ): - ''' Use the provided source code and frontend options to generate - a fparser2 parsetree. + ''' Use the provided source code to generate a fparser2 parsetree. :param source_code: the given source code. - :param ignore_comments: whether to let the parser ignore comments. - :param free_form: whether to parse using Fortran free_form syntax. - :param ignore_directives: whether to ignore directives while parsing. - :param conditional_openmp: :param partial_code: if the provided source_code is not a full unit this indicates the starting parsing point. It currently supports "expression" or "statement". + :returns: the fparser2 parsetree of the given source code. + ''' + reader = FortranStringReader( + source_code, + include_dirs=Config.get().include_paths, + ignore_comments=self._ignore_comments, + process_directives=not self._ignore_directives, + include_omp_conditional_lines=self._conditional_openmp, + ) + return self._fparser2_tree_from_fparser2_reader(reader, source_code, + partial_code) + + def _fparser2_tree_from_fparser2_reader( + self, reader: FortranReaderBase, source_code: str = "", + partial_code: str = "" + ): + ''' Common functionality to use the readers generated by + 'generate_parse_tree_from_*' methods. + + :param reader: the generated fparser2 reader. + :param source_code: the source code is sometimes needed in + addition to the reader the partial expressions are provided. + :param partial_code: if the provided source_code is not a full unit + this indicates the starting parsing point. It currently supports + "expression" or "statement". + + :returns: the fparser2 parsetree of the given source code. + + :raises ValueError: if the given Fortran had a syntax error. ''' - if file_path: - reader = FortranFileReader( - file_path, - include_dirs=Config.get().include_paths, - ignore_comments=ignore_comments, - process_directives=not self._ignore_directives, - include_omp_conditional_lines=conditional_openmp, - ) - else: - reader = FortranStringReader( - source_code, - include_dirs=Config.get().include_paths, - ignore_comments=ignore_comments, - process_directives=not self._ignore_directives, - include_omp_conditional_lines=conditional_openmp, - ) # Set reader to free format. - reader.set_format(FortranFormat(free_form, False)) + reader.set_format(FortranFormat(self._free_form, False)) SYMBOL_TABLES.clear() if partial_code == "expression": diff --git a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py index 163adff265..33d629bffd 100644 --- a/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py +++ b/src/psyclone/tests/psyir/frontend/fortran_treesitter_reader/ftr_test.py @@ -80,7 +80,7 @@ def test_generate_parse_tree(tmpdir_factory): program test end program test """ - ptree = processor.generate_parse_tree(valid_code) + ptree = processor.generate_parse_tree_from_source(valid_code) assert isinstance(ptree, TSNode) # Invalid code raises a Value error with a relevant error message @@ -90,18 +90,16 @@ def test_generate_parse_tree(tmpdir_factory): end program test """ with pytest.raises(ValueError) as err: - _ = processor.generate_parse_tree(invalid_code) + _ = processor.generate_parse_tree_from_source(invalid_code) assert "Syntax Error found at line 2" in str(err.value) # Test providing a source file filename = str(tmpdir_factory.mktemp('ts_test').join("testfile.f90")) with open(filename, "w", encoding='utf-8') as wfile: wfile.write(valid_code) - ptree = processor.generate_parse_tree(file_path=filename) + ptree = processor.generate_parse_tree_from_file(filename) assert isinstance(ptree, TSNode) - # TODO #3038 All arguments are currently ignored - def test_generate_psyir(): ''' @@ -119,7 +117,7 @@ def test_generate_psyir(): end subroutine end module test """ - ptree = processor.generate_parse_tree(valid_code) + ptree = processor.generate_parse_tree_from_source(valid_code) psyir = processor.generate_psyir(ptree) assert isinstance(psyir, FileContainer) @@ -141,7 +139,7 @@ def test_codeblock_generation_and_messages(): end subroutine end module test """ - ptree = processor.generate_parse_tree(unsupported_code) + ptree = processor.generate_parse_tree_from_source(unsupported_code) psyir = processor.generate_psyir(ptree) assert isinstance(psyir, FileContainer) @@ -161,7 +159,7 @@ def test_codeblock_generation_and_messages(): end subroutine end module test """ - ptree = processor.generate_parse_tree(unsupported_code) + ptree = processor.generate_parse_tree_from_source(unsupported_code) psyir = processor.generate_psyir(ptree) assert isinstance(psyir, FileContainer) diff --git a/src/psyclone/tests/psyir/nodes/codeblock_test.py b/src/psyclone/tests/psyir/nodes/codeblock_test.py index 98cdf56359..e1ca6761da 100644 --- a/src/psyclone/tests/psyir/nodes/codeblock_test.py +++ b/src/psyclone/tests/psyir/nodes/codeblock_test.py @@ -147,13 +147,13 @@ def test_codeblock_get_fortran_lines(): frontends) ''' code = "\nsubroutine mytest\nend subroutine" - tree = Fparser2Reader().generate_parse_tree(code) + tree = Fparser2Reader().generate_parse_tree_from_source(code) block = Fparser2CodeBlock(tree.children, CodeBlock.Structure.STATEMENT) assert isinstance(block.get_fortran_lines(), list) assert "subroutine mytest" in block.get_fortran_lines() assert "end subroutine" in block.get_fortran_lines() - tree = FortranTreeSitterReader().generate_parse_tree(code) + tree = FortranTreeSitterReader().generate_parse_tree_from_source(code) block = TreeSitterCodeBlock(tree, CodeBlock.Structure.STATEMENT) assert isinstance(block.get_fortran_lines(), list) assert "subroutine mytest" in block.get_fortran_lines() @@ -164,7 +164,7 @@ def test_codeblock_get_symbol_names(): '''Test that the get_symbol_names methods returns the names of the symbols used inside the CodeBlock. This is slightly subtle as we have to avoid any labels on loop and branching statements.''' - prog = Fparser2Reader().generate_parse_tree(''' + prog = Fparser2Reader().generate_parse_tree_from_source(''' subroutine mytest myloop: DO i = 1, 10 a = b + sqrt(c) From 69c0039defb1eef4248f5e70153cf67810bfff93 Mon Sep 17 00:00:00 2001 From: Sergi Siso Date: Wed, 25 Mar 2026 15:39:08 +0000 Subject: [PATCH 24/37] #3351 Update copyright, docstrings and setup.py --- examples/psyir/custom_directives/Makefile | 2 +- examples/psyir/custom_directives/README.md | 2 +- .../custom_directives/directive_filtering.F90 | 2 +- .../identify_custom_directives.py | 2 +- external/lfric_infrastructure/Makefile | 2 +- .../apps/flux_direction_mod.f90 | 2 +- .../lfric_include_flags.mk | 2 +- .../src/apps/flux_direction_mod.f90 | 2 +- .../src/lfric_include_flags.mk | 2 +- setup.py | 7 +++-- src/psyclone/configuration.py | 4 +-- .../domain/lfric/lfric_scalar_array_args.py | 2 +- src/psyclone/psyir/frontend/fortran.py | 29 ++++++++----------- .../frontend/fortran_treesitter_reader.py | 2 +- src/psyclone/psyir/nodes/unknown_directive.py | 2 +- .../maximal_omp_parallel_region_trans.py | 2 +- .../psyir/nodes/unknown_directive_test.py | 2 +- .../lfric/15.10.9_min_max_X_builtin.f90 | 2 +- ...ltikernel_invokes_scalar_array_invalid.f90 | 2 +- .../lfric/28.scalar_array_invoke.f90 | 2 +- .../lfric/testkern_scalar_array_mod.f90 | 2 +- 21 files changed, 37 insertions(+), 39 deletions(-) diff --git a/examples/psyir/custom_directives/Makefile b/examples/psyir/custom_directives/Makefile index 95f2d28aa0..bd058aeca8 100644 --- a/examples/psyir/custom_directives/Makefile +++ b/examples/psyir/custom_directives/Makefile @@ -1,7 +1,7 @@ # ----------------------------------------------------------------------------- # BSD 3-Clause License # -# Copyright (c) 2025, Science and Technology Facilities Council +# Copyright (c) 2025-2026, Science and Technology Facilities Council # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/examples/psyir/custom_directives/README.md b/examples/psyir/custom_directives/README.md index 0770b4f6ad..bc0fdea5cd 100644 --- a/examples/psyir/custom_directives/README.md +++ b/examples/psyir/custom_directives/README.md @@ -1,7 +1,7 @@