diff --git a/.github/workflows/compilation.yml b/.github/workflows/compilation.yml index 41828a7ffe..851c2aaed5 100644 --- a/.github/workflows/compilation.yml +++ b/.github/workflows/compilation.yml @@ -88,7 +88,7 @@ jobs: # Uncomment the below to use the submodule version of fparser rather # than the latest release from pypi. pip install external/fparser - pip install .[test,psydata,doc] + pip install .[test,psydata,doc,treesitter] - name: Unit tests with compilation - gfortran run: | . .runner_venv/bin/activate @@ -177,5 +177,6 @@ jobs: - name: Test reasonable psycloning times for complex files run: | . .runner_venv/bin/activate - # Fail if it takes more than 15s - timeout -s INT 15s psyclone /archive/psyclone-tests/latest-run/slow_files/ukca_aero_ctl.F90 + # Fail if it takes more time than expected + timeout -s INT 12s psyclone /archive/psyclone-tests/latest-run/slow_files/ukca_aero_ctl.F90 + timeout -s INT 0.5s psyclone --frontend treesitter /archive/psyclone-tests/latest-run/slow_files/ukca_aero_ctl.F90 diff --git a/.github/workflows/lfric_test.yml b/.github/workflows/lfric_test.yml index f9dc85c593..2b5ff1cbf9 100644 --- a/.github/workflows/lfric_test.yml +++ b/.github/workflows/lfric_test.yml @@ -42,7 +42,7 @@ on: push env: - LFRIC_APPS_HASH: 948b9ec1a7ae6ef110b97ed45ced8d4b71079f34 + LFRIC_APPS_HASH: d8cab8417d873eb686e7fe74707da03742eaa9d9 PYTHON_VERSION: "3.14" GNU_TOOLCHAIN: gnu14_openmpi NVHPC_TOOLCHAIN: nvhpc26_3_openmpi diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index ba78905488..bc4c65ab2a 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -107,7 +107,7 @@ jobs: # than the latest release from pypi. # pip install external/fparser pip install .[doc] - pip install .[test] + pip install .[test,treesitter] - name: Lint with flake8 run: | # Stop the build if there are Python syntax errors or undefined names. diff --git a/changelog b/changelog index 0c4a7c01ca..135d730230 100644 --- a/changelog +++ b/changelog @@ -1,3 +1,6 @@ + 3) PR #3351 towards #3083. Adds scaffolding for new Treesitter-based + frontend. + 2) PR #3401 for #3400. Fixes a frontend issue causing issues with NEMO master branch. diff --git a/doc/developer_guide/APIs.rst b/doc/developer_guide/APIs.rst index 864b4fb1e2..9ee5b55167 100644 --- a/doc/developer_guide/APIs.rst +++ b/doc/developer_guide/APIs.rst @@ -1025,7 +1025,7 @@ coarse mesh. Lowering -------- -As described in :ref:`psy_layer_backends`, the use of a PSyIR backend to +As described in :ref:`uplifting-lowering`, the use of a PSyIR backend to generate code for the LFRic PSy layer requires that each LFRic-specific node be lowered to 'language-level' PSyIR. This requires that each node have the ``lower_to_language_level()`` method implemented. diff --git a/doc/developer_guide/index.rst b/doc/developer_guide/index.rst index 3e68696edf..1078f64a06 100644 --- a/doc/developer_guide/index.rst +++ b/doc/developer_guide/index.rst @@ -17,7 +17,7 @@ Developer Guide psyir psyir_symbols - psyir_backends + psyir_frontends_backends psykal module_manager APIs diff --git a/doc/developer_guide/psyir_backends.rst b/doc/developer_guide/psyir_frontends_backends.rst similarity index 72% rename from doc/developer_guide/psyir_backends.rst rename to doc/developer_guide/psyir_frontends_backends.rst index faf3216408..9f27035a78 100644 --- a/doc/developer_guide/psyir_backends.rst +++ b/doc/developer_guide/psyir_frontends_backends.rst @@ -34,49 +34,72 @@ Authors: R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab +PSyIR Frontend and Backends +########################### + +Instead of creating PSyIR nodes manually, PSyclone provides +:ref:`psyir-frontends` and :ref:`psyir-backends` to translate from/to +other representations (such as languages like Fortran or C, or other +Intermediate Representations like SIR). + +The set of PSyIR nodes that the frontends and backends recognise and translate +to/from are known as the language_level nodes. PSyclone also provide +:ref:`uplifting-lowering` to support higher-level or domain-specific abstractions. + + +.. _psyir-frontends: + +PSyIR Frontends +=============== + +Currently two Fortran frontends are available: + +fparser2: + This is the main Fortran frontend based on + `fparser2 `_ and it is currently the only + recommended stable option. + +treesitter: + This is a highly experimental frontend based on + `the treesitter parser `_ which has the + potential to be faster, but is currently severely incomplete and untested. + +The frontend is selected with the ``psyclone --frontend `` flag. + .. _psyir-backends: PSyIR Back-ends -############### - -PSyIR back-ends translate PSyIR into another form (such as Fortran, C -or OpenCL). Until recently this back-end support has been implemented -within the PSyIR `Node` classes themselves via various `gen*` -methods. However, this approach is getting a little unwieldy. - -Therefore PSyclone is transitioning into a `Visitor` pattern approach. -Visitor backends are already being used in the back-end implementations -that translate PSyIR kernel code. This approach separates the code to -traverse a tree from the tree being visited. It is expected that the -existing back-ends (used in the PSy-layer) will migrate to this new -approach over time (more information about the PSy-layer migration -can be found in :ref:`psy_layer_backends`). The back-end visitor code -is stored in `psyclone/psyir/backend`. +=============== + +PSyIR back-ends translate PSyIR into another form (such as Fortran, C or +OpenCL) using a ``Visitor`` pattern approach.This approach separates the code to +traverse a tree from the tree being visited. The back-end visitor code +is stored in ``psyclone/psyir/backend``. Visitor Base code -================= +----------------- -`visitor.py` in `psyclone/psyir/backend` provides a base class - +``visitor.py`` in ``psyclone/psyir/backend`` provides a base class - `PSyIRVisitor` - that implements the visitor pattern and is designed to be subclassed by each back-end. -`PSyIRVisitor` is implemented in such a way that the PSyIR classes do +``PSyIRVisitor`` is implemented in such a way that the PSyIR classes do not need to be modified. This is achieved by translating the class name of the object being visited in the PSyIR tree into the method -name that the visitor attempts to call (using the Python `eval` -function). `_node` is postfixed to the method name to avoid name +name that the visitor attempts to call (using the Python ``eval`` +function). ``_node`` is postfixed to the method name to avoid name clashes with Python keywords. -For example, an instance of the `Loop` PSyIR class would result in -`PSyIRVisitor` attempting to call a `loop_node` method with the PSyIR +For example, an instance of the ``Loop`` PSyIR class would result in +``PSyIRVisitor`` attempting to call a ``loop_node`` method with the PSyIR instance as an argument. Note the names are always translated to lower case. Therefore, a particular back-end needs to subclass -`PSyIRVisitor`, provide a `loop_node` method (in this particular example) and -this method would then be called when the visitor finds an instance of -`Loop`. For example:: +``PSyIRVisitor``, provide a ``loop_node`` method (in this particular example) +and this method would then be called when the visitor finds an instance of +``Loop``. For example: +.. code-block:: python - from __future__ import print_function from psyclone.psyir.visitor import PSyIRVisitor class TestVisitor(PSyIRVisitor): ''' Example implementation of a back-end visitor. ''' @@ -90,10 +113,11 @@ this method would then be called when the visitor finds an instance of It is up to the sub-class to call any children of the particular node. This approach was chosen as it allows the sub-class to control -when and how to call children. For example:: +when and how to call children. For example: + +.. code-block:: python - from __future__ import print_function from psyclone.psyir.visitor import PSyIRVisitor class TestVisitor(PSyIRVisitor): ''' Example implementation of a back-end visitor. ''' @@ -107,53 +131,53 @@ when and how to call children. For example:: test_visitor = TestVisitor() test_visitor._visit(psyir_tree) -If a `node` is called that does not have an associated method defined -then `PSyIRVisitor` will raise a `VisitorError` exception. This -behaviour can be changed by setting the `skip_nodes` option to `True` -when initialising the visitor i.e. +If a ``node`` is called that does not have an associated method defined +then ``PSyIRVisitor`` will raise a ``VisitorError`` exception. This +behaviour can be changed by setting the ``skip_nodes`` option to ``True`` +when initialising the visitor i.e.: -:: +.. code-block:: python test_visitor = TestVisitor(skip_nodes=True) Any unsupported nodes will then be ignored and their children will be called in the order that they appear in the tree. -PSyIR nodes might not be direct subclasses of `Node`. For example, -`GOKernelSchedule` subclasses `KernelSchedule` which subclasses -`Routine` which subclasses `Schedule` which subclasses `Node`. This can +PSyIR nodes might not be direct subclasses of ``Node``. For example, +``GOKernelSchedule`` subclasses ``KernelSchedule`` which subclasses +``Routine`` which subclasses ``Schedule`` which subclasses ``Node``. This can cause a problem as a back-end would need to have a different method for each class e.g. both -a `gokernelschedule_node` and a `kernelschedule_node` method, even if the +a ``gokernelschedule_node`` and a ``kernelschedule_node`` method, even if the required behaviour is the same. Even worse, expecting someone to have to implement a new method in all back-ends when they subclass a node (if they don't require the back-end output to change) is overly restrictive. To get round the above problem, if the attempt to call a method with -the name of the PSyIR class (with `_node` appended) fails, then the -`PSyIRVisitor` will subsequently call the method name of its parent -(with `_node` appended). This will continue with the `PSyIRVisitor` +the name of the PSyIR class (with ``_node`` appended) fails, then the +``PSyIRVisitor`` will subsequently call the method name of its parent +(with ``_node`` appended). This will continue with the ``PSyIRVisitor`` working its way through the class hierarchy in method resolution order until it is successful (or fails for all names and raises an exception). This implementation gives the behaviour one would expect from standard -inheritance rules. For example, if a `kernelschedule_node` method is -implemented in the back-end and a `GOKernelSchedule` is found then a -`gokernelschedule_node` method is first tried which fails, then a -`kernelschedule_node` method is called which succeeds. Therefore all -subclasses of `KernelSchedule` will call the `kernelschedule_node` +inheritance rules. For example, if a ``kernelschedule_node`` method is +implemented in the back-end and a ``GOKernelSchedule`` is found then a +``gokernelschedule_node`` method is first tried which fails, then a +``kernelschedule_node`` method is called which succeeds. Therefore all +subclasses of ``KernelSchedule`` will call the ``kernelschedule_node`` method (if their particular specialisation has not been added). One example of the power of this approach makes use of the fact that -all PSyIR nodes have `Node` as a parent class. Therefore, some base +all PSyIR nodes have ``Node`` as a parent class. Therefore, some base functionality can be added there and all nodes that do not have a specific method implemented will call this. To see the -class hierarchy, the following code can be written:: +class hierarchy, the following code can be written: +.. code-block:: python - from __future__ import print_function class PrintHierarchy(PSyIRVisitor): ''' Example of a visitor that prints the PSyIR node hierarchy. ''' @@ -170,12 +194,12 @@ class hierarchy, the following code can be written:: In the examples presented up to now, the information from a back-end has been printed. However, a back-end will generally not want to use -print statements. Output from a `PSyIRVisitor` is supported by +print statements. Output from a ``PSyIRVisitor`` is supported by allowing each method call to return a string. Reimplementing the -previous example using strings would give the following:: +previous example using strings would give the following: + +.. code-block:: python - - from __future__ import print_function class class PrintHierarchy(PSyIRVisitor): ''' Example of a visitor that prints the PSyIR node hierarchy''' @@ -192,23 +216,25 @@ previous example using strings would give the following:: print(result) As most back-ends are expected to indent their output based in some -way on the PSyIR node hierarchy, the `PSyIRVisitor` provides support -for this. The `self._nindent` variable contains the current +way on the PSyIR node hierarchy, the ``PSyIRVisitor`` provides support +for this. The ``self._nindent`` variable contains the current indentation as a string and the indentation can be increased by -increasing the value of the `self._depth` variable. The initial depth +increasing the value of the ``self._depth`` variable. The initial depth defaults to 0 and the initial indentation defaults to two spaces. These defaults can be changed when creating the back-end -instance. For example:: +instance. For example: +.. code-block:: python print_hierarchy = PrintHierarchy(initial_indent_depth=2, indent_string="***") -The `PrintHierarchy` example can be modified to support indenting by -writing the following:: +The ``PrintHierarchy`` example can be modified to support indenting by +writing the following: + +.. code-block:: python - from __future__ import print_function class PrintHierarchy(PSyIRVisitor): ''' Example of a visitor that prints the PSyIR node hierarchy with indentation''' @@ -227,11 +253,12 @@ writing the following:: result = print_hierarchy._visit(psyir_tree) print(result) -As a visitor instance always calls the `_visit` method, an alternative -(functor) implementation is provided via the `__call__` method in the +As a visitor instance always calls the ``_visit`` method, an alternative +(functor) implementation is provided via the ``__call__`` method in the base class. This allows the above example to be called in the -following simplified way (as if it were a function):: +following simplified way (as if it were a function): +.. code-block:: python print_hierarchy = PrintHierarchy() result = print_hierarchy(psyir_tree) @@ -239,18 +266,20 @@ following simplified way (as if it were a function):: The primary reason for providing the above (functor) interface is to hide users from the use of the visitor pattern. This is the interface -to expose to users (which is why `_visit` is used for the visitor -method, rather than `visit`). An important characteristic of the `__call__` +to expose to users (which is why ``_visit`` is used for the visitor +method, rather than ``visit``). An important characteristic of the ``__call__`` method is that it will manage the lowering of DSL-concepts because the backends should not provide specific visitors for concepts that do not relate directly to the language domain (more information about the lowering step is -provided in the :ref:`psy_layer_backends` section below). This step is done +provided in the :ref:`uplifting-lowering` section below). This step is done internally without exposing side effects (e.g. modifications to the provided tree). This is important because it permits the generation of backend code without altering the existing PSyIR tree, thus simplifying debugging and development. For instance the walk statement in the following example will return the same nodes, regardless of whether or not the print statement -is commented out:: +is commented out: + +.. code-block:: python print_hierarchy = PrintHierarchy() # print(print_hierarchy(psyir_tree)) @@ -268,32 +297,31 @@ is commented out:: PSyIR Validation -================ - -Although the validity of parent-child relationships is checked during the -construction of a PSyIR tree (see e.g. :ref:`nodesinfo-label`), there are -often constraints that can only be checked once the tree is complete i.e. -at the point that a backend is used to generate code. One such example -is that an OpenMP `do` directive must appear within an OpenMP `parallel` -region. - -The base PSyVisitor class provides support for this validation by -calling the `validate_global_constraints()` method of each Node that -it visits. The `Node` base class contains an empty implementation of -this method. Therefore, if a subclass of `Node` is subject to certain +---------------- + +Although some validation is performed during the Node creation and when +applying transformations, there are often constraints that can only be checked +once the tree is complete, i.e. at the point that a backend is used to generate +code. One such example is that an OpenMP `do` directive must appear within an +OpenMP `parallel` region. + +For this reason, the base PSyVisitor class provides support for this global +validation by calling the ``validate_global_constraints()`` method of each +Node that it visits. The ``Node`` base class contains an empty implementation +of this method. Therefore, if a subclass of ``Node`` is subject to certain global constraints then it must override this method and implement the required checks. If those checks fail then the method should raise a -`GenerationError`. +``GenerationError``. Note that, if required, this validation may be disabled by passing -`check_global_constraints=False` when constructing the PSyIRVisitor +``check_global_constraints=False`` when constructing the PSyIRVisitor instance:: print_hierarchy = PrintHierarchy(check_global_constraints=False) Available back-ends -=================== +------------------- Currently, there are two back-ends capable of generating Kernel code (a KernelSchedule with all its children), these are: @@ -309,7 +337,7 @@ Additionally, there are two partially-implemented back-ends valid SIR from simple Fortran code conforming to the NEMO API. SIR back-end -============ +++++++++++++ The SIR back-end is limited in a number of ways: @@ -368,10 +396,10 @@ are assumed to be global, which may not be the case. This limitation is captured in issue #521. -.. _psy_layer_backends: +.. _uplifting-lowering: -Back-ends for the PSy-layer -=========================== +Uplifting and lowering mechanism +================================ The additional complexity of the PSy-layer comes from the fact that it contains multiple domain-specific concepts and parallel concepts that are not @@ -395,8 +423,3 @@ The language-level PSyIR is still the same IR but restricted to the subset of Nodes that have a direct translation into target language concepts. .. image:: 2level_psyir.png - -.. note:: Using the language backends to generate the PSy-layer code - is supported by the Nemo and GOcean APIs. For the GOcean API the - algorithm-layer is also generated using the language - backends. LFRic support is still under development, see #1010. diff --git a/examples/nemo/scripts/utils.py b/examples/nemo/scripts/utils.py index 6ebe7ffbdd..3cab05f169 100755 --- a/examples/nemo/scripts/utils.py +++ b/examples/nemo/scripts/utils.py @@ -480,7 +480,7 @@ class MaximalProfilingOutsideDirectivesTrans(MaximalRegionTrans): :param routine_name: The name of the Routine being profiled. ''' - # We purposely don't encompase Directive, or Return statements + # We purposely don't encompass Directive, or Return statements # (which would create unclosed hooks). _allowed_contiguous_statements = (Assignment, Call, CodeBlock) _transformation = ProfileTrans @@ -496,7 +496,7 @@ def _satisfies_minimum_region_rules(self, region: list[Node]) -> bool: ''' if len(region) == 1: if (isinstance(region[0], CodeBlock) and - len(region[0].get_ast_nodes) == 1): + len(region[0].parse_tree_nodes) == 1): # Don't create profiling regions for CodeBlocks consisting # of a single statement. return False diff --git a/examples/psyir/custom_directives/Makefile b/examples/psyir/custom_directives/Makefile index 95f2d28aa0..bd058aeca8 100644 --- a/examples/psyir/custom_directives/Makefile +++ b/examples/psyir/custom_directives/Makefile @@ -1,7 +1,7 @@ # ----------------------------------------------------------------------------- # BSD 3-Clause License # -# Copyright (c) 2025, Science and Technology Facilities Council +# Copyright (c) 2025-2026, Science and Technology Facilities Council # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/examples/psyir/custom_directives/README.md b/examples/psyir/custom_directives/README.md index 0770b4f6ad..bc0fdea5cd 100644 --- a/examples/psyir/custom_directives/README.md +++ b/examples/psyir/custom_directives/README.md @@ -1,7 +1,7 @@