diff --git a/doc/user_guide/transformations.rst b/doc/user_guide/transformations.rst index c5d22923ed..08665efeab 100644 --- a/doc/user_guide/transformations.rst +++ b/doc/user_guide/transformations.rst @@ -376,7 +376,7 @@ can be found in the API-specific sections). #### -.. autoclass:: psyclone.transformations.OMPDeclareTargetTrans +.. autoclass:: psyclone.psyir.transformations.OMPDeclareTargetTrans :members: apply :no-index: diff --git a/examples/lfric/scripts/gpu_offloading.py b/examples/lfric/scripts/gpu_offloading.py index 2e693015af..d67b292f16 100644 --- a/examples/lfric/scripts/gpu_offloading.py +++ b/examples/lfric/scripts/gpu_offloading.py @@ -48,12 +48,13 @@ from psyclone.psyir.nodes import ( Call, Directive, IntrinsicCall, Loop, Routine, Schedule) from psyclone.psyir.transformations import ( - ACCKernelsTrans, Matmul2CodeTrans, OMPTargetTrans, TransformationError) + ACCKernelsTrans, Matmul2CodeTrans, OMPTargetTrans, TransformationError, + OMPDeclareTargetTrans) from psyclone.transformations import ( LFRicColourTrans, LFRicOMPLoopTrans, LFRicRedundantComputationTrans, OMPParallelTrans, ACCParallelTrans, ACCLoopTrans, ACCRoutineTrans, - OMPDeclareTargetTrans, OMPLoopTrans) + OMPLoopTrans) # Names of any invoke that we won't add any GPU offloading diff --git a/examples/nemo/scripts/omp_gpu_trans.py b/examples/nemo/scripts/omp_gpu_trans.py index 89740cad9c..6b23b3c2ba 100755 --- a/examples/nemo/scripts/omp_gpu_trans.py +++ b/examples/nemo/scripts/omp_gpu_trans.py @@ -43,9 +43,10 @@ normalise_loops, enhance_tree_information, PARALLELISATION_ISSUES, NEMO_MODULES_TO_IMPORT, PRIVATISATION_ISSUES) from psyclone.psyir.nodes import Routine -from psyclone.psyir.transformations import OMPTargetTrans +from psyclone.psyir.transformations import ( + OMPTargetTrans, OMPDeclareTargetTrans) from psyclone.transformations import ( - OMPLoopTrans, OMPDeclareTargetTrans, TransformationError) + OMPLoopTrans, TransformationError) # This environment variable informs if profiling hooks have to be inserted. diff --git a/src/psyclone/psyir/transformations/__init__.py b/src/psyclone/psyir/transformations/__init__.py index c61eb449ce..458d073823 100644 --- a/src/psyclone/psyir/transformations/__init__.py +++ b/src/psyclone/psyir/transformations/__init__.py @@ -116,6 +116,12 @@ ParallelRegionTrans from psyclone.psyir.transformations.omp_taskloop_trans import \ OMPTaskloopTrans +from psyclone.psyir.transformations.omp_declare_target_trans import \ + OMPDeclareTargetTrans +from psyclone.psyir.transformations.acc_loop_trans import \ + ACCLoopTrans +from psyclone.psyir.transformations.omp_parallel_loop_trans import \ + OMPParallelLoopTrans # For AutoAPI documentation generation __all__ = [ @@ -162,4 +168,7 @@ "ValueRangeCheckTrans", "ParallelRegionTrans", "OMPTaskloopTrans", + "OMPDeclareTargetTrans", + "ACCLoopTrans", + "OMPParallelLoopTrans", ] diff --git a/src/psyclone/psyir/transformations/acc_loop_trans.py b/src/psyclone/psyir/transformations/acc_loop_trans.py new file mode 100644 index 0000000000..9bba2a8471 --- /dev/null +++ b/src/psyclone/psyir/transformations/acc_loop_trans.py @@ -0,0 +1,158 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2017-2025, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab +# A. B. G. Chalk, V. K. Atkinson, STFC Daresbury Lab +# J. Henrichs, Bureau of Meteorology +# Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann +# J. Dendy, Met Office + + +from psyclone.psyir.transformations.parallel_loop_trans import ( + ParallelLoopTrans) +from psyclone.psyir.nodes import (ACCLoopDirective, PSyDataNode) + + +class ACCLoopTrans(ParallelLoopTrans): + ''' + Adds an OpenACC loop directive to a loop. This directive must be within + the scope of some OpenACC Parallel region (at code-generation time). + + For example: + + >>> from psyclone.parse.algorithm import parse + >>> from psyclone.parse.utils import ParseError + >>> from psyclone.psyGen import PSyFactory + >>> from psyclone.errors import GenerationError + >>> api = "gocean" + >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api) + >>> psy = PSyFactory(api).create(invokeInfo) + >>> + >>> from psyclone.psyGen import TransInfo + >>> t = TransInfo() + >>> ltrans = t.get_trans_name('ACCLoopTrans') + >>> rtrans = t.get_trans_name('ACCParallelTrans') + >>> + >>> schedule = psy.invokes.get('invoke_0').schedule + >>> # Uncomment the following line to see a text view of the schedule + >>> # print(schedule.view()) + >>> + >>> # Apply the OpenACC Loop transformation to *every* loop in the schedule + >>> for child in schedule.children[:]: + ... ltrans.apply(child) + >>> + >>> # Enclose all of these loops within a single OpenACC parallel region + >>> rtrans.apply(schedule) + >>> + + ''' + # The types of node that must be excluded from the section of PSyIR + # being transformed. + excluded_node_types = (PSyDataNode,) + + def __init__(self): + # Whether to add the "independent" clause + # to the loop directive. + self._independent = True + self._sequential = False + self._gang = False + self._vector = False + super().__init__() + + def __str__(self): + return "Adds an 'OpenACC loop' directive to a loop" + + def _directive(self, children, collapse=None): + ''' + Creates the ACCLoopDirective needed by this sub-class of + transformation. + + :param children: list of child nodes of the new directive Node. + :type children: list of :py:class:`psyclone.psyir.nodes.Node` + :param int collapse: number of nested loops to collapse or None if + no collapse attribute is required. + ''' + directive = ACCLoopDirective(children=children, + collapse=collapse, + independent=self._independent, + sequential=self._sequential, + gang=self._gang, + vector=self._vector) + return directive + + def apply(self, node, options=None): + ''' + Apply the ACCLoop transformation to the specified node. This node + must be a Loop since this transformation corresponds to + inserting a directive immediately before a loop, e.g.: + + .. code-block:: fortran + + !$ACC LOOP + do ... + ... + end do + + At code-generation time (when lowering is called), + this node must be within (i.e. a child of) a PARALLEL region. + + :param node: the supplied node to which we will apply the + Loop transformation. + :type node: :py:class:`psyclone.psyir.nodes.Loop` + :param options: a dictionary with options for transformations. + :type options: Optional[Dict[str, Any]] + :param int options["collapse"]: number of nested loops to collapse. + :param bool options["independent"]: whether to add the "independent" + clause to the directive (not strictly necessary within + PARALLEL regions). + :param bool options["sequential"]: whether to add the "seq" clause to + the directive. + :param bool options["gang"]: whether to add the "gang" clause to the + directive. + :param bool options["vector"]: whether to add the "vector" clause to + the directive. + + ''' + # Store sub-class specific options. These are used when + # creating the directive (in the _directive() method). + if not options: + options = {} + self._independent = options.get("independent", True) + self._sequential = options.get("sequential", False) + self._gang = options.get("gang", False) + self._vector = options.get("vector", False) + + # Call the apply() method of the base class + super().apply(node, options) diff --git a/src/psyclone/psyir/transformations/mark_routine_for_gpu_mixin.py b/src/psyclone/psyir/transformations/mark_routine_for_gpu_mixin.py new file mode 100644 index 0000000000..c8df45ad80 --- /dev/null +++ b/src/psyclone/psyir/transformations/mark_routine_for_gpu_mixin.py @@ -0,0 +1,175 @@ +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab +# A. B. G. Chalk, V. K. Atkinson, STFC Daresbury Lab +# J. Henrichs, Bureau of Meteorology +# Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann +# J. Dendy, Met Office + + +from psyclone.psyGen import Kern +from psyclone.psyir.nodes import (Call, CodeBlock, Routine, + IntrinsicCall) +from psyclone.psyir.transformations.transformation_error import ( + TransformationError) +from psyclone.psyir.symbols import (DataSymbol, Symbol, SymbolError, + DataType) +from psyclone.psyGen import BuiltIn + + +class MarkRoutineForGPUMixin: + ''' This Mixin provides the "validate_it_can_run_on_gpu" method that + given a routine or kernel node, it checks that the callee code is valid + to run on a GPU. It is implemented as a Mixin because transformations + from multiple programming models, e.g. OpenMP and OpenACC, can reuse + the same logic. + + ''' + def validate_it_can_run_on_gpu(self, node, options): + ''' + Check that the supplied node can be marked as available to be + called on GPU. + + :param node: the kernel or routine to validate. + :type node: :py:class:`psyclone.psyGen.Kern` | + :py:class:`psyclone.psyir.nodes.Routine` + :param options: a dictionary with options for transformations. + :type options: Optional[Dict[str, Any]] + :param bool options["force"]: whether to allow routines with + CodeBlocks to run on the GPU. + :param str options["device_string"]: provide a compiler-platform + identifier. + + :raises TransformationError: if the node is not a kernel or a routine. + :raises TransformationError: if the target is a built-in kernel. + :raises TransformationError: if it is a kernel but without an + associated PSyIR. + :raises TransformationError: if any of the symbols in the kernel are + accessed via a module use statement (and + are not compile-time constants). + :raises TransformationError: if the routine contains any CodeBlocks. + :raises TransformationError: if the kernel contains any calls to other + routines. + ''' + force = options.get("force", False) if options else False + device_string = options.get("device_string", "") if options else "" + + if not isinstance(node, (Kern, Routine)): + raise TransformationError( + f"The {type(self).__name__} must be applied to a sub-class of " + f"Kern or Routine but got '{type(node).__name__}'.") + + # If it is a kernel call it must have an accessible implementation + if isinstance(node, BuiltIn): + raise TransformationError( + f"Applying {type(self).__name__} to a built-in kernel is not " + f"yet supported and kernel '{node.name}' is of type " + f"'{type(node).__name__}'") + + if isinstance(node, Kern): + # Get the PSyIR routine from the associated kernel. If there is an + # exception (this could mean that there is no associated tree + # or that the frontend failed to convert it into PSyIR) reraise it + # as a TransformationError + try: + kernel_schedules = node.get_callees() + except Exception as error: + raise TransformationError( + f"Failed to create PSyIR for kernel '{node.name}'. " + f"Cannot transform such a kernel.") from error + + k_or_r = "Kernel" + else: + # Supplied node is a PSyIR Routine which *is* a Schedule. + kernel_schedules = [node] + k_or_r = "routine" + + # Check that the routine(s) do(oes) not access any data that is + # imported via a 'use' statement. + for sched in kernel_schedules: + vam = sched.reference_accesses() + ktable = sched.symbol_table + for sig in vam.all_signatures: + name = sig.var_name + first = vam[sig].all_accesses[0].node + if isinstance(first, (Symbol, DataType)): + table = ktable + else: + try: + table = first.scope.symbol_table + except SymbolError: + # The node associated with this access is not within a + # scoping region. + table = ktable + symbol = table.lookup(name) + if symbol.is_import: + # resolve_type does nothing if the Symbol type is known. + try: + symbol.resolve_type() + except (SymbolError, FileNotFoundError): + # TODO #11 - log that we failed to resolve this Symbol. + pass + if (isinstance(symbol, DataSymbol) and symbol.is_constant): + # An import of a compile-time constant is fine. + continue + raise TransformationError( + f"{k_or_r} '{node.name}' accesses the symbol " + f"'{symbol}' which is imported. If this symbol " + f"represents data then it must first be converted to a" + f" {k_or_r} argument using the " + f"KernelImportsToArguments transformation.") + + # We forbid CodeBlocks because we can't be certain that what they + # contain can be executed on a GPU. However, we do permit the user + # to override this check. + cblocks = sched.walk(CodeBlock) + if not force: + if cblocks: + cblock_txt = ("\n " + "\n ".join( + str(node) for node in cblocks[0].get_ast_nodes) + + "\n") + option_txt = "options={'force': True}" + raise TransformationError( + f"Cannot safely apply {type(self).__name__} to " + f"{k_or_r} '{node.name}' because its PSyIR contains " + f"one or more CodeBlocks:{cblock_txt}You may use " + f"'{option_txt}' to override this check.") + + for call in sched.walk(Call): + if not call.is_available_on_device(device_string): + if isinstance(call, IntrinsicCall): + if device_string: + device_str = (f"on the '{device_string}' " + f"accelerator device") + else: + device_str = "on the default accelerator device" + raise TransformationError( + f"{k_or_r} '{node.name}' calls intrinsic " + f"'{call.intrinsic.name}' which is not available " + f"{device_str}. Use the 'device_string' option to " + f"specify a different device." + ) + call_str = call.debug_string().rstrip("\n") + raise TransformationError( + f"{k_or_r} '{node.name}' calls another routine " + f"'{call_str}' which is not available on the " + f"accelerator device and therefore cannot have " + f"{type(self).__name__} applied to it (TODO #342).") diff --git a/src/psyclone/psyir/transformations/omp_declare_target_trans.py b/src/psyclone/psyir/transformations/omp_declare_target_trans.py new file mode 100644 index 0000000000..1f1247c35a --- /dev/null +++ b/src/psyclone/psyir/transformations/omp_declare_target_trans.py @@ -0,0 +1,148 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2017-2025, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab +# A. B. G. Chalk, V. K. Atkinson, STFC Daresbury Lab + +''' +This module provides the implementation of OMPDeclareTargetTrans + +''' + +from psyclone.psyir.nodes import OMPDeclareTargetDirective +from psyclone.psyGen import Transformation, Kern +from psyclone.psyir.transformations.mark_routine_for_gpu_mixin import ( + MarkRoutineForGPUMixin) + + +class OMPDeclareTargetTrans(Transformation, MarkRoutineForGPUMixin): + ''' + Adds an OpenMP declare target directive to the specified routine. + + For example: + + >>> from psyclone.psyir.frontend.fortran import FortranReader + >>> from psyclone.psyir.nodes import Loop + >>> from psyclone.transformations import OMPDeclareTargetTrans + >>> + >>> tree = FortranReader().psyir_from_source(""" + ... subroutine my_subroutine(A) + ... integer, dimension(10, 10), intent(inout) :: A + ... integer :: i + ... integer :: j + ... do i = 1, 10 + ... do j = 1, 10 + ... A(i, j) = 0 + ... end do + ... end do + ... end subroutine + ... """ + >>> omptargettrans = OMPDeclareTargetTrans() + >>> omptargettrans.apply(tree.walk(Routine)[0]) + + will generate: + + .. code-block:: fortran + + subroutine my_subroutine(A) + integer, dimension(10, 10), intent(inout) :: A + integer :: i + integer :: j + !$omp declare target + do i = 1, 10 + do j = 1, 10 + A(i, j) = 0 + end do + end do + end subroutine + + ''' + def apply(self, node, options=None): + ''' Insert an OMPDeclareTargetDirective inside the provided routine or + associated PSyKAl kernel. + + :param node: the kernel or routine which is the target of this + transformation. + :type node: :py:class:`psyclone.psyir.nodes.Routine` | + :py:class:`psyclone.psyGen.Kern` + :param options: a dictionary with options for transformations. + :type options: Optional[Dict[str, Any]] + :param bool options["force"]: whether to allow routines with + CodeBlocks to run on the GPU. + :param str options["device_string"]: provide a compiler-platform + identifier. + + ''' + self.validate(node, options) + + if isinstance(node, Kern): + # Flag that the kernel has been modified + node.modified = True + + # Get the schedule representing the kernel subroutine + routines = node.get_callees() + else: + routines = [node] + + for routine in routines: + if not any(isinstance(child, OMPDeclareTargetDirective) for + child in routine.children): + routine.children.insert(0, OMPDeclareTargetDirective()) + + def validate(self, node, options=None): + ''' Check that an OMPDeclareTargetDirective can be inserted. + + :param node: the kernel or routine which is the target of this + transformation. + :type node: :py:class:`psyclone.psyGen.Kern` | + :py:class:`psyclone.psyir.nodes.Routine` + :param options: a dictionary with options for transformations. + :type options: Optional[Dict[str, Any]] + :param bool options["force"]: whether to allow routines with + CodeBlocks to run on the GPU. + :param str options["device_string"]: provide a compiler-platform + identifier. + + :raises TransformationError: if the node is not a kernel or a routine. + :raises TransformationError: if the target is a built-in kernel. + :raises TransformationError: if it is a kernel but without an + associated PSyIR. + :raises TransformationError: if any of the symbols in the kernel are + accessed via a module use statement. + :raises TransformationError: if the kernel contains any calls to other + routines. + + ''' + super().validate(node, options=options) + + self.validate_it_can_run_on_gpu(node, options) diff --git a/src/psyclone/psyir/transformations/omp_parallel_loop_trans.py b/src/psyclone/psyir/transformations/omp_parallel_loop_trans.py new file mode 100644 index 0000000000..57042d8482 --- /dev/null +++ b/src/psyclone/psyir/transformations/omp_parallel_loop_trans.py @@ -0,0 +1,103 @@ +# ----------------------------------------------------------------------------- +# BSD 3-Clause License +# +# Copyright (c) 2017-2025, Science and Technology Facilities Council. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of the copyright holder nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, +# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN +# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +# POSSIBILITY OF SUCH DAMAGE. +# ----------------------------------------------------------------------------- +# Authors R. W. Ford, A. R. Porter, S. Siso and N. Nobre, STFC Daresbury Lab +# A. B. G. Chalk, V. K. Atkinson, STFC Daresbury Lab +# J. Henrichs, Bureau of Meteorology +# Modified I. Kavcic, J. G. Wallwork, O. Brunt and L. Turner, Met Office +# S. Valat, Inria / Laboratoire Jean Kuntzmann +# M. Schreiber, Univ. Grenoble Alpes / Inria / Lab. Jean Kuntzmann +# J. Dendy, Met Office + + +from psyclone.psyir.transformations.omp_loop_trans import OMPLoopTrans +from psyclone.psyir.nodes import OMPParallelDoDirective + + +class OMPParallelLoopTrans(OMPLoopTrans): + + ''' Adds an OpenMP PARALLEL DO directive to a loop. + + For example: + + >>> from psyclone.parse.algorithm import parse + >>> from psyclone.psyGen import PSyFactory + >>> ast, invokeInfo = parse("lfric.F90") + >>> psy = PSyFactory("lfric").create(invokeInfo) + >>> schedule = psy.invokes.get('invoke_v3_kernel_type').schedule + >>> # Uncomment the following line to see a text view of the schedule + >>> # print(schedule.view()) + >>> + >>> from psyclone.transformations import OMPParallelLoopTrans + >>> trans = OMPParallelLoopTrans() + >>> trans.apply(schedule.children[0]) + >>> # Uncomment the following line to see a text view of the schedule + >>> # print(schedule.view()) + + ''' + def __str__(self): + return "Add an 'OpenMP PARALLEL DO' directive" + + def apply(self, node, options=None): + ''' Apply an OMPParallelLoop Transformation to the supplied node + (which must be a Loop). In the generated code this corresponds to + wrapping the Loop with directives: + + .. code-block:: fortran + + !$OMP PARALLEL DO ... + do ... + ... + end do + !$OMP END PARALLEL DO + + :param node: the node (loop) to which to apply the transformation. + :type node: :py:class:`psyclone.psyir.nodes.Loop` + :param options: a dictionary with options for transformations + and validation. + :type options: Optional[Dict[str, Any]] + ''' + self.validate(node, options=options) + + # keep a reference to the node's original parent and its index as these + # are required and will change when we change the node's location + node_parent = node.parent + node_position = node.position + + # add our OpenMP loop directive setting its parent to the node's + # parent and its children to the node + directive = OMPParallelDoDirective(children=[node.detach()], + omp_schedule=self.omp_schedule) + + # add the OpenMP loop directive as a child of the node's parent + node_parent.addchild(directive, index=node_position) diff --git a/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py b/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py index 56234f79fe..44e138d49b 100644 --- a/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py +++ b/src/psyclone/tests/domain/common/transformations/kernel_module_inline_trans_test.py @@ -50,8 +50,9 @@ ContainerSymbol, DataSymbol, GenericInterfaceSymbol, ImportInterface, RoutineSymbol, REAL_TYPE, Symbol, SymbolError, SymbolTable, UnresolvedInterface) -from psyclone.psyir.transformations import TransformationError -from psyclone.transformations import ACCRoutineTrans, OMPDeclareTargetTrans +from psyclone.psyir.transformations import TransformationError, \ + OMPDeclareTargetTrans +from psyclone.transformations import ACCRoutineTrans from psyclone.tests.gocean_build import GOceanBuild from psyclone.tests.lfric_build import LFRicBuild from psyclone.tests.utilities import (Compile, count_lines, get_invoke, diff --git a/src/psyclone/tests/domain/gocean/transformations/gocean1p0_transformations_test.py b/src/psyclone/tests/domain/gocean/transformations/gocean1p0_transformations_test.py index e2e8f5a31c..f3421de940 100644 --- a/src/psyclone/tests/domain/gocean/transformations/gocean1p0_transformations_test.py +++ b/src/psyclone/tests/domain/gocean/transformations/gocean1p0_transformations_test.py @@ -49,10 +49,10 @@ from psyclone.psyGen import Kern from psyclone.psyir.nodes import Loop from psyclone.psyir.transformations import ( - LoopFuseTrans, LoopTrans, TransformationError) -from psyclone.transformations import ACCRoutineTrans, \ - OMPParallelTrans, GOceanOMPParallelLoopTrans, GOceanOMPLoopTrans, \ - OMPLoopTrans, ACCParallelTrans, ACCEnterDataTrans, ACCLoopTrans + LoopFuseTrans, LoopTrans, TransformationError, ACCLoopTrans) +from psyclone.transformations import ( + ACCRoutineTrans, OMPParallelTrans, GOceanOMPParallelLoopTrans, + GOceanOMPLoopTrans, OMPLoopTrans, ACCParallelTrans, ACCEnterDataTrans) from psyclone.domain.gocean.transformations import GOConstLoopBoundsTrans from psyclone.tests.gocean_build import GOceanBuild from psyclone.tests.utilities import count_lines, get_invoke, get_base_path diff --git a/src/psyclone/tests/domain/gocean/transformations/gocean_extract_test.py b/src/psyclone/tests/domain/gocean/transformations/gocean_extract_test.py index 40d32e18df..77c185e73d 100644 --- a/src/psyclone/tests/domain/gocean/transformations/gocean_extract_test.py +++ b/src/psyclone/tests/domain/gocean/transformations/gocean_extract_test.py @@ -47,9 +47,10 @@ from psyclone.configuration import Config from psyclone.domain.gocean.transformations import GOceanExtractTrans from psyclone.psyir.nodes import ExtractNode, Loop, OMPDirective -from psyclone.psyir.transformations import PSyDataTrans, TransformationError +from psyclone.psyir.transformations import (PSyDataTrans, TransformationError, + ACCLoopTrans) from psyclone.transformations import (ACCParallelTrans, ACCEnterDataTrans, - ACCLoopTrans, GOceanOMPLoopTrans, + GOceanOMPLoopTrans, GOceanOMPParallelLoopTrans, OMPParallelTrans) from psyclone.domain.gocean.transformations import GOConstLoopBoundsTrans diff --git a/src/psyclone/tests/domain/lfric/transformations/lfric_transformations_test.py b/src/psyclone/tests/domain/lfric/transformations/lfric_transformations_test.py index ed7ccf6cd0..08516b9554 100644 --- a/src/psyclone/tests/domain/lfric/transformations/lfric_transformations_test.py +++ b/src/psyclone/tests/domain/lfric/transformations/lfric_transformations_test.py @@ -61,14 +61,14 @@ REAL_TYPE, INTEGER_TYPE) from psyclone.psyir.transformations import ( ACCKernelsTrans, LoopFuseTrans, LoopTrans, OMPLoopTrans, - TransformationError) + TransformationError, ACCLoopTrans) from psyclone.tests.lfric_build import LFRicBuild from psyclone.tests.utilities import get_invoke from psyclone.transformations import ( OMPParallelTrans, LFRicColourTrans, LFRicOMPLoopTrans, LFRicOMPParallelLoopTrans, MoveTrans, LFRicRedundantComputationTrans, LFRicAsyncHaloExchangeTrans, LFRicKernelConstTrans, - ACCLoopTrans, ACCParallelTrans, ACCEnterDataTrans) + ACCParallelTrans, ACCEnterDataTrans) # The version of the API that the tests in this file diff --git a/src/psyclone/tests/nemo/transformations/openacc/loop_directive_test.py b/src/psyclone/tests/nemo/transformations/openacc/loop_directive_test.py index 967f2b7c27..c7a0148847 100644 --- a/src/psyclone/tests/nemo/transformations/openacc/loop_directive_test.py +++ b/src/psyclone/tests/nemo/transformations/openacc/loop_directive_test.py @@ -41,7 +41,8 @@ import pytest from psyclone.psyGen import TransInfo -from psyclone.psyir.transformations import ACCKernelsTrans +from psyclone.psyir.transformations import ( + ACCKernelsTrans, ACCLoopTrans) from psyclone.psyir.nodes import Loop from psyclone.errors import GenerationError @@ -60,7 +61,7 @@ def test_missing_enclosing_region(fortran_reader): "end do\n" "end program do_loop\n") schedule = psyir.children[0] - acc_trans = TransInfo().get_trans_name('ACCLoopTrans') + acc_trans = ACCLoopTrans() acc_trans.apply(schedule[0]) with pytest.raises(GenerationError) as err: schedule[0].validate_global_constraints() @@ -85,7 +86,7 @@ def test_explicit_loop(fortran_reader, fortran_writer): "end do\n" "end program do_loop\n") schedule = psyir.children[0] - acc_trans = TransInfo().get_trans_name('ACCLoopTrans') + acc_trans = ACCLoopTrans() para_trans = TransInfo().get_trans_name('ACCParallelTrans') data_trans = TransInfo().get_trans_name('ACCDataTrans') para_trans.apply(schedule.children) @@ -144,7 +145,7 @@ def test_seq_loop(fortran_reader, fortran_writer): clause. ''' psyir = fortran_reader.psyir_from_source(SINGLE_LOOP) schedule = psyir.children[0] - acc_trans = TransInfo().get_trans_name('ACCLoopTrans') + acc_trans = ACCLoopTrans() # An ACC Loop must be within a KERNELS or PARALLEL region kernels_trans = ACCKernelsTrans() kernels_trans.apply(schedule.children) @@ -164,7 +165,7 @@ def test_loop_clauses(fortran_reader, fortran_writer, clause): clauses for independent loops. ''' psyir = fortran_reader.psyir_from_source(SINGLE_LOOP) schedule = psyir.children[0] - acc_trans = TransInfo().get_trans_name('ACCLoopTrans') + acc_trans = ACCLoopTrans() # An ACC Loop must be within a KERNELS or PARALLEL region kernels_trans = ACCKernelsTrans() kernels_trans.apply(schedule.children) @@ -183,7 +184,7 @@ def test_collapse(fortran_reader, fortran_writer): clause. ''' psyir = fortran_reader.psyir_from_source(DOUBLE_LOOP) schedule = psyir.children[0] - acc_trans = TransInfo().get_trans_name('ACCLoopTrans') + acc_trans = ACCLoopTrans() # An ACC Loop must be within a KERNELS or PARALLEL region kernels_trans = ACCKernelsTrans() kernels_trans.apply(schedule.children) diff --git a/src/psyclone/tests/psyir/backend/psyir_openacc_test.py b/src/psyclone/tests/psyir/backend/psyir_openacc_test.py index 3f89d7beb7..fcfab9d6ad 100644 --- a/src/psyclone/tests/psyir/backend/psyir_openacc_test.py +++ b/src/psyclone/tests/psyir/backend/psyir_openacc_test.py @@ -40,7 +40,6 @@ psyclone.psyir.backend.fortran and c modules. ''' import pytest -from psyclone.psyGen import TransInfo from psyclone.psyir.backend.c import CWriter from psyclone.psyir.backend.fortran import FortranWriter from psyclone.psyir.nodes import (Assignment, Reference, Loop, Directive, @@ -49,6 +48,7 @@ from psyclone.psyir.transformations import ACCKernelsTrans from psyclone.transformations import (ACCDataTrans, ACCParallelTrans) from psyclone.tests.utilities import get_invoke +from psyclone.psyir.transformations.acc_loop_trans import ACCLoopTrans NEMO_TEST_CODE = ''' @@ -202,7 +202,7 @@ def test_acc_loop(fortran_reader, fortran_writer): ''' Tests that an OpenACC loop directive is handled correctly. ''' psyir = fortran_reader.psyir_from_source(DOUBLE_LOOP) schedule = psyir.children[0] - acc_trans = TransInfo().get_trans_name('ACCLoopTrans') + acc_trans = ACCLoopTrans() # An ACC Loop must be within a KERNELS or PARALLEL region kernels_trans = ACCKernelsTrans() kernels_trans.apply(schedule.children) diff --git a/src/psyclone/tests/psyir/nodes/acc_directives_test.py b/src/psyclone/tests/psyir/nodes/acc_directives_test.py index ab7e3b5447..fc560cfd38 100644 --- a/src/psyclone/tests/psyir/nodes/acc_directives_test.py +++ b/src/psyclone/tests/psyir/nodes/acc_directives_test.py @@ -58,9 +58,9 @@ from psyclone.psyir.nodes.loop import Loop from psyclone.psyir.symbols import ( Symbol, SymbolTable, DataSymbol, INTEGER_TYPE) -from psyclone.psyir.transformations import ACCKernelsTrans +from psyclone.psyir.transformations import ACCKernelsTrans, ACCLoopTrans from psyclone.transformations import ( - ACCDataTrans, ACCEnterDataTrans, ACCLoopTrans, + ACCDataTrans, ACCEnterDataTrans, ACCParallelTrans, ACCRoutineTrans) BASE_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname( diff --git a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py index f620dcc8e3..80f66f9bc0 100644 --- a/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/acc_kernels_trans_test.py @@ -45,8 +45,8 @@ from psyclone.errors import GenerationError from psyclone.psyir.nodes import Assignment, ACCKernelsDirective, Loop, Routine from psyclone.psyir.transformations import ( - ACCKernelsTrans, TransformationError, ProfileTrans) -from psyclone.transformations import ACCEnterDataTrans, ACCLoopTrans + ACCKernelsTrans, TransformationError, ProfileTrans, ACCLoopTrans) +from psyclone.transformations import ACCEnterDataTrans from psyclone.tests.utilities import get_invoke EXPLICIT_LOOP = ("program do_loop\n" diff --git a/src/psyclone/tests/psyir/transformations/extract_trans_test.py b/src/psyclone/tests/psyir/transformations/extract_trans_test.py index c30581e225..7b7bcbd1b7 100644 --- a/src/psyclone/tests/psyir/transformations/extract_trans_test.py +++ b/src/psyclone/tests/psyir/transformations/extract_trans_test.py @@ -47,8 +47,9 @@ from psyclone.psyir.tools import ReadWriteInfo from psyclone.psyir.transformations import ExtractTrans, TransformationError from psyclone.tests.utilities import get_invoke -from psyclone.transformations import (ACCParallelTrans, ACCLoopTrans, +from psyclone.transformations import (ACCParallelTrans, LFRicOMPParallelLoopTrans) +from psyclone.psyir.transformations import ACCLoopTrans # --------------------------------------------------------------------------- # diff --git a/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py b/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py index 50cf0101f2..a512c16e25 100644 --- a/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py +++ b/src/psyclone/tests/psyir/transformations/kernel_transformation_test.py @@ -49,10 +49,9 @@ from psyclone.psyGen import Kern from psyclone.psyir.nodes import Routine, FileContainer, IntrinsicCall, Call from psyclone.psyir.symbols import DataSymbol, INTEGER_TYPE -from psyclone.psyir.transformations import TransformationError -from psyclone.transformations import ( - ACCRoutineTrans, OMPDeclareTargetTrans, LFRicKernelConstTrans) - +from psyclone.psyir.transformations import TransformationError, \ + OMPDeclareTargetTrans +from psyclone.transformations import ACCRoutineTrans, LFRicKernelConstTrans from psyclone.tests.gocean_build import GOceanBuild from psyclone.tests.lfric_build import LFRicBuild from psyclone.tests.utilities import get_invoke diff --git a/src/psyclone/tests/psyir/transformations/transformations_test.py b/src/psyclone/tests/psyir/transformations/transformations_test.py index 22b7d6454a..56a73af401 100644 --- a/src/psyclone/tests/psyir/transformations/transformations_test.py +++ b/src/psyclone/tests/psyir/transformations/transformations_test.py @@ -49,15 +49,16 @@ OMPDoDirective, OMPLoopDirective, Routine) from psyclone.psyir.symbols import (DataSymbol, INTEGER_TYPE, ImportInterface, ContainerSymbol) -from psyclone.psyir.transformations import ProfileTrans, RegionTrans, \ - TransformationError +from psyclone.psyir.transformations import ( + ProfileTrans, RegionTrans, TransformationError, OMPTaskloopTrans, + OMPDeclareTargetTrans, ACCLoopTrans) from psyclone.tests.utilities import get_invoke, Compile -from psyclone.transformations import ACCEnterDataTrans, ACCLoopTrans, \ - ACCParallelTrans, OMPLoopTrans, OMPParallelLoopTrans, OMPParallelTrans, \ - OMPSingleTrans, OMPMasterTrans, OMPDeclareTargetTrans +from psyclone.transformations import ( + ACCEnterDataTrans, ACCParallelTrans, OMPLoopTrans, + OMPParallelLoopTrans, OMPParallelTrans, OMPSingleTrans, + OMPMasterTrans) from psyclone.parse.algorithm import parse from psyclone.psyGen import PSyFactory -from psyclone.psyir.transformations.omp_taskloop_trans import OMPTaskloopTrans GOCEAN_BASE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, os.pardir, "test_files", diff --git a/src/psyclone/transformations.py b/src/psyclone/transformations.py index ac3e935482..808fa210e6 100644 --- a/src/psyclone/transformations.py +++ b/src/psyclone/transformations.py @@ -56,15 +56,14 @@ from psyclone.lfric import LFRicHaloExchangeEnd, LFRicHaloExchangeStart from psyclone.errors import InternalError from psyclone.gocean1p0 import GOInvokeSchedule -from psyclone.psyGen import (Transformation, CodedKern, Kern, InvokeSchedule, - BuiltIn) +from psyclone.psyGen import (Transformation, CodedKern, Kern, InvokeSchedule) from psyclone.psyir.nodes import ( ACCDataDirective, ACCDirective, ACCEnterDataDirective, ACCKernelsDirective, - ACCLoopDirective, ACCParallelDirective, ACCRoutineDirective, + ACCParallelDirective, ACCRoutineDirective, Call, CodeBlock, Directive, Literal, Loop, Node, - OMPDeclareTargetDirective, OMPDirective, OMPMasterDirective, - OMPParallelDirective, OMPParallelDoDirective, OMPSerialDirective, - Return, Routine, Schedule, + OMPDirective, OMPMasterDirective, + OMPParallelDirective, OMPSerialDirective, + Return, Schedule, OMPSingleDirective, PSyDataNode, IntrinsicCall) from psyclone.psyir.nodes.acc_mixins import ACCAsyncMixin from psyclone.psyir.nodes.array_mixin import ArrayMixin @@ -72,16 +71,18 @@ from psyclone.psyir.nodes.structure_reference import StructureReference from psyclone.psyir.symbols import ( ArgumentInterface, DataSymbol, INTEGER_TYPE, ScalarType, Symbol, - SymbolError, UnresolvedType, DataType) + SymbolError, UnresolvedType) from psyclone.psyir.transformations.loop_trans import LoopTrans from psyclone.psyir.transformations.omp_loop_trans import OMPLoopTrans -from psyclone.psyir.transformations.parallel_loop_trans import ( - ParallelLoopTrans) from psyclone.psyir.transformations.region_trans import RegionTrans from psyclone.psyir.transformations.transformation_error import ( TransformationError) from psyclone.psyir.transformations import ParallelRegionTrans from psyclone.utils import transformation_documentation_wrapper +from psyclone.psyir.transformations.mark_routine_for_gpu_mixin import ( + MarkRoutineForGPUMixin) +from psyclone.psyir.transformations.omp_parallel_loop_trans import ( + OMPParallelLoopTrans) def check_intergrid(node): @@ -111,421 +112,6 @@ def check_intergrid(node): f" is such a kernel.") -class MarkRoutineForGPUMixin: - ''' This Mixin provides the "validate_it_can_run_on_gpu" method that - given a routine or kernel node, it checks that the callee code is valid - to run on a GPU. It is implemented as a Mixin because transformations - from multiple programming models, e.g. OpenMP and OpenACC, can reuse - the same logic. - - ''' - def validate_it_can_run_on_gpu(self, node, options): - ''' - Check that the supplied node can be marked as available to be - called on GPU. - - :param node: the kernel or routine to validate. - :type node: :py:class:`psyclone.psyGen.Kern` | - :py:class:`psyclone.psyir.nodes.Routine` - :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] - :param bool options["force"]: whether to allow routines with - CodeBlocks to run on the GPU. - :param str options["device_string"]: provide a compiler-platform - identifier. - - :raises TransformationError: if the node is not a kernel or a routine. - :raises TransformationError: if the target is a built-in kernel. - :raises TransformationError: if it is a kernel but without an - associated PSyIR. - :raises TransformationError: if any of the symbols in the kernel are - accessed via a module use statement (and - are not compile-time constants). - :raises TransformationError: if the routine contains any CodeBlocks. - :raises TransformationError: if the kernel contains any calls to other - routines. - ''' - force = options.get("force", False) if options else False - device_string = options.get("device_string", "") if options else "" - - if not isinstance(node, (Kern, Routine)): - raise TransformationError( - f"The {type(self).__name__} must be applied to a sub-class of " - f"Kern or Routine but got '{type(node).__name__}'.") - - # If it is a kernel call it must have an accessible implementation - if isinstance(node, BuiltIn): - raise TransformationError( - f"Applying {type(self).__name__} to a built-in kernel is not " - f"yet supported and kernel '{node.name}' is of type " - f"'{type(node).__name__}'") - - if isinstance(node, Kern): - # Get the PSyIR routine from the associated kernel. If there is an - # exception (this could mean that there is no associated tree - # or that the frontend failed to convert it into PSyIR) reraise it - # as a TransformationError - try: - kernel_schedules = node.get_callees() - except Exception as error: - raise TransformationError( - f"Failed to create PSyIR for kernel '{node.name}'. " - f"Cannot transform such a kernel.") from error - - k_or_r = "Kernel" - else: - # Supplied node is a PSyIR Routine which *is* a Schedule. - kernel_schedules = [node] - k_or_r = "routine" - - # Check that the routine(s) do(oes) not access any data that is - # imported via a 'use' statement. - for sched in kernel_schedules: - vam = sched.reference_accesses() - ktable = sched.symbol_table - for sig in vam.all_signatures: - name = sig.var_name - first = vam[sig].all_accesses[0].node - if isinstance(first, (Symbol, DataType)): - table = ktable - else: - try: - table = first.scope.symbol_table - except SymbolError: - # The node associated with this access is not within a - # scoping region. - table = ktable - symbol = table.lookup(name) - if symbol.is_import: - # resolve_type does nothing if the Symbol type is known. - try: - symbol.resolve_type() - except (SymbolError, FileNotFoundError): - # TODO #11 - log that we failed to resolve this Symbol. - pass - if (isinstance(symbol, DataSymbol) and symbol.is_constant): - # An import of a compile-time constant is fine. - continue - raise TransformationError( - f"{k_or_r} '{node.name}' accesses the symbol " - f"'{symbol}' which is imported. If this symbol " - f"represents data then it must first be converted to a" - f" {k_or_r} argument using the " - f"KernelImportsToArguments transformation.") - - # We forbid CodeBlocks because we can't be certain that what they - # contain can be executed on a GPU. However, we do permit the user - # to override this check. - cblocks = sched.walk(CodeBlock) - if not force: - if cblocks: - cblock_txt = ("\n " + "\n ".join( - str(node) for node in cblocks[0].get_ast_nodes) - + "\n") - option_txt = "options={'force': True}" - raise TransformationError( - f"Cannot safely apply {type(self).__name__} to " - f"{k_or_r} '{node.name}' because its PSyIR contains " - f"one or more CodeBlocks:{cblock_txt}You may use " - f"'{option_txt}' to override this check.") - - for call in sched.walk(Call): - if not call.is_available_on_device(device_string): - if isinstance(call, IntrinsicCall): - if device_string: - device_str = (f"on the '{device_string}' " - f"accelerator device") - else: - device_str = "on the default accelerator device" - raise TransformationError( - f"{k_or_r} '{node.name}' calls intrinsic " - f"'{call.intrinsic.name}' which is not available " - f"{device_str}. Use the 'device_string' option to " - f"specify a different device." - ) - call_str = call.debug_string().rstrip("\n") - raise TransformationError( - f"{k_or_r} '{node.name}' calls another routine " - f"'{call_str}' which is not available on the " - f"accelerator device and therefore cannot have " - f"{type(self).__name__} applied to it (TODO #342).") - - -class OMPDeclareTargetTrans(Transformation, MarkRoutineForGPUMixin): - ''' - Adds an OpenMP declare target directive to the specified routine. - - For example: - - >>> from psyclone.psyir.frontend.fortran import FortranReader - >>> from psyclone.psyir.nodes import Loop - >>> from psyclone.transformations import OMPDeclareTargetTrans - >>> - >>> tree = FortranReader().psyir_from_source(""" - ... subroutine my_subroutine(A) - ... integer, dimension(10, 10), intent(inout) :: A - ... integer :: i - ... integer :: j - ... do i = 1, 10 - ... do j = 1, 10 - ... A(i, j) = 0 - ... end do - ... end do - ... end subroutine - ... """ - >>> omptargettrans = OMPDeclareTargetTrans() - >>> omptargettrans.apply(tree.walk(Routine)[0]) - - will generate: - - .. code-block:: fortran - - subroutine my_subroutine(A) - integer, dimension(10, 10), intent(inout) :: A - integer :: i - integer :: j - !$omp declare target - do i = 1, 10 - do j = 1, 10 - A(i, j) = 0 - end do - end do - end subroutine - - ''' - def apply(self, node, options=None): - ''' Insert an OMPDeclareTargetDirective inside the provided routine or - associated PSyKAl kernel. - - :param node: the kernel or routine which is the target of this - transformation. - :type node: :py:class:`psyclone.psyir.nodes.Routine` | - :py:class:`psyclone.psyGen.Kern` - :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] - :param bool options["force"]: whether to allow routines with - CodeBlocks to run on the GPU. - :param str options["device_string"]: provide a compiler-platform - identifier. - - ''' - self.validate(node, options) - - if isinstance(node, Kern): - # Flag that the kernel has been modified - node.modified = True - - # Get the schedule representing the kernel subroutine - routines = node.get_callees() - else: - routines = [node] - - for routine in routines: - if not any(isinstance(child, OMPDeclareTargetDirective) for - child in routine.children): - routine.children.insert(0, OMPDeclareTargetDirective()) - - def validate(self, node, options=None): - ''' Check that an OMPDeclareTargetDirective can be inserted. - - :param node: the kernel or routine which is the target of this - transformation. - :type node: :py:class:`psyclone.psyGen.Kern` | - :py:class:`psyclone.psyir.nodes.Routine` - :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] - :param bool options["force"]: whether to allow routines with - CodeBlocks to run on the GPU. - :param str options["device_string"]: provide a compiler-platform - identifier. - - :raises TransformationError: if the node is not a kernel or a routine. - :raises TransformationError: if the target is a built-in kernel. - :raises TransformationError: if it is a kernel but without an - associated PSyIR. - :raises TransformationError: if any of the symbols in the kernel are - accessed via a module use statement. - :raises TransformationError: if the kernel contains any calls to other - routines. - - ''' - super().validate(node, options=options) - - self.validate_it_can_run_on_gpu(node, options) - - -class ACCLoopTrans(ParallelLoopTrans): - ''' - Adds an OpenACC loop directive to a loop. This directive must be within - the scope of some OpenACC Parallel region (at code-generation time). - - For example: - - >>> from psyclone.parse.algorithm import parse - >>> from psyclone.parse.utils import ParseError - >>> from psyclone.psyGen import PSyFactory - >>> from psyclone.errors import GenerationError - >>> api = "gocean" - >>> ast, invokeInfo = parse(GOCEAN_SOURCE_FILE, api=api) - >>> psy = PSyFactory(api).create(invokeInfo) - >>> - >>> from psyclone.psyGen import TransInfo - >>> t = TransInfo() - >>> ltrans = t.get_trans_name('ACCLoopTrans') - >>> rtrans = t.get_trans_name('ACCParallelTrans') - >>> - >>> schedule = psy.invokes.get('invoke_0').schedule - >>> # Uncomment the following line to see a text view of the schedule - >>> # print(schedule.view()) - >>> - >>> # Apply the OpenACC Loop transformation to *every* loop in the schedule - >>> for child in schedule.children[:]: - ... ltrans.apply(child) - >>> - >>> # Enclose all of these loops within a single OpenACC parallel region - >>> rtrans.apply(schedule) - >>> - - ''' - # The types of node that must be excluded from the section of PSyIR - # being transformed. - excluded_node_types = (PSyDataNode,) - - def __init__(self): - # Whether to add the "independent" clause - # to the loop directive. - self._independent = True - self._sequential = False - self._gang = False - self._vector = False - super().__init__() - - def __str__(self): - return "Adds an 'OpenACC loop' directive to a loop" - - def _directive(self, children, collapse=None): - ''' - Creates the ACCLoopDirective needed by this sub-class of - transformation. - - :param children: list of child nodes of the new directive Node. - :type children: list of :py:class:`psyclone.psyir.nodes.Node` - :param int collapse: number of nested loops to collapse or None if - no collapse attribute is required. - ''' - directive = ACCLoopDirective(children=children, - collapse=collapse, - independent=self._independent, - sequential=self._sequential, - gang=self._gang, - vector=self._vector) - return directive - - def apply(self, node, options=None): - ''' - Apply the ACCLoop transformation to the specified node. This node - must be a Loop since this transformation corresponds to - inserting a directive immediately before a loop, e.g.: - - .. code-block:: fortran - - !$ACC LOOP - do ... - ... - end do - - At code-generation time (when lowering is called), - this node must be within (i.e. a child of) a PARALLEL region. - - :param node: the supplied node to which we will apply the - Loop transformation. - :type node: :py:class:`psyclone.psyir.nodes.Loop` - :param options: a dictionary with options for transformations. - :type options: Optional[Dict[str, Any]] - :param int options["collapse"]: number of nested loops to collapse. - :param bool options["independent"]: whether to add the "independent" - clause to the directive (not strictly necessary within - PARALLEL regions). - :param bool options["sequential"]: whether to add the "seq" clause to - the directive. - :param bool options["gang"]: whether to add the "gang" clause to the - directive. - :param bool options["vector"]: whether to add the "vector" clause to - the directive. - - ''' - # Store sub-class specific options. These are used when - # creating the directive (in the _directive() method). - if not options: - options = {} - self._independent = options.get("independent", True) - self._sequential = options.get("sequential", False) - self._gang = options.get("gang", False) - self._vector = options.get("vector", False) - - # Call the apply() method of the base class - super().apply(node, options) - - -class OMPParallelLoopTrans(OMPLoopTrans): - - ''' Adds an OpenMP PARALLEL DO directive to a loop. - - For example: - - >>> from psyclone.parse.algorithm import parse - >>> from psyclone.psyGen import PSyFactory - >>> ast, invokeInfo = parse("lfric.F90") - >>> psy = PSyFactory("lfric").create(invokeInfo) - >>> schedule = psy.invokes.get('invoke_v3_kernel_type').schedule - >>> # Uncomment the following line to see a text view of the schedule - >>> # print(schedule.view()) - >>> - >>> from psyclone.transformations import OMPParallelLoopTrans - >>> trans = OMPParallelLoopTrans() - >>> trans.apply(schedule.children[0]) - >>> # Uncomment the following line to see a text view of the schedule - >>> # print(schedule.view()) - - ''' - def __str__(self): - return "Add an 'OpenMP PARALLEL DO' directive" - - def apply(self, node, options=None): - ''' Apply an OMPParallelLoop Transformation to the supplied node - (which must be a Loop). In the generated code this corresponds to - wrapping the Loop with directives: - - .. code-block:: fortran - - !$OMP PARALLEL DO ... - do ... - ... - end do - !$OMP END PARALLEL DO - - :param node: the node (loop) to which to apply the transformation. - :type node: :py:class:`psyclone.psyir.nodes.Loop` - :param options: a dictionary with options for transformations - and validation. - :type options: Optional[Dict[str, Any]] - ''' - self.validate(node, options=options) - - # keep a reference to the node's original parent and its index as these - # are required and will change when we change the node's location - node_parent = node.parent - node_position = node.position - - # add our OpenMP loop directive setting its parent to the node's - # parent and its children to the node - directive = OMPParallelDoDirective(children=[node.detach()], - omp_schedule=self.omp_schedule) - - # add the OpenMP loop directive as a child of the node's parent - node_parent.addchild(directive, index=node_position) - - class LFRicOMPParallelLoopTrans(OMPParallelLoopTrans): ''' LFRic-specific OpenMP loop transformation. Adds LFRic specific @@ -2840,7 +2426,6 @@ def __new__(cls): __all__ = [ "ACCEnterDataTrans", "ACCDataTrans", - "ACCLoopTrans", "ACCParallelTrans", "ACCRoutineTrans", "ColourTrans",