Skip to content

Commit 2552ccb

Browse files
committed
[WARNING]: Should be avlbl anywhere but local branches; explicit transfers work
1 parent 6b97b14 commit 2552ccb

File tree

5 files changed

+89
-64
lines changed

5 files changed

+89
-64
lines changed

pyop2/configuration.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,10 @@ class Configuration(dict):
7171
cdim > 1 be built as block sparsities, or dof sparsities. The
7272
former saves memory but changes which preconditioners are
7373
available for the resulting matrices. (Default yes)
74+
:param only_explicit_host_device_data_transfers: Flag to set host<->device
75+
transfers mode. If set *True*, the user has to invoke all the
76+
host<->device transfers. If set *False* (default), Firedrake automatically
77+
figures out the data transfers, however this might lead to sub-optimality.
7478
"""
7579
# name, env variable, type, default, write once
7680
DEFAULTS = {
@@ -112,6 +116,7 @@ class Configuration(dict):
112116
"print_summary": ("PYOP2_PRINT_SUMMARY", bool, False),
113117
"matnest": ("PYOP2_MATNEST", bool, True),
114118
"block_sparsity": ("PYOP2_BLOCK_SPARSITY", bool, True),
119+
"only_explicit_host_device_data_transfers": ("EXPLICIT_TRNSFRS", bool, False)
115120
}
116121
"""Default values for PyOP2 configuration parameters"""
117122

pyop2/gpu/cuda.py

Lines changed: 7 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -119,19 +119,6 @@ class Dat(petsc_Dat):
119119
"""
120120
Dat for GPU.
121121
"""
122-
@validate_type(('dataset', (base.DataCarrier, DataSet, Set), DataSetTypeError),
123-
('name', str, NameTypeError))
124-
@validate_dtype(('dtype', None, DataTypeError))
125-
def __init__(self, dataset, data=None, dtype=None, name=None, uid=None):
126-
127-
if isinstance(dataset, petsc_Dat) and not isinstance(dataset, Dat):
128-
self.__init__(dataset.dataset, None, dtype=dataset.dtype,
129-
name="copy_of_%s" % dataset.name)
130-
self._data[...] = dataset.data
131-
return
132-
133-
super(Dat, self).__init__(dataset, data, dtype, name, uid)
134-
135122
@cached_property
136123
def _vec(self):
137124
assert self.dtype == PETSc.ScalarType, \
@@ -149,47 +136,6 @@ def _vec(self):
149136

150137
return cuda_vec
151138

152-
@cached_property
153-
def device_handle(self):
154-
if self.dtype == PETSc.ScalarType:
155-
with self.vec as v:
156-
return v.getCUDAHandle()
157-
elif self.dtype == PETSc.IntType:
158-
m_gpu = cuda.mem_alloc(int(self._data.nbytes))
159-
cuda.memcpy_htod(m_gpu, self._data)
160-
return m_gpu
161-
else:
162-
raise NotImplementedError("Unknown type: %s." % self.dtype)
163-
164-
@cached_property
165-
def _kernel_args_(self):
166-
return (self.device_handle, )
167-
168-
@collective
169-
@property
170-
def data(self):
171-
172-
with self.vec as v:
173-
v.restoreCUDAHandle(self.device_handle)
174-
return v.array
175-
176-
## TODO: fail when trying to acess elems from data_ro
177-
@collective
178-
@property
179-
def data_ro(self):
180-
with self.vec_ro as v:
181-
v.restoreCUDAHandle(self.device_handle)
182-
return v.array
183-
184-
185-
def move_to_host(self):
186-
with self.vec_ro as v:
187-
v.restoreCUDAHandle(self.device_handle)
188-
self._data = v.array
189-
190-
return petsc_Dat(self._dataset, self._data, self.dtype, 'copy_of_%s' %
191-
self.name)
192-
193139

194140
class Global(petsc_Global):
195141

@@ -226,6 +172,7 @@ def __init__(self, kernel, iterset, *args, **kwargs):
226172
otherwise they (and the :class:`~.Dat`\s, :class:`~.Map`\s
227173
and :class:`~.Mat`\s they reference) will never be collected.
228174
"""
175+
229176
# Return early if we were in the cache.
230177
if self._initialized:
231178
return
@@ -326,6 +273,12 @@ def ith_added_global_arg_i(self, i):
326273

327274
@collective
328275
def __call__(self, *args):
276+
#FIXME: Should prolly get rid of this once the implementation is
277+
# finalized.
278+
from pyop2.op2 import device
279+
import pyop2.gpu.cuda
280+
assert device.target == pyop2.gpu.cuda
281+
329282
if self._initialized:
330283
grid, block = self.grid_size(args[0], args[1])
331284
extra_global_args = self.get_args_marked_for_globals
@@ -417,13 +370,11 @@ def argtypes(self):
417370
argtypes = (index_type, index_type)
418371
argtypes += self._iterset._argtypes_
419372
for arg in self._args:
420-
assert isinstance(arg.data, Dat)
421373
argtypes += arg._argtypes_
422374
seen = set()
423375
for arg in self._args:
424376
maps = arg.map_tuple
425377
for map_ in maps:
426-
assert isinstance(map_, Map)
427378
for k, t in zip(map_._kernel_args_, map_._argtypes_):
428379
if k in seen:
429380
continue

pyop2/op2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ def __getattr__(self, attr):
135135
def set(self, target):
136136
self.target = target
137137

138+
def copy(self):
139+
return Target(self.target)
140+
138141
def __repr__(self):
139142
return "Target(%r)" % self.target
140143

pyop2/petsc_base.py

Lines changed: 68 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
from pyop2.base import _make_object, Subset
4646
from pyop2.mpi import collective
4747
from pyop2.profiling import timed_region
48+
from pyop2.configuration import configuration
4849

4950

5051
class DataSet(base.DataSet):
@@ -333,6 +334,18 @@ def vec_ro(self):
333334

334335

335336
class Dat(base.Dat, VecAccessMixin):
337+
338+
def __init__(self, *args, **kwargs):
339+
super(Dat, self).__init__(*args, **kwargs)
340+
341+
import pyop2.sequential
342+
import pyop2.gpu.cuda
343+
344+
self._PETSC_VEC_TYPE_TO_TARGET_ = {
345+
'seq': pyop2.sequential,
346+
'seqcuda': pyop2.gpu.cuda,
347+
}
348+
336349
@utils.cached_property
337350
def _vec(self):
338351
print(75*'=')
@@ -363,17 +376,65 @@ def vec_context(self, access):
363376
self.halo_valid = False
364377

365378
def move_to_device(self, device):
366-
"""
367-
Returns an instance of :class:`pyop2.base.Dat`, which is located on
368-
device.
369-
"""
370-
from pyop2.op2 import Target
379+
from pyop2 import gpu
380+
from pyop2.op2 import Target, host
371381
if isinstance(device, Target):
372382
device = device.target
373-
return device.Dat(self)
383+
with self.vec as petsc_vec:
384+
385+
if self._PETSC_VEC_TYPE_TO_TARGET_[petsc_vec.type] == device:
386+
return
387+
388+
print("We want {} to {}.".format(petsc_vec.type, device))
389+
390+
if device == host.target:
391+
self.move_to_host()
392+
elif device == gpu.cuda:
393+
host_data = petsc_vec.array.copy()
394+
petsc_vec.setType('seqcuda')
395+
petsc_vec.setArray(host_data)
396+
elif device == gpu.opencl:
397+
raise NotImplementedError("OpenCL target not yet implemented")
398+
else:
399+
raise NotImplementedError("Unknown target '%s'." % device)
374400

375401
def move_to_host(self):
376-
raise ValueError("Cannot transfer Dat to host which is already on host.")
402+
with self.vec as petsc_vec:
403+
size = self.dataset.layout_vec.getSizes()
404+
#FIXME: This is probably more involved
405+
self._data = self.data.copy()
406+
petsc_vec.setType('seq')
407+
petsc_vec.setArray(self._data[:size[0]])
408+
409+
@property
410+
def _kernel_args_(self):
411+
#FIXME: This should be cached_property, but because of the in-place
412+
# updates this is no longer possible; improve the caching mechanism.
413+
from pyop2.op2 import device
414+
with self.vec as petsc_vec:
415+
if self._PETSC_VEC_TYPE_TO_TARGET_[petsc_vec.type] != device.target:
416+
if configuration['only_explicit_host_device_data_transfers']:
417+
raise RuntimeError("Memory location mismatch.")
418+
else:
419+
self.move_to_device(device)
420+
if petsc_vec.type == 'seq':
421+
return (self._data.ctypes.data, )
422+
elif petsc_vec.type == 'seqcuda':
423+
return (petsc_vec.getCUDAHandle(), )
424+
else:
425+
raise NotImplementedError()
426+
427+
@collective
428+
@property
429+
def data(self):
430+
with self.vec as v:
431+
if v.type == 'seq':
432+
return v.array
433+
elif v.type == 'seqcuda':
434+
v.restoreCUDAHandle(v.getCUDAHandle())
435+
return v.array
436+
else:
437+
raise NotImplementedError("Unknown vec type %s." % v.type)
377438

378439

379440
class MixedDat(base.MixedDat, VecAccessMixin):

pyop2/sequential.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@ def __init__(self, kernel, iterset, *args, **kwargs):
8484
and :class:`~.Mat`\s they reference) will never be collected.
8585
"""
8686
# Return early if we were in the cache.
87-
8887
if self._initialized:
8988
return
9089
self.comm = iterset.comm
@@ -104,6 +103,10 @@ def __init__(self, kernel, iterset, *args, **kwargs):
104103

105104
@collective
106105
def __call__(self, *args):
106+
#FIXME: Should prolly get rid of this once the implementation is
107+
# finalized.
108+
from pyop2.op2 import device, host
109+
assert host == device
107110
return self._fun(*args)
108111

109112
@cached_property
@@ -112,6 +115,8 @@ def _wrapper_name(self):
112115

113116
@cached_property
114117
def code_to_compile(self):
118+
if self._wrapper_name == 'wrap_copy':
119+
import pudb; pu.db
115120
print(75*"=")
116121
print("pyop2.sequential:115: Compiling %s on Host." % self._wrapper_name)
117122
print(75*"=")

0 commit comments

Comments
 (0)