Skip to content

Commit f8b58a1

Browse files
committed
updates for running benchmarks with HIP on AMD
1 parent 7498543 commit f8b58a1

File tree

4 files changed

+67
-58
lines changed

4 files changed

+67
-58
lines changed

kernel_tuner/backends/hip.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(self, device=0, iterations=7, compiler_options=None, observers=None
5858
self.name = self.hipProps._name.decode('utf-8')
5959
self.max_threads = self.hipProps.maxThreadsPerBlock
6060
self.device = device
61-
self.compiler_options = compiler_options
61+
self.compiler_options = compiler_options or []
6262
self.iterations = iterations
6363

6464
env = dict()
@@ -122,7 +122,7 @@ def ready_argument_list(self, arguments):
122122
class ArgListStructure(ctypes.Structure):
123123
_fields_ = [(f'field{i}', t) for i, t in enumerate(field_types)]
124124
def __getitem__(self, key):
125-
return self._fields_[key]
125+
return getattr(self, self._fields_[key][0])
126126

127127
return ArgListStructure(*ctype_args)
128128

@@ -146,19 +146,28 @@ def compile(self, kernel_instance):
146146
kernel_string = 'extern "C" {\n' + kernel_string + "\n}"
147147
kernel_ptr = hiprtc.hiprtcCreateProgram(kernel_string, kernel_name, [], [])
148148

149-
#Compile based on device (Not yet tested for non-AMD devices)
150-
plat = hip.hipGetPlatformName()
151-
if plat == "amd":
152-
hiprtc.hiprtcCompileProgram(
153-
kernel_ptr, [f'--offload-arch={self.hipProps.gcnArchName}'])
154-
else:
155-
hiprtc.hiprtcCompileProgram(kernel_ptr, [])
156-
157-
#Get module and kernel from compiled kernel string
158-
code = hiprtc.hiprtcGetCode(kernel_ptr)
159-
module = hip.hipModuleLoadData(code)
160-
self.current_module = module
161-
kernel = hip.hipModuleGetFunction(module, kernel_name)
149+
try:
150+
#Compile based on device (Not yet tested for non-AMD devices)
151+
plat = hip.hipGetPlatformName()
152+
if plat == "amd":
153+
options_list = [f'--offload-arch={self.hipProps.gcnArchName}']
154+
options_list.extend(self.compiler_options)
155+
hiprtc.hiprtcCompileProgram(kernel_ptr, options_list)
156+
else:
157+
options_list = []
158+
options_list.extend(self.compiler_options)
159+
hiprtc.hiprtcCompileProgram(kernel_ptr, options_list)
160+
161+
#Get module and kernel from compiled kernel string
162+
code = hiprtc.hiprtcGetCode(kernel_ptr)
163+
module = hip.hipModuleLoadData(code)
164+
self.current_module = module
165+
kernel = hip.hipModuleGetFunction(module, kernel_name)
166+
167+
except Exception as e:
168+
log = hiprtc.hiprtcGetProgramLog(kernel_ptr)
169+
print(log)
170+
raise e
162171

163172
return kernel
164173

kernel_tuner/core.py

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -425,48 +425,48 @@ def compile_and_benchmark(self, kernel_source, gpu_args, params, kernel_options,
425425

426426
instance = self.create_kernel_instance(kernel_source, kernel_options, params, verbose)
427427
if isinstance(instance, util.ErrorConfig):
428-
return instance
429-
430-
try:
431-
# compile the kernel
432-
start_compilation = time.perf_counter()
433-
func = self.compile_kernel(instance, verbose)
434-
if not func:
435-
result[to.objective] = util.CompilationFailedConfig()
436-
else:
437-
# add shared memory arguments to compiled module
438-
if kernel_options.smem_args is not None:
439-
self.dev.copy_shared_memory_args(util.get_smem_args(kernel_options.smem_args, params))
440-
# add constant memory arguments to compiled module
441-
if kernel_options.cmem_args is not None:
442-
self.dev.copy_constant_memory_args(kernel_options.cmem_args)
443-
# add texture memory arguments to compiled module
444-
if kernel_options.texmem_args is not None:
445-
self.dev.copy_texture_memory_args(kernel_options.texmem_args)
446-
447-
# stop compilation stopwatch and convert to miliseconds
448-
last_compilation_time = 1000 * (time.perf_counter() - start_compilation)
449-
450-
# test kernel for correctness
451-
if func and (to.answer or to.verify):
452-
start_verification = time.perf_counter()
453-
self.check_kernel_output(func, gpu_args, instance, to.answer, to.atol, to.verify, verbose)
454-
last_verification_time = 1000 * (time.perf_counter() - start_verification)
455-
456-
# benchmark
457-
if func:
458-
start_benchmark = time.perf_counter()
459-
result.update(self.benchmark(func, gpu_args, instance, verbose, to.objective))
460-
last_benchmark_time = 1000 * (time.perf_counter() - start_benchmark)
461-
462-
except Exception as e:
463-
# dump kernel sources to temp file
464-
temp_filenames = instance.prepare_temp_files_for_error_msg()
465-
print("Error while compiling or benchmarking, see source files: " + " ".join(temp_filenames))
466-
raise e
428+
result[to.objective] = util.InvalidConfig()
429+
else:
430+
try:
431+
# compile the kernel
432+
start_compilation = time.perf_counter()
433+
func = self.compile_kernel(instance, verbose)
434+
if not func:
435+
result[to.objective] = util.CompilationFailedConfig()
436+
else:
437+
# add shared memory arguments to compiled module
438+
if kernel_options.smem_args is not None:
439+
self.dev.copy_shared_memory_args(util.get_smem_args(kernel_options.smem_args, params))
440+
# add constant memory arguments to compiled module
441+
if kernel_options.cmem_args is not None:
442+
self.dev.copy_constant_memory_args(kernel_options.cmem_args)
443+
# add texture memory arguments to compiled module
444+
if kernel_options.texmem_args is not None:
445+
self.dev.copy_texture_memory_args(kernel_options.texmem_args)
446+
447+
# stop compilation stopwatch and convert to miliseconds
448+
last_compilation_time = 1000 * (time.perf_counter() - start_compilation)
449+
450+
# test kernel for correctness
451+
if func and (to.answer or to.verify):
452+
start_verification = time.perf_counter()
453+
self.check_kernel_output(func, gpu_args, instance, to.answer, to.atol, to.verify, verbose)
454+
last_verification_time = 1000 * (time.perf_counter() - start_verification)
455+
456+
# benchmark
457+
if func:
458+
start_benchmark = time.perf_counter()
459+
result.update(self.benchmark(func, gpu_args, instance, verbose, to.objective))
460+
last_benchmark_time = 1000 * (time.perf_counter() - start_benchmark)
461+
462+
except Exception as e:
463+
# dump kernel sources to temp file
464+
temp_filenames = instance.prepare_temp_files_for_error_msg()
465+
print("Error while compiling or benchmarking, see source files: " + " ".join(temp_filenames))
466+
raise e
467467

468-
#clean up any temporary files, if no error occured
469-
instance.delete_temp_files()
468+
#clean up any temporary files, if no error occured
469+
instance.delete_temp_files()
470470

471471
result['compile_time'] = last_compilation_time or 0
472472
result['verification_time'] = last_verification_time or 0

test/test_c_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ def test_compile(npct, subprocess):
143143
print(npct.mock_calls)
144144
print(f)
145145

146-
assert len(subprocess.mock_calls) == 6
146+
assert len(subprocess.mock_calls) == 8
147147
assert npct.load_library.called == 1
148148

149149
args, _ = npct.load_library.call_args_list[0]

test/test_hip_functions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ class ArgListStructure(ctypes.Structure):
5555
("field2", ctypes.POINTER(ctypes.c_float)),
5656
("field3", ctypes.c_bool)]
5757
def __getitem__(self, key):
58-
return self._fields_[key]
58+
return getattr(self, self._fields_[key][0])
5959

6060
dev = kt_hip.HipFunctions(0)
6161
gpu_args = dev.ready_argument_list(arguments)

0 commit comments

Comments
 (0)