Skip to content

Commit e161efb

Browse files
small changes reduction example
1 parent 09827b4 commit e161efb

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

examples/cuda/reduction.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ def tune():
1313
tune_params["block_size_x"] = [2**i for i in range(5,11)]
1414
tune_params["use_shuffle"] = [0, 1]
1515
tune_params["vector"] = [2**i for i in range(3)]
16-
tune_params["num_blocks"] = [2**i for i in range(5,11)]
16+
tune_params["num_blocks"] = [2**i for i in range(5,16)]
1717

1818
problem_size = "num_blocks"
19-
size = 80000000
19+
size = 800000000
2020
max_blocks = max(tune_params["num_blocks"])
2121

2222
x = numpy.random.rand(size).astype(numpy.float32)
@@ -43,6 +43,7 @@ def verify_partial_reduce(cpu_result, gpu_result, atol=None):
4343
tune_params["num_blocks"] = [1]
4444
second_kernel = dict()
4545
for nblocks in num_blocks:
46+
print('nblocks:', nblocks)
4647
#change the input size to nblocks
4748
args = [sum_x, x, numpy.int32(nblocks)]
4849
#tune the second kernel with n=nblocks
@@ -58,8 +59,11 @@ def verify_partial_reduce(cpu_result, gpu_result, atol=None):
5859
for i, instance in enumerate(first_kernel):
5960
first_kernel[i]["total"] = instance["time"] + second_kernel[instance["num_blocks"]]["time"]
6061

62+
first_config = min(first_kernel, key=lambda x:x['time'])
6163
best_config = min(first_kernel, key=lambda x:x['total'])
6264

65+
print("Best performing config first kernel only: \n" + get_config_string(first_config))
66+
6367
print("Best performing config: \n" + get_config_string(best_config))
6468
print("uses the following config for the secondary kernel:")
6569
print(get_config_string(second_kernel[best_config["num_blocks"]]))

0 commit comments

Comments
 (0)