Skip to content

Commit b2ee7e7

Browse files
authored
Cooperative groups: add a boundscheck to avoid confusing inexact errors. (#2631)
Selecting an inactive lane results in fns(mask, lane) returning -1.
1 parent 159345f commit b2ee7e7

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

src/device/intrinsics/cooperative_groups.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,7 @@ function shfl(cg::coalesced_group, elem, src_rank)
429429
else
430430
CUDA.fns(cg.mask, 0, src_rank) + 1i32
431431
end
432+
@boundscheck lane > 0 || throw(BoundsError(cg, src_rank))
432433

433434
shfl_sync(cg.mask, elem, lane)
434435
end
@@ -439,6 +440,7 @@ function shfl_down(cg::coalesced_group, elem, delta)
439440
end
440441

441442
lane = CUDA.fns(cg.mask, laneid() - 1i32, delta + 1i32) + 1i32
443+
@boundscheck lane > 0 || throw(BoundsError(cg, laneid()+delta))
442444
if lane > 32
443445
lane = laneid()
444446
end
@@ -452,6 +454,7 @@ function shfl_up(cg::coalesced_group, elem, delta)
452454
end
453455

454456
lane = CUDA.fns(cg.mask, laneid() - 1i32, -(delta + 1i32)) + 1i32
457+
@boundscheck lane > 0 || throw(BoundsError(cg, laneid()-delta))
455458
if lane > 32
456459
lane = laneid()
457460
end

0 commit comments

Comments
 (0)