Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
_CLC_OVERLOAD _CLC_DEF event_t __spirv_GroupAsyncCopy(
int scope, global __CLC_GENTYPE *dst, const local __CLC_GENTYPE *src,
size_t num_gentypes, size_t stride, event_t event) {
(void)scope;
Comment thread
Maetveis marked this conversation as resolved.
STRIDED_COPY(global, local, stride, 1);
return event;
}

_CLC_OVERLOAD _CLC_DEF event_t __spirv_GroupAsyncCopy(
int scope, local __CLC_GENTYPE *dst, const global __CLC_GENTYPE *src,
size_t num_gentypes, size_t stride, event_t event) {
(void)scope;
STRIDED_COPY(local, global, 1, stride);
return event;
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ int __clc_nvvm_reflect_arch();
int scope, __attribute__((address_space(1))) TYPE *dst, \
const __attribute__((address_space(3))) TYPE *src, size_t num_gentypes, \
size_t stride, event_t event) { \
(void)scope; \
STRIDED_COPY(__attribute__((address_space(1))), \
__attribute__((address_space(3))), stride, 1); \
return event; \
Expand Down Expand Up @@ -64,6 +65,7 @@ __CLC_GROUP_CP_ASYNC_DST_GLOBAL(uchar16);
int scope, __attribute__((address_space(3))) TYPE *dst, \
const __attribute__((address_space(1))) TYPE *src, size_t num_gentypes, \
size_t stride, event_t event) { \
(void)scope; \
if (__clc_nvvm_reflect_arch() >= 800) { \
size_t id, size; \
SET_GROUP_SIZE_AND_ID(size, id); \
Expand Down Expand Up @@ -94,6 +96,7 @@ __CLC_GROUP_CP_ASYNC_4(uchar4);
int scope, __attribute__((address_space(3))) TYPE *dst, \
const __attribute__((address_space(1))) TYPE *src, size_t num_gentypes, \
size_t stride, event_t event) { \
(void)scope; \
if (__clc_nvvm_reflect_arch() >= 800) { \
size_t id, size; \
SET_GROUP_SIZE_AND_ID(size, id); \
Expand Down Expand Up @@ -127,6 +130,7 @@ __CLC_GROUP_CP_ASYNC_8(uchar8);
int scope, __attribute__((address_space(3))) TYPE *dst, \
const __attribute__((address_space(1))) TYPE *src, size_t num_gentypes, \
size_t stride, event_t event) { \
(void)scope; \
if (__clc_nvvm_reflect_arch() >= 800) { \
size_t id, size; \
SET_GROUP_SIZE_AND_ID(size, id); \
Expand Down
2 changes: 2 additions & 0 deletions libclc/libspirv/lib/ptx-nvidiacl/async/wait_group_events.cl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ int __clc_nvvm_reflect_arch();
_CLC_OVERLOAD _CLC_DEF void __spirv_GroupWaitEvents(int scope,
int num_events,
event_t *event_list) {
(void)num_events;
(void)event_list;
if (__clc_nvvm_reflect_arch() >= 800) {
__nvvm_cp_async_wait_all();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ _CLC_OVERLOAD _CLC_DECL void __spirv_MemoryBarrier(int, int);
_CLC_OVERLOAD _CLC_DEF TYPE __spirv_Atomic##OP_MANGLED( \
ADDR_SPACE TYPE *pointer, int scope, int semantics1, int semantics2, \
TYPE cmp, TYPE value) { \
(void)semantics2; \
/* Semantics mask may include memory order, storage class and other info \
Memory order is stored in the lowest 5 bits */ \
unsigned int order = semantics1 & 0x1F; \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ _CLC_DEF static bool __clc_nvvm_is_global(generic void *ptr) {
#define GenericCastToPtrExplicit_To(ADDRSPACE, NAME) \
_CLC_OVERLOAD _CLC_DEF ADDRSPACE void * \
__spirv_GenericCastToPtrExplicit_To##NAME(generic void *ptr, int unused) { \
(void)unused; \
if (__clc_nvvm_is_##ADDRSPACE(ptr)) \
return (ADDRSPACE void *)ptr; \
return 0; \
} \
_CLC_OVERLOAD _CLC_DEF ADDRSPACE const void * \
__spirv_GenericCastToPtrExplicit_To##NAME(generic const void *ptr, \
int unused) { \
(void)unused; \
return __spirv_GenericCastToPtrExplicit_To##NAME((generic void *)ptr, \
unused); \
}
Expand Down
14 changes: 10 additions & 4 deletions libclc/libspirv/lib/ptx-nvidiacl/group/collectives.cl
Original file line number Diff line number Diff line change
Expand Up @@ -205,12 +205,14 @@ __clc__SubgroupShuffleUp(complex_double x, uint delta) {
// Currently only Reduce is required (for GroupAny and GroupAll)
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT bool
__clc__SubgroupBitwiseOr(int op, bool predicate, bool *carry) {
(void)op;
bool result = __nvvm_vote_any_sync(__clc__membermask(), predicate);
*carry = result;
return result;
}
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT bool
__clc__SubgroupBitwiseAny(int op, bool predicate, bool *carry) {
(void)op;
bool result = __nvvm_vote_all_sync(__clc__membermask(), predicate);
*carry = result;
return result;
Expand Down Expand Up @@ -300,7 +302,7 @@ complex_double __muldc3(double a, double b, double c, double d) {
#define __CLC_SUBGROUP_COLLECTIVE_BODY(OP, TYPE, IDENTITY) \
uint sg_lid = __spirv_BuiltInSubgroupLocalInvocationId(); \
/* Can't use XOR/butterfly shuffles; some lanes may be inactive */ \
for (int o = 1; o < __spirv_BuiltInSubgroupMaxSize(); o *= 2) { \
for (uint o = 1; o < __spirv_BuiltInSubgroupMaxSize(); o *= 2) { \
TYPE contribution = __clc__SubgroupShuffleUp(x, o); \
bool inactive = (sg_lid < o); \
contribution = (inactive) ? IDENTITY : contribution; \
Expand All @@ -322,6 +324,9 @@ complex_double __muldc3(double a, double b, double c, double d) {
if (sg_lid == 0) { \
result = IDENTITY; \
} \
} else { \
/* TODO: Not implemented yet */ \
result = (TYPE){0}; \
} \
return result;

Expand Down Expand Up @@ -453,7 +458,7 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true)
/* Perform InclusiveScan over sub-group results */ \
TYPE sg_prefix; \
TYPE sg_aggregate = scratch[0]; \
for (int s = 1; s < num_sg; ++s) { \
for (uint s = 1; s < num_sg; ++s) { \
if (sg_id == s) { \
sg_prefix = sg_aggregate; \
} \
Expand All @@ -471,6 +476,9 @@ __CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true)
} else { \
result = OP(sg_x, sg_prefix); \
} \
} else { \
Comment thread
Maetveis marked this conversation as resolved.
/* TODO: Not implemented yet */ \
result = (TYPE){0}; \
} \
__spirv_ControlBarrier(Workgroup, 0, 0); \
return result;
Expand Down Expand Up @@ -614,14 +622,12 @@ __CLC_GROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true)

long __clc__2d_to_linear_local_id(ulong2 id) {
size_t size_x = __spirv_BuiltInWorkgroupSize(0);
size_t size_y = __spirv_BuiltInWorkgroupSize(1);
return (id.y * size_x + id.x);
}

long __clc__3d_to_linear_local_id(ulong3 id) {
size_t size_x = __spirv_BuiltInWorkgroupSize(0);
size_t size_y = __spirv_BuiltInWorkgroupSize(1);
size_t size_z = __spirv_BuiltInWorkgroupSize(2);
return (id.z * size_y * size_x + id.y * size_x + id.x);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ __spirv_GroupNonUniformBallot(int flag, bool predicate) {
_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT uint
__spirv_GroupNonUniformBallotBitCount(
int scope, int flag, __clc_vec4_uint32_t mask) {
(void)scope;
// here we assume scope == __spv::Scope::Subgroup
// flag == InclusiveScan is not yet implemented
if (flag == Reduce) {
Expand Down
13 changes: 11 additions & 2 deletions libclc/libspirv/lib/ptx-nvidiacl/images/image.cl
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ void __nvvm_sust_3d_v4i32_clamp(write_only image3d_t, int, int, int, int, int,
int __nvvm_suq_width(long) __asm("llvm.nvvm.suq.width");
int __nvvm_suq_height(long) __asm("llvm.nvvm.suq.height");
__attribute__((always_inline)) int __nvvm_suq_depth(long arg) {
(void)arg;
// suq.depth generates runtime errors in CUDA
return -1;
}
Expand All @@ -158,7 +159,10 @@ int __nvvm_suq_width_2i(read_only image2d_t) __asm("llvm.nvvm.suq.width");
int __nvvm_suq_width_3i(read_only image3d_t) __asm("llvm.nvvm.suq.width");
int __nvvm_suq_height_2i(read_only image2d_t) __asm("llvm.nvvm.suq.height");
int __nvvm_suq_height_3i(read_only image3d_t) __asm("llvm.nvvm.suq.height");
int __nvvm_suq_depth_3i(read_only image3d_t arg) { return -1; }
int __nvvm_suq_depth_3i(read_only image3d_t arg) {
(void)arg;
return -1;
}

// Helpers

Expand Down Expand Up @@ -960,6 +964,8 @@ _DEFINE_SAMPLED_LOADS(half, 16)
elem_t##4 _Z30__spirv_ImageSampleExplicitLodI32__spirv_SampledImage__image##dims##d_roDv4_##elem_t_mangled##input_coord_t_mangled##ET0_T_T1_if( \
__ocl_sampled_image##dims##d_ro_t sampled_image, \
input_coord_t input_coord, int operands, float lod) { \
(void)operands; \
(void)lod; \
long image = __clc__sampled_image##dims##d_unpack_image(sampled_image); \
int sampler = __clc__sampled_image##dims##d_unpack_sampler(sampled_image); \
/* Sampling algorithms are implemented assu__spirv_ocl_s_ming an \
Expand Down Expand Up @@ -2576,7 +2582,7 @@ __nvvm_tex_2d_v4f16_f32(unsigned long imageHandle, float x, float y) {

__attribute__((always_inline)) half4
__nvvm_tex_3d_v4f16_f32(unsigned long imageHandle, float x, float y, float z) {
float4 a = __nvvm_tex_1d_v4f32_f32(imageHandle, x);
float4 a = __nvvm_tex_3d_v4f32_f32(imageHandle, x, y, z);
Comment thread
Maetveis marked this conversation as resolved.
return cast_float4_to_half4(a);
}

Expand Down Expand Up @@ -3633,6 +3639,7 @@ _CLC_DEFINE_MIPMAP_BINDLESS_THUNK_READS_BUILTIN(half, 3, f16, v4f32,
30, __spirv_ImageSampleExplicitLod, I, \
elem_t_mangled##coord_mangled##ET0_T_T1_if)( \
ulong imageHandle, coord_input, int type, float level) { \
(void)type; \
return __nvvm_tex_##dimension##d_level_##vec_size##_f32( \
imageHandle, coord_parameter, level); \
} \
Expand All @@ -3641,6 +3648,7 @@ _CLC_DEFINE_MIPMAP_BINDLESS_THUNK_READS_BUILTIN(half, 3, f16, v4f32,
elem_t_mangled##coord_mangled##ET0_T_T1_i##grad_mangled)( \
ulong imageHandle, coord_input, int type, float##grad_input dX, \
float##grad_input dY) { \
(void)type; \
return __nvvm_tex_##dimension##d_grad_##vec_size##_f32( \
imageHandle, coord_parameter, __VA_ARGS__); \
}
Expand Down Expand Up @@ -3875,6 +3883,7 @@ _CLC_DEFINE_MIPMAP_BINDLESS_READS_BUILTIN(half4, 3, Dv4_Dh, v4f16, Dv3_f,
_CLC_DEF half4
_Z30__spirv_ImageSampleExplicitLodImDv4_DF16_Dv3_fET0_T_T1_iS4_S4_(
ulong imageHandle, float3 coord, int type, float3 dX, float3 dY) {
(void)type;
return __nvvm_tex_3d_grad_v4f16_f32(imageHandle, COORD_PARAMS_3D, dX.x, dX.y,
dX.z, dY.x, dY.y, dY.z);
}
Expand Down
Loading