Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions unified-runtime/include/unified-runtime/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,8 @@ typedef enum ur_function_t {
UR_FUNCTION_ENQUEUE_HOST_TASK_EXP = 309,
/// Enumerator for ::urCommandBufferAppendKernelLaunchWithArgsExp
UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_WITH_ARGS_EXP = 310,
/// Enumerator for ::urKernelGetSuggestedLocalWorkSizeWithArgs
UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE_WITH_ARGS = 311,
/// @cond
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -9501,6 +9503,55 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
/// suggested local work size that will contain the result of the query
size_t *pSuggestedLocalWorkSize);

///////////////////////////////////////////////////////////////////////////////
/// @brief Set kernel args and get the suggested local work size for a kernel.
///
/// @details
/// - Query a suggested local work size for a kernel given a global size for
/// each dimension.
/// - The application may call this function from simultaneous threads for
/// the same context.
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hKernel`
/// + `NULL == hQueue`
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// + `NULL == pGlobalWorkOffset`
/// + `NULL == pGlobalWorkSize`
/// + `NULL == pSuggestedLocalWorkSize`
/// + `pArgs == NULL && numArgs > 0`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `NULL != pArgs && ::UR_EXP_KERNEL_ARG_TYPE_SAMPLER < pArgs->type`
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSizeWithArgs(
/// [in] handle of the kernel
ur_kernel_handle_t hKernel,
/// [in] handle of the queue object
ur_queue_handle_t hQueue,
/// [in] number of dimensions, from 1 to 3, to specify the global
/// and work-group work-items
uint32_t numWorkDim,
/// [in] pointer to an array of numWorkDim unsigned values that specify
/// the offset used to calculate the global ID of a work-item
const size_t *pGlobalWorkOffset,
/// [in] pointer to an array of numWorkDim unsigned values that specify
/// the number of global work-items in workDim that will execute the
/// kernel function
const size_t *pGlobalWorkSize,
/// [in] Number of entries in pArgs
uint32_t numArgs,
/// [in][optional][range(0, numArgs)] pointer to a list of kernel arg
/// properties.
const ur_exp_kernel_arg_properties_t *pArgs,
/// [out] pointer to an array of numWorkDim unsigned values that specify
/// suggested local work size that will contain the result of the query
size_t *pSuggestedLocalWorkSize);

///////////////////////////////////////////////////////////////////////////////
/// @brief Query the maximum number of work groups for a cooperative kernel
///
Expand Down Expand Up @@ -14580,6 +14631,21 @@ typedef struct ur_kernel_get_suggested_local_work_size_params_t {
size_t **ppSuggestedLocalWorkSize;
} ur_kernel_get_suggested_local_work_size_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urKernelGetSuggestedLocalWorkSizeWithArgs
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_kernel_get_suggested_local_work_size_with_args_params_t {
ur_kernel_handle_t *phKernel;
ur_queue_handle_t *phQueue;
uint32_t *pnumWorkDim;
const size_t **ppGlobalWorkOffset;
const size_t **ppGlobalWorkSize;
uint32_t *pnumArgs;
const ur_exp_kernel_arg_properties_t **ppArgs;
size_t **ppSuggestedLocalWorkSize;
} ur_kernel_get_suggested_local_work_size_with_args_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urKernelSetArgValue
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
1 change: 1 addition & 0 deletions unified-runtime/include/unified-runtime/ur_api_funcs.def
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ _UR_API(urKernelRelease)
_UR_API(urKernelGetNativeHandle)
_UR_API(urKernelCreateWithNativeHandle)
_UR_API(urKernelGetSuggestedLocalWorkSize)
_UR_API(urKernelGetSuggestedLocalWorkSizeWithArgs)
_UR_API(urKernelSetArgValue)
_UR_API(urKernelSetArgLocal)
_UR_API(urKernelSetArgPointer)
Expand Down
9 changes: 9 additions & 0 deletions unified-runtime/include/unified-runtime/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,13 @@ typedef ur_result_t(UR_APICALL *ur_pfnKernelGetSuggestedLocalWorkSize_t)(
ur_kernel_handle_t, ur_queue_handle_t, uint32_t, const size_t *,
const size_t *, size_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urKernelGetSuggestedLocalWorkSizeWithArgs
typedef ur_result_t(
UR_APICALL *ur_pfnKernelGetSuggestedLocalWorkSizeWithArgs_t)(
ur_kernel_handle_t, ur_queue_handle_t, uint32_t, const size_t *,
const size_t *, uint32_t, const ur_exp_kernel_arg_properties_t *, size_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urKernelSetArgValue
typedef ur_result_t(UR_APICALL *ur_pfnKernelSetArgValue_t)(
Expand Down Expand Up @@ -580,6 +587,8 @@ typedef struct ur_kernel_dditable_t {
ur_pfnKernelGetNativeHandle_t pfnGetNativeHandle;
ur_pfnKernelCreateWithNativeHandle_t pfnCreateWithNativeHandle;
ur_pfnKernelGetSuggestedLocalWorkSize_t pfnGetSuggestedLocalWorkSize;
ur_pfnKernelGetSuggestedLocalWorkSizeWithArgs_t
pfnGetSuggestedLocalWorkSizeWithArgs;
ur_pfnKernelSetArgValue_t pfnSetArgValue;
ur_pfnKernelSetArgLocal_t pfnSetArgLocal;
ur_pfnKernelSetArgPointer_t pfnSetArgPointer;
Expand Down
13 changes: 13 additions & 0 deletions unified-runtime/include/unified-runtime/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -2169,6 +2169,19 @@ urPrintKernelGetSuggestedLocalWorkSizeParams(
const struct ur_kernel_get_suggested_local_work_size_params_t *params,
char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_kernel_get_suggested_local_work_size_with_args_params_t
/// struct
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL
urPrintKernelGetSuggestedLocalWorkSizeWithArgsParams(
const struct ur_kernel_get_suggested_local_work_size_with_args_params_t
*params,
char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_kernel_set_arg_value_params_t struct
/// @returns
Expand Down
68 changes: 68 additions & 0 deletions unified-runtime/include/unified-runtime/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1373,6 +1373,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_function_t value) {
case UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_WITH_ARGS_EXP:
os << "UR_FUNCTION_COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_WITH_ARGS_EXP";
break;
case UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE_WITH_ARGS:
os << "UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE_WITH_ARGS";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -14923,6 +14926,67 @@ operator<<(std::ostream &os, [[maybe_unused]] const struct
return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the
/// ur_kernel_get_suggested_local_work_size_with_args_params_t type
/// @returns
/// std::ostream &
inline std::ostream &
operator<<(std::ostream &os, [[maybe_unused]] const struct
ur_kernel_get_suggested_local_work_size_with_args_params_t *params) {

os << ".hKernel = ";

ur::details::printPtr(os, *(params->phKernel));

os << ", ";
os << ".hQueue = ";

ur::details::printPtr(os, *(params->phQueue));

os << ", ";
os << ".numWorkDim = ";

os << *(params->pnumWorkDim);

os << ", ";
os << ".pGlobalWorkOffset = ";

ur::details::printPtr(os, *(params->ppGlobalWorkOffset));

os << ", ";
os << ".pGlobalWorkSize = ";

ur::details::printPtr(os, *(params->ppGlobalWorkSize));

os << ", ";
os << ".numArgs = ";

os << *(params->pnumArgs);

os << ", ";
os << ".pArgs = ";
ur::details::printPtr(os, reinterpret_cast<const void *>(*(params->ppArgs)));
if (*(params->ppArgs) != NULL) {
os << " {";
for (size_t i = 0; i < *params->pnumArgs; ++i) {
if (i != 0) {
os << ", ";
}

os << (*(params->ppArgs))[i];
}
os << "}";
}

os << ", ";
os << ".pSuggestedLocalWorkSize = ";

ur::details::printPtr(os, *(params->ppSuggestedLocalWorkSize));

return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_kernel_set_arg_value_params_t type
/// @returns
Expand Down Expand Up @@ -22582,6 +22646,10 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os,
os << (const struct ur_kernel_get_suggested_local_work_size_params_t *)
params;
} break;
case UR_FUNCTION_KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE_WITH_ARGS: {
os << (const struct
ur_kernel_get_suggested_local_work_size_with_args_params_t *)params;
} break;
case UR_FUNCTION_KERNEL_SET_ARG_VALUE: {
os << (const struct ur_kernel_set_arg_value_params_t *)params;
} break;
Expand Down
49 changes: 49 additions & 0 deletions unified-runtime/scripts/core/kernel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,55 @@ returns:
- $X_RESULT_ERROR_UNSUPPORTED_FEATURE
--- #--------------------------------------------------------------------------
type: function
desc: "Set kernel args and get the suggested local work size for a kernel."
class: $xKernel
name: GetSuggestedLocalWorkSizeWithArgs
ordinal: "0"
details:
- "Query a suggested local work size for a kernel given a global size for each dimension."
- "The application may call this function from simultaneous threads for the same context."
params:
- type: $x_kernel_handle_t
name: hKernel
desc: |
[in] handle of the kernel
- type: $x_queue_handle_t
name: hQueue
desc: |
[in] handle of the queue object
- type: uint32_t
name: numWorkDim
desc: |
[in] number of dimensions, from 1 to 3, to specify the global
and work-group work-items
- type: const size_t*
name: pGlobalWorkOffset
desc: |
[in] pointer to an array of numWorkDim unsigned values that specify
the offset used to calculate the global ID of a work-item
- type: const size_t*
name: pGlobalWorkSize
desc: |
[in] pointer to an array of numWorkDim unsigned values that specify
the number of global work-items in workDim that will execute the
kernel function
- type: uint32_t
name: numArgs
desc: "[in] Number of entries in pArgs"
- type: "const $x_exp_kernel_arg_properties_t*"
name: pArgs
desc: "[in][optional][range(0, numArgs)] pointer to a list of kernel arg properties."
- type: size_t*
name: pSuggestedLocalWorkSize
desc: |
[out] pointer to an array of numWorkDim unsigned values that specify
suggested local work size that will contain the result of the query
returns:
- $X_RESULT_ERROR_INVALID_NULL_POINTER:
- "`pArgs == NULL && numArgs > 0`"
- $X_RESULT_ERROR_UNSUPPORTED_FEATURE
--- #--------------------------------------------------------------------------
type: function
desc: "Query the maximum number of work groups for a cooperative kernel"
class: $xKernel
name: SuggestMaxCooperativeGroupCount
Expand Down
5 changes: 4 additions & 1 deletion unified-runtime/scripts/core/registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -730,7 +730,10 @@ etors:
- name: COMMAND_BUFFER_APPEND_KERNEL_LAUNCH_WITH_ARGS_EXP
desc: Enumerator for $xCommandBufferAppendKernelLaunchWithArgsExp
value: '310'
max_id: '310'
- name: KERNEL_GET_SUGGESTED_LOCAL_WORK_SIZE_WITH_ARGS
desc: Enumerator for $xKernelGetSuggestedLocalWorkSizeWithArgs
value: '311'
max_id: '311'
---
type: enum
desc: Defines structure types
Expand Down
11 changes: 11 additions & 0 deletions unified-runtime/source/adapters/cuda/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSizeWithArgs(
ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
[[maybe_unused]] uint32_t numArgs,
[[maybe_unused]] const ur_exp_kernel_arg_properties_t *pArgs,
size_t *pSuggestedLocalWorkSize) {
return urKernelGetSuggestedLocalWorkSize(hKernel, hQueue, workDim,
pGlobalWorkOffset, pGlobalWorkSize,
pSuggestedLocalWorkSize);
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelSetSpecializationConstants(
ur_kernel_handle_t, uint32_t, const ur_specialization_constant_info_t *) {
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants;
pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize;
pDdiTable->pfnGetSuggestedLocalWorkSizeWithArgs =
urKernelGetSuggestedLocalWorkSizeWithArgs;
pDdiTable->pfnSuggestMaxCooperativeGroupCount =
urKernelSuggestMaxCooperativeGroupCount;
return UR_RESULT_SUCCESS;
Expand Down
11 changes: 11 additions & 0 deletions unified-runtime/source/adapters/hip/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -373,3 +373,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSize(
pSuggestedLocalWorkSize);
return UR_RESULT_SUCCESS;
}

UR_APIEXPORT ur_result_t UR_APICALL urKernelGetSuggestedLocalWorkSizeWithArgs(
ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
[[maybe_unused]] uint32_t numArgs,
[[maybe_unused]] const ur_exp_kernel_arg_properties_t *pArgs,
size_t *pSuggestedLocalWorkSize) {
return urKernelGetSuggestedLocalWorkSize(hKernel, hQueue, workDim,
pGlobalWorkOffset, pGlobalWorkSize,
pSuggestedLocalWorkSize);
}
2 changes: 2 additions & 0 deletions unified-runtime/source/adapters/hip/ur_interface_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetKernelProcAddrTable(
pDdiTable->pfnSetExecInfo = urKernelSetExecInfo;
pDdiTable->pfnSetSpecializationConstants = urKernelSetSpecializationConstants;
pDdiTable->pfnGetSuggestedLocalWorkSize = urKernelGetSuggestedLocalWorkSize;
pDdiTable->pfnGetSuggestedLocalWorkSizeWithArgs =
urKernelGetSuggestedLocalWorkSizeWithArgs;
pDdiTable->pfnSuggestMaxCooperativeGroupCount =
urKernelSuggestMaxCooperativeGroupCount;
return UR_RESULT_SUCCESS;
Expand Down
11 changes: 11 additions & 0 deletions unified-runtime/source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,17 @@ ur_result_t urKernelGetSuggestedLocalWorkSize(
return UR_RESULT_SUCCESS;
}

ur_result_t urKernelGetSuggestedLocalWorkSizeWithArgs(
ur_kernel_handle_t hKernel, ur_queue_handle_t hQueue, uint32_t workDim,
const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize,
[[maybe_unused]] uint32_t numArgs,
[[maybe_unused]] const ur_exp_kernel_arg_properties_t *pArgs,
size_t *pSuggestedLocalWorkSize) {
return ur::level_zero::urKernelGetSuggestedLocalWorkSize(
hKernel, hQueue, workDim, pGlobalWorkOffset, pGlobalWorkSize,
pSuggestedLocalWorkSize);
}

ur_result_t urKernelSetArgValueHelper(
ur_kernel_handle_t Kernel,
/// [in] argument index in range [0, num args - 1]
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading