diff --git a/cmake/Modules/CUDA-QX.cmake b/cmake/Modules/CUDA-QX.cmake index c3011b35..6e206ada 100644 --- a/cmake/Modules/CUDA-QX.cmake +++ b/cmake/Modules/CUDA-QX.cmake @@ -187,6 +187,16 @@ function(cudaqx_import_cudaq_targets) find_package(CUDAQPlatformDefault REQUIRED CONFIG) find_package(CUDAQPythonInterop CONFIG) + # Realtime-enabled CUDA-Q installs export device_call targets that reference + # cudaq::cudaq-realtime. Since CUDA-QX intentionally imports CUDA-Q targets + # without loading CUDAQConfig.cmake, preload the sibling realtime package when + # it is present so CUDAQTargets.cmake does not fail on the missing target. + if(NOT TARGET cudaq::cudaq-realtime) + find_package(cudaq-realtime CONFIG QUIET + PATHS "${_cudaq_parent_dir}/cudaq-realtime" + NO_DEFAULT_PATH) + endif() + # Import the CUDA-Q library target without loading CUDAQConfig.cmake, which # enables the CUDAQ CMake language in current CUDA-Q installs. if(NOT TARGET cudaq::cudaq) diff --git a/libs/qec/lib/realtime/CMakeLists.txt b/libs/qec/lib/realtime/CMakeLists.txt index 6bc8c4b1..5d3bec48 100644 --- a/libs/qec/lib/realtime/CMakeLists.txt +++ b/libs/qec/lib/realtime/CMakeLists.txt @@ -22,6 +22,9 @@ if(CMAKE_CUDA_COMPILER) if(CUDAQ_INSTALL_PREFIX) list(APPEND _cudaq_realtime_prefixes "${CUDAQ_INSTALL_PREFIX}") endif() + if(CUDAQ_INSTALL_DIR) + list(APPEND _cudaq_realtime_prefixes "${CUDAQ_INSTALL_DIR}") + endif() find_path(CUDAQ_REALTIME_INCLUDE_DIR NAMES cudaq/realtime/daemon/dispatcher/cudaq_realtime.h @@ -246,3 +249,81 @@ endif() add_subdirectory(quantinuum) add_subdirectory(simulation) + +set(_cudaq_device_call_prefixes "") +if(CUDAQ_REALTIME_ROOT) + list(APPEND _cudaq_device_call_prefixes "${CUDAQ_REALTIME_ROOT}") + list(APPEND _cudaq_device_call_prefixes "${CUDAQ_REALTIME_ROOT}/build") +endif() +if(CUDAQ_INSTALL_PREFIX) + list(APPEND _cudaq_device_call_prefixes "${CUDAQ_INSTALL_PREFIX}") +endif() +if(CUDAQ_INSTALL_DIR) + list(APPEND _cudaq_device_call_prefixes "${CUDAQ_INSTALL_DIR}") +endif() + +if(CUDAQ_REALTIME_INCLUDE_DIR AND NOT CUDAQ_DEVICE_CALL_INCLUDE_DIR) + find_path(CUDAQ_DEVICE_CALL_INCLUDE_DIR + NAMES cudaq_internal/device_call/DeviceCallService.h + HINTS ${_cudaq_device_call_prefixes} + PATH_SUFFIXES include runtime/internal/device_call/include + ) + if(CUDAQ_DEVICE_CALL_INCLUDE_DIR) + message(STATUS "Found cuda-quantum device_call headers at ${CUDAQ_DEVICE_CALL_INCLUDE_DIR}") + endif() +endif() + +if(CUDAQ_REALTIME_INCLUDE_DIR AND NOT CUDAQ_DEVICE_CALL_INCLUDE_DIR) + foreach(_device_call_headers_target IN ITEMS + cudaq::cudaq-device-call-runtime-headers + cudaq-device-call-runtime-headers + cudaq::cudaq-device-call-runtime + cudaq-device-call-runtime) + if(TARGET ${_device_call_headers_target}) + get_target_property(_device_call_include_dirs + ${_device_call_headers_target} INTERFACE_INCLUDE_DIRECTORIES) + foreach(_device_call_include_dir IN LISTS _device_call_include_dirs) + if(_device_call_include_dir MATCHES "^\\$$") + set(_device_call_include_dir "${CMAKE_MATCH_1}") + elseif(_device_call_include_dir MATCHES "^\\$$" + AND CUDAQ_INSTALL_DIR) + set(_device_call_include_dir + "${CUDAQ_INSTALL_DIR}/${CMAKE_MATCH_1}") + endif() + if(EXISTS "${_device_call_include_dir}/cudaq_internal/device_call/DeviceCallService.h") + set(CUDAQ_DEVICE_CALL_INCLUDE_DIR "${_device_call_include_dir}") + message(STATUS "Found cuda-quantum device_call headers from target ${_device_call_headers_target} at ${CUDAQ_DEVICE_CALL_INCLUDE_DIR}") + break() + endif() + endforeach() + endif() + if(CUDAQ_DEVICE_CALL_INCLUDE_DIR) + break() + endif() + endforeach() +endif() + +set(CUDAQ_DEVICE_CALL_RUNTIME_LINK "") +if(TARGET cudaq::cudaq-device-call-runtime) + set(CUDAQ_DEVICE_CALL_RUNTIME_LINK cudaq::cudaq-device-call-runtime) + message(STATUS "Found cuda-quantum device_call runtime target cudaq::cudaq-device-call-runtime") +elseif(TARGET cudaq-device-call-runtime) + set(CUDAQ_DEVICE_CALL_RUNTIME_LINK cudaq-device-call-runtime) + message(STATUS "Found cuda-quantum device_call runtime target cudaq-device-call-runtime") +else() + if(CUDAQ_REALTIME_INCLUDE_DIR AND NOT CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY) + find_library(CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY + NAMES cudaq-device-call-runtime + PATHS ${_cudaq_device_call_prefixes} + PATH_SUFFIXES lib + ) + if(CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY) + message(STATUS "Found cuda-quantum device_call runtime library at ${CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY}") + endif() + endif() + if(CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY) + set(CUDAQ_DEVICE_CALL_RUNTIME_LINK ${CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY}) + endif() +endif() + +add_subdirectory(host_dispatch) diff --git a/libs/qec/lib/realtime/host_dispatch/CMakeLists.txt b/libs/qec/lib/realtime/host_dispatch/CMakeLists.txt new file mode 100644 index 00000000..68223e3d --- /dev/null +++ b/libs/qec/lib/realtime/host_dispatch/CMakeLists.txt @@ -0,0 +1,69 @@ +# ============================================================================ # +# Copyright (c) 2026 NVIDIA Corporation & Affiliates. # +# All rights reserved. # +# # +# This source code and the accompanying materials are made available under # +# the terms of the Apache License 2.0 which accompanies this distribution. # +# ============================================================================ # + +if(NOT CUDAQ_REALTIME_INCLUDE_DIR OR NOT CUDAQ_DEVICE_CALL_INCLUDE_DIR + OR NOT CUDAQ_DEVICE_CALL_RUNTIME_LINK) + message(WARNING "cuda-quantum realtime/device_call headers or runtime not found. " + "Skipping cudaq-qec-realtime-decoding-host-dispatch.") + return() +endif() + +add_library(cudaq-qec-realtime-decoding-host-dispatch SHARED + host_dispatch_service.cpp +) + +target_compile_options(cudaq-qec-realtime-decoding-host-dispatch + PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden) + +cudaqx_add_device_code(cudaq-qec-realtime-decoding-host-dispatch + SOURCES + host_dispatch_device.cpp + COMPILER_FLAGS + -frealtime-lowering) + +target_include_directories(cudaq-qec-realtime-decoding-host-dispatch + PUBLIC + $ + $ + $ + PRIVATE + ${CUDAQ_REALTIME_INCLUDE_DIR} + ${CUDAQ_DEVICE_CALL_INCLUDE_DIR} +) + +target_link_options(cudaq-qec-realtime-decoding-host-dispatch PUBLIC + $<$:-Wl,--exclude-libs,ALL> +) + +target_link_libraries(cudaq-qec-realtime-decoding-host-dispatch + PUBLIC + cudaqx-core + cudaq-qec + cudaq-qec-realtime-decoding + PRIVATE + cudaq::cudaq-common + CUDA::cudart + ${CUDAQ_DEVICE_CALL_RUNTIME_LINK} + $<$:${CUDAQ_REALTIME_LIBRARY}> + $<$:${CUDAQ_REALTIME_HOST_DISPATCH_LIBRARY}> +) + +set_target_properties(cudaq-qec-realtime-decoding-host-dispatch PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) + +if(_CUDAQ_REALTIME_LIB_DIR) + set_target_properties(cudaq-qec-realtime-decoding-host-dispatch PROPERTIES + BUILD_RPATH "${_CUDAQ_REALTIME_LIB_DIR};${CMAKE_BINARY_DIR}/lib" + INSTALL_RPATH "${_CUDAQ_REALTIME_LIB_DIR};${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}" + ) +endif() + +install(TARGETS cudaq-qec-realtime-decoding-host-dispatch + COMPONENT qec-lib + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} +) diff --git a/libs/qec/lib/realtime/host_dispatch/host_dispatch_device.cpp b/libs/qec/lib/realtime/host_dispatch/host_dispatch_device.cpp new file mode 100644 index 00000000..e8089894 --- /dev/null +++ b/libs/qec/lib/realtime/host_dispatch/host_dispatch_device.cpp @@ -0,0 +1,77 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2026 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "cudaq.h" +#include "cudaq/qec/realtime/decoding.h" + +#include +#include +#include +#include + +extern "C" std::uint64_t +qec_enqueue_syndromes_ui64([[maybe_unused]] std::uint64_t decoder_id, + [[maybe_unused]] std::uint64_t syndrome_size, + [[maybe_unused]] std::uint64_t syndrome, + [[maybe_unused]] std::uint64_t tag) { + std::abort(); +} + +extern "C" std::uint64_t +qec_get_corrections_ui64([[maybe_unused]] std::uint64_t decoder_id, + [[maybe_unused]] std::uint64_t return_size, + [[maybe_unused]] std::uint64_t reset) { + std::abort(); +} + +extern "C" std::uint64_t +qec_reset_decoder_ui64([[maybe_unused]] std::uint64_t decoder_id) { + std::abort(); +} + +namespace cudaq::qec::decoding { + +__qpu__ void +enqueue_syndromes(std::uint64_t decoder_id, + const std::vector &syndromes, + std::uint64_t tag) { + const std::uint64_t syndrome_size = syndromes.size(); + const std::uint64_t syndrome = cudaq::to_integer(cudaq::to_bools(syndromes)); + // The ignored ack keeps host-dispatch from treating this as fire-and-forget. + (void)cudaq::device_call(0, qec_enqueue_syndromes_ui64, decoder_id, + syndrome_size, syndrome, tag); +} + +__qpu__ void enqueue_syndromes_test(std::uint64_t decoder_id, + const std::vector &syndromes, + std::uint64_t tag) { + const std::uint64_t syndrome_size = syndromes.size(); + const std::uint64_t syndrome = cudaq::to_integer(syndromes); + // The ignored ack keeps host-dispatch from treating this as fire-and-forget. + (void)cudaq::device_call(0, qec_enqueue_syndromes_ui64, decoder_id, + syndrome_size, syndrome, tag); +} + +__qpu__ std::vector get_corrections(std::uint64_t decoder_id, + std::uint64_t return_size, + bool reset) { + std::vector result(return_size); + const auto packed = + cudaq::device_call(0, qec_get_corrections_ui64, decoder_id, return_size, + static_cast(reset)); + for (std::size_t i = 0; i < return_size; ++i) + result[i] = (packed >> i) & 1; + return result; +} + +__qpu__ void reset_decoder(std::uint64_t decoder_id) { + // The ignored ack keeps host-dispatch from treating this as fire-and-forget. + (void)cudaq::device_call(0, qec_reset_decoder_ui64, decoder_id); +} + +} // namespace cudaq::qec::decoding diff --git a/libs/qec/lib/realtime/host_dispatch/host_dispatch_service.cpp b/libs/qec/lib/realtime/host_dispatch/host_dispatch_service.cpp new file mode 100644 index 00000000..790e0d91 --- /dev/null +++ b/libs/qec/lib/realtime/host_dispatch/host_dispatch_service.cpp @@ -0,0 +1,231 @@ +/******************************************************************************* + * Copyright (c) 2026 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "../realtime_decoding.h" + +#include "cudaq_internal/device_call/DeviceCallError.h" +#include "cudaq_internal/device_call/DeviceCallService.h" +#include "cudaq/realtime/daemon/dispatcher/cudaq_realtime.h" +#include "cudaq/realtime/daemon/dispatcher/dispatch_kernel_launch.h" + +#include +#include +#include +#include +#include + +namespace { + +using cudaq::qec::decoding::host::enqueue_syndromes; +using cudaq::qec::decoding::host::get_corrections; +using cudaq::qec::decoding::host::reset_decoder; + +constexpr std::uint32_t kEnqueueSyndromesUi64FunctionId = + cudaq::realtime::fnv1a_hash("qec_enqueue_syndromes_ui64"); +constexpr std::uint32_t kGetCorrectionsUi64FunctionId = + cudaq::realtime::fnv1a_hash("qec_get_corrections_ui64"); +constexpr std::uint32_t kResetDecoderUi64FunctionId = + cudaq::realtime::fnv1a_hash("qec_reset_decoder_ui64"); +constexpr std::int32_t kInvalidArgumentStatus = + cudaq_internal::device_call::toAbiStatus( + cudaq_internal::device_call::DeviceCallStatus::InvalidArgument); +constexpr std::int32_t kRemoteErrorStatus = + cudaq_internal::device_call::toAbiStatus( + cudaq_internal::device_call::DeviceCallStatus::RemoteError); + +struct EnqueueSyndromesUi64Payload { + std::uint64_t decoder_id; + std::uint64_t syndrome_size; + std::uint64_t syndrome; + std::uint64_t tag; +}; + +struct GetCorrectionsUi64Payload { + std::uint64_t decoder_id; + std::uint64_t return_size; + std::uint64_t reset; +}; + +struct ResetDecoderUi64Payload { + std::uint64_t decoder_id; +}; + +static_assert(sizeof(EnqueueSyndromesUi64Payload) == 32); +static_assert(sizeof(GetCorrectionsUi64Payload) == 24); +static_assert(sizeof(ResetDecoderUi64Payload) == 8); + +cudaq::realtime::RPCResponse * +start_response(const cudaq::realtime::RPCHeader *request, void *tx_slot) { + auto *response = static_cast(tx_slot); + std::memset(response, 0, sizeof(cudaq::realtime::RPCResponse)); + response->request_id = request ? request->request_id : 0; + response->ptp_timestamp = request ? request->ptp_timestamp : 0; + return response; +} + +void finish_response(cudaq::realtime::RPCResponse *response, + std::int32_t status, std::uint32_t result_len = 0) { + response->status = status; + response->result_len = status == 0 ? result_len : 0; + __atomic_store_n(&response->magic, cudaq::realtime::RPC_MAGIC_RESPONSE, + __ATOMIC_RELEASE); +} + +void write_uint64_result(void *tx_slot, std::uint64_t result) { + std::memcpy(static_cast(tx_slot) + + sizeof(cudaq::realtime::RPCResponse), + &result, sizeof(result)); +} + +bool has_uint64_result_capacity(std::size_t slot_size) { + return slot_size >= + sizeof(cudaq::realtime::RPCResponse) + sizeof(std::uint64_t); +} + +template +const Payload *get_payload(const cudaq::realtime::RPCHeader *request) { + if (!request || request->magic != cudaq::realtime::RPC_MAGIC_REQUEST || + request->arg_len != sizeof(Payload)) + return nullptr; + return reinterpret_cast(request + 1); +} + +void qec_enqueue_syndromes_ui64_host(const void *rx_slot, void *tx_slot, + std::size_t slot_size) { + const auto *request = + static_cast(rx_slot); + auto *response = start_response(request, tx_slot); + const auto *payload = get_payload(request); + if (!payload || payload->syndrome_size > 64 || + !has_uint64_result_capacity(slot_size)) { + finish_response(response, kInvalidArgumentStatus); + return; + } + + try { + std::vector syndromes(payload->syndrome_size); + for (std::uint64_t bit = 0; bit < payload->syndrome_size; ++bit) + syndromes[bit] = (payload->syndrome >> bit) & 1u; + enqueue_syndromes(static_cast(payload->decoder_id), + syndromes.data(), payload->syndrome_size, payload->tag); + write_uint64_result(tx_slot, 0); + finish_response(response, 0, sizeof(std::uint64_t)); + } catch (...) { + finish_response(response, kRemoteErrorStatus); + } +} + +void qec_get_corrections_ui64_host(const void *rx_slot, void *tx_slot, + std::size_t slot_size) { + const auto *request = + static_cast(rx_slot); + auto *response = start_response(request, tx_slot); + const auto *payload = get_payload(request); + if (!payload || payload->return_size > 64 || + !has_uint64_result_capacity(slot_size)) { + finish_response(response, kInvalidArgumentStatus); + return; + } + + try { + std::vector corrections(payload->return_size); + get_corrections(static_cast(payload->decoder_id), + corrections.data(), payload->return_size, + payload->reset != 0); + + std::uint64_t packed = 0; + for (std::uint64_t bit = 0; bit < payload->return_size; ++bit) + if (corrections[bit] & 1u) + packed |= std::uint64_t{1} << bit; + write_uint64_result(tx_slot, packed); + finish_response(response, 0, sizeof(packed)); + } catch (...) { + finish_response(response, kRemoteErrorStatus); + } +} + +void qec_reset_decoder_ui64_host(const void *rx_slot, void *tx_slot, + std::size_t slot_size) { + const auto *request = + static_cast(rx_slot); + auto *response = start_response(request, tx_slot); + const auto *payload = get_payload(request); + if (!payload || !has_uint64_result_capacity(slot_size)) { + finish_response(response, kInvalidArgumentStatus); + return; + } + + try { + reset_decoder(static_cast(payload->decoder_id)); + write_uint64_result(tx_slot, 0); + finish_response(response, 0, sizeof(std::uint64_t)); + } catch (...) { + finish_response(response, kRemoteErrorStatus); + } +} + +void fill_int64_arg(cudaq_type_desc_t &arg) { + arg.type_id = CUDAQ_TYPE_INT64; + arg.size_bytes = sizeof(std::uint64_t); + arg.num_elements = 1; +} + +void fill_entry(cudaq_function_entry_t &entry, cudaq_host_rpc_fn_t handler, + std::uint32_t function_id, std::uint8_t num_args, + std::uint8_t num_results) { + entry = {}; + entry.handler.host_fn = handler; + entry.function_id = function_id; + entry.dispatch_mode = CUDAQ_DISPATCH_HOST_CALL; + entry.schema.num_args = num_args; + entry.schema.num_results = num_results; + for (std::uint8_t i = 0; i < num_args; ++i) + fill_int64_arg(entry.schema.args[i]); + for (std::uint8_t i = 0; i < num_results; ++i) + fill_int64_arg(entry.schema.results[i]); +} + +class QecHostDispatchService + : public cudaq_internal::device_call::DeviceCallService { +public: + int getHostDispatchTable( + cudaq_internal::device_call::DeviceCallHostDispatchTable &table) + override { + fill_entry(entries[0], qec_enqueue_syndromes_ui64_host, + kEnqueueSyndromesUi64FunctionId, /*num_args=*/4, + /*num_results=*/1); + fill_entry(entries[1], qec_get_corrections_ui64_host, + kGetCorrectionsUi64FunctionId, /*num_args=*/3, + /*num_results=*/1); + fill_entry(entries[2], qec_reset_decoder_ui64_host, + kResetDecoderUi64FunctionId, /*num_args=*/1, + /*num_results=*/1); + table.entries = entries.data(); + table.count = static_cast(entries.size()); + table.deviceId = 0; + table.mailbox = nullptr; + return 0; + } + +private: + std::array entries{}; +}; + +cudaq_internal::device_call::DeviceCallService * +get_qec_host_dispatch_service() { + static QecHostDispatchService service; + return &service; +} + +} // namespace + +extern "C" __attribute__((visibility("default"))) +cudaq_internal::device_call::DeviceCallServicePluginInfo +cudaqGetDeviceCallServicePluginInfo() { + return {"cudaq-qec-host-dispatch", &get_qec_host_dispatch_service}; +} diff --git a/libs/qec/unittests/decoders/pymatching/CMakeLists.txt b/libs/qec/unittests/decoders/pymatching/CMakeLists.txt index 4943f823..23d0c8fd 100644 --- a/libs/qec/unittests/decoders/pymatching/CMakeLists.txt +++ b/libs/qec/unittests/decoders/pymatching/CMakeLists.txt @@ -29,7 +29,9 @@ target_link_libraries(test_pymatching PRIVATE GTest::gtest_main cudaq-qec cudaq: add_dependencies(CUDAQXQECUnitTests test_pymatching) gtest_discover_tests(test_pymatching) -if(CUDAQ_REALTIME_ROOT AND CUDAQ_REALTIME_INCLUDE_DIR) +if(CUDAQ_REALTIME_INCLUDE_DIR + AND TARGET cudaq-qec-realtime-decoding + AND TARGET cudaq-qec-pymatching) add_executable(test_pymatching_realtime test_pymatching_realtime.cpp) target_include_directories(test_pymatching_realtime PRIVATE ${CUDAQ_REALTIME_INCLUDE_DIR} @@ -50,4 +52,112 @@ if(CUDAQ_REALTIME_ROOT AND CUDAQ_REALTIME_INCLUDE_DIR) add_dependencies(test_pymatching_realtime cudaq-qec-pymatching) add_dependencies(CUDAQXQECUnitTests test_pymatching_realtime) gtest_discover_tests(test_pymatching_realtime) +else() + message(STATUS "Skipping test_pymatching_realtime " + "(requires realtime headers, " + "cudaq-qec-realtime-decoding, and cudaq-qec-pymatching).") +endif() + +set(_pymatching_cudaq_realtime_prefixes "") +if(CUDAQ_REALTIME_ROOT) + list(APPEND _pymatching_cudaq_realtime_prefixes "${CUDAQ_REALTIME_ROOT}") + list(APPEND _pymatching_cudaq_realtime_prefixes "${CUDAQ_REALTIME_ROOT}/build") +endif() +if(CUDAQ_INSTALL_PREFIX) + list(APPEND _pymatching_cudaq_realtime_prefixes "${CUDAQ_INSTALL_PREFIX}") +endif() +if(CUDAQ_INSTALL_DIR) + list(APPEND _pymatching_cudaq_realtime_prefixes "${CUDAQ_INSTALL_DIR}") +endif() + +if(NOT CUDAQ_DEVICE_CALL_INCLUDE_DIR) + find_path(CUDAQ_DEVICE_CALL_INCLUDE_DIR + NAMES cudaq_internal/device_call/DeviceCallService.h + HINTS ${_pymatching_cudaq_realtime_prefixes} + PATH_SUFFIXES include runtime/internal/device_call/include + ) +endif() + +set(_pymatching_device_call_runtime_link "") +if(TARGET cudaq::cudaq-device-call-runtime) + set(_pymatching_device_call_runtime_link cudaq::cudaq-device-call-runtime) +elseif(TARGET cudaq-device-call-runtime) + set(_pymatching_device_call_runtime_link cudaq-device-call-runtime) +else() + if(NOT CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY) + find_library(CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY + NAMES cudaq-device-call-runtime + PATHS ${_pymatching_cudaq_realtime_prefixes} + PATH_SUFFIXES lib + ) + endif() + if(CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY) + set(_pymatching_device_call_runtime_link ${CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY}) + endif() +endif() + +if(CUDAQ_REALTIME_INCLUDE_DIR + AND _pymatching_device_call_runtime_link + AND TARGET cudaq-qec-realtime-decoding-host-dispatch + AND TARGET cudaq-qec-pymatching) + add_executable(test_pymatching_qpu_device_call) + set_target_properties(test_pymatching_qpu_device_call PROPERTIES + LINKER_LANGUAGE CXX + ) + + cudaqx_add_device_code(test_pymatching_qpu_device_call + SOURCES + test_pymatching_qpu_device_call.cpp + COMPILER_FLAGS + --target qpp-cpu + -frealtime-lowering + ) + + target_link_directories(test_pymatching_qpu_device_call + PRIVATE ${CUDAQ_INSTALL_DIR}/lib) + + if(UNIX AND NOT APPLE) + target_link_options(test_pymatching_qpu_device_call PRIVATE + "LINKER:--no-as-needed") + endif() + + target_link_libraries(test_pymatching_qpu_device_call PRIVATE + GTest::gtest_main + cudaq-qec + cudaq-qec-pymatching + cudaq-qec-realtime-decoding + cudaq-qec-realtime-decoding-host-dispatch + ${_pymatching_device_call_runtime_link} + cudaq::cudaq cudaq::cudaq-common cudaq-mlir-runtime + nvqir nvqir-qpp) + + set(_pymatching_qpu_device_call_rpath + "${CMAKE_BINARY_DIR}/lib" + "${CMAKE_BINARY_DIR}/lib/decoder-plugins" + ) + if(CUDAQ_REALTIME_LIBRARY) + get_filename_component(_pymatching_qpu_realtime_lib_dir + "${CUDAQ_REALTIME_LIBRARY}" DIRECTORY) + list(APPEND _pymatching_qpu_device_call_rpath + "${_pymatching_qpu_realtime_lib_dir}") + endif() + if(CUDAQ_INSTALL_DIR) + list(APPEND _pymatching_qpu_device_call_rpath + "${CUDAQ_INSTALL_DIR}/lib") + endif() + + set_target_properties(test_pymatching_qpu_device_call PROPERTIES + BUILD_RPATH "${_pymatching_qpu_device_call_rpath}" + ) + + add_dependencies(test_pymatching_qpu_device_call + cudaq-qec-pymatching + cudaq-qec-realtime-decoding-host-dispatch) + add_dependencies(CUDAQXQECUnitTests test_pymatching_qpu_device_call) + gtest_discover_tests(test_pymatching_qpu_device_call) +else() + message(STATUS "Skipping test_pymatching_qpu_device_call " + "(requires realtime headers, cudaq-device-call-runtime, " + "cudaq-qec-realtime-decoding-host-dispatch, and " + "cudaq-qec-pymatching).") endif() diff --git a/libs/qec/unittests/decoders/pymatching/test_pymatching_qpu_device_call.cpp b/libs/qec/unittests/decoders/pymatching/test_pymatching_qpu_device_call.cpp new file mode 100644 index 00000000..9c8b7d90 --- /dev/null +++ b/libs/qec/unittests/decoders/pymatching/test_pymatching_qpu_device_call.cpp @@ -0,0 +1,88 @@ +/****************************************************************-*- C++ -*-**** + * Copyright (c) 2024 - 2026 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +#include "cudaq.h" +#include "cudaq/qec/realtime/decoding.h" +#include "cudaq/qec/realtime/decoding_config.h" + +#include + +#include +#include +#include + +namespace { + +__qpu__ std::vector +decode_syndrome_via_qpu_device_call(std::vector syndrome) { + cudaq::qec::decoding::enqueue_syndromes_test(/*decoder_id=*/0, syndrome, + /*tag=*/1); + return cudaq::qec::decoding::get_corrections(/*decoder_id=*/0, + /*return_size=*/3, + /*reset=*/true); +} + +__qpu__ std::vector read_corrections_via_qpu_device_call() { + return cudaq::qec::decoding::get_corrections(/*decoder_id=*/0, + /*return_size=*/3, + /*reset=*/false); +} + +cudaq::qec::decoding::config::multi_decoder_config make_pymatching_config() { + namespace config = cudaq::qec::decoding::config; + + config::decoder_config decoder_config; + decoder_config.id = 0; + decoder_config.type = "pymatching"; + decoder_config.block_size = 3; + decoder_config.syndrome_size = 3; + decoder_config.H_sparse = {0, -1, 1, -1, 2, -1}; + decoder_config.O_sparse = {0, -1, 1, -1, 2, -1}; + decoder_config.D_sparse = {0, -1, 1, -1, 2, -1}; + + decoder_config.decoder_custom_args = config::pymatching_config(); + auto &pymatching_config = + std::get(decoder_config.decoder_custom_args); + pymatching_config.error_rate_vec = std::vector{0.1, 0.1, 0.1}; + pymatching_config.merge_strategy = "smallest_weight"; + + config::multi_decoder_config multi_config; + multi_config.decoders.push_back(decoder_config); + return multi_config; +} + +} // namespace + +TEST(PyMatchingQpuDeviceCall, DecodesViaHostDispatchSharedMemory) { + namespace config = cudaq::qec::decoding::config; + + auto decoder_config = make_pymatching_config(); + ASSERT_EQ(config::configure_decoders(decoder_config), 0); + struct DecoderConfigGuard { + ~DecoderConfigGuard() { cudaq::qec::decoding::config::finalize_decoders(); } + } decoder_guard; + + char program[] = "test_pymatching_qpu_device_call"; + char channel[] = "--cudaq-device-call=host-dispatch"; + char slots[] = "--cudaq-device-call-slots=4"; + char slot_size[] = "--cudaq-device-call-slot-size=256"; + char *argv[] = {program, channel, slots, slot_size}; + cudaq::realtime::initialize(4, argv); + struct RealtimeGuard { + ~RealtimeGuard() { cudaq::realtime::finalize(); } + } realtime_guard; + + const auto decoded_runs = cudaq::run(1, decode_syndrome_via_qpu_device_call, + std::vector{false, true, false}); + ASSERT_EQ(decoded_runs.size(), 1); + EXPECT_EQ(decoded_runs.front(), (std::vector{false, true, false})); + + const auto reset_runs = cudaq::run(1, read_corrections_via_qpu_device_call); + ASSERT_EQ(reset_runs.size(), 1); + EXPECT_EQ(reset_runs.front(), (std::vector{false, false, false})); +} diff --git a/libs/qec/unittests/realtime/app_examples/CMakeLists.txt b/libs/qec/unittests/realtime/app_examples/CMakeLists.txt index af784ccd..aa2a161b 100644 --- a/libs/qec/unittests/realtime/app_examples/CMakeLists.txt +++ b/libs/qec/unittests/realtime/app_examples/CMakeLists.txt @@ -25,10 +25,27 @@ endif() if(CUDAQ_INSTALL_PREFIX) list(APPEND _app_cudaq_realtime_prefixes "${CUDAQ_INSTALL_PREFIX}") endif() +if(CUDAQ_INSTALL_DIR) + list(APPEND _app_cudaq_realtime_prefixes "${CUDAQ_INSTALL_DIR}") +endif() find_library(_APP_CUDAQ_REALTIME_DISPATCH_LIB cudaq-realtime-dispatch PATHS ${_app_cudaq_realtime_prefixes} PATH_SUFFIXES lib) +set(_app_device_call_runtime_link "") +if(NOT CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY) + find_library(CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY + NAMES cudaq-device-call-runtime + PATHS ${_app_cudaq_realtime_prefixes} + PATH_SUFFIXES lib) +endif() +if(CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY) + set(_app_device_call_runtime_link ${CUDAQ_DEVICE_CALL_RUNTIME_LIBRARY}) +elseif(TARGET cudaq::cudaq-device-call-runtime) + set(_app_device_call_runtime_link cudaq::cudaq-device-call-runtime) +elseif(TARGET cudaq-device-call-runtime) + set(_app_device_call_runtime_link cudaq-device-call-runtime) +endif() -function(qec_realtime_app_link_options _tgt) +function(qec_realtime_app_common_link_options _tgt) target_link_options(${_tgt} PRIVATE "LINKER:--allow-shlib-undefined" "LINKER:--export-dynamic" @@ -37,6 +54,10 @@ function(qec_realtime_app_link_options _tgt) target_link_libraries(${_tgt} PRIVATE ${_APP_CUDAQ_REALTIME_DISPATCH_LIB}) endif() +endfunction() + +function(qec_realtime_app_link_options _tgt) + qec_realtime_app_common_link_options(${_tgt}) if(TARGET cudaq-qec-realtime-cudevice-proprietary) target_link_libraries(${_tgt} PRIVATE CUDA::cudart @@ -115,6 +136,86 @@ add_test( WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) +if(_app_device_call_runtime_link + AND TARGET cudaq-qec-realtime-decoding-host-dispatch + AND TARGET cudaq-qec-pymatching) + add_executable(surface_code-1-host-dispatch) + set_target_properties(surface_code-1-host-dispatch PROPERTIES + LINKER_LANGUAGE CXX) + + add_dependencies(surface_code-1-host-dispatch surface_code-1-local) + + cudaqx_add_device_code(surface_code-1-host-dispatch + SOURCES + surface_code-1.cpp + COMPILER_FLAGS + --target stim + -frealtime-lowering + -DCUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS=1 + -DCUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE=1 + DEPENDS_ON + # Must wait for the local executable to be built because it has the same + # surface_code-1.cpp file, and the nvq++ compiler will use the same + # filename prefix (surface_code-1) for intermediate files. + surface_code-1-local + ) + + target_link_directories(surface_code-1-host-dispatch + PRIVATE ${CUDAQ_INSTALL_DIR}/lib) + + if(UNIX AND NOT APPLE) + target_link_options(surface_code-1-host-dispatch PRIVATE + "LINKER:--no-as-needed") + endif() + + target_link_libraries(surface_code-1-host-dispatch + PRIVATE + cudaq-qec + cudaq-qec-pymatching + cudaq-qec-realtime-decoding + cudaq-qec-realtime-decoding-host-dispatch + ${_app_device_call_runtime_link} + cudaq::cudaq cudaq::cudaq-common cudaq-mlir-runtime + nvqir nvqir-stim) + target_link_options(surface_code-1-host-dispatch PRIVATE + "LINKER:--allow-shlib-undefined" + "LINKER:--export-dynamic" + ) + + set(_surface_code_host_dispatch_rpath + "${CMAKE_BINARY_DIR}/lib" + "${CMAKE_BINARY_DIR}/lib/decoder-plugins") + if(_APP_CUDAQ_REALTIME_DISPATCH_LIB) + get_filename_component(_app_cudaq_realtime_dispatch_lib_dir + "${_APP_CUDAQ_REALTIME_DISPATCH_LIB}" DIRECTORY) + list(APPEND _surface_code_host_dispatch_rpath + "${_app_cudaq_realtime_dispatch_lib_dir}") + endif() + if(CUDAQ_INSTALL_DIR) + list(APPEND _surface_code_host_dispatch_rpath "${CUDAQ_INSTALL_DIR}/lib") + endif() + set_target_properties(surface_code-1-host-dispatch PROPERTIES + BUILD_RPATH "${_surface_code_host_dispatch_rpath}") + + add_dependencies(surface_code-1-host-dispatch + cudaq-qec-pymatching + cudaq-qec-realtime-decoding-host-dispatch) + + add_test( + NAME app_examples.surface_code-1-host-dispatch-pymatching-smoke + COMMAND + ${CMAKE_CURRENT_BINARY_DIR}/surface_code-1-host-dispatch + --pymatching-host-dispatch-smoke + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + ) + +else() + message(STATUS "Skipping surface_code-1-host-dispatch PyMatching app test " + "(requires cudaq-device-call-runtime, " + "cudaq-qec-realtime-decoding-host-dispatch, and " + "cudaq-qec-pymatching).") +endif() + # This must be disabled for now because the multi_error_lut decoder is not # powerful enough to pass this test. The nv-qldpc-decoder can pass this test, # but that is not available on the GitHub repo. @@ -427,5 +528,3 @@ add_surface_code_test( # End of surface code 3, --target quantinuum --emulate #------------------------------------------------------------------------------# - - diff --git a/libs/qec/unittests/realtime/app_examples/surface_code-1.cpp b/libs/qec/unittests/realtime/app_examples/surface_code-1.cpp index 9c29e6de..84ca047d 100644 --- a/libs/qec/unittests/realtime/app_examples/surface_code-1.cpp +++ b/libs/qec/unittests/realtime/app_examples/surface_code-1.cpp @@ -1,5 +1,5 @@ /******************************************************************************* - * Copyright (c) 2025 NVIDIA Corporation & Affiliates. * + * Copyright (c) 2025 - 2026 NVIDIA Corporation & Affiliates. * * All rights reserved. * * * * This source code and the accompanying materials are made available under * @@ -40,9 +40,106 @@ static int g_syndromes_per_shot = 0; #define PER_SHOT_DEBUG 0 #endif +#ifndef CUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS +#define CUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS 0 +#endif + +#ifndef CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE +#define CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE 0 +#endif + // Uncomment this to manually inject errors. // #define MANUALLY_INJECT_ERRORS +#if CUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS && \ + CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE +namespace { + +__qpu__ std::vector +pymatching_host_dispatch_decode_syndrome(std::vector syndrome) { + cudaq::qec::decoding::enqueue_syndromes_test(/*decoder_id=*/0, syndrome, + /*tag=*/1); + return cudaq::qec::decoding::get_corrections(/*decoder_id=*/0, + /*return_size=*/3, + /*reset=*/true); +} + +__qpu__ std::vector pymatching_host_dispatch_read_corrections() { + return cudaq::qec::decoding::get_corrections(/*decoder_id=*/0, + /*return_size=*/3, + /*reset=*/false); +} + +cudaq::qec::decoding::config::multi_decoder_config +make_pymatching_host_dispatch_smoke_config() { + namespace config = cudaq::qec::decoding::config; + + config::decoder_config decoder_config; + decoder_config.id = 0; + decoder_config.type = "pymatching"; + decoder_config.block_size = 3; + decoder_config.syndrome_size = 3; + decoder_config.H_sparse = {0, -1, 1, -1, 2, -1}; + decoder_config.O_sparse = {0, -1, 1, -1, 2, -1}; + decoder_config.D_sparse = {0, -1, 1, -1, 2, -1}; + + decoder_config.decoder_custom_args = config::pymatching_config(); + auto &pymatching_config = + std::get(decoder_config.decoder_custom_args); + pymatching_config.error_rate_vec = std::vector{0.1, 0.1, 0.1}; + pymatching_config.merge_strategy = "smallest_weight"; + + config::multi_decoder_config multi_config; + multi_config.decoders.push_back(decoder_config); + return multi_config; +} + +int run_pymatching_host_dispatch_smoke() { + namespace config = cudaq::qec::decoding::config; + + auto decoder_config = make_pymatching_host_dispatch_smoke_config(); + if (const auto ret = config::configure_decoders(decoder_config); ret != 0) { + std::printf("configure_decoders failed: %d\n", ret); + return ret; + } + struct DecoderConfigGuard { + ~DecoderConfigGuard() { cudaq::qec::decoding::config::finalize_decoders(); } + } decoder_guard; + + char program[] = "surface_code-1-host-dispatch"; + char channel[] = "--cudaq-device-call=host-dispatch"; + char slots[] = "--cudaq-device-call-slots=4"; + char slot_size[] = "--cudaq-device-call-slot-size=256"; + char *argv[] = {program, channel, slots, slot_size}; + cudaq::realtime::initialize(4, argv); + struct RealtimeGuard { + ~RealtimeGuard() { cudaq::realtime::finalize(); } + } realtime_guard; + + const auto decoded_runs = + cudaq::run(1, pymatching_host_dispatch_decode_syndrome, + std::vector{false, true, false}); + const std::vector expected_decoded{false, true, false}; + if (decoded_runs.size() != 1 || decoded_runs.front() != expected_decoded) { + std::printf("decoded correction mismatch\n"); + return 1; + } + + const auto reset_runs = + cudaq::run(1, pymatching_host_dispatch_read_corrections); + const std::vector expected_reset{false, false, false}; + if (reset_runs.size() != 1 || reset_runs.front() != expected_reset) { + std::printf("reset correction mismatch\n"); + return 1; + } + + std::printf("PyMatching host-dispatch device_call smoke test passed\n"); + return 0; +} + +} // namespace +#endif + void save_dem_to_file(const cudaq::qec::detector_error_model &dem, std::string dem_filename, uint64_t numSyndromesPerRound, uint64_t numLogical, const std::string &decoder_type, @@ -114,6 +211,13 @@ void save_dem_to_file(const cudaq::qec::detector_error_model &dem, lut_config.lut_error_depth = 2; sw_config.multi_error_lut_params = lut_config; config.decoder_custom_args = sw_config; +#if CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE + } else if (decoder_type == "pymatching") { + cudaq::qec::decoding::config::pymatching_config pm_config; + pm_config.error_rate_vec = dem.error_rates; + pm_config.merge_strategy = "smallest_weight"; + config.decoder_custom_args = pm_config; +#endif } multi_config.decoders.push_back(config); @@ -911,8 +1015,14 @@ void show_help() { "distance\n"); printf(" --decoder_window Number of rounds to use for the decoder " "window. Default: distance\n"); +#if CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE + printf(" --decoder_type Decoder type: 'multi_error_lut', " + "'nv-qldpc-decoder', 'sliding_window', or 'pymatching'. Default: " + "multi_error_lut\n"); +#else printf(" --decoder_type Decoder type: 'multi_error_lut', " "'nv-qldpc-decoder', or 'sliding_window'. Default: multi_error_lut\n"); +#endif printf(" --sw_window_size Sliding window size (only for " "sliding_window decoder). Default: decoder_window\n"); printf(" --sw_step_size Sliding window step size. Default: 1\n"); @@ -925,6 +1035,14 @@ void show_help() { "file.\n"); printf(" --use-relay-bp For --decoder_type nv-qldpc-decoder: select " "Relay BP instead of the default BP + OSD block.\n"); +#if CUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS + printf(" --use-host-dispatch-device-calls Route QPU device_call through " + "the CUDA-Q host-dispatch channel.\n"); +#if CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE + printf(" --pymatching-host-dispatch-smoke Run a graphlike PyMatching " + "host-dispatch smoke test.\n"); +#endif +#endif printf(" --help Show this help message\n"); } @@ -949,6 +1067,12 @@ int main(int argc, char **argv) { bool load_syndrome = false; std::string syndrome_filename; bool use_relay_bp = false; +#if CUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS + bool use_host_dispatch_device_calls = false; +#if CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE + bool run_pymatching_host_dispatch_smoke_test = false; +#endif +#endif // Parse the command line arguments for (int i = 1; i < argc; i++) { @@ -1001,6 +1125,14 @@ int main(int argc, char **argv) { i++; } else if (arg == "--use-relay-bp") { use_relay_bp = true; +#if CUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS + } else if (arg == "--use-host-dispatch-device-calls") { + use_host_dispatch_device_calls = true; +#if CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE + } else if (arg == "--pymatching-host-dispatch-smoke") { + run_pymatching_host_dispatch_smoke_test = true; +#endif +#endif } else { printf("Unknown argument: %s\n", arg.c_str()); show_help(); @@ -1008,6 +1140,13 @@ int main(int argc, char **argv) { } } +#if CUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS && \ + CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE + if (run_pymatching_host_dispatch_smoke_test) { + return run_pymatching_host_dispatch_smoke(); + } +#endif + if (!load_dem && !save_dem && !load_syndrome) { printf("Neither --save_dem nor --load_dem nor --load_syndrome was " "specified. This is not a valid use case for this program.\n"); @@ -1037,9 +1176,18 @@ int main(int argc, char **argv) { // Validate decoder type if (decoder_type != "multi_error_lut" && decoder_type != "sliding_window" && - decoder_type != "nv-qldpc-decoder") { + decoder_type != "nv-qldpc-decoder" +#if CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE + && decoder_type != "pymatching" +#endif + ) { +#if CUDAQ_QEC_ENABLE_PYMATCHING_DECODER_TYPE + printf("Error: --decoder_type must be 'multi_error_lut', " + "'nv-qldpc-decoder', 'sliding_window', or 'pymatching'\n"); +#else printf("Error: --decoder_type must be 'multi_error_lut', " "'nv-qldpc-decoder', or 'sliding_window'\n"); +#endif return 1; } @@ -1087,6 +1235,28 @@ int main(int argc, char **argv) { printf("Running with p_spam = %f, distance = %d, num_shots = %d, num_rounds " "= %d, decoder_window = %d\n", p_spam, distance, num_shots, num_rounds, decoder_window); + +#if CUDAQ_QEC_ENABLE_HOST_DISPATCH_DEVICE_CALLS + struct RealtimeGuard { + bool initialized = false; + ~RealtimeGuard() { + if (initialized) + cudaq::realtime::finalize(); + } + } realtime_guard; + + char realtime_program[] = "surface_code-1-host-dispatch"; + char realtime_channel[] = "--cudaq-device-call=host-dispatch"; + char realtime_slots[] = "--cudaq-device-call-slots=8"; + char realtime_slot_size[] = "--cudaq-device-call-slot-size=256"; + char *realtime_argv[] = {realtime_program, realtime_channel, realtime_slots, + realtime_slot_size}; + if (use_host_dispatch_device_calls) { + cudaq::realtime::initialize(4, realtime_argv); + realtime_guard.initialized = true; + } +#endif + auto code = cudaq::qec::get_code( "surface_code", cudaqx::heterogeneous_map{{"distance", distance}});