diff --git a/sycl/source/detail/device_kernel_info.cpp b/sycl/source/detail/device_kernel_info.cpp index a256870a1058d..30d3d03126ff4 100644 --- a/sycl/source/detail/device_kernel_info.cpp +++ b/sycl/source/detail/device_kernel_info.cpp @@ -12,8 +12,9 @@ namespace sycl { inline namespace _V1 { namespace detail { -DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info) - : CompileTimeKernelInfoTy(Info) {} +DeviceKernelInfo::DeviceKernelInfo(const CompileTimeKernelInfoTy &Info, + std::optional KernelID) + : CompileTimeKernelInfoTy{Info}, MKernelID{std::move(KernelID)} {} template inline constexpr bool operator==(const CompileTimeKernelInfoTy &LHS, diff --git a/sycl/source/detail/device_kernel_info.hpp b/sycl/source/detail/device_kernel_info.hpp index f06d03c126d11..c2574d1704d11 100644 --- a/sycl/source/detail/device_kernel_info.hpp +++ b/sycl/source/detail/device_kernel_info.hpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -84,12 +85,10 @@ struct FastKernelSubcacheT { // information that is uniform between different submissions of the same // kernel). Pointers to instances of this class are stored in header function // templates as a static variable to avoid repeated runtime lookup overhead. -// TODO Currently this class duplicates information fetched from the program -// manager. Instead, we should merge all of this information -// into this structure and get rid of the other KernelName -> * maps. class DeviceKernelInfo : public CompileTimeKernelInfoTy { public: - DeviceKernelInfo(const CompileTimeKernelInfoTy &Info); + DeviceKernelInfo(const CompileTimeKernelInfoTy &Info, + std::optional KernelID = std::nullopt); void init(std::string_view KernelName); void setCompileTimeInfoIfNeeded(const CompileTimeKernelInfoTy &Info); @@ -100,6 +99,14 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { return MImplicitLocalArgPos; } + const sycl::kernel_id &getKernelID() const { + // Expected to be called only for DeviceKernelInfo instances created by + // program manager (as opposed to allocated by sycl::kernel with + // origins other than SYCL offline compilation). + assert(MKernelID); + return *MKernelID; + } + // Implicit local argument position is used only for some backends, so this // function allows setting it as more images are added. void setImplicitLocalArgPos(int Pos); @@ -109,6 +116,7 @@ class DeviceKernelInfo : public CompileTimeKernelInfoTy { FastKernelSubcacheT MFastKernelSubcache; std::optional MImplicitLocalArgPos; + const std::optional MKernelID; }; } // namespace detail diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 6ab55167c2c9d..d8c25d539d5ce 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -667,8 +667,8 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( "Cannot resolve external symbols, linking is unsupported " "for the backend"); - // Access to m_ExportedSymbolImages must be guarded by m_KernelIDsMutex. - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + // Access to m_ExportedSymbolImages must be guarded by m_ImgMapsMutex. + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); while (!WorkList.empty()) { std::string Symbol = WorkList.front(); @@ -748,8 +748,8 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( if (!WorkList.empty()) { // Guard read access to m_VFSet2BinImage: // TODO: a better solution should be sought in the future, i.e. a different - // mutex than m_KernelIDsMutex, check lock check pattern, etc. - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + // mutex than m_ImgMapsMutex, check lock check pattern, etc. + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); while (!WorkList.empty()) { std::string SetName = WorkList.front(); @@ -1311,11 +1311,12 @@ ProgramManager::getDeviceImage(std::string_view KernelName, const RTDeviceBinaryImage *Img = nullptr; { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); - if (auto KernelId = m_KernelName2KernelIDs.find(KernelName); - KernelId != m_KernelName2KernelIDs.end()) { - Img = getBinImageFromMultiMap(m_KernelIDs2BinImage, KernelId->second, - ContextImpl, DeviceImpl); + std::lock_guard Guard(m_DeviceKernelInfoMapMutex); + if (auto It = m_DeviceKernelInfoMap.find(KernelName); + It != m_DeviceKernelInfoMap.end()) { + Img = getBinImageFromMultiMap(m_KernelIDs2BinImage, + It->second.getKernelID(), ContextImpl, + DeviceImpl); } } @@ -1347,7 +1348,7 @@ const RTDeviceBinaryImage &ProgramManager::getDeviceImage( debugPrintBinaryImages(); } - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); std::vector RawImgs(ImageSet.size()); auto ImageIterator = ImageSet.begin(); for (size_t i = 0; i < ImageSet.size(); i++, ImageIterator++) @@ -1620,7 +1621,7 @@ void ProgramManager::addImage(sycl_device_binary RawImg, } // Fill maps for kernel bundles - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); // For bfloat16 device library image, it doesn't include any kernel, device // global, virtual function, so just skip adding it to any related maps. @@ -1694,31 +1695,31 @@ void ProgramManager::addImage(sycl_device_binary RawImg, m_BinImg2KernelIDs[Img.get()]; KernelIDs.reset(new std::vector); + std::lock_guard DKIGuard(m_DeviceKernelInfoMapMutex); + for (sycl_offload_entry EntriesIt = EntriesB; EntriesIt != EntriesE; EntriesIt = EntriesIt->Increment()) { auto name = EntriesIt->GetName(); - // Skip creating unique kernel ID if it is an exported device + // Skip creating device kernel information if it is an exported device // function. Exported device functions appear in the offload entries // among kernels, but are identifiable by being listed in properties. if (m_ExportedSymbolImages.find(name) != m_ExportedSymbolImages.end()) continue; - // ... and create a unique kernel ID for the entry - auto It = m_KernelName2KernelIDs.find(name); - if (It == m_KernelName2KernelIDs.end()) { + auto It = m_DeviceKernelInfoMap.find(std::string_view(name)); + if (It == m_DeviceKernelInfoMap.end()) { sycl::kernel_id KernelID = detail::createSyclObjFromImpl( std::make_shared(name)); - - It = m_KernelName2KernelIDs.emplace_hint(It, name, KernelID); + CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)}; + It = m_DeviceKernelInfoMap.emplace_hint( + It, std::piecewise_construct, std::forward_as_tuple(name), + std::forward_as_tuple(DefaultCompileTimeInfo, KernelID)); } - m_KernelIDs2BinImage.insert(std::make_pair(It->second, Img.get())); - KernelIDs->push_back(It->second); - - CompileTimeKernelInfoTy DefaultCompileTimeInfo{std::string_view(name)}; - m_DeviceKernelInfoMap.try_emplace(std::string_view(name), - DefaultCompileTimeInfo); + m_KernelIDs2BinImage.insert( + std::make_pair(It->second.getKernelID(), Img.get())); + KernelIDs->push_back(It->second.getKernelID()); // Keep track of image to kernel name reference count for cleanup. m_KernelNameRefCount[name]++; @@ -1777,7 +1778,7 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { if (DeviceBinary->NumDeviceBinaries == 0) return; // Acquire lock to read and modify maps for kernel bundles - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); // Acquire lock to erase DeviceKernelInfoMap std::lock_guard Guard(m_DeviceKernelInfoMapMutex); @@ -1846,9 +1847,10 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { continue; } - auto Name2IDIt = m_KernelName2KernelIDs.find(Name); - if (Name2IDIt != m_KernelName2KernelIDs.end()) - removeFromMultimapByVal(m_KernelIDs2BinImage, Name2IDIt->second, Img); + auto DKIIt = m_DeviceKernelInfoMap.find(Name); + assert(DKIIt != m_DeviceKernelInfoMap.end()); + removeFromMultimapByVal(m_KernelIDs2BinImage, DKIIt->second.getKernelID(), + Img); auto RefCountIt = m_KernelNameRefCount.find(Name); assert(RefCountIt != m_KernelNameRefCount.end()); @@ -1860,10 +1862,8 @@ void ProgramManager::removeImages(sycl_device_binaries DeviceBinary) { if (--RefCount == 0) { // TODO aggregate all these maps into a single one since their entries // share lifetime. - m_DeviceKernelInfoMap.erase(Name); + m_DeviceKernelInfoMap.erase(DKIIt); m_KernelNameRefCount.erase(RefCountIt); - if (Name2IDIt != m_KernelName2KernelIDs.end()) - m_KernelName2KernelIDs.erase(Name2IDIt); } } @@ -1971,7 +1971,7 @@ ProgramManager::getBinImageState(const RTDeviceBinaryImage *BinImage) { } bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) { - std::lock_guard Guard(m_KernelIDsMutex); + std::lock_guard Guard(m_ImgMapsMutex); return std::any_of( m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(), @@ -1981,19 +1981,19 @@ bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) { } std::vector ProgramManager::getAllSYCLKernelIDs() { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard DKIGuard(m_DeviceKernelInfoMapMutex); std::vector AllKernelIDs; - AllKernelIDs.reserve(m_KernelName2KernelIDs.size()); - for (std::pair KernelID : - m_KernelName2KernelIDs) { - AllKernelIDs.push_back(KernelID.second); + AllKernelIDs.reserve(m_DeviceKernelInfoMap.size()); + for (const std::pair &Pair : + m_DeviceKernelInfoMap) { + AllKernelIDs.push_back(Pair.second.getKernelID()); } return AllKernelIDs; } kernel_id ProgramManager::getBuiltInKernelID(std::string_view KernelName) { - std::lock_guard BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex); + std::lock_guard BuiltInImgMapsGuard(m_BuiltInKernelIDsMutex); auto KernelID = m_BuiltInKernelIDs.find(KernelName); if (KernelID == m_BuiltInKernelIDs.end()) { @@ -2044,7 +2044,7 @@ ProgramManager::getKernelGlobalInfoDesc(const char *UniqueId) { std::set ProgramManager::getRawDeviceImages(const std::vector &KernelIDs) { std::set BinImages; - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); for (const kernel_id &KID : KernelIDs) { auto Range = m_KernelIDs2BinImage.equal_range(KID); for (auto It = Range.first, End = Range.second; It != End; ++It) @@ -2099,7 +2099,7 @@ device_image_plain ProgramManager::getDeviceImageFromBinaryImage( std::shared_ptr> KernelIDs; // Collect kernel names for the image. { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); KernelIDs = m_BinImg2KernelIDs[BinImage]; } @@ -2129,7 +2129,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( } BinImages = getRawDeviceImages(KernelIDs); } else { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); for (auto &ImageUPtr : m_BinImg2KernelIDs) { BinImages.insert(ImageUPtr.first); } @@ -2188,7 +2188,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( ImgInfo.State = getBinImageState(BinImage); // Collect kernel names for the image { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage]; } ImgInfo.Deps = collectDeviceImageDeps(*BinImage, Dev); @@ -2285,7 +2285,7 @@ ProgramManager::createDependencyImage(const context &Ctx, devices_range Devs, bundle_state DepState) { std::shared_ptr> DepKernelIDs; { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); // For device library images, they are not in m_BinImg2KernelIDs since // no kernel is included. auto DepIt = m_BinImg2KernelIDs.find(DepImage); @@ -2408,7 +2408,7 @@ ProgramManager::getSYCLDeviceImages(const context &Ctx, devices_range Devs, return {}; { - std::lock_guard BuiltInKernelIDsGuard(m_BuiltInKernelIDsMutex); + std::lock_guard BuiltInImgMapsGuard(m_BuiltInKernelIDsMutex); for (auto &It : m_BuiltInKernelIDs) { if (std::find(KernelIDs.begin(), KernelIDs.end(), It.second) != @@ -2838,7 +2838,7 @@ ur_kernel_handle_t ProgramManager::getCachedMaterializedKernel( << "KernelName: " << KernelName << "\n"; { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); if (auto KnownMaterializations = m_MaterializedKernels.find(KernelName); KnownMaterializations != m_MaterializedKernels.end()) { if constexpr (DbgProgMgr > 0) @@ -2895,7 +2895,7 @@ ur_kernel_handle_t ProgramManager::getOrCreateMaterializedKernel( BuildProgram, KernelName.data(), &UrKernel); ur_kernel_handle_t RawUrKernel = UrKernel; { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::lock_guard ImgMapsGuard(m_ImgMapsMutex); m_MaterializedKernels[KernelName][SpecializationConsts] = std::move(UrKernel); } diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index b5419091961da..c9d972c7db9b7 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -219,19 +219,20 @@ class ProgramManager { // The function returns the unique SYCL kernel identifier associated with a // kernel name or nullopt if there is no such ID. - std::optional tryGetSYCLKernelID(std::string_view KernelName) { - std::lock_guard KernelIDsGuard(m_KernelIDsMutex); + std::optional + tryGetSYCLKernelID(std::string_view KernelName) const { + std::lock_guard Guard(m_DeviceKernelInfoMapMutex); - auto KernelID = m_KernelName2KernelIDs.find(KernelName); - if (KernelID == m_KernelName2KernelIDs.end()) + auto It = m_DeviceKernelInfoMap.find(KernelName); + if (It == m_DeviceKernelInfoMap.end()) return std::nullopt; - return KernelID->second; + return It->second.getKernelID(); } // The function returns the unique SYCL kernel identifier associated with a // kernel name or throws a sycl exception if there is no such ID. - kernel_id getSYCLKernelID(std::string_view KernelName) { + kernel_id getSYCLKernelID(std::string_view KernelName) const { if (std::optional MaybeKernelID = tryGetSYCLKernelID(KernelName)) return *MaybeKernelID; throw exception(make_error_code(errc::runtime), @@ -409,20 +410,22 @@ class ProgramManager { const device_impl &DeviceImpl); protected: - /// The three maps below are used during kernel resolution. Any kernel is - /// identified by its name. using RTDeviceBinaryImageUPtr = std::unique_ptr; using DynRTDeviceBinaryImageUPtr = std::unique_ptr; - /// Maps names of kernels to their unique kernel IDs. - /// TODO: Use std::unordered_set with transparent hash and equality functions - /// when C++20 is enabled for the runtime library. - /// Access must be guarded by the m_KernelIDsMutex mutex. - // - std::unordered_map m_KernelName2KernelIDs; + /// Protects kernel ID based maps. + /// NOTE: This may be acquired while \ref Sync::getGlobalLock() is held so to + /// avoid deadlocks care must be taken not to acquire + /// \ref Sync::getGlobalLock() while holding this mutex. + // TODO This currently serves as the mutex for multiple maps that are not + // always accessed together, probably should be split into multiple mutexes. + std::mutex m_ImgMapsMutex; + + /// The two maps below are used during kernel resolution. Any kernel is + /// identified by its name, its kernel id is stored in m_DeviceKernelInfoMap. // Maps KernelIDs to device binary images. There can be more than one image // in case of SPIRV + AOT. - /// Access must be guarded by the m_KernelIDsMutex mutex. + /// Access must be guarded by the m_ImgMapsMutex mutex. std::unordered_multimap m_KernelIDs2BinImage; @@ -430,26 +433,20 @@ class ProgramManager { // Using shared_ptr to avoid expensive copy of the vector. // The vector is initialized in addImages function and is supposed to be // immutable afterwards. - /// Access must be guarded by the m_KernelIDsMutex mutex. + /// Access must be guarded by the m_ImgMapsMutex mutex. std::unordered_map>> m_BinImg2KernelIDs; - /// Protects kernel ID cache. - /// NOTE: This may be acquired while \ref Sync::getGlobalLock() is held so to - /// avoid deadlocks care must be taken not to acquire - /// \ref Sync::getGlobalLock() while holding this mutex. - std::mutex m_KernelIDsMutex; - /// Keeps track of binary image to kernel name reference count. /// Used for checking if the last image referencing the kernel name /// is removed in order to trigger cleanup of kernel specific information. - /// Access must be guarded by the m_KernelIDsMutex mutex. + /// Access must be guarded by the m_ImgMapsMutex mutex. std::unordered_map m_KernelNameRefCount; /// Caches all exported symbols to allow faster lookup when excluding these /// from kernel bundles. - /// Access must be guarded by the m_KernelIDsMutex mutex. + /// Access must be guarded by the m_ImgMapsMutex mutex. /// Owns its keys to support the bfloat16 use case with dynamic images, /// where the symbol is taken from another image (that might be unloaded). std::unordered_multimap @@ -457,7 +454,7 @@ class ProgramManager { /// Keeps all device images we are refering to during program lifetime. Used /// for proper cleanup. - /// Access must be guarded by the m_KernelIDsMutex mutex. + /// Access must be guarded by the m_ImgMapsMutex mutex. std::unordered_map m_DeviceImages; @@ -467,7 +464,7 @@ class ProgramManager { /// Caches list of device images that use or provide virtual functions from /// the same set. Used to simplify access. - /// Access must be guarded by the m_KernelIDsMutex mutex. + /// Access must be guarded by the m_ImgMapsMutex mutex. std::unordered_map> m_VFSet2BinImage; @@ -511,7 +508,7 @@ class ProgramManager { std::unordered_map m_DeviceKernelInfoMap; // Protects m_DeviceKernelInfoMap. - std::mutex m_DeviceKernelInfoMapMutex; + mutable std::mutex m_DeviceKernelInfoMapMutex; // Sanitizer type used in device image SanitizerType m_SanitizerFoundInImage; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index b03f7f0cfd855..5a1cc09cd55b8 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -505,8 +505,7 @@ detail::EventImplPtr handler::finalize() { !KernelBundleImpPtr->tryGetKernel(impl->getKernelName())) { detail::device_impl &Dev = impl->get_device(); kernel_id KernelID = - detail::ProgramManager::getInstance().getSYCLKernelID( - impl->getKernelName()); + impl->MKernelData.getDeviceKernelInfoPtr()->getKernelID(); bool KernelInserted = KernelBundleImpPtr->add_kernel( KernelID, detail::createSyclObjFromImpl(Dev)); // If kernel was not inserted and the bundle is in input mode we try diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 339a154e2db8b..02feacd45bb42 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -18,11 +18,6 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_KernelIDs2BinImage; } - std::unordered_map & - getKernelName2KernelID() { - return m_KernelName2KernelIDs; - } - std::unordered_map>> & getBinImage2KernelId() { @@ -255,9 +250,6 @@ void checkAllInvolvedContainers(ProgramManagerExposed &PM, bool MultipleImgsPerEntryTestCase = false) { EXPECT_EQ(PM.getKernelID2BinImage().size(), ExpectedImgCount) << "KernelID2BinImg " + CommentPostfix; - checkContainer(PM.getKernelName2KernelID(), ExpectedEntryCount, - generateRefNames(ImgIds, "Kernel"), - "KernelName2KernelID " + CommentPostfix); EXPECT_EQ(PM.getBinImage2KernelId().size(), ExpectedImgCount) << CommentPostfix; checkContainer(PM.getExportedSymbolImages(), ExpectedImgCount,