ModelEngine-Group
diff --git a/‎CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion b/‎CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎ucm/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎ucm/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ucm/shared/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎ucm/shared/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎ucm/shared/__init__.py‎ b/‎ucm/shared/__init__.py‎
diff --git a/‎ucm/shared/test/CMakeLists.txt‎
Lines changed: 11 additions & 0 deletions b/‎ucm/shared/test/CMakeLists.txt‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎ucm/shared/test/case/trans/trans_test.cc‎
Lines changed: 93 additions & 0 deletions b/‎ucm/shared/test/case/trans/trans_test.cc‎
Lines changed: 93 additions & 0 deletions
diff --git a/‎ucm/shared/test/example/trans/trans_on_cuda_example.py‎
Lines changed: 149 additions & 0 deletions b/‎ucm/shared/test/example/trans/trans_on_cuda_example.py‎
Lines changed: 149 additions & 0 deletions
diff --git a/‎ucm/shared/trans/CMakeLists.txt‎
Lines changed: 14 additions & 0 deletions b/‎ucm/shared/trans/CMakeLists.txt‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎ucm/shared/trans/__init__.py‎ b/‎ucm/shared/trans/__init__.py‎
diff --git a/‎ucm/shared/trans/buffer.h‎
Lines changed: 51 additions & 0 deletions b/‎ucm/shared/trans/buffer.h‎
Lines changed: 51 additions & 0 deletions
@@ -9,7 +9,8 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 option(BUILD_UCM_STORE "build ucm store module." ON)
 option(BUILD_UCM_SPARSE "build ucm sparse module." ON)
 option(BUILD_UNIT_TESTS "build all unit test suits." OFF)
-option(BUILD_NUMA "build numactl library" OFF)
+option(BUILD_NUMA "build numactl library." OFF)
+option(DOWNLOAD_DEPENDENCE "download dependence by cmake." ON)
 set(RUNTIME_ENVIRONMENT "simu" CACHE STRING "runtime: simu, ascend, musa or cuda.")
 
 execute_process(COMMAND git rev-parse HEAD OUTPUT_VARIABLE UCM_COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE)
 
@@ -1,3 +1,4 @@
+add_subdirectory(shared)
 if(BUILD_UCM_STORE)
     add_subdirectory(store)
 endif()
 
@@ -0,0 +1,4 @@
+include_directories(${CMAKE_CURRENT_SOURCE_DIR})
+add_subdirectory(vendor)
+add_subdirectory(trans)
+add_subdirectory(test)
@@ -0,0 +1,11 @@
+if(BUILD_UNIT_TESTS)
+    include(GoogleTest)
+    file(GLOB_RECURSE UCMSHARED_TEST_SOURCE_FILES "./case/*.cc")
+    add_executable(ucmshared.test ${UCMSHARED_TEST_SOURCE_FILES})
+    target_include_directories(ucmshared.test PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/case)
+    target_link_libraries(ucmshared.test PRIVATE
+        trans
+        gtest_main gtest mockcpp
+    )
+    gtest_discover_tests(ucmshared.test)
+endif()
@@ -0,0 +1,93 @@
+/**
+ * MIT License
+ *
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ * */
+#include <gtest/gtest.h>
+#include "trans/device.h"
+
+class UCTransUnitTest : public ::testing::Test {};
+
+TEST_F(UCTransUnitTest, CopyDataWithCE)
+{
+    const auto ok = UC::Trans::Status::OK();
+    constexpr int32_t deviceId = 0;
+    constexpr size_t size = 36 * 1024;
+    constexpr size_t number = 64 * 61;
+    UC::Trans::Device device;
+    ASSERT_EQ(device.Setup(deviceId), ok);
+    auto buffer = device.MakeBuffer();
+    auto stream = device.MakeStream();
+    auto hPtr1 = buffer->MakeHostBuffer(size * number);
+    ASSERT_NE(hPtr1, nullptr);
+    ASSERT_EQ(buffer->MakeDeviceBuffers(size, number), ok);
+    std::vector<std::shared_ptr<void>> ptrHolder;
+    ptrHolder.reserve(number);
+    void* dPtrArr[number];
+    for (size_t i = 0; i < number; i++) {
+        *(size_t*)(((char*)hPtr1.get()) + size * i) = i;
+        auto ptr = buffer->GetDeviceBuffer(size);
+        dPtrArr[i] = ptr.get();
+        ptrHolder.emplace_back(ptr);
+    }
+    auto hPtr2 = buffer->MakeHostBuffer(size * number);
+    ASSERT_NE(hPtr2, nullptr);
+    ASSERT_EQ(stream->HostToDeviceAsync(hPtr1.get(), dPtrArr, size, number), ok);
+    ASSERT_EQ(stream->DeviceToHostAsync(dPtrArr, hPtr2.get(), size, number), ok);
+    ASSERT_EQ(stream->Synchronized(), ok);
+    for (size_t i = 0; i < number; i++) {
+        ASSERT_EQ(*(size_t*)(((char*)hPtr2.get()) + size * i), i);
+    }
+}
+
+TEST_F(UCTransUnitTest, CopyDataWithSM)
+{
+    const auto ok = UC::Trans::Status::OK();
+    constexpr int32_t deviceId = 0;
+    constexpr size_t size = 36 * 1024;
+    constexpr size_t number = 64 * 61;
+    UC::Trans::Device device;
+    ASSERT_EQ(device.Setup(deviceId), ok);
+    auto buffer = device.MakeBuffer();
+    auto stream = device.MakeSMStream();
+    auto hPtr1 = buffer->MakeHostBuffer(size * number);
+    ASSERT_NE(hPtr1, nullptr);
+    ASSERT_EQ(buffer->MakeDeviceBuffers(size, number), ok);
+    std::vector<std::shared_ptr<void>> ptrHolder;
+    ptrHolder.reserve(number);
+    void* dPtrArr[number];
+    for (size_t i = 0; i < number; i++) {
+        *(size_t*)(((char*)hPtr1.get()) + size * i) = i;
+        auto ptr = buffer->GetDeviceBuffer(size);
+        dPtrArr[i] = ptr.get();
+        ptrHolder.emplace_back(ptr);
+    }
+    auto dPtrArrOnDev = buffer->MakeDeviceBuffer(sizeof(dPtrArr));
+    ASSERT_EQ(stream->HostToDevice((void*)dPtrArr, dPtrArrOnDev.get(), sizeof(dPtrArr)), ok);
+    auto hPtr2 = buffer->MakeHostBuffer(size * number);
+    ASSERT_NE(hPtr2, nullptr);
+    ASSERT_EQ(stream->HostToDeviceAsync(hPtr1.get(), (void**)dPtrArrOnDev.get(), size, number), ok);
+    ASSERT_EQ(stream->DeviceToHostAsync((void**)dPtrArrOnDev.get(), hPtr2.get(), size, number), ok);
+    ASSERT_EQ(stream->Synchronized(), ok);
+    for (size_t i = 0; i < number; i++) {
+        ASSERT_EQ(*(size_t*)(((char*)hPtr2.get()) + size * i), i);
+    }
+}
@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+#
+# MIT License
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+import time
+from functools import wraps
+
+import cupy
+import numpy as np
+
+from ucm.shared.trans import ucmtrans
+
+
+def test_wrap(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        print(f"========>> Running in {func.__name__}:")
+        result = func(*args, **kwargs)
+        print()
+        return result
+
+    return wrapper
+
+
+def make_host_memory(size, number, dtype, fill=False):
+    host = cupy.cuda.alloc_pinned_memory(size * number)
+    host_np = np.frombuffer(host, dtype=dtype)
+    if fill:
+        fixed_len = min(1024, number)
+        host_np[:fixed_len] = np.arange(fixed_len, dtype=dtype)
+    print("make:", host_np.shape, host_np.itemsize, host_np)
+    return host
+
+
+def compare(host1, host2, dtype):
+    host1_np = np.frombuffer(host1, dtype=dtype)
+    host2_np = np.frombuffer(host2, dtype=dtype)
+    print("compare[1]:", host1_np.shape, host1_np.itemsize, host1_np)
+    print("compare[2]:", host2_np.shape, host2_np.itemsize, host2_np)
+    return np.array_equal(host1_np, host2_np)
+
+
+@test_wrap
+def trans_with_ce(d, size, number, dtype):
+    s = d.MakeStream()
+    host1 = make_host_memory(size, number, dtype, True)
+    device = [cupy.empty(size, dtype=np.uint8) for _ in range(number)]
+    device_ptr = np.array([d.data.ptr for d in device], dtype=np.uint64)
+    host2 = make_host_memory(size, number, dtype)
+    tp = time.perf_counter()
+    s.HostToDeviceScatter(host1.ptr, device_ptr, size, number)
+    s.DeviceToHostGather(device_ptr, host2.ptr, size, number)
+    cost = time.perf_counter() - tp
+    print(f"cost: {cost}s")
+    print(f"bandwidth: {size * number / cost / 1e9}GB/s")
+    assert compare(host1, host2, dtype)
+
+
+@test_wrap
+def trans_with_sm(d, size, number, dtype):
+    s = d.MakeSMStream()
+    host1 = make_host_memory(size, number, dtype, True)
+    device = [cupy.empty(size, dtype=np.uint8) for _ in range(number)]
+    device_ptr = np.array([d.data.ptr for d in device], dtype=np.uint64)
+    device_ptr_cupy = cupy.empty(number, dtype=np.uint64)
+    device_ptr_cupy.set(device_ptr)
+    host2 = make_host_memory(size, number, dtype)
+    tp = time.perf_counter()
+    s.HostToDeviceScatter(host1.ptr, device_ptr_cupy.data.ptr, size, number)
+    s.DeviceToHostGather(device_ptr_cupy.data.ptr, host2.ptr, size, number)
+    cost = time.perf_counter() - tp
+    print(f"cost: {cost}s")
+    print(f"bandwidth: {size * number / cost / 1e9}GB/s")
+    assert compare(host1, host2, dtype)
+
+
+@test_wrap
+def trans_with_ce_async(d, size, number, dtype):
+    s = d.MakeStream()
+    host1 = make_host_memory(size, number, dtype, True)
+    device = [cupy.empty(size, dtype=np.uint8) for _ in range(number)]
+    device_ptr = np.array([d.data.ptr for d in device], dtype=np.uint64)
+    host2 = make_host_memory(size, number, dtype)
+    tp = time.perf_counter()
+    s.HostToDeviceScatterAsync(host1.ptr, device_ptr, size, number)
+    s.DeviceToHostGatherAsync(device_ptr, host2.ptr, size, number)
+    s.Synchronized()
+    cost = time.perf_counter() - tp
+    print(f"cost: {cost}s")
+    print(f"bandwidth: {size * number / cost / 1e9}GB/s")
+    assert compare(host1, host2, dtype)
+
+
+@test_wrap
+def trans_with_sm_async(d, size, number, dtype):
+    s = d.MakeSMStream()
+    host1 = make_host_memory(size, number, dtype, True)
+    device = [cupy.empty(size, dtype=np.uint8) for _ in range(number)]
+    device_ptr = np.array([d.data.ptr for d in device], dtype=np.uint64)
+    device_ptr_cupy = cupy.empty(number, dtype=np.uint64)
+    device_ptr_cupy.set(device_ptr)
+    host2 = make_host_memory(size, number, dtype)
+    tp = time.perf_counter()
+    s.HostToDeviceScatterAsync(host1.ptr, device_ptr_cupy.data.ptr, size, number)
+    s.DeviceToHostGatherAsync(device_ptr_cupy.data.ptr, host2.ptr, size, number)
+    s.Synchronized()
+    cost = time.perf_counter() - tp
+    print(f"cost: {cost}s")
+    print(f"bandwidth: {size * number / cost / 1e9}GB/s")
+    assert compare(host1, host2, dtype)
+
+
+def main():
+    device_id = 0
+    size = 36 * 1024
+    number = 61 * 64
+    dtype = np.float16
+    print(f"ucmtrans: {ucmtrans.commit_id}-{ucmtrans.build_type}")
+    cupy.cuda.Device(device_id).use()
+    d = ucmtrans.Device()
+    d.Setup(device_id)
+    trans_with_ce(d, size, number, dtype)
+    trans_with_sm(d, size, number, dtype)
+    trans_with_ce_async(d, size, number, dtype)
+    trans_with_sm_async(d, size, number, dtype)
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,14 @@
+if(RUNTIME_ENVIRONMENT STREQUAL "ascend")
+    add_subdirectory(ascend)
+endif()
+if(RUNTIME_ENVIRONMENT STREQUAL "cuda")
+    add_subdirectory(cuda)
+endif()
+if(RUNTIME_ENVIRONMENT STREQUAL "simu")
+    add_subdirectory(simu)
+endif()
+
+file(GLOB_RECURSE UCMTRANS_CPY_SOURCE_FILES "./cpy/*.cc")
+pybind11_add_module(ucmtrans ${UCMTRANS_CPY_SOURCE_FILES})
+target_link_libraries(ucmtrans PRIVATE trans)
+set_target_properties(ucmtrans PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
@@ -0,0 +1,51 @@
+/**
+ * MIT License
+ *
+ * Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ * */
+#ifndef UNIFIEDCACHE_TRANS_BUFFER_H
+#define UNIFIEDCACHE_TRANS_BUFFER_H
+
+#include <memory>
+#include "status.h"
+
+namespace UC::Trans {
+
+class Buffer {
+public:
+    virtual ~Buffer() = default;
+
+    virtual std::shared_ptr<void> MakeDeviceBuffer(size_t size) = 0;
+    virtual Status MakeDeviceBuffers(size_t size, size_t number) = 0;
+    virtual std::shared_ptr<void> GetDeviceBuffer(size_t size) = 0;
+
+    virtual std::shared_ptr<void> MakeHostBuffer(size_t size) = 0;
+    virtual Status MakeHostBuffers(size_t size, size_t number) = 0;
+    virtual std::shared_ptr<void> GetHostBuffer(size_t size) = 0;
+
+    virtual Status RegisterHostBuffer(void* ptr, size_t size) = 0;
+    virtual void UnregisterHostBuffer(void* ptr) = 0;
+    virtual void* GetHostPtrOnDevice(void* ptr) = 0;
+};
+
+} // namespace UC::Trans
+
+#endif
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+add_subdirectory(shared)`
`1`	`2`	`if(BUILD_UCM_STORE)`
`2`	`3`	`add_subdirectory(store)`
`3`	`4`	`endif()`