From 67ccff7c798d9be37af244ed4f909d459851cb50 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Sat, 18 Oct 2025 05:51:48 +0000
Subject: [PATCH 01/47] kernel library guide

---
 docs/guides/kernel_library_guide.rst | 129 +++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 docs/guides/kernel_library_guide.rst

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
new file mode 100644
index 00000000..78b72374
--- /dev/null
+++ b/docs/guides/kernel_library_guide.rst
@@ -0,0 +1,129 @@
+====================
+Kernel Library Guide
+====================
+
+This guide serves as a quick start for composing a kernel from scratch, or migrating a kernel from externel frameworks. It covers the core concepts in TVM FFI, such as tensor, stream.
+
+Tensor
+======
+
+Tensor is the most important input for a kernel libaray. In PyTorch C++ extensions, kernel library usually takes ``at::Tensor`` as tensor input. In TVM FFI, we introduce two types of tensor, ``ffi::Tensor`` and ``ffi::TensorView``.
+
+Tensor and TensorView
+---------------------
+
+Both ``ffi::Tensor`` and ``ffi::TensorView`` are designed to represent tensors in TVM FFI eco-system. The main difference is whether it is an owning tensor pointer.
+
+:ffi::Tensor:
+ ``ffi::Tensor`` is a completely onwing tensor pointer, pointing to TVM FFI tensor object. TVM FFI handles the lifetime of ``ffi::Tensor`` by retaining a strong reference. 
+
+:ffi::TensorView:
+ ``ffi::TensorView`` is a light weight non-owning tnesor pointer, pointeing to a TVM FFI tensor or external tensor object. TVM FFI does not retain its reference. So users are responsible for ensuring the lifetime of tensor object to which the ``ffi::TensorView`` points. 
+
+TVM FFI can automatically convert the input tensor at Python side, e.g. ``torch.Tensor``, to both ``ffi::Tensor`` or ``ffi::TensorView`` at C++ side, depends on the C++ function arguments. However, for more flexibility and better compatibility, we **recommand** to use ``ffi::TensorView`` in practice. Since some frameworks, like JAX, cannot provide strong referenced tensor, as ``ffi::Tensor`` expected.
+
+Tensor as Argument
+------------------
+
+Typically, we expect that all tensors are pre-allocated at Python side and passed in via TVM FFI, including the output tensor. And TVM FFI will convert them into ``ffi::TensorView`` at runtime. For the optional arguments, ``ffi::Optional`` is the best practice. Here is an example of a kernel definition at C++ side and calling at Python side.
+
+.. code-block:: c++
+
+ // Kernel definition
+ void func(ffi::TensorView input, ffi::Optional<ffi::Tensor> optional_input, ffi::TensorView output, ffi::TensorView workspace);
+
+.. code-block:: python
+
+ # Kernel calling
+ input = torch.tensor(...)
+ output = torch.empty(...)
+ workspace = torch.empty(...)
+ func(input, None, output, workspace)
+
+Ideally, we expect the kernel function to have ``void`` as return type. However, if it is necessary to return the ``ffi::Tensor`` anyway, please pay attention to convert the output ``ffi::Tensor`` to original tensor type at Python side, like ``torch.from_dlpack``.
+
+Tensor Attributes
+-----------------
+
+For the sake of convenience, ``ffi::TensorView`` and ``ffi::Tensor`` align the following attributes retrieval mehtods to ``at::Tensor`` interface:
+
+``dim``, ``sizes``, ``size``, ``strides``, ``stride``, ``numel``, ``data_ptr``, ``device``, ``is_contiguous``
+
+:DLDataType:
+ In TVM FFI, tensor data types are stored as ``DLDataType`` which is defined by DLPack protocol.
+
+:DLDevice:
+ In TVM FFI, tensor device information are stored as ``DLDevice`` which is defined by DLPack protocol.
+
+:ShapeView:
+ In TVM FFI, tensor shapes and strides attributes retrieval are returned as ``ShapeView``. It is an iterate-able data structure storing the shapes or strides data as ``int64_t`` array.
+
+Tensor Allocation
+-----------------
+
+Sometimes we have to allocate the tensor within the kernel. TVM FFI provides several methods to allocate tensors.
+
+:FromNDAlloc:
+ ``FromNDAlloc`` is the most basic tensor allocator. Besides of the basic attributes like shape, data type and device, it requires a custom allocator struct to handle the allocation and free. The allocator must consist of ``void AllocData(DLTensor*)`` and ``void FreeData(DLTensor*)`` methods. Here are the examples of CPU, CUDA and NVSHMEM allocation:
+
+ .. code-block:: c++
+
+  // CPU Allocator
+  struct CPUNDAlloc {
+    void AllocData(DLTensor* tensor) { tensor->data = malloc(ffi::GetDataSize(*tensor)); }
+    void FreeData(DLTensor* tensor) { free(tensor->data); }
+  };
+
+  // CUDA Allocator
+  struct CUDANDAlloc {
+    void AllocData(DLTensor* tensor) {
+      size_t data_size = ffi::GetDataSize(*tensor);
+      void* ptr = nullptr;
+      cudaError_t err = cudaMalloc(&ptr, data_size);
+      TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaMalloc failed: " << cudaGetErrorString(err);
+      tensor->data = ptr;
+    }
+    void FreeData(DLTensor* tensor) {
+      if (tensor->data != nullptr) {
+        cudaError_t err = cudaFree(tensor->data);
+        TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaFree failed: " << cudaGetErrorString(err);
+        tensor->data = nullptr;
+      }
+    }
+  };
+
+  // NVSHMEM Allocator
+  struct NVSHMEMNDAlloc {
+    void AllocData(DLTensor* tensor) {
+      size_t size = tvm::ffi::GetDataSize(*tensor);
+      tensor->data = nvshmem_malloc(size);
+      TVM_FFI_ICHECK_NE(tensor->data, nullptr) << "nvshmem_malloc failed. size: " << size;
+    }
+    void FreeData(DLTensor* tensor) { nvshmem_free(tensor->data); }
+  };
+
+  // Allocator usage
+  ffi::Tensor cpu_tensor = ffi::Tensor::FromNDAlloc(CPUNDAlloc(), ...);
+  ffi::Tensor cuda_tensor = ffi::Tensor::FromNDAlloc(CUDANDAlloc(), ...);
+  ffi::Tensor nvshmem_tensor = ffi::Tensor::FromNDAlloc(NVSHMEMNDAlloc(), ...);
+
+:FromEnvAlloc:
+ For the case of using external tensor allocator, like``at::empty`` in PyTorch C++ extensions, ``FromEnvAlloc`` is the better choice. Besides of the basic attributes like shape, data type and device, it requires a thread-local environmental allocator ``TVMFFIEnvTensorAlloc``. ``TVMFFIEnvTensorAlloc`` gets the global tensor allocator in the current context. The context can be switched based on the arguments of the kernel.
+
+:FromDLPack:
+ ``FromDLPack`` enables creating ``ffi::Tensor`` from ``DLManagedTensor*``.
+
+:FromDLPackVersioned:
+ ``FromDLPackVersioned`` enables creating ``ffi::Tensor`` from ``DLManagedTensorVersioned*``.
+
+Stream
+======
+
+TVM FFI maintains the stream context per device type and index. Use ``TVMFFIEnvGetStream`` to get the current stream on device:
+
+.. code-block:: c++
+
+ ffi::DLDevice device = input.device();
+ cudaStream_t stream = reinterpret_cast<cudaStream_t>(TVMFFIEnvGetStream(device.device_type, device.device_id));
+
+Similar to ``TVMFFIEnvTensorAlloc``, TVM FFI updates the context stream based on the arguments of the kernel, by calling ``TVMFFIEnvSetStream``.
\ No newline at end of file

From 57d6c03cb1dc47f140fd1adb5f858802d28b1547 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Sat, 18 Oct 2025 05:55:38 +0000
Subject: [PATCH 02/47] fix lint

---
 docs/guides/kernel_library_guide.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 78b72374..4815b271 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -15,10 +15,10 @@ Tensor and TensorView
 Both ``ffi::Tensor`` and ``ffi::TensorView`` are designed to represent tensors in TVM FFI eco-system. The main difference is whether it is an owning tensor pointer.
 
 :ffi::Tensor:
- ``ffi::Tensor`` is a completely onwing tensor pointer, pointing to TVM FFI tensor object. TVM FFI handles the lifetime of ``ffi::Tensor`` by retaining a strong reference. 
+ ``ffi::Tensor`` is a completely onwing tensor pointer, pointing to TVM FFI tensor object. TVM FFI handles the lifetime of ``ffi::Tensor`` by retaining a strong reference.
 
 :ffi::TensorView:
- ``ffi::TensorView`` is a light weight non-owning tnesor pointer, pointeing to a TVM FFI tensor or external tensor object. TVM FFI does not retain its reference. So users are responsible for ensuring the lifetime of tensor object to which the ``ffi::TensorView`` points. 
+ ``ffi::TensorView`` is a light weight non-owning tnesor pointer, pointeing to a TVM FFI tensor or external tensor object. TVM FFI does not retain its reference. So users are responsible for ensuring the lifetime of tensor object to which the ``ffi::TensorView`` points.
 
 TVM FFI can automatically convert the input tensor at Python side, e.g. ``torch.Tensor``, to both ``ffi::Tensor`` or ``ffi::TensorView`` at C++ side, depends on the C++ function arguments. However, for more flexibility and better compatibility, we **recommand** to use ``ffi::TensorView`` in practice. Since some frameworks, like JAX, cannot provide strong referenced tensor, as ``ffi::Tensor`` expected.
 
@@ -126,4 +126,4 @@ TVM FFI maintains the stream context per device type and index. Use ``TVMFFIEnvG
  ffi::DLDevice device = input.device();
  cudaStream_t stream = reinterpret_cast<cudaStream_t>(TVMFFIEnvGetStream(device.device_type, device.device_id));
 
-Similar to ``TVMFFIEnvTensorAlloc``, TVM FFI updates the context stream based on the arguments of the kernel, by calling ``TVMFFIEnvSetStream``.
\ No newline at end of file
+Similar to ``TVMFFIEnvTensorAlloc``, TVM FFI updates the context stream based on the arguments of the kernel, by calling ``TVMFFIEnvSetStream``.

From 58ce14c3bbc4b888587d93675a236f4e88f89c4d Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Sat, 18 Oct 2025 06:06:20 +0000
Subject: [PATCH 03/47] add licence

---
 docs/guides/kernel_library_guide.rst | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 4815b271..35555cc1 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -1,3 +1,20 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+.. or more contributor license agreements.  See the NOTICE file
+.. distributed with this work for additional information
+.. regarding copyright ownership.  The ASF licenses this file
+.. to you under the Apache License, Version 2.0 (the
+.. "License"); you may not use this file except in compliance
+.. with the License.  You may obtain a copy of the License at
+..
+..   http://www.apache.org/licenses/LICENSE-2.0
+..
+.. Unless required by applicable law or agreed to in writing,
+.. software distributed under the License is distributed on an
+.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+.. KIND, either express or implied.  See the License for the
+.. specific language governing permissions and limitations
+.. under the License.
+
 ====================
 Kernel Library Guide
 ====================

From f18fec29a793b005b197edbe2abadc16753aa212 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Sat, 18 Oct 2025 06:17:40 +0000
Subject: [PATCH 04/47] add index

---
 docs/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/index.rst b/docs/index.rst
index 5e8fd0cb..81a1a953 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -51,6 +51,7 @@ Table of Contents
    guides/stable_c_abi.md
    guides/compiler_integration.md
    guides/build_from_source.md
+   guides/kernel_library_guide.rst
 
 
 .. toctree::

From c2a5a17843b52e05c2042d6f86f69bbb470063e1 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Mon, 20 Oct 2025 08:06:44 +0000
Subject: [PATCH 05/47] upd

---
 docs/guides/kernel_library_guide.rst | 104 +++++++++++++++------------
 1 file changed, 60 insertions(+), 44 deletions(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 35555cc1..90559c7a 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -19,45 +19,25 @@
 Kernel Library Guide
 ====================
 
-This guide serves as a quick start for composing a kernel from scratch, or migrating a kernel from externel frameworks. It covers the core concepts in TVM FFI, such as tensor, stream.
+This guide serves as a quick start for shipping python version and framework agnostic kernel libraries with TVM FFI.
 
 Tensor
 ======
 
-Tensor is the most important input for a kernel libaray. In PyTorch C++ extensions, kernel library usually takes ``at::Tensor`` as tensor input. In TVM FFI, we introduce two types of tensor, ``ffi::Tensor`` and ``ffi::TensorView``.
+In TVM FFI, we support two types of tensor constructs, ``ffi::Tensor`` and ``ffi::TensorView`` that can be used to represent a tensor from machine learning frameworks, such as PyTorch, XLA, JAX, and so on.
 
 Tensor and TensorView
 ---------------------
 
-Both ``ffi::Tensor`` and ``ffi::TensorView`` are designed to represent tensors in TVM FFI eco-system. The main difference is whether it is an owning tensor pointer.
+Both ``ffi::Tensor`` and ``ffi::TensorView`` are designed to represent tensors from ML frameworks that interact with the TVM FFI ABI. The main difference is whether it is an owning tensor structure.
 
-:ffi::Tensor:
- ``ffi::Tensor`` is a completely onwing tensor pointer, pointing to TVM FFI tensor object. TVM FFI handles the lifetime of ``ffi::Tensor`` by retaining a strong reference.
+ffi::Tensor
+ ``ffi::Tensor`` is a completely onwing tensor pointer, pointing to a TVM FFI tensor object. TVM FFI handles the lifetime of ``ffi::Tensor`` by retaining a strong reference.
 
-:ffi::TensorView:
- ``ffi::TensorView`` is a light weight non-owning tnesor pointer, pointeing to a TVM FFI tensor or external tensor object. TVM FFI does not retain its reference. So users are responsible for ensuring the lifetime of tensor object to which the ``ffi::TensorView`` points.
+ffi::TensorView
+ ``ffi::TensorView`` is non-owning view of an existing tensor. It is backed by ``DLTensor`` structure in DLPack. Since it is a non-owning view, it is the user's responsibility to ensure the lifetime of underlying tensor data and attributes of the viewed tensor object.
 
-TVM FFI can automatically convert the input tensor at Python side, e.g. ``torch.Tensor``, to both ``ffi::Tensor`` or ``ffi::TensorView`` at C++ side, depends on the C++ function arguments. However, for more flexibility and better compatibility, we **recommand** to use ``ffi::TensorView`` in practice. Since some frameworks, like JAX, cannot provide strong referenced tensor, as ``ffi::Tensor`` expected.
-
-Tensor as Argument
-------------------
-
-Typically, we expect that all tensors are pre-allocated at Python side and passed in via TVM FFI, including the output tensor. And TVM FFI will convert them into ``ffi::TensorView`` at runtime. For the optional arguments, ``ffi::Optional`` is the best practice. Here is an example of a kernel definition at C++ side and calling at Python side.
-
-.. code-block:: c++
-
- // Kernel definition
- void func(ffi::TensorView input, ffi::Optional<ffi::Tensor> optional_input, ffi::TensorView output, ffi::TensorView workspace);
-
-.. code-block:: python
-
- # Kernel calling
- input = torch.tensor(...)
- output = torch.empty(...)
- workspace = torch.empty(...)
- func(input, None, output, workspace)
-
-Ideally, we expect the kernel function to have ``void`` as return type. However, if it is necessary to return the ``ffi::Tensor`` anyway, please pay attention to convert the output ``ffi::Tensor`` to original tensor type at Python side, like ``torch.from_dlpack``.
+We **recommend** to use ``ffi::TensorView`` when possible, that helps us to support more cases, including cases where only view but not strong reference are passed, like XLA buffer. It is also more lightweight.
 
 Tensor Attributes
 -----------------
@@ -66,21 +46,37 @@ For the sake of convenience, ``ffi::TensorView`` and ``ffi::Tensor`` align the f
 
 ``dim``, ``sizes``, ``size``, ``strides``, ``stride``, ``numel``, ``data_ptr``, ``device``, ``is_contiguous``
 
-:DLDataType:
+DLDataType
  In TVM FFI, tensor data types are stored as ``DLDataType`` which is defined by DLPack protocol.
 
-:DLDevice:
+DLDevice
  In TVM FFI, tensor device information are stored as ``DLDevice`` which is defined by DLPack protocol.
 
-:ShapeView:
+ShapeView
  In TVM FFI, tensor shapes and strides attributes retrieval are returned as ``ShapeView``. It is an iterate-able data structure storing the shapes or strides data as ``int64_t`` array.
 
 Tensor Allocation
 -----------------
 
-Sometimes we have to allocate the tensor within the kernel. TVM FFI provides several methods to allocate tensors.
+TVM FFI provides several methods to allocate tensors, when dynamic tensor allocation is necessary.
+
+FromEnvAlloc
+ Usually TVM FFI works together with a ML framework with its own tensor allocator. ``FromEnvAlloc`` is tailor-made for this case, so that it is possible to use framework tensor allocator when allocating ``ffi::Tensor``. And TVM FFI automatically sets the framework tensor allocator when the corresponding framework tensor exists in FFI arguments. For example, when calling TVM FFI packed kernels, if there are any input arguments of type ``torch.Tensor`` at Python side, TVM FFI will bind the ``at::Empty`` as the global framework tensor allocator - ``TVMFFIEnvTensorAlloc``. Here is an example:
+
+ .. code-block::
+
+  void func(ffi::TensorView arg0, ffi::TensorView arg1, ...) {
+   ffi::Tensor tensor0 = ffi::Tensor::FromEnvAlloc(TVMFFIEnvTensorAlloc, ...);
+   ffi::Tensor tensor1 = ffi::Tensor::FromDLPackVersioned(at::toDLPackImpl<DLManagedTensorVersioned>(at::empty(...)))
+   // tensor0 and tensor1 are equivalent once arg{i} at Python side has type of torch.Tensor.
+  }
 
-:FromNDAlloc:
+ We **recommend** to use ``FromEnvAlloc`` when possible, since the framework tensor allocator has adavantages:
+
+ * Benefit from the framework's native caching allocator or related mechanism.
+ * Help framework tracking memory usage and planning globally.
+
+FromNDAlloc
  ``FromNDAlloc`` is the most basic tensor allocator. Besides of the basic attributes like shape, data type and device, it requires a custom allocator struct to handle the allocation and free. The allocator must consist of ``void AllocData(DLTensor*)`` and ``void FreeData(DLTensor*)`` methods. Here are the examples of CPU, CUDA and NVSHMEM allocation:
 
  .. code-block:: c++
@@ -124,23 +120,43 @@ Sometimes we have to allocate the tensor within the kernel. TVM FFI provides sev
   ffi::Tensor cuda_tensor = ffi::Tensor::FromNDAlloc(CUDANDAlloc(), ...);
   ffi::Tensor nvshmem_tensor = ffi::Tensor::FromNDAlloc(NVSHMEMNDAlloc(), ...);
 
-:FromEnvAlloc:
- For the case of using external tensor allocator, like``at::empty`` in PyTorch C++ extensions, ``FromEnvAlloc`` is the better choice. Besides of the basic attributes like shape, data type and device, it requires a thread-local environmental allocator ``TVMFFIEnvTensorAlloc``. ``TVMFFIEnvTensorAlloc`` gets the global tensor allocator in the current context. The context can be switched based on the arguments of the kernel.
+FromDLPack
+ ``FromDLPack`` enables creating ``ffi::Tensor`` from ``DLManagedTensor*``, working with ``ToDLPack`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack pre V1.0 API.
+
+FromDLPackVersioned
+ ``FromDLPackVersioned`` enables creating ``ffi::Tensor`` from ``DLManagedTensorVersioned*``, working with ``ToDLPackVersioned`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack post V1.0 API.
+
+Tensor Passing FFI
+------------------
+
+TVM FFI does two conversions when calling the compiled kernels to pass the tensor across FFI. It first converts the framework tensor at Python side to ``ffi::Tensor`` or ``ffi::TensorView``. And then it converts the output ``ffi::Tensor`` back to the framework tensor at Python side. When converting back, TVM FFI will convert to the same framework as arguments. If there are no framework tensors provided in the arguments, TVM FFI will output tensors with the type of ``tvm_ffi.core.Tensor`` still.
+
+Actually, in practie, we **recommend** that all input and output tensors are pre-allocated at Python side by framework alreadly. As for the optional arguments, use ``ffi::Optional`` as wrapper. So, for the kernel function, it returns nothing with a ``void`` return type. Here is a paradigm of TVM FFI interact with Pytorch:
+
+.. code-block:: c++
+
+ // Kernel definition
+ void func(ffi::TensorView input, ffi::Optional<ffi::Tensor> optional_input, ffi::TensorView output, ffi::TensorView workspace);
 
-:FromDLPack:
- ``FromDLPack`` enables creating ``ffi::Tensor`` from ``DLManagedTensor*``.
+.. code-block:: python
 
-:FromDLPackVersioned:
- ``FromDLPackVersioned`` enables creating ``ffi::Tensor`` from ``DLManagedTensorVersioned*``.
+ # Kernel calling
+ input: torch.Tensor = ...
+ output: torch.Tensor = ...
+ workspace: torch.Tensor = ...
+ func(input, None, output, workspace)
 
 Stream
 ======
 
-TVM FFI maintains the stream context per device type and index. Use ``TVMFFIEnvGetStream`` to get the current stream on device:
+TVM FFI maintains the stream context per device type and index. And TVM FFI automatically updates the context stream when handling the arguments. For example, if there is an argument of ``torch.Tensor(device="cuda:3")``, TVM FFI will set the current stream of cuda device 3 from torch current context stream. Then at C++ side, use ``TVMFFIEnvGetStream`` to get the current stream on the specific device. Here is an example:
 
 .. code-block:: c++
 
- ffi::DLDevice device = input.device();
- cudaStream_t stream = reinterpret_cast<cudaStream_t>(TVMFFIEnvGetStream(device.device_type, device.device_id));
-
-Similar to ``TVMFFIEnvTensorAlloc``, TVM FFI updates the context stream based on the arguments of the kernel, by calling ``TVMFFIEnvSetStream``.
+ void func(ffi::TensorView arg0, ...) {
+  ffi::DLDevice device = arg0.device();
+  cudaStream_t stream0 = reinterpret_cast<cudaStream_t>(TVMFFIEnvGetStream(device.device_type, device.device_id));
+  cudaStream_t stream1 = reinterpret_cast<cudaStream_t>(at::cuda::getCurrentCUDAStream(device.device_id).stream());
+  // stream0 and stream1 are the same cuda stream handle once arg0 is of type torch.Tensor at Python side, or any other torch.Tensor arguments at PYthon side are on the same device as arg0.
+ }
+ 
\ No newline at end of file

From aa3c7159fed8fb7c1b3bf44fd19d985057cea7d3 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Mon, 20 Oct 2025 08:08:17 +0000
Subject: [PATCH 06/47] fix

---
 docs/guides/kernel_library_guide.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 90559c7a..84238125 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -63,7 +63,7 @@ TVM FFI provides several methods to allocate tensors, when dynamic tensor alloca
 FromEnvAlloc
  Usually TVM FFI works together with a ML framework with its own tensor allocator. ``FromEnvAlloc`` is tailor-made for this case, so that it is possible to use framework tensor allocator when allocating ``ffi::Tensor``. And TVM FFI automatically sets the framework tensor allocator when the corresponding framework tensor exists in FFI arguments. For example, when calling TVM FFI packed kernels, if there are any input arguments of type ``torch.Tensor`` at Python side, TVM FFI will bind the ``at::Empty`` as the global framework tensor allocator - ``TVMFFIEnvTensorAlloc``. Here is an example:
 
- .. code-block::
+ .. code-block::c++
 
   void func(ffi::TensorView arg0, ffi::TensorView arg1, ...) {
    ffi::Tensor tensor0 = ffi::Tensor::FromEnvAlloc(TVMFFIEnvTensorAlloc, ...);
@@ -159,4 +159,3 @@ TVM FFI maintains the stream context per device type and index. And TVM FFI auto
   cudaStream_t stream1 = reinterpret_cast<cudaStream_t>(at::cuda::getCurrentCUDAStream(device.device_id).stream());
   // stream0 and stream1 are the same cuda stream handle once arg0 is of type torch.Tensor at Python side, or any other torch.Tensor arguments at PYthon side are on the same device as arg0.
  }
- 
\ No newline at end of file

From f8b8b921ec4f9b5227f72b9902e02b40a6bf8593 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Mon, 20 Oct 2025 08:09:02 +0000
Subject: [PATCH 07/47] fix

---
 docs/guides/kernel_library_guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 84238125..700fc6d8 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -63,7 +63,7 @@ TVM FFI provides several methods to allocate tensors, when dynamic tensor alloca
 FromEnvAlloc
  Usually TVM FFI works together with a ML framework with its own tensor allocator. ``FromEnvAlloc`` is tailor-made for this case, so that it is possible to use framework tensor allocator when allocating ``ffi::Tensor``. And TVM FFI automatically sets the framework tensor allocator when the corresponding framework tensor exists in FFI arguments. For example, when calling TVM FFI packed kernels, if there are any input arguments of type ``torch.Tensor`` at Python side, TVM FFI will bind the ``at::Empty`` as the global framework tensor allocator - ``TVMFFIEnvTensorAlloc``. Here is an example:
 
- .. code-block::c++
+ .. code-block:: c++
 
   void func(ffi::TensorView arg0, ffi::TensorView arg1, ...) {
    ffi::Tensor tensor0 = ffi::Tensor::FromEnvAlloc(TVMFFIEnvTensorAlloc, ...);

From 507d17ee3fb712c63d9794aed5e32d4b38520e00 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Mon, 20 Oct 2025 08:11:56 +0000
Subject: [PATCH 08/47] upd

---
 docs/guides/kernel_library_guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 700fc6d8..034adedd 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -24,7 +24,7 @@ This guide serves as a quick start for shipping python version and framework agn
 Tensor
 ======
 
-In TVM FFI, we support two types of tensor constructs, ``ffi::Tensor`` and ``ffi::TensorView`` that can be used to represent a tensor from machine learning frameworks, such as PyTorch, XLA, JAX, and so on.
+TVM FFI provide minimal set of data structures to represent tensors from frameworks and allows us to build kernels for frameworks. In TVM FFI, we support two types of tensor constructs, ``ffi::Tensor`` and ``ffi::TensorView`` that can be used to represent a tensor from machine learning frameworks, such as PyTorch, XLA, JAX, and so on.
 
 Tensor and TensorView
 ---------------------

From d87c16aaa30b7db3d91488aa39dfa50585a9645e Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Sat, 8 Nov 2025 20:23:39 -0500
Subject: [PATCH 09/47] add orcjit

---
 .github/workflows/orcjit-publish.yml          | 164 +++++++++++++
 .github/workflows/orcjit-tests.yml            |  98 ++++++++
 addons/tvm-ffi-orcjit/CMakeLists.txt          | 105 ++++++++
 addons/tvm-ffi-orcjit/MANIFEST.in             |   7 +
 addons/tvm-ffi-orcjit/README.md               | 230 ++++++++++++++++++
 .../examples/quick-start/CMakeLists.txt       |  54 ++++
 .../examples/quick-start/README.md            | 129 ++++++++++
 .../examples/quick-start/add.cc               |  44 ++++
 .../examples/quick-start/compile.sh           |  60 +++++
 .../examples/quick-start/run.py               |  93 +++++++
 .../include/tvm/ffi/orcjit/orcjit_dylib.h     | 115 +++++++++
 .../include/tvm/ffi/orcjit/orcjit_session.h   | 103 ++++++++
 addons/tvm-ffi-orcjit/pyproject.toml          |  55 +++++
 .../python/tvm_ffi_orcjit/__init__.py         |  54 ++++
 .../python/tvm_ffi_orcjit/dylib.py            | 133 ++++++++++
 .../python/tvm_ffi_orcjit/session.py          |  88 +++++++
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc | 174 +++++++++++++
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.cc  | 134 ++++++++++
 addons/tvm-ffi-orcjit/tests/__init__.py       |  17 ++
 addons/tvm-ffi-orcjit/tests/test_basic.py     | 155 ++++++++++++
 20 files changed, 2012 insertions(+)
 create mode 100644 .github/workflows/orcjit-publish.yml
 create mode 100644 .github/workflows/orcjit-tests.yml
 create mode 100644 addons/tvm-ffi-orcjit/CMakeLists.txt
 create mode 100644 addons/tvm-ffi-orcjit/MANIFEST.in
 create mode 100644 addons/tvm-ffi-orcjit/README.md
 create mode 100644 addons/tvm-ffi-orcjit/examples/quick-start/CMakeLists.txt
 create mode 100644 addons/tvm-ffi-orcjit/examples/quick-start/README.md
 create mode 100644 addons/tvm-ffi-orcjit/examples/quick-start/add.cc
 create mode 100755 addons/tvm-ffi-orcjit/examples/quick-start/compile.sh
 create mode 100755 addons/tvm-ffi-orcjit/examples/quick-start/run.py
 create mode 100644 addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
 create mode 100644 addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h
 create mode 100644 addons/tvm-ffi-orcjit/pyproject.toml
 create mode 100644 addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
 create mode 100644 addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
 create mode 100644 addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
 create mode 100644 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
 create mode 100644 addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
 create mode 100644 addons/tvm-ffi-orcjit/tests/__init__.py
 create mode 100644 addons/tvm-ffi-orcjit/tests/test_basic.py

diff --git a/.github/workflows/orcjit-publish.yml b/.github/workflows/orcjit-publish.yml
new file mode 100644
index 00000000..6ed0d4d6
--- /dev/null
+++ b/.github/workflows/orcjit-publish.yml
@@ -0,0 +1,164 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Publish TVM-FFI-OrcJIT
+
+on:
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: "Branch or tag to publish (manual run)"
+        required: true
+        default: "main"
+      pypi_repository:
+        description: "PyPI repository (pypi or testpypi)"
+        required: true
+        default: "testpypi"
+        type: choice
+        options:
+          - pypi
+          - testpypi
+
+jobs:
+  build_wheels:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - {os: ubuntu-latest, arch: x86_64, linux_image: manylinux2014}
+          - {os: ubuntu-latest, arch: x86_64, linux_image: manylinux_2_28}
+          - {os: ubuntu-24.04-arm, arch: aarch64, linux_image: manylinux2014}
+          - {os: ubuntu-24.04-arm, arch: aarch64, linux_image: manylinux_2_28}
+          - {os: macos-14, arch: arm64, linux_image: ""}
+
+    steps:
+      # Special handling for macOS arm64 + python 3.8/3.9
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.9
+        if: runner.os == 'macOS' && runner.arch == 'ARM64'
+
+      - uses: astral-sh/setup-uv@v6
+        if: matrix.os != 'macos-14'
+
+      - uses: actions/checkout@v5
+        with:
+          ref: ${{ inputs.branch }}
+          submodules: recursive
+          fetch-depth: 0
+          fetch-tags: true
+
+      - name: Print current commit
+        run: git log -1 --oneline
+
+      - name: Install LLVM (Linux)
+        if: runner.os == 'Linux'
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 18
+          sudo apt-get install -y llvm-18-dev
+
+      - name: Install LLVM (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew install llvm@18
+          echo "LLVM_DIR=$(brew --prefix llvm@18)" >> $GITHUB_ENV
+
+      - name: Build wheels
+        uses: pypa/cibuildwheel@v3.1.4
+        env:
+          CIBW_ARCHS_MACOS: ${{ matrix.arch }}
+          CIBW_ARCHS_LINUX: ${{ matrix.arch }}
+          CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.linux_image }}
+          CIBW_MANYLINUX_AARCH64_IMAGE: ${{ matrix.linux_image }}
+          CIBW_BUILD_VERBOSITY: 1
+          CIBW_BUILD: cp39-* cp310-* cp311-* cp312-*
+          CIBW_SKIP: "*-musllinux_*"
+          # Install LLVM in the manylinux container
+          CIBW_BEFORE_ALL_LINUX: |
+            yum install -y wget
+            wget https://github.com/llvm/llvm-project/releases/download/llvmorg-18.1.8/clang+llvm-18.1.8-x86_64-linux-gnu-ubuntu-18.04.tar.xz
+            tar -xf clang+llvm-18.1.8-x86_64-linux-gnu-ubuntu-18.04.tar.xz
+            export PATH="$PWD/clang+llvm-18.1.8-x86_64-linux-gnu-ubuntu-18.04/bin:$PATH"
+            export LLVM_DIR="$PWD/clang+llvm-18.1.8-x86_64-linux-gnu-ubuntu-18.04"
+          # Placeholder: may need to install tvm-ffi first
+          CIBW_BEFORE_BUILD: |
+            pip install apache-tvm-ffi || echo "apache-tvm-ffi not yet published"
+
+        with:
+          package-dir: addons/tvm-ffi-orcjit
+          output-dir: wheelhouse
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-wheels-${{ matrix.os }}-${{ matrix.arch }}-${{ strategy.job-index }}
+          path: ./wheelhouse/*.whl
+
+  build_sdist:
+    name: Build source distribution
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          ref: ${{ inputs.branch }}
+          submodules: recursive
+          fetch-depth: 0
+          fetch-tags: true
+
+      - uses: astral-sh/setup-uv@v6
+
+      - name: Build sdist
+        working-directory: addons/tvm-ffi-orcjit
+        run: pipx run build --sdist --outdir dist .
+
+      - name: Check metadata
+        working-directory: addons/tvm-ffi-orcjit
+        run: pipx run twine check dist/*
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: cibw-sdist
+          path: addons/tvm-ffi-orcjit/dist/*.tar.gz
+
+  upload_pypi:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write
+      attestations: write
+    steps:
+      - uses: actions/download-artifact@v4
+        with:
+          # unpacks all CIBW artifacts into dist/
+          pattern: cibw-*
+          path: dist
+          merge-multiple: true
+
+      - name: Generate artifact attestation for sdist and wheels
+        uses: actions/attest-build-provenance@v1
+        with:
+          subject-path: dist/*
+
+      - name: Publish package distributions to PyPI
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          attestations: true
+          verbose: true
+          repository-url: ${{ inputs.pypi_repository == 'testpypi' && 'https://test.pypi.org/legacy/' || '' }}
diff --git a/.github/workflows/orcjit-tests.yml b/.github/workflows/orcjit-tests.yml
new file mode 100644
index 00000000..97e343ce
--- /dev/null
+++ b/.github/workflows/orcjit-tests.yml
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: TVM-FFI-OrcJIT CI Tests
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+      - orcjit
+    paths:
+      - 'addons/tvm-ffi-orcjit/**'
+      - '.github/workflows/tvm-ffi-orcjit/ci_test.yml'
+  pull_request:
+    paths:
+      - 'addons/tvm-ffi-orcjit/**'
+      - '.github/workflows/tvm-ffi-orcjit/ci_test.yml'
+  workflow_dispatch:
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        python-version: ['3.9', '3.10', '3.11', '3.12']
+
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          submodules: recursive
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install LLVM (Ubuntu)
+        if: runner.os == 'Linux'
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 18
+          sudo apt-get install -y llvm-18-dev clang-18
+          echo "CC=clang-18" >> $GITHUB_ENV
+          echo "CXX=clang++-18" >> $GITHUB_ENV
+
+      - name: Install LLVM (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew install llvm@18
+          echo "LLVM_DIR=$(brew --prefix llvm@18)" >> $GITHUB_ENV
+          echo "CC=$(brew --prefix llvm@18)/bin/clang" >> $GITHUB_ENV
+          echo "CXX=$(brew --prefix llvm@18)/bin/clang++" >> $GITHUB_ENV
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Install tvm-ffi (core package)
+        run: |
+          uv pip install -e . --system
+
+      - name: Build and install tvm-ffi-orcjit
+        working-directory: addons/tvm-ffi-orcjit
+        run: |
+          uv pip install -e . --system
+
+      - name: Install test dependencies
+        run: |
+          uv pip install pytest --system
+
+      - name: Run tests
+        working-directory: addons/tvm-ffi-orcjit
+        run: |
+          pytest tests/ -v
+
+      - name: Run example
+        working-directory: addons/tvm-ffi-orcjit/examples/quick-start
+        run: |
+          cmake -B build
+          cmake --build build
+          python run.py
diff --git a/addons/tvm-ffi-orcjit/CMakeLists.txt b/addons/tvm-ffi-orcjit/CMakeLists.txt
new file mode 100644
index 00000000..92abf22b
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/CMakeLists.txt
@@ -0,0 +1,105 @@
+cmake_minimum_required(VERSION 3.18)
+project(
+  tvm_ffi_orcjit
+  VERSION 0.1.0
+  LANGUAGES CXX
+)
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# Find dependencies
+find_package(LLVM REQUIRED CONFIG)
+message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
+message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
+
+# Add LLVM definitions and include directories early
+separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
+add_definitions(${LLVM_DEFINITIONS_LIST})
+include_directories(${LLVM_INCLUDE_DIRS})
+
+# Find tvm-ffi Look for tvm-ffi in the parent repository first, then system
+set(TVM_FFI_ROOT
+    "${CMAKE_CURRENT_SOURCE_DIR}/../.."
+    CACHE PATH "Path to tvm-ffi"
+)
+
+if (EXISTS "${TVM_FFI_ROOT}/include/tvm/ffi/c_api.h")
+  message(STATUS "Using tvm-ffi from: ${TVM_FFI_ROOT}")
+
+  # Create imported target for tvm_ffi
+  add_library(tvm_ffi SHARED IMPORTED)
+  set_target_properties(
+    tvm_ffi
+    PROPERTIES IMPORTED_LOCATION "${TVM_FFI_ROOT}/build/lib/libtvm_ffi.so"
+               INTERFACE_INCLUDE_DIRECTORIES
+               "${TVM_FFI_ROOT}/include;${TVM_FFI_ROOT}/3rdparty/dlpack/include"
+  )
+
+  # Set include directories (including src for internal headers)
+  include_directories(${TVM_FFI_ROOT}/include)
+  include_directories(${TVM_FFI_ROOT}/src)
+  include_directories(${TVM_FFI_ROOT}/3rdparty/dlpack/include)
+else ()
+  message(STATUS "Looking for system tvm-ffi")
+  find_package(tvm-ffi REQUIRED CONFIG)
+endif ()
+
+# LLVM components needed for ORC JIT v2
+llvm_map_components_to_libnames(LLVM_LIBS Core OrcJIT Support native)
+
+# Filter out non-existent targets
+set(LLVM_LIBS_FILTERED)
+foreach (lib ${LLVM_LIBS})
+  if (TARGET ${lib})
+    list(APPEND LLVM_LIBS_FILTERED ${lib})
+  else ()
+    message(STATUS "Skipping non-existent LLVM target: ${lib}")
+  endif ()
+endforeach ()
+set(LLVM_LIBS ${LLVM_LIBS_FILTERED})
+
+# Source files
+set(SOURCES src/ffi/orcjit_session.cc src/ffi/orcjit_dylib.cc)
+
+# Build shared library
+add_library(tvm_ffi_orcjit SHARED ${SOURCES})
+
+target_include_directories(
+  tvm_ffi_orcjit
+  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>
+  PRIVATE ${LLVM_INCLUDE_DIRS}
+)
+
+target_link_libraries(
+  tvm_ffi_orcjit
+  PUBLIC tvm_ffi
+  PRIVATE LLVM
+)
+
+# Compile definitions
+separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
+target_compile_definitions(tvm_ffi_orcjit PRIVATE ${LLVM_DEFINITIONS_LIST})
+
+# Installation rules
+install(
+  TARGETS tvm_ffi_orcjit
+  LIBRARY DESTINATION lib
+  ARCHIVE DESTINATION lib
+  RUNTIME DESTINATION bin
+)
+
+install(
+  DIRECTORY include/
+  DESTINATION include
+  FILES_MATCHING
+  PATTERN "*.h"
+)
+
+# For Python package building
+if (SKBUILD)
+  # Install shared library alongside Python modules
+  install(TARGETS tvm_ffi_orcjit LIBRARY DESTINATION .)
+endif ()
diff --git a/addons/tvm-ffi-orcjit/MANIFEST.in b/addons/tvm-ffi-orcjit/MANIFEST.in
new file mode 100644
index 00000000..83b838e8
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/MANIFEST.in
@@ -0,0 +1,7 @@
+include README.md
+include LICENSE
+include pyproject.toml
+include CMakeLists.txt
+recursive-include include *.h
+recursive-include src *.cc *.cpp
+recursive-include python *.py
diff --git a/addons/tvm-ffi-orcjit/README.md b/addons/tvm-ffi-orcjit/README.md
new file mode 100644
index 00000000..d94db69b
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/README.md
@@ -0,0 +1,230 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+# TVM-FFI OrcJIT
+
+A Python package that enables dynamic loading of TVM-FFI exported object files (`.o`) using LLVM ORC JIT v2.
+
+## Features
+
+- **Dynamic Loading**: Load compiled object files at runtime using LLVM's ORC JIT v2
+- **Incremental Loading**: Add multiple object files to the same loader instance
+- **TVM-FFI Integration**: Seamlessly works with TVM-FFI's stable C ABI
+- **Python API**: Simple Pythonic interface for loading and calling compiled functions
+- **Standalone Package**: Works alongside apache-tvm-ffi without conflicts
+
+## Installation
+
+### Prerequisites
+
+- Python 3.8+
+- CMake 3.18+
+- LLVM 14+ (with ORC JIT support)
+- Ninja build system (recommended)
+
+### Build from Source
+
+1. Clone the repository with submodules:
+
+```bash
+git clone --recursive https://github.com/apache/tvm-ffi.git
+cd tvm-ffi/addons/tvm-ffi-orcjit
+```
+
+1. Build TVM-FFI dependency (from the root of tvm-ffi repository):
+
+```bash
+cd ../..  # Go to tvm-ffi root
+mkdir -p build && cd build
+cmake .. -G Ninja
+ninja
+cd addons/tvm-ffi-orcjit
+```
+
+1. Create build directory and configure with CMake:
+
+```bash
+mkdir -p build
+cd build
+cmake .. \
+    -DCMAKE_BUILD_TYPE=Release \
+    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
+    -G Ninja
+```
+
+1. Build the project:
+
+```bash
+cmake --build . -j$(nproc)
+cd ..
+```
+
+The shared library will be created at: `build/libtvm_ffi_orcjit.so`
+
+1. Install the Python package:
+
+```bash
+# Using pip
+pip install .
+
+# Or for development (editable install)
+pip install -e .
+```
+
+## Usage
+
+### Basic Example
+
+```python
+from tvm_ffi_orcjit import ObjectLoader
+
+# Create a loader instance
+loader = ObjectLoader()
+
+# Load an object file
+loader.load("example.o")
+
+# Get and call a function
+add_func = loader.get_function("simple_add")
+result = add_func(1, 2)
+print(f"Result: {result}")  # Output: Result: 3
+```
+
+### Incremental Loading
+
+Load multiple object files and access functions from all of them:
+
+```python
+from tvm_ffi_orcjit import ObjectLoader
+
+loader = ObjectLoader()
+
+# Load first object file
+loader.load("math_ops.o")
+add = loader.get_function("simple_add")
+
+# Load second object file - functions from first remain accessible
+loader.load("string_ops.o")
+concat = loader.get_function("string_concat")
+
+# Both functions work
+print(add(10, 20))           # From math_ops.o
+print(concat("Hello", "World"))  # From string_ops.o
+```
+
+### Direct Module Access
+
+You can also use TVM-FFI's `load_module` directly (`.o` files are automatically handled):
+
+```python
+import tvm_ffi
+
+# Load object file as a module
+module = tvm_ffi.load_module("example.o")
+
+# Get function
+func = module.get_function("my_function")
+result = func(arg1, arg2)
+```
+
+## How It Works
+
+1. **C++ Backend**: The package implements a `Library` subclass using LLVM's ORC JIT v2 (`LLJIT`)
+2. **Registration**: Registers with TVM-FFI as a loader for `.o` files via `ffi.Module.load_from_file.o`
+3. **Symbol Resolution**: Uses LLJIT's `lookup()` to resolve TVM-FFI exported symbols
+4. **Module Integration**: Wraps the ORC JIT library in `LibraryModuleObj` for seamless TVM-FFI integration
+
+## Development
+
+### Building with CMake
+
+```bash
+mkdir build && cd build
+cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
+cmake --build .
+```
+
+### Project Structure
+
+```text
+tvm-ffi-orcjit/
+├── CMakeLists.txt              # CMake build configuration
+├── pyproject.toml              # Python package metadata
+├── README.md                   # This file
+├── example.py                  # Usage example
+├── include/
+│   └── tvm/ffi/orcjit/
+│       └── orcjit_library.h    # C++ header
+├── src/
+│   └── ffi/
+│       └── orcjit_library.cc   # C++ implementation
+└── python/
+    └── tvm_ffi_orcjit/
+        ├── __init__.py         # Module exports
+        └── loader.py           # Python ObjectLoader class
+```
+
+## Examples
+
+See `example.py` for a complete demonstration of incremental loading.
+
+## Requirements
+
+The package depends on:
+
+- `apache-tvm-ffi>=0.1.0` - TVM-FFI library
+- LLVM 14+ (linked at build time) - For ORC JIT functionality
+
+## License
+
+Apache License 2.0
+
+## Contributing
+
+Contributions are welcome! Please ensure that:
+
+1. Code follows the existing style
+2. New features include tests
+3. Documentation is updated
+
+## Troubleshooting
+
+### Symbol not found errors
+
+Make sure your object file was compiled with TVM-FFI export macros:
+
+```cpp
+#include <tvm/ffi/c_ffi_api.h>
+
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(my_function, [](int a, int b) {
+    return a + b;
+});
+```
+
+### LLVM version mismatch
+
+Ensure the LLVM version used to build this package matches your system's LLVM installation.
+
+### TVM-FFI not found
+
+Make sure TVM-FFI is built in the parent repository:
+
+```bash
+cd ../../  # Go to tvm-ffi root
+mkdir -p build && cd build
+cmake .. -G Ninja && ninja
+```
diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/CMakeLists.txt b/addons/tvm-ffi-orcjit/examples/quick-start/CMakeLists.txt
new file mode 100644
index 00000000..bb8daf78
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/CMakeLists.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+cmake_minimum_required(VERSION 3.18)
+project(tvm_ffi_orcjit_example)
+
+# Set C++ standard
+set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Run `python -m tvm_ffi.config --cmakedir` to find tvm-ffi package
+find_package(
+  Python
+  COMPONENTS Interpreter
+  REQUIRED
+)
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -m tvm_ffi.config --cmakedir
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  OUTPUT_VARIABLE tvm_ffi_ROOT
+)
+find_package(tvm_ffi CONFIG REQUIRED)
+
+# Create object library (not a shared library, just object files)
+add_library(add_obj OBJECT add.cc)
+target_link_libraries(add_obj PRIVATE tvm_ffi_header)
+
+# Set compiler flags
+target_compile_options(add_obj PRIVATE -fPIC -O2)
+
+# Custom target to copy the object file to the example directory
+add_custom_target(
+  copy_obj_file ALL
+  COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:add_obj> ${CMAKE_CURRENT_SOURCE_DIR}/add.o
+  COMMAND ${CMAKE_COMMAND} -E echo "Successfully compiled add.o"
+  COMMAND ${CMAKE_COMMAND} -E echo ""
+  COMMAND ${CMAKE_COMMAND} -E echo "You can now run: python run.py"
+  DEPENDS add_obj
+  COMMENT "Copying add.o to example directory"
+)
diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/README.md b/addons/tvm-ffi-orcjit/examples/quick-start/README.md
new file mode 100644
index 00000000..bdb6b6b5
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/README.md
@@ -0,0 +1,129 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+# Quick Start Example
+
+This example demonstrates the basic usage of tvm-ffi-orcjit to compile C++ functions and load them dynamically at runtime.
+
+## What's Included
+
+- `add.cc` - Simple C++ source file with math functions exported via TVM-FFI
+- `run.py` - Python script that loads and calls the compiled functions
+- `CMakeLists.txt` - CMake configuration to compile the C++ code into an object file
+- `compile.sh` - Legacy shell script (CMake is recommended for cross-platform support)
+
+## Prerequisites
+
+- Python 3.8+
+- CMake 3.18+
+- C++ compiler (g++, clang++, or MSVC)
+- TVM-FFI and tvm-ffi-orcjit packages
+
+## Installation
+
+First, install the required packages:
+
+```bash
+# Navigate to the repository root
+cd ../../..
+
+# Install TVM-FFI in editable mode
+pip install -e .
+
+# Install tvm-ffi-orcjit in editable mode
+pip install -e addons/tvm-ffi-orcjit
+
+# Return to the example directory
+cd addons/tvm-ffi-orcjit/examples/quick-start
+```
+
+After installation, `tvm-ffi-config` will be available in your PATH and used by the compile script to get the correct include directories and compiler flags.
+
+## Steps
+
+### 1. Compile the C++ code
+
+Using CMake (recommended for cross-platform):
+
+```bash
+cmake -B build
+cmake --build build
+```
+
+Or using the legacy shell script (Unix-like systems only):
+
+```bash
+./compile.sh
+```
+
+Both methods will create `add.o` - a compiled object file with exported functions.
+
+### 2. Run the Python loader
+
+```bash
+python run.py
+```
+
+This will:
+
+- Load the object file using tvm-ffi-orcjit
+- Call the exported functions
+- Print the results
+
+## Expected Output
+
+```text
+Loading object file: add.o
+✓ Object file loaded successfully
+
+=== Testing add function ===
+add(10, 20) = 30
+
+=== Testing multiply function ===
+multiply(7, 6) = 42
+
+=== Testing fibonacci function ===
+fibonacci(10) = 55
+
+=== Testing concat function ===
+concat('Hello, ', 'World!') = 'Hello, World!'
+
+==================================================
+✓ All tests passed successfully!
+==================================================
+```
+
+## How It Works
+
+1. **C++ Side** (`add.cc`):
+   - Functions are exported using `TVM_FFI_DLL_EXPORT_TYPED_FUNC` macro
+   - The macro registers functions with TVM-FFI's global function registry
+
+2. **Python Side** (`run.py`):
+   - `create_session()` creates an ORC JIT execution session
+   - `session.create_library()` creates a dynamic library (JITDylib)
+   - `lib.add()` loads the `.o` file into the JIT
+   - `lib.get_function()` looks up symbols in the JIT-compiled code
+   - Functions are called like normal Python functions
+
+## Next Steps
+
+- Modify `add.cc` to add your own functions
+- Recompile with CMake: `cmake --build build`
+- Load and test in Python
+
+For more details on the API, see the main package documentation.
diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/add.cc b/addons/tvm-ffi-orcjit/examples/quick-start/add.cc
new file mode 100644
index 00000000..11cb3b67
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/add.cc
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*
+ * Quick Start Example - Simple Math Functions
+ *
+ * This file demonstrates how to export C++ functions using TVM-FFI
+ * so they can be loaded dynamically at runtime with tvm-ffi-orcjit.
+ */
+
+#include <tvm/ffi/function.h>
+
+// Simple addition function
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(add, [](int a, int b) { return a + b; });
+
+// Multiplication function
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(multiply, [](int a, int b) { return a * b; });
+
+// Fibonacci function (recursive)
+int fib_impl(int n) {
+  if (n <= 1) return n;
+  return fib_impl(n - 1) + fib_impl(n - 2);
+}
+
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(fibonacci, [](int n) { return fib_impl(n); });
+
+// String concatenation example
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(concat, [](std::string a, std::string b) { return a + b; });
diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh b/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh
new file mode 100755
index 00000000..e8c10042
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Compile script for quick-start example
+
+set -e
+
+# Colors for output
+GREEN='\\033[0;32m'
+RED='\\033[0;31m'
+NC='\\033[0m' # No Color
+
+echo -e "${GREEN}Compiling add.cc to object file...${NC}"
+
+# Check if tvm-ffi-config is available
+if ! command -v tvm-ffi-config &> /dev/null; then
+    echo -e "${RED}Error: tvm-ffi-config not found${NC}"
+    echo "Make sure apache-tvm-ffi is installed:"
+    echo "  pip install -e ../../3rdparty/tvm-ffi"
+    exit 1
+fi
+
+# Get compilation flags from tvm-ffi-config
+echo -e "${GREEN}Getting compilation flags from tvm-ffi-config...${NC}"
+CXXFLAGS=$(tvm-ffi-config --cxxflags)
+LDFLAGS=$(tvm-ffi-config --ldflags)
+
+# Override C++ standard to C++20 (needed for lambda in unevaluated context)
+CXXFLAGS="${CXXFLAGS/-std=c++17/-std=c++20}"
+
+echo "  CXXFLAGS: $CXXFLAGS"
+echo "  LDFLAGS: $LDFLAGS"
+
+# Compile to object file
+echo -e "${GREEN}Compiling...${NC}"
+# shellcheck disable=SC2086
+g++ -c add.cc \
+    -o add.o \
+    $CXXFLAGS \
+    -fPIC \
+    -O2
+
+echo -e "${GREEN}Successfully compiled add.o${NC}"
+echo ""
+echo "You can now run: python run.py"
diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/run.py b/addons/tvm-ffi-orcjit/examples/quick-start/run.py
new file mode 100755
index 00000000..28926190
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/run.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Quick Start Example - Load and call functions from add.o.
+
+This script demonstrates how to:
+1. Create an ExecutionSession instance
+2. Create a DynamicLibrary
+3. Load a compiled object file
+4. Get functions by name
+5. Call them like regular Python functions
+"""
+
+import sys
+from pathlib import Path
+
+# Add the parent python directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent / "python"))
+
+
+from tvm_ffi_orcjit import create_session
+
+
+def main() -> int:
+    """Run the quick start example."""
+    # Check if object file exists
+    obj_file = Path("add.o")
+    if not obj_file.exists():
+        print(f"Error: {obj_file} not found!")
+        print("Please run ./compile.sh first to compile the C++ code.")
+        return 1
+
+    print(f"Loading object file: {obj_file}")
+
+    # Create execution session and dynamic library
+    session = create_session()
+    lib = session.create_library()
+    lib.add(str(obj_file))
+
+    print("✓ Object file loaded successfully\n")
+
+    # Get and call the 'add' function
+    print("=== Testing add function ===")
+    add = lib.get_function("add")
+    result = add(10, 20)
+    print(f"add(10, 20) = {result}")
+    assert result == 30, f"Expected 30, got {result}"
+
+    # Get and call the 'multiply' function
+    print("\n=== Testing multiply function ===")
+    multiply = lib.get_function("multiply")
+    result = multiply(7, 6)
+    print(f"multiply(7, 6) = {result}")
+    assert result == 42, f"Expected 42, got {result}"
+
+    # Get and call the 'fibonacci' function
+    print("\n=== Testing fibonacci function ===")
+    fibonacci = lib.get_function("fibonacci")
+    result = fibonacci(10)
+    print(f"fibonacci(10) = {result}")
+    assert result == 55, f"Expected 55, got {result}"
+
+    # Get and call the 'concat' function
+    print("\n=== Testing concat function ===")
+    concat = lib.get_function("concat")
+    result = concat("Hello, ", "World!")
+    print(f"concat('Hello, ', 'World!') = '{result}'")
+    assert result == "Hello, World!", f"Expected 'Hello, World!', got '{result}'"
+
+    print("\n" + "=" * 50)
+    print("✓ All tests passed successfully!")
+    print("=" * 50)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
new file mode 100644
index 00000000..8280f80f
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file orcjit_dylib.h
+ * \brief LLVM ORC JIT DynamicLibrary (JITDylib) wrapper
+ */
+#ifndef TVM_FFI_ORCJIT_ORCJIT_DYLIB_H_
+#define TVM_FFI_ORCJIT_ORCJIT_DYLIB_H_
+
+#include <llvm/ExecutionEngine/Orc/Core.h>
+#include <llvm/ExecutionEngine/Orc/LLJIT.h>
+#include <tvm/ffi/object.h>
+#include <tvm/ffi/string.h>
+
+#include <memory>
+#include <string>
+
+namespace tvm {
+namespace ffi {
+namespace orcjit {
+
+class ORCJITExecutionSession;
+
+/*!
+ * \brief DynamicLibrary wrapper for LLVM ORC JIT v2 JITDylib
+ *
+ * This class wraps an LLVM JITDylib and provides functionality to:
+ * - Load object files
+ * - Link against other dynamic libraries
+ * - Look up symbols
+ */
+class ORCJITDynamicLibrary : public Object {
+ public:
+  /*!
+   * \brief Add an object file to this library
+   * \param path Path to the object file to load
+   */
+  void AddObjectFile(const String& path);
+
+  /*!
+   * \brief Link this library against another library
+   * \param other The library to link against
+   *
+   * After this call, this library can resolve symbols from 'other'.
+   */
+  void LinkAgainst(const ORCJITDynamicLibrary& other);
+
+  /*!
+   * \brief Look up a symbol in this library
+   * \param name The symbol name to look up
+   * \return Pointer to the symbol, or nullptr if not found
+   */
+  void* GetSymbol(const String& name);
+
+  /*!
+   * \brief Get the underlying LLVM JITDylib
+   * \return Reference to the LLVM JITDylib
+   */
+  llvm::orc::JITDylib& GetJITDylib();
+
+  /*!
+   * \brief Get the name of this library
+   * \return The library name
+   */
+  String GetName() const { return name_; }
+
+  static constexpr bool _type_mutable = true;
+  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("orcjit.DynamicLibrary", ORCJITDynamicLibrary, Object);
+
+  /*!
+   * \brief Constructor
+   * \param session The parent execution session
+   * \param dylib The LLVM JITDylib
+   * \param jit The LLJIT instance
+   * \param name The library name
+   */
+  ORCJITDynamicLibrary(ObjectPtr<ORCJITExecutionSession> session, llvm::orc::JITDylib* dylib,
+                       llvm::orc::LLJIT* jit, String name);
+
+ private:
+  /*! \brief Parent execution session (for lifetime management) */
+  ObjectPtr<ORCJITExecutionSession> session_;
+
+  /*! \brief The LLVM JITDylib */
+  llvm::orc::JITDylib* dylib_;
+
+  /*! \brief The LLJIT instance (for addObjectFile API) */
+  llvm::orc::LLJIT* jit_;
+
+  /*! \brief Library name */
+  String name_;
+};
+
+}  // namespace orcjit
+}  // namespace ffi
+}  // namespace tvm
+
+#endif  // TVM_FFI_ORCJIT_ORCJIT_DYLIB_H_
diff --git a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h
new file mode 100644
index 00000000..835b07c6
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file orcjit_session.h
+ * \brief LLVM ORC JIT ExecutionSession wrapper
+ */
+#ifndef TVM_FFI_ORCJIT_ORCJIT_SESSION_H_
+#define TVM_FFI_ORCJIT_ORCJIT_SESSION_H_
+
+#include <llvm/ExecutionEngine/Orc/LLJIT.h>
+#include <tvm/ffi/object.h>
+#include <tvm/ffi/string.h>
+
+#include <memory>
+#include <string>
+#include <unordered_map>
+
+namespace tvm {
+namespace ffi {
+namespace orcjit {
+
+// Forward declaration
+class ORCJITDynamicLibrary;
+
+/*!
+ * \brief ExecutionSession wrapper for LLVM ORC JIT v2
+ *
+ * This class manages the lifetime of an LLVM ExecutionSession and provides
+ * functionality to create and manage multiple JITDylibs (DynamicLibraries).
+ */
+class ORCJITExecutionSession : public Object {
+ public:
+  /*!
+   * \brief Create a new ExecutionSession
+   * \return The created execution session instance
+   */
+  static ObjectPtr<ORCJITExecutionSession> Create();
+
+  /*!
+   * \brief Create a new DynamicLibrary (JITDylib) in this session
+   * \param name Optional name for the library (for debugging)
+   * \return The created dynamic library instance
+   */
+  ObjectPtr<ORCJITDynamicLibrary> CreateDynamicLibrary(const String& name);
+
+  /*!
+   * \brief Get the underlying LLVM ExecutionSession
+   * \return Reference to the LLVM ExecutionSession
+   */
+  llvm::orc::ExecutionSession& GetLLVMExecutionSession();
+
+  /*!
+   * \brief Get the underlying LLJIT instance
+   * \return Reference to the LLJIT instance
+   */
+  llvm::orc::LLJIT& GetLLJIT();
+
+  static constexpr bool _type_mutable = true;
+  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("orcjit.ExecutionSession", ORCJITExecutionSession, Object);
+
+  /*!
+   * \brief Default constructor (for make_object)
+   */
+  ORCJITExecutionSession();
+
+ private:
+  /*!
+   * \brief Initialize the LLJIT instance
+   */
+  void Initialize();
+
+  /*! \brief The LLVM ORC JIT instance */
+  std::unique_ptr<llvm::orc::LLJIT> jit_;
+
+  /*! \brief Counter for auto-generating library names */
+  int dylib_counter_ = 0;
+
+  /*! \brief Map of created dynamic libraries for lifetime management */
+  std::unordered_map<std::string, ObjectPtr<ORCJITDynamicLibrary>> dylibs_;
+};
+
+}  // namespace orcjit
+}  // namespace ffi
+}  // namespace tvm
+
+#endif  // TVM_FFI_ORCJIT_ORCJIT_SESSION_H_
diff --git a/addons/tvm-ffi-orcjit/pyproject.toml b/addons/tvm-ffi-orcjit/pyproject.toml
new file mode 100644
index 00000000..3371b99d
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/pyproject.toml
@@ -0,0 +1,55 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+[build-system]
+requires = ["scikit-build-core>=0.3.3", "pybind11"]
+build-backend = "scikit_build_core.build"
+
+[project]
+name = "tvm-ffi-orcjit"
+version = "0.1.0"
+description = "Load TVM-FFI exported object files using LLVM ORC JIT v2"
+readme = "README.md"
+requires-python = ">=3.8"
+license = { text = "Apache-2.0" }
+authors = [{ name = "TVM-FFI OrcJIT Contributors" }]
+keywords = ["tvm-ffi", "llvm", "jit", "orcjit"]
+classifiers = [
+  "Development Status :: 3 - Alpha",
+  "Intended Audience :: Developers",
+  "License :: OSI Approved :: Apache Software License",
+  "Programming Language :: Python :: 3",
+  "Programming Language :: Python :: 3.8",
+  "Programming Language :: Python :: 3.9",
+  "Programming Language :: Python :: 3.10",
+  "Programming Language :: Python :: 3.11",
+  "Programming Language :: C++",
+]
+dependencies = ["apache-tvm-ffi>=0.1.0"]
+
+[project.urls]
+Homepage = "https://github.com/apache/tvm-ffi"
+Repository = "https://github.com/apache/tvm-ffi"
+
+[tool.scikit-build]
+cmake.minimum-version = "3.18"
+cmake.build-type = "Release"
+wheel.py-api = "py3"
+# Don't set install-dir, let it use the default python/ directory
+
+[tool.scikit-build.cmake.define]
+CMAKE_EXPORT_COMPILE_COMMANDS = "ON"
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
new file mode 100644
index 00000000..435c9b04
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""TVM-FFI OrcJIT.
+
+This module provides functionality to load object files (.o) compiled with TVM-FFI
+exports using LLVM ORC JIT v2.
+
+Example:
+    >>> from tvm_ffi_orcjit import create_session
+    >>> session = create_session()
+    >>> lib = session.create_library()
+    >>> lib.add("example.o")
+    >>> func = lib.get_function("my_function")
+    >>> result = func(arg1, arg2)
+
+"""
+
+import sys
+from pathlib import Path
+
+from tvm_ffi import load_module
+
+# Load the orcjit extension library to register functions
+_LIB_PATH = Path(__file__).parent.parent.parent / "libtvm_ffi_orcjit.so"
+if _LIB_PATH.exists():
+    load_module(str(_LIB_PATH))
+else:
+    # Fallback: search in site-packages (installed location)
+    for site_pkg in sys.path:
+        candidate = Path(site_pkg) / "libtvm_ffi_orcjit.so"
+        if candidate.exists():
+            load_module(str(candidate))
+            break
+
+from .dylib import DynamicLibrary
+from .session import ExecutionSession, create_session
+
+__all__ = ["DynamicLibrary", "ExecutionSession", "create_session"]
+__version__ = "0.1.0"
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
new file mode 100644
index 00000000..83c9149f
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
@@ -0,0 +1,133 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""ORC JIT Dynamic Library."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, Any
+
+from tvm_ffi import Function, get_global_func
+from tvm_ffi._ffi_api import ModuleGetFunction
+
+if TYPE_CHECKING:
+    from .session import ExecutionSession
+
+
+class DynamicLibrary:
+    """ORC JIT Dynamic Library (JITDylib).
+
+    Represents a collection of symbols that can be loaded from object files and linked
+    against other dynamic libraries. Supports JIT compilation and symbol resolution.
+
+    Examples
+    --------
+    >>> session = create_session()
+    >>> lib = session.create_library()
+    >>> lib.add("add.o")
+    >>> lib.add("multiply.o")
+    >>> add_func = lib.get_function("add")
+    >>> result = add_func(1, 2)
+
+    """
+
+    def __init__(self, handle: Any, session: ExecutionSession) -> None:
+        """Initialize DynamicLibrary from a handle.
+
+        Parameters
+        ----------
+        handle : object
+            The underlying C++ ORCJITDynamicLibrary object.
+        session : ExecutionSession
+            The parent execution session (kept alive for the library's lifetime).
+
+        """
+        self._handle = handle
+        self._session = session  # Keep session alive
+        self._add_func = get_global_func("orcjit.DynamicLibraryAdd")
+        self._link_func = get_global_func("orcjit.DynamicLibraryLinkAgainst")
+        self._to_module_func = get_global_func("orcjit.DynamicLibraryToModule")
+
+    def add(self, object_file: str | Path) -> None:
+        """Add an object file to this dynamic library.
+
+        Parameters
+        ----------
+        object_file : str or Path
+            Path to the object file to load.
+
+        Examples
+        --------
+        >>> lib.add("add.o")
+        >>> lib.add(Path("multiply.o"))
+
+        """
+        if isinstance(object_file, Path):
+            object_file = str(object_file)
+        self._add_func(self._handle, object_file)
+
+    def link_against(self, *libraries: DynamicLibrary) -> None:
+        """Link this library against other dynamic libraries.
+
+        Sets the search order for symbol resolution. Symbols not found in this library
+        will be searched in the linked libraries in the order specified.
+
+        Parameters
+        ----------
+        *libraries : DynamicLibrary
+            One or more dynamic libraries to link against.
+
+        Examples
+        --------
+        >>> session = create_session()
+        >>> lib_utils = session.create_library()
+        >>> lib_utils.add("utils.o")
+        >>> lib_main = session.create_library()
+        >>> lib_main.add("main.o")
+        >>> lib_main.link_against(lib_utils)  # main can call utils symbols
+
+        """
+        handles = [lib._handle for lib in libraries]
+        self._link_func(self._handle, *handles)
+
+    def get_function(self, name: str) -> Function:
+        """Get a function from this dynamic library.
+
+        Parameters
+        ----------
+        name : str
+            The name of the function to retrieve.
+
+        Returns
+        -------
+        callable
+            The function object that can be called from Python.
+
+        Examples
+        --------
+        >>> lib.add("add.o")
+        >>> add_func = lib.get_function("add")
+        >>> result = add_func(1, 2)
+
+        """
+        # Get the module handle and use ModuleGetFunction
+        module_handle = self._to_module_func(self._handle)
+
+        func = ModuleGetFunction(module_handle, name, False)
+        if func is None:
+            raise AttributeError(f"Module has no function '{name}'")
+        return func
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
new file mode 100644
index 00000000..96ed7146
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""ORC JIT Execution Session."""
+
+from __future__ import annotations
+
+from typing import Any
+
+from tvm_ffi import get_global_func
+
+from .dylib import DynamicLibrary
+
+
+class ExecutionSession:
+    """ORC JIT Execution Session.
+
+    Manages the LLVM ORC JIT execution environment and creates dynamic libraries (JITDylibs).
+    This is the top-level context for JIT compilation and symbol management.
+
+    Examples
+    --------
+    >>> session = create_session()
+    >>> lib = session.create_library(name="main")
+    >>> lib.add("add.o")
+    >>> add_func = lib.get_function("add")
+
+    """
+
+    def __init__(self, handle: Any) -> None:
+        """Initialize ExecutionSession from a handle.
+
+        Parameters
+        ----------
+        handle : object
+            The underlying C++ ORCJITExecutionSession object.
+
+        """
+        self._handle = handle
+        self._create_dylib_func = get_global_func("orcjit.SessionCreateDynamicLibrary")
+
+    def create_library(self, name: str = "") -> DynamicLibrary:
+        """Create a new dynamic library associated with this execution session.
+
+        Args:
+            name: Optional name for the library. If empty, a unique name will be generated.
+
+        Returns:
+            A new DynamicLibrary instance.
+
+        """
+        handle = self._create_dylib_func(self._handle, name)
+        return DynamicLibrary(handle, self)
+
+
+def create_session() -> ExecutionSession:
+    """Create a new ORC JIT execution session.
+
+    This is the main entry point for using the ORC JIT system. The session
+    manages the LLVM ORC JIT infrastructure and allows creating dynamic libraries.
+
+    Returns
+    -------
+    ExecutionSession
+        A new execution session instance.
+
+    Examples
+    --------
+    >>> session = create_session()
+    >>> lib = session.create_library()
+
+    """
+    create_func = get_global_func("orcjit.CreateExecutionSession")
+    handle = create_func()
+    return ExecutionSession(handle)
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
new file mode 100644
index 00000000..33d3a598
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file orcjit_dylib.cc
+ * \brief LLVM ORC JIT DynamicLibrary implementation
+ */
+
+#include <llvm/ExecutionEngine/Orc/Core.h>
+#include <llvm/ExecutionEngine/Orc/LLJIT.h>
+#include <llvm/Object/ObjectFile.h>
+#include <llvm/Support/Error.h>
+#include <llvm/Support/MemoryBuffer.h>
+#include <tvm/ffi/cast.h>
+#include <tvm/ffi/error.h>
+#include <tvm/ffi/extra/module.h>
+#include <tvm/ffi/orcjit/orcjit_dylib.h>
+#include <tvm/ffi/orcjit/orcjit_session.h>
+#include <tvm/ffi/reflection/registry.h>
+
+namespace tvm {
+namespace ffi {
+namespace orcjit {
+
+ORCJITDynamicLibrary::ORCJITDynamicLibrary(ObjectPtr<ORCJITExecutionSession> session,
+                                           llvm::orc::JITDylib* dylib, llvm::orc::LLJIT* jit,
+                                           String name)
+    : session_(std::move(session)), dylib_(dylib), jit_(jit), name_(std::move(name)) {
+  TVM_FFI_CHECK(dylib_ != nullptr, ValueError) << "JITDylib cannot be null";
+  TVM_FFI_CHECK(jit_ != nullptr, ValueError) << "LLJIT cannot be null";
+}
+
+void ORCJITDynamicLibrary::AddObjectFile(const String& path) {
+  // Read object file
+  auto buffer_or_err = llvm::MemoryBuffer::getFile(path.c_str());
+  if (!buffer_or_err) {
+    TVM_FFI_THROW(IOError) << "Failed to read object file: " << path;
+  }
+
+  // Add object file to this JITDylib
+  auto err = jit_->addObjectFile(*dylib_, std::move(*buffer_or_err));
+  if (err) {
+    std::string err_msg;
+    llvm::handleAllErrors(std::move(err),
+                          [&](const llvm::ErrorInfoBase& eib) { err_msg = eib.message(); });
+    TVM_FFI_THROW(ValueError) << "Failed to add object file '" << path << "': " << err_msg;
+  }
+}
+
+void ORCJITDynamicLibrary::LinkAgainst(const ORCJITDynamicLibrary& other) {
+  // Set up link order: this dylib should search in other dylib
+  llvm::orc::JITDylibSearchOrder search_order;
+  search_order.push_back({other.dylib_, llvm::orc::JITDylibLookupFlags::MatchAllSymbols});
+
+  dylib_->setLinkOrder(search_order, false);
+}
+
+void* ORCJITDynamicLibrary::GetSymbol(const String& name) {
+  // Look up symbol
+  auto symbol_or_err =
+      jit_->getExecutionSession().lookup({dylib_}, jit_->mangleAndIntern(name.c_str()));
+  if (!symbol_or_err) {
+    auto err = symbol_or_err.takeError();
+    std::string err_msg;
+    llvm::handleAllErrors(std::move(err),
+                          [&](const llvm::ErrorInfoBase& eib) { err_msg = eib.message(); });
+    TVM_FFI_THROW(ValueError) << "Failed to find symbol '" << name << "': " << err_msg;
+  }
+
+  // Convert ExecutorAddr to pointer
+  return symbol_or_err->getAddress().toPtr<void*>();
+}
+
+llvm::orc::JITDylib& ORCJITDynamicLibrary::GetJITDylib() {
+  TVM_FFI_CHECK(dylib_ != nullptr, InternalError) << "JITDylib is null";
+  return *dylib_;
+}
+
+//-------------------------------------
+// Module wrapper for DynamicLibrary
+//-------------------------------------
+
+class DynamicLibraryModuleObj : public ModuleObj {
+ public:
+  explicit DynamicLibraryModuleObj(ObjectPtr<ORCJITDynamicLibrary> dylib)
+      : dylib_(std::move(dylib)) {}
+
+  const char* kind() const final { return "orcjit_dynamic_library"; }
+
+  Optional<Function> GetFunction(const String& name) override {
+    // TVM-FFI exports have __tvm_ffi_ prefix
+    std::string symbol_name = "__tvm_ffi_" + std::string(name);
+
+    // Try to get the symbol - return NullOpt if not found
+    void* symbol = nullptr;
+    try {
+      symbol = dylib_->GetSymbol(symbol_name);
+    } catch (const Error& e) {
+      // Symbol not found
+      return Optional<Function>();
+    }
+
+    // Wrap C function pointer as tvm-ffi Function
+    using TVMFFISafeCallType =
+        int (*)(void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* rv);
+    auto c_func = reinterpret_cast<TVMFFISafeCallType>(symbol);
+
+    return Function::FromPacked([c_func, name](PackedArgs args, Any* rv) {
+      std::vector<AnyView> arg_views;
+      arg_views.reserve(args.size());
+      for (int i = 0; i < args.size(); ++i) {
+        arg_views.push_back(args[i]);
+      }
+
+      int ret_code = c_func(nullptr, reinterpret_cast<const TVMFFIAny*>(arg_views.data()),
+                            static_cast<int32_t>(args.size()), reinterpret_cast<TVMFFIAny*>(rv));
+
+      if (ret_code != 0) {
+        TVM_FFI_THROW(RuntimeError) << "Function '" << name << "' returned error code " << ret_code;
+      }
+    });
+  }
+
+ private:
+  ObjectPtr<ORCJITDynamicLibrary> dylib_;
+};
+
+//-------------------------------------
+// Registration
+//-------------------------------------
+
+TVM_FFI_STATIC_INIT_BLOCK() {
+  namespace refl = tvm::ffi::reflection;
+
+  refl::GlobalDef()
+      .def("orcjit.CreateExecutionSession",
+           []() -> ObjectRef { return ObjectRef(ORCJITExecutionSession::Create()); })
+      .def("orcjit.SessionCreateDynamicLibrary",
+           [](ORCJITExecutionSession* session, String name) -> ObjectRef {
+             return ObjectRef(session->CreateDynamicLibrary(name));
+           })
+      .def("orcjit.DynamicLibraryAdd",
+           [](ORCJITDynamicLibrary* dylib, String path) { dylib->AddObjectFile(path); })
+      .def("orcjit.DynamicLibraryLinkAgainst",
+           [](ORCJITDynamicLibrary* dylib, ORCJITDynamicLibrary* other) {
+             dylib->LinkAgainst(*other);
+           })
+      .def("orcjit.DynamicLibraryGetName",
+           [](ORCJITDynamicLibrary* dylib) -> String { return dylib->GetName(); })
+      .def("orcjit.DynamicLibraryToModule", [](ORCJITDynamicLibrary* dylib) -> Module {
+        return Module(
+            make_object<DynamicLibraryModuleObj>(GetObjectPtr<ORCJITDynamicLibrary>(dylib)));
+      });
+}
+
+}  // namespace orcjit
+}  // namespace ffi
+}  // namespace tvm
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
new file mode 100644
index 00000000..d21e9a71
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file orcjit_session.cc
+ * \brief LLVM ORC JIT ExecutionSession implementation
+ */
+
+#include <llvm/ExecutionEngine/Orc/LLJIT.h>
+#include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
+#include <llvm/Support/Error.h>
+#include <llvm/Support/TargetSelect.h>
+#include <tvm/ffi/cast.h>
+#include <tvm/ffi/error.h>
+#include <tvm/ffi/orcjit/orcjit_dylib.h>
+#include <tvm/ffi/orcjit/orcjit_session.h>
+#include <tvm/ffi/reflection/registry.h>
+
+#include <sstream>
+
+namespace tvm {
+namespace ffi {
+namespace orcjit {
+
+// Initialize LLVM native target (only once)
+struct LLVMInitializer {
+  LLVMInitializer() {
+    llvm::InitializeNativeTarget();
+    llvm::InitializeNativeTargetAsmPrinter();
+    llvm::InitializeNativeTargetAsmParser();
+  }
+};
+
+static LLVMInitializer llvm_initializer;
+
+// Provide __dso_handle for C++ runtime
+static char dso_handle_storage;
+
+ORCJITExecutionSession::ORCJITExecutionSession() : jit_(nullptr), dylib_counter_(0) {}
+
+void ORCJITExecutionSession::Initialize() {
+  // Create LLJIT instance
+  auto jit_or_err = llvm::orc::LLJITBuilder().create();
+  if (!jit_or_err) {
+    auto err = jit_or_err.takeError();
+    std::string err_msg;
+    llvm::handleAllErrors(std::move(err),
+                          [&](const llvm::ErrorInfoBase& eib) { err_msg = eib.message(); });
+    TVM_FFI_THROW(InternalError) << "Failed to create LLJIT: " << err_msg;
+  }
+  jit_ = std::move(*jit_or_err);
+}
+
+ObjectPtr<ORCJITExecutionSession> ORCJITExecutionSession::Create() {
+  auto session = make_object<ORCJITExecutionSession>();
+  session->Initialize();
+  return session;
+}
+
+ObjectPtr<ORCJITDynamicLibrary> ORCJITExecutionSession::CreateDynamicLibrary(const String& name) {
+  TVM_FFI_CHECK(jit_ != nullptr, InternalError) << "ExecutionSession not initialized";
+
+  // Generate name if not provided
+  String lib_name = name;
+  if (lib_name.empty()) {
+    std::ostringstream oss;
+    oss << "dylib_" << dylib_counter_++;
+    lib_name = oss.str();
+  }
+
+  // Check if library with this name already exists
+  TVM_FFI_CHECK(dylibs_.find(lib_name) == dylibs_.end(), ValueError)
+      << "DynamicLibrary with name '" << lib_name << "' already exists";
+
+  // Create a new JITDylib
+  auto& jd = jit_->getExecutionSession().createBareJITDylib(lib_name.c_str());
+
+  // Add process symbol resolver to make C/C++ stdlib available
+  auto dlsg = llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+      jit_->getDataLayout().getGlobalPrefix());
+  if (!dlsg) {
+    TVM_FFI_THROW(InternalError) << "Failed to create process symbol resolver";
+  }
+  jd.addGenerator(std::move(*dlsg));
+
+  // Add __dso_handle as a weak symbol (use static storage)
+  auto& es = jit_->getExecutionSession();
+  auto dso_symbol = llvm::orc::ExecutorSymbolDef(
+      llvm::orc::ExecutorAddr::fromPtr(&dso_handle_storage), llvm::JITSymbolFlags::Exported);
+  llvm::orc::SymbolMap symbols;
+  symbols[es.intern("__dso_handle")] = dso_symbol;
+  if (auto err = jd.define(llvm::orc::absoluteSymbols(std::move(symbols)))) {
+    TVM_FFI_THROW(InternalError) << "Failed to define __dso_handle";
+  }
+
+  // Create the wrapper object
+  auto dylib = make_object<ORCJITDynamicLibrary>(GetObjectPtr<ORCJITExecutionSession>(this), &jd,
+                                                 jit_.get(), lib_name);
+
+  // Store for lifetime management
+  dylibs_[lib_name] = dylib;
+
+  return dylib;
+}
+
+llvm::orc::ExecutionSession& ORCJITExecutionSession::GetLLVMExecutionSession() {
+  TVM_FFI_CHECK(jit_ != nullptr, InternalError) << "ExecutionSession not initialized";
+  return jit_->getExecutionSession();
+}
+
+llvm::orc::LLJIT& ORCJITExecutionSession::GetLLJIT() {
+  TVM_FFI_CHECK(jit_ != nullptr, InternalError) << "ExecutionSession not initialized";
+  return *jit_;
+}
+
+}  // namespace orcjit
+}  // namespace ffi
+}  // namespace tvm
diff --git a/addons/tvm-ffi-orcjit/tests/__init__.py b/addons/tvm-ffi-orcjit/tests/__init__.py
new file mode 100644
index 00000000..4d4bdfa8
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/tests/__init__.py
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Tests for tvm-ffi-orcjit package."""
diff --git a/addons/tvm-ffi-orcjit/tests/test_basic.py b/addons/tvm-ffi-orcjit/tests/test_basic.py
new file mode 100644
index 00000000..4ef4c4d1
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/tests/test_basic.py
@@ -0,0 +1,155 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Basic tests for tvm-ffi-orcjit functionality."""
+
+import subprocess
+import tempfile
+from pathlib import Path
+
+import pytest
+from tvm_ffi_orcjit import create_session
+
+
+def compile_simple_function() -> Path:
+    """Compile a simple C function with TVM-FFI exports for testing.
+
+    Returns
+    -------
+    Path
+        Path to the compiled object file.
+
+    """
+    c_code = """
+#include <tvm/ffi/c_ffi_api.h>
+
+TVM_FFI_EXPORT_FUNC("test_add")
+int test_add(TVMFFIFunctionHandle handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* rv) {
+    if (num_args != 2) return -1;
+    int a = args[0].v_int64;
+    int b = args[1].v_int64;
+    rv->v_int64 = a + b;
+    rv->type_code = kTVMFFIArgTypeInt;
+    return 0;
+}
+
+TVM_FFI_EXPORT_FUNC("test_multiply")
+int test_multiply(TVMFFIFunctionHandle handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* rv) {
+    if (num_args != 2) return -1;
+    int a = args[0].v_int64;
+    int b = args[1].v_int64;
+    rv->v_int64 = a * b;
+    rv->type_code = kTVMFFIArgTypeInt;
+    return 0;
+}
+"""
+    # Create temporary directory
+    tmpdir = Path(tempfile.mkdtemp())
+    src_file = tmpdir / "test_func.c"
+    obj_file = tmpdir / "test_func.o"
+
+    # Write C code
+    src_file.write_text(c_code)
+
+    # Compile with clang
+    subprocess.run(
+        [
+            "clang",
+            "-c",
+            "-fPIC",
+            "-O2",
+            str(src_file),
+            "-o",
+            str(obj_file),
+        ],
+        check=True,
+    )
+
+    return obj_file
+
+
+def test_create_session() -> None:
+    """Test creating an execution session."""
+    session = create_session()
+    assert session is not None
+
+
+def test_create_library() -> None:
+    """Test creating a dynamic library."""
+    session = create_session()
+    lib = session.create_library()
+    assert lib is not None
+
+
+def test_load_and_execute_function() -> None:
+    """Test loading an object file and executing a function."""
+    # Compile test function
+    obj_file = compile_simple_function()
+
+    try:
+        # Create session and library
+        session = create_session()
+        lib = session.create_library()
+
+        # Load object file
+        lib.add(str(obj_file))
+
+        # Get and call test_add function
+        add_func = lib.get_function("test_add")
+        result = add_func(10, 20)
+        assert result == 30
+
+        # Get and call test_multiply function
+        mul_func = lib.get_function("test_multiply")
+        result = mul_func(7, 6)
+        assert result == 42
+
+    finally:
+        # Clean up
+        obj_file.unlink()
+        obj_file.parent.rmdir()
+
+
+def test_multiple_libraries() -> None:
+    """Test creating and using multiple libraries."""
+    session = create_session()
+
+    lib1 = session.create_library("lib1")
+    lib2 = session.create_library("lib2")
+
+    assert lib1 is not None
+    assert lib2 is not None
+
+
+def test_function_not_found() -> None:
+    """Test that getting a non-existent function raises an error."""
+    obj_file = compile_simple_function()
+
+    try:
+        session = create_session()
+        lib = session.create_library()
+        lib.add(str(obj_file))
+
+        with pytest.raises(AttributeError, match="Module has no function"):
+            lib.get_function("nonexistent_function")
+
+    finally:
+        obj_file.unlink()
+        obj_file.parent.rmdir()
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])

From 501b63521a58dedcd2903a0577b9b8172dc342dc Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 16:44:38 -0500
Subject: [PATCH 10/47] add more tests

---
 .github/workflows/tvm-ffi-orcjit/ci_test.yml  | 104 +++++++
 .../examples/quick-start/CMakeLists.txt       |   2 +-
 .../examples/quick-start/add.cc               |  12 +-
 addons/tvm-ffi-orcjit/tests/CMakeLists.txt    |  65 ++++
 addons/tvm-ffi-orcjit/tests/README.md         |  58 ++++
 .../tests/sources/test_funcs.cc               |  26 ++
 .../tests/sources/test_funcs2.cc              |  26 ++
 .../tests/sources/test_funcs_conflict.cc      |  26 ++
 addons/tvm-ffi-orcjit/tests/test_basic.py     | 283 +++++++++++++-----
 9 files changed, 516 insertions(+), 86 deletions(-)
 create mode 100644 .github/workflows/tvm-ffi-orcjit/ci_test.yml
 create mode 100644 addons/tvm-ffi-orcjit/tests/CMakeLists.txt
 create mode 100644 addons/tvm-ffi-orcjit/tests/README.md
 create mode 100644 addons/tvm-ffi-orcjit/tests/sources/test_funcs.cc
 create mode 100644 addons/tvm-ffi-orcjit/tests/sources/test_funcs2.cc
 create mode 100644 addons/tvm-ffi-orcjit/tests/sources/test_funcs_conflict.cc

diff --git a/.github/workflows/tvm-ffi-orcjit/ci_test.yml b/.github/workflows/tvm-ffi-orcjit/ci_test.yml
new file mode 100644
index 00000000..df798038
--- /dev/null
+++ b/.github/workflows/tvm-ffi-orcjit/ci_test.yml
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: TVM-FFI-OrcJIT CI Tests
+
+on:
+  push:
+    branches:
+      - main
+      - dev
+      - orcjit
+    paths:
+      - 'addons/tvm-ffi-orcjit/**'
+      - '.github/workflows/tvm-ffi-orcjit/ci_test.yml'
+  pull_request:
+    paths:
+      - 'addons/tvm-ffi-orcjit/**'
+      - '.github/workflows/tvm-ffi-orcjit/ci_test.yml'
+  workflow_dispatch:
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest]
+        python-version: ['3.9', '3.10', '3.11', '3.12']
+
+    steps:
+      - uses: actions/checkout@v5
+        with:
+          submodules: recursive
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Install LLVM (Ubuntu)
+        if: runner.os == 'Linux'
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 18
+          sudo apt-get install -y llvm-18-dev clang-18
+          echo "CC=clang-18" >> $GITHUB_ENV
+          echo "CXX=clang++-18" >> $GITHUB_ENV
+
+      - name: Install LLVM (macOS)
+        if: runner.os == 'macOS'
+        run: |
+          brew install llvm@18
+          echo "LLVM_DIR=$(brew --prefix llvm@18)" >> $GITHUB_ENV
+          echo "CC=$(brew --prefix llvm@18)/bin/clang" >> $GITHUB_ENV
+          echo "CXX=$(brew --prefix llvm@18)/bin/clang++" >> $GITHUB_ENV
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v6
+
+      - name: Install tvm-ffi (core package)
+        run: |
+          uv pip install -e . --system
+
+      - name: Build and install tvm-ffi-orcjit
+        working-directory: addons/tvm-ffi-orcjit
+        run: |
+          uv pip install -e . --system
+
+      - name: Install test dependencies
+        run: |
+          uv pip install pytest --system
+
+      - name: Build test objects
+        working-directory: addons/tvm-ffi-orcjit/tests
+        run: |
+          cmake -B build
+          cmake --build build
+
+      - name: Run tests
+        working-directory: addons/tvm-ffi-orcjit
+        run: |
+          pytest tests/ -v
+
+      - name: Run example
+        working-directory: addons/tvm-ffi-orcjit/examples/quick-start
+        run: |
+          cmake -B build
+          cmake --build build
+          python run.py
diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/CMakeLists.txt b/addons/tvm-ffi-orcjit/examples/quick-start/CMakeLists.txt
index bb8daf78..7fb1dfed 100644
--- a/addons/tvm-ffi-orcjit/examples/quick-start/CMakeLists.txt
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/CMakeLists.txt
@@ -19,7 +19,7 @@ cmake_minimum_required(VERSION 3.18)
 project(tvm_ffi_orcjit_example)
 
 # Set C++ standard
-set(CMAKE_CXX_STANDARD 20)
+set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
 
 # Run `python -m tvm_ffi.config --cmakedir` to find tvm-ffi package
diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/add.cc b/addons/tvm-ffi-orcjit/examples/quick-start/add.cc
index 11cb3b67..443638aa 100644
--- a/addons/tvm-ffi-orcjit/examples/quick-start/add.cc
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/add.cc
@@ -27,18 +27,20 @@
 #include <tvm/ffi/function.h>
 
 // Simple addition function
-TVM_FFI_DLL_EXPORT_TYPED_FUNC(add, [](int a, int b) { return a + b; });
+int add_impl(int a, int b) { return a + b; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(add, add_impl);
 
 // Multiplication function
-TVM_FFI_DLL_EXPORT_TYPED_FUNC(multiply, [](int a, int b) { return a * b; });
+int multiply_impl(int a, int b) { return a * b; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(multiply, multiply_impl);
 
 // Fibonacci function (recursive)
 int fib_impl(int n) {
   if (n <= 1) return n;
   return fib_impl(n - 1) + fib_impl(n - 2);
 }
-
-TVM_FFI_DLL_EXPORT_TYPED_FUNC(fibonacci, [](int n) { return fib_impl(n); });
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(fibonacci, fib_impl);
 
 // String concatenation example
-TVM_FFI_DLL_EXPORT_TYPED_FUNC(concat, [](std::string a, std::string b) { return a + b; });
+std::string concat_impl(std::string a, std::string b) { return a + b; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(concat, concat_impl);
diff --git a/addons/tvm-ffi-orcjit/tests/CMakeLists.txt b/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
new file mode 100644
index 00000000..1f53f7eb
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
@@ -0,0 +1,65 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+cmake_minimum_required(VERSION 3.18)
+project(tvm_ffi_orcjit_tests)
+
+# Set C++ standard
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Run `python -m tvm_ffi.config --cmakedir` to find tvm-ffi package
+find_package(
+  Python
+  COMPONENTS Interpreter
+  REQUIRED
+)
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -m tvm_ffi.config --cmakedir
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  OUTPUT_VARIABLE tvm_ffi_ROOT
+)
+find_package(tvm_ffi CONFIG REQUIRED)
+
+# Create object library for test functions
+add_library(test_funcs_obj OBJECT sources/test_funcs.cc)
+target_link_libraries(test_funcs_obj PRIVATE tvm_ffi_header)
+target_compile_options(test_funcs_obj PRIVATE -fPIC -O2)
+
+# Create object library for second set of test functions
+add_library(test_funcs2_obj OBJECT sources/test_funcs2.cc)
+target_link_libraries(test_funcs2_obj PRIVATE tvm_ffi_header)
+target_compile_options(test_funcs2_obj PRIVATE -fPIC -O2)
+
+# Create object library for conflicting test functions
+add_library(test_funcs_conflict_obj OBJECT sources/test_funcs_conflict.cc)
+target_link_libraries(test_funcs_conflict_obj PRIVATE tvm_ffi_header)
+target_compile_options(test_funcs_conflict_obj PRIVATE -fPIC -O2)
+
+# Custom target to copy all object files to the test directory
+add_custom_target(
+  copy_test_obj_files ALL
+  COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:test_funcs_obj>
+          ${CMAKE_CURRENT_SOURCE_DIR}/test_funcs.o
+  COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:test_funcs2_obj>
+          ${CMAKE_CURRENT_SOURCE_DIR}/test_funcs2.o
+  COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:test_funcs_conflict_obj>
+          ${CMAKE_CURRENT_SOURCE_DIR}/test_funcs_conflict.o
+  COMMAND ${CMAKE_COMMAND} -E echo "Successfully compiled all test object files"
+  DEPENDS test_funcs_obj test_funcs2_obj test_funcs_conflict_obj
+  COMMENT "Copying all test object files to test directory"
+)
diff --git a/addons/tvm-ffi-orcjit/tests/README.md b/addons/tvm-ffi-orcjit/tests/README.md
new file mode 100644
index 00000000..1a90808f
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/tests/README.md
@@ -0,0 +1,58 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+# TVM-FFI-OrcJIT Tests
+
+This directory contains tests for the tvm-ffi-orcjit package.
+
+## Building Test Objects
+
+The tests require pre-built object files. To build them:
+
+```bash
+cd tests
+cmake -B build
+cmake --build build
+```
+
+This will compile `sources/test_funcs.cc` and generate `test_funcs.o` in the tests directory.
+
+## Running Tests
+
+After building the test objects, run the tests with:
+
+```bash
+pytest tests/ -v
+```
+
+Or from the repository root:
+
+```bash
+cd addons/tvm-ffi-orcjit
+pytest tests/ -v
+```
+
+## Test Structure
+
+- `sources/` - C++ source files for test functions
+- `test_basic.py` - Python test cases
+- `CMakeLists.txt` - Build configuration for test objects
+- `test_funcs.o` - Generated object file (after building)
+
+## CI/CD
+
+The CI workflow automatically builds the test objects before running tests. See `.github/workflows/tvm-ffi-orcjit/ci_test.yml` for the full workflow.
diff --git a/addons/tvm-ffi-orcjit/tests/sources/test_funcs.cc b/addons/tvm-ffi-orcjit/tests/sources/test_funcs.cc
new file mode 100644
index 00000000..94aecfbc
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/tests/sources/test_funcs.cc
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <tvm/ffi/function.h>
+
+// Simple addition function
+int test_add_impl(int a, int b) { return a + b; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(test_add, test_add_impl);
+
+// Multiplication function
+int test_multiply_impl(int a, int b) { return a * b; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(test_multiply, test_multiply_impl);
diff --git a/addons/tvm-ffi-orcjit/tests/sources/test_funcs2.cc b/addons/tvm-ffi-orcjit/tests/sources/test_funcs2.cc
new file mode 100644
index 00000000..d786bc36
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/tests/sources/test_funcs2.cc
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <tvm/ffi/function.h>
+
+// Subtraction function
+int test_subtract_impl(int a, int b) { return a - b; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(test_subtract, test_subtract_impl);
+
+// Division function
+int test_divide_impl(int a, int b) { return a / b; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(test_divide, test_divide_impl);
diff --git a/addons/tvm-ffi-orcjit/tests/sources/test_funcs_conflict.cc b/addons/tvm-ffi-orcjit/tests/sources/test_funcs_conflict.cc
new file mode 100644
index 00000000..09d35a84
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/tests/sources/test_funcs_conflict.cc
@@ -0,0 +1,26 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <tvm/ffi/function.h>
+
+// Conflicting test_add function - different implementation
+int test_add_conflict_impl(int a, int b) { return a + b + 1000; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(test_add, test_add_conflict_impl);
+
+// Conflicting test_multiply function - different implementation
+int test_multiply_conflict_impl(int a, int b) { return a * b * 2; }
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(test_multiply, test_multiply_conflict_impl);
diff --git a/addons/tvm-ffi-orcjit/tests/test_basic.py b/addons/tvm-ffi-orcjit/tests/test_basic.py
index 4ef4c4d1..b3577030 100644
--- a/addons/tvm-ffi-orcjit/tests/test_basic.py
+++ b/addons/tvm-ffi-orcjit/tests/test_basic.py
@@ -16,67 +16,77 @@
 # under the License.
 """Basic tests for tvm-ffi-orcjit functionality."""
 
-import subprocess
-import tempfile
+from __future__ import annotations
+
 from pathlib import Path
 
 import pytest
 from tvm_ffi_orcjit import create_session
 
 
-def compile_simple_function() -> Path:
-    """Compile a simple C function with TVM-FFI exports for testing.
+def get_test_obj_file() -> Path:
+    """Get the path to the pre-built test object file.
 
     Returns
     -------
     Path
-        Path to the compiled object file.
+        Path to the test_funcs.o object file.
 
     """
-    c_code = """
-#include <tvm/ffi/c_ffi_api.h>
-
-TVM_FFI_EXPORT_FUNC("test_add")
-int test_add(TVMFFIFunctionHandle handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* rv) {
-    if (num_args != 2) return -1;
-    int a = args[0].v_int64;
-    int b = args[1].v_int64;
-    rv->v_int64 = a + b;
-    rv->type_code = kTVMFFIArgTypeInt;
-    return 0;
-}
-
-TVM_FFI_EXPORT_FUNC("test_multiply")
-int test_multiply(TVMFFIFunctionHandle handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* rv) {
-    if (num_args != 2) return -1;
-    int a = args[0].v_int64;
-    int b = args[1].v_int64;
-    rv->v_int64 = a * b;
-    rv->type_code = kTVMFFIArgTypeInt;
-    return 0;
-}
-"""
-    # Create temporary directory
-    tmpdir = Path(tempfile.mkdtemp())
-    src_file = tmpdir / "test_func.c"
-    obj_file = tmpdir / "test_func.o"
-
-    # Write C code
-    src_file.write_text(c_code)
-
-    # Compile with clang
-    subprocess.run(
-        [
-            "clang",
-            "-c",
-            "-fPIC",
-            "-O2",
-            str(src_file),
-            "-o",
-            str(obj_file),
-        ],
-        check=True,
-    )
+    # The object file should be built by CMake and located in the tests directory
+    test_dir = Path(__file__).parent
+    obj_file = test_dir / "test_funcs.o"
+
+    if not obj_file.exists():
+        raise FileNotFoundError(
+            f"Test object file not found: {obj_file}\n"
+            "Please build the test object file first:\n"
+            "  cd tests && cmake -B build && cmake --build build"
+        )
+
+    return obj_file
+
+
+def get_test_obj_file2() -> Path:
+    """Get the path to the second pre-built test object file.
+
+    Returns
+    -------
+    Path
+        Path to the test_funcs2.o object file.
+
+    """
+    test_dir = Path(__file__).parent
+    obj_file = test_dir / "test_funcs2.o"
+
+    if not obj_file.exists():
+        raise FileNotFoundError(
+            f"Test object file not found: {obj_file}\n"
+            "Please build the test object file first:\n"
+            "  cd tests && cmake -B build && cmake --build build"
+        )
+
+    return obj_file
+
+
+def get_test_obj_file_conflict() -> Path:
+    """Get the path to the conflicting test object file.
+
+    Returns
+    -------
+    Path
+        Path to the test_funcs_conflict.o object file.
+
+    """
+    test_dir = Path(__file__).parent
+    obj_file = test_dir / "test_funcs_conflict.o"
+
+    if not obj_file.exists():
+        raise FileNotFoundError(
+            f"Test object file not found: {obj_file}\n"
+            "Please build the test object file first:\n"
+            "  cd tests && cmake -B build && cmake --build build"
+        )
 
     return obj_file
 
@@ -96,31 +106,25 @@ def test_create_library() -> None:
 
 def test_load_and_execute_function() -> None:
     """Test loading an object file and executing a function."""
-    # Compile test function
-    obj_file = compile_simple_function()
+    # Get pre-built test object file
+    obj_file = get_test_obj_file()
 
-    try:
-        # Create session and library
-        session = create_session()
-        lib = session.create_library()
-
-        # Load object file
-        lib.add(str(obj_file))
+    # Create session and library
+    session = create_session()
+    lib = session.create_library()
 
-        # Get and call test_add function
-        add_func = lib.get_function("test_add")
-        result = add_func(10, 20)
-        assert result == 30
+    # Load object file
+    lib.add(str(obj_file))
 
-        # Get and call test_multiply function
-        mul_func = lib.get_function("test_multiply")
-        result = mul_func(7, 6)
-        assert result == 42
+    # Get and call test_add function
+    add_func = lib.get_function("test_add")
+    result = add_func(10, 20)
+    assert result == 30
 
-    finally:
-        # Clean up
-        obj_file.unlink()
-        obj_file.parent.rmdir()
+    # Get and call test_multiply function
+    mul_func = lib.get_function("test_multiply")
+    result = mul_func(7, 6)
+    assert result == 42
 
 
 def test_multiple_libraries() -> None:
@@ -136,19 +140,138 @@ def test_multiple_libraries() -> None:
 
 def test_function_not_found() -> None:
     """Test that getting a non-existent function raises an error."""
-    obj_file = compile_simple_function()
+    # Get pre-built test object file
+    obj_file = get_test_obj_file()
+
+    session = create_session()
+    lib = session.create_library()
+    lib.add(str(obj_file))
+
+    with pytest.raises(AttributeError, match="Module has no function"):
+        lib.get_function("nonexistent_function")
+
+
+def test_gradually_add_objects_to_same_library() -> None:
+    """Test gradually adding multiple object files to the same library."""
+    obj_file1 = get_test_obj_file()
+    obj_file2 = get_test_obj_file2()
+
+    session = create_session()
+    lib = session.create_library()
+
+    # Add first object file
+    lib.add(str(obj_file1))
+
+    # Test functions from first object
+    add_func = lib.get_function("test_add")
+    assert add_func(5, 3) == 8
+
+    mul_func = lib.get_function("test_multiply")
+    assert mul_func(4, 5) == 20
+
+    # Add second object file to the same library
+    lib.add(str(obj_file2))
+
+    # Test functions from second object
+    sub_func = lib.get_function("test_subtract")
+    assert sub_func(10, 3) == 7
+
+    div_func = lib.get_function("test_divide")
+    assert div_func(20, 4) == 5
+
+    # Verify first object's functions still work
+    assert add_func(10, 20) == 30
+    assert mul_func(7, 6) == 42
+
+
+def test_two_separate_libraries() -> None:
+    """Test creating two separate libraries each with its own object file."""
+    obj_file1 = get_test_obj_file()
+    obj_file2 = get_test_obj_file2()
+
+    session = create_session()
+
+    # Create first library with first object
+    lib1 = session.create_library("lib1")
+    lib1.add(str(obj_file1))
+
+    # Create second library with second object
+    lib2 = session.create_library("lib2")
+    lib2.add(str(obj_file2))
+
+    # Test functions from lib1
+    add_func = lib1.get_function("test_add")
+    assert add_func(5, 3) == 8
+
+    mul_func = lib1.get_function("test_multiply")
+    assert mul_func(4, 5) == 20
+
+    # Test functions from lib2
+    sub_func = lib2.get_function("test_subtract")
+    assert sub_func(10, 3) == 7
+
+    div_func = lib2.get_function("test_divide")
+    assert div_func(20, 4) == 5
+
+    # Verify lib1 doesn't have lib2's functions
+    with pytest.raises(AttributeError, match="Module has no function"):
+        lib1.get_function("test_subtract")
+
+    # Verify lib2 doesn't have lib1's functions
+    with pytest.raises(AttributeError, match="Module has no function"):
+        lib2.get_function("test_add")
+
+
+def test_symbol_conflict_same_library() -> None:
+    """Test that adding objects with conflicting symbols to same library fails."""
+    obj_file1 = get_test_obj_file()
+    obj_file_conflict = get_test_obj_file_conflict()
+
+    session = create_session()
+    lib = session.create_library()
+
+    # Add first object file
+    lib.add(str(obj_file1))
+
+    # Verify first object's function works
+    add_func = lib.get_function("test_add")
+    assert add_func(10, 20) == 30
+
+    # Try to add conflicting object - should raise an error
+    with pytest.raises(Exception):  # LLVM will throw an error for duplicate symbols
+        lib.add(str(obj_file_conflict))
+
+
+def test_symbol_conflict_different_libraries() -> None:
+    """Test that adding objects with conflicting symbols to different libraries works."""
+    obj_file1 = get_test_obj_file()
+    obj_file_conflict = get_test_obj_file_conflict()
+
+    session = create_session()
+
+    # Create first library with first object
+    lib1 = session.create_library("lib1")
+    lib1.add(str(obj_file1))
+
+    # Create second library with conflicting object
+    lib2 = session.create_library("lib2")
+    lib2.add(str(obj_file_conflict))
+
+    # Test that both libraries work with their own versions
+    add_func1 = lib1.get_function("test_add")
+    result1 = add_func1(10, 20)
+    assert result1 == 30  # Original implementation
 
-    try:
-        session = create_session()
-        lib = session.create_library()
-        lib.add(str(obj_file))
+    add_func2 = lib2.get_function("test_add")
+    result2 = add_func2(10, 20)
+    assert result2 == 1030  # Conflicting implementation adds 1000
 
-        with pytest.raises(AttributeError, match="Module has no function"):
-            lib.get_function("nonexistent_function")
+    # Test multiply functions
+    mul_func1 = lib1.get_function("test_multiply")
+    assert mul_func1(5, 6) == 30  # Original: 5 * 6
 
-    finally:
-        obj_file.unlink()
-        obj_file.parent.rmdir()
+    mul_func2 = lib2.get_function("test_multiply")
+    assert mul_func2(5, 6) == 60  # Conflict: (5 * 6) * 2
 
 
 if __name__ == "__main__":

From 6ce56a10495d6ae5ba37dbc3706799a7d3ec0287 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 17:25:28 -0500
Subject: [PATCH 11/47] fix tests

---
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.cc  | 41 ++++++++++++++-----
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
index d21e9a71..0392c1f5 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
@@ -24,6 +24,9 @@
 
 #include <llvm/ExecutionEngine/Orc/LLJIT.h>
 #include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
+#include <llvm/IR/GlobalVariable.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Module.h>
 #include <llvm/Support/Error.h>
 #include <llvm/Support/TargetSelect.h>
 #include <tvm/ffi/cast.h>
@@ -49,9 +52,6 @@ struct LLVMInitializer {
 
 static LLVMInitializer llvm_initializer;
 
-// Provide __dso_handle for C++ runtime
-static char dso_handle_storage;
-
 ORCJITExecutionSession::ORCJITExecutionSession() : jit_(nullptr), dylib_counter_(0) {}
 
 void ORCJITExecutionSession::Initialize() {
@@ -99,14 +99,33 @@ ObjectPtr<ORCJITDynamicLibrary> ORCJITExecutionSession::CreateDynamicLibrary(con
   }
   jd.addGenerator(std::move(*dlsg));
 
-  // Add __dso_handle as a weak symbol (use static storage)
-  auto& es = jit_->getExecutionSession();
-  auto dso_symbol = llvm::orc::ExecutorSymbolDef(
-      llvm::orc::ExecutorAddr::fromPtr(&dso_handle_storage), llvm::JITSymbolFlags::Exported);
-  llvm::orc::SymbolMap symbols;
-  symbols[es.intern("__dso_handle")] = dso_symbol;
-  if (auto err = jd.define(llvm::orc::absoluteSymbols(std::move(symbols)))) {
-    TVM_FFI_THROW(InternalError) << "Failed to define __dso_handle";
+  // Add __dso_handle by compiling a minimal LLVM IR module containing it.
+  // This ensures __dso_handle is allocated in JIT memory (within 2GB of code),
+  // avoiding "relocation out of range" errors with optimized code.
+  //
+  // We create an IR module with a global variable for __dso_handle, then compile
+  // it through the normal IR compilation path. JITLink will allocate it properly.
+  auto Ctx = std::make_unique<llvm::LLVMContext>();
+  auto M = std::make_unique<llvm::Module>("__dso_handle_module", *Ctx);
+  M->setDataLayout(jit_->getDataLayout());
+  M->setTargetTriple(jit_->getTargetTriple().str());
+
+  // Create a global variable: i8 __dso_handle = 0
+  auto* Int8Ty = llvm::Type::getInt8Ty(*Ctx);
+  auto* DsoHandle = new llvm::GlobalVariable(
+      *M, Int8Ty,
+      false,                              // not constant
+      llvm::GlobalValue::WeakAnyLinkage,  // Use weak linkage so multiple dylibs can define it
+      llvm::ConstantInt::get(Int8Ty, 0), "__dso_handle");
+  DsoHandle->setVisibility(llvm::GlobalValue::DefaultVisibility);
+
+  // Add the module to THIS specific JITDylib using the IR layer
+  auto& CompileLayer = jit_->getIRCompileLayer();
+  if (auto Err = CompileLayer.add(jd, llvm::orc::ThreadSafeModule(std::move(M), std::move(Ctx)))) {
+    std::string err_msg;
+    llvm::handleAllErrors(std::move(Err),
+                          [&](const llvm::ErrorInfoBase& eib) { err_msg = eib.message(); });
+    TVM_FFI_THROW(InternalError) << "Failed to add __dso_handle module: " << err_msg;
   }
 
   // Create the wrapper object

From d27a5b02b77c305ec0943bbd1bfe208077a18607 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 17:30:12 -0500
Subject: [PATCH 12/47] wip

---
 .github/workflows/orcjit-tests.yml | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/workflows/orcjit-tests.yml b/.github/workflows/orcjit-tests.yml
index 97e343ce..1e6cd174 100644
--- a/.github/workflows/orcjit-tests.yml
+++ b/.github/workflows/orcjit-tests.yml
@@ -85,6 +85,12 @@ jobs:
         run: |
           uv pip install pytest --system
 
+      - name: Build test object files
+        working-directory: addons/tvm-ffi-orcjit/tests
+        run: |
+          cmake -B build
+          cmake --build build
+
       - name: Run tests
         working-directory: addons/tvm-ffi-orcjit
         run: |

From cf2da5dddd93cfa11894bb44edf53c0c000a48df Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 17:38:57 -0500
Subject: [PATCH 13/47] fix

---
 addons/tvm-ffi-orcjit/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/addons/tvm-ffi-orcjit/CMakeLists.txt b/addons/tvm-ffi-orcjit/CMakeLists.txt
index 92abf22b..1270b4f3 100644
--- a/addons/tvm-ffi-orcjit/CMakeLists.txt
+++ b/addons/tvm-ffi-orcjit/CMakeLists.txt
@@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.18)
 project(
   tvm_ffi_orcjit
   VERSION 0.1.0
-  LANGUAGES CXX
+  LANGUAGES C CXX
 )
 
 set(CMAKE_CXX_STANDARD 17)

From 76d13d5218667a227e2a88a74f83e72505b176a7 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 17:42:50 -0500
Subject: [PATCH 14/47] fix cmakelists.txt

---
 addons/tvm-ffi-orcjit/CMakeLists.txt | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/addons/tvm-ffi-orcjit/CMakeLists.txt b/addons/tvm-ffi-orcjit/CMakeLists.txt
index 1270b4f3..a880093f 100644
--- a/addons/tvm-ffi-orcjit/CMakeLists.txt
+++ b/addons/tvm-ffi-orcjit/CMakeLists.txt
@@ -29,11 +29,20 @@ set(TVM_FFI_ROOT
 if (EXISTS "${TVM_FFI_ROOT}/include/tvm/ffi/c_api.h")
   message(STATUS "Using tvm-ffi from: ${TVM_FFI_ROOT}")
 
+  # Determine the library extension based on platform
+  if (APPLE)
+    set(TVM_FFI_LIB_EXT "dylib")
+  elseif (WIN32)
+    set(TVM_FFI_LIB_EXT "dll")
+  else ()
+    set(TVM_FFI_LIB_EXT "so")
+  endif ()
+
   # Create imported target for tvm_ffi
   add_library(tvm_ffi SHARED IMPORTED)
   set_target_properties(
     tvm_ffi
-    PROPERTIES IMPORTED_LOCATION "${TVM_FFI_ROOT}/build/lib/libtvm_ffi.so"
+    PROPERTIES IMPORTED_LOCATION "${TVM_FFI_ROOT}/build/lib/libtvm_ffi.${TVM_FFI_LIB_EXT}"
                INTERFACE_INCLUDE_DIRECTORIES
                "${TVM_FFI_ROOT}/include;${TVM_FFI_ROOT}/3rdparty/dlpack/include"
   )

From b57756d696915408acc597f032664b5d8bee0b1b Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 17:47:01 -0500
Subject: [PATCH 15/47] fix

---
 .../python/tvm_ffi_orcjit/__init__.py         | 22 +++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
index 435c9b04..7d853b03 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
@@ -30,23 +30,41 @@
 
 """
 
+import platform
 import sys
 from pathlib import Path
 
 from tvm_ffi import load_module
 
+# Determine the library extension based on platform
+if platform.system() == "Darwin":
+    _LIB_EXT = "dylib"
+elif platform.system() == "Windows":
+    _LIB_EXT = "dll"
+else:
+    _LIB_EXT = "so"
+
 # Load the orcjit extension library to register functions
-_LIB_PATH = Path(__file__).parent.parent.parent / "libtvm_ffi_orcjit.so"
+_LIB_PATH = Path(__file__).parent.parent.parent / f"libtvm_ffi_orcjit.{_LIB_EXT}"
 if _LIB_PATH.exists():
     load_module(str(_LIB_PATH))
 else:
     # Fallback: search in site-packages (installed location)
+    found = False
     for site_pkg in sys.path:
-        candidate = Path(site_pkg) / "libtvm_ffi_orcjit.so"
+        candidate = Path(site_pkg) / f"libtvm_ffi_orcjit.{_LIB_EXT}"
         if candidate.exists():
             load_module(str(candidate))
+            found = True
             break
 
+    if not found:
+        raise RuntimeError(
+            f"Could not find libtvm_ffi_orcjit.{_LIB_EXT}. "
+            f"Searched in {_LIB_PATH} and site-packages. "
+            f"Please ensure the package is installed correctly."
+        )
+
 from .dylib import DynamicLibrary
 from .session import ExecutionSession, create_session
 

From 331404bd1784d07bfe956f4d1d1139c137498d56 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 18:24:23 -0500
Subject: [PATCH 16/47] fix tests on macos

---
 .../python/tvm_ffi_orcjit/__init__.py         | 23 ++++++++++++++++---
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc | 19 ++++++++++++++-
 2 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
index 7d853b03..066c5e57 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
@@ -30,6 +30,7 @@
 
 """
 
+import ctypes
 import platform
 import sys
 from pathlib import Path
@@ -44,17 +45,20 @@
 else:
     _LIB_EXT = "so"
 
-# Load the orcjit extension library to register functions
+# Load the orcjit extension library
 _LIB_PATH = Path(__file__).parent.parent.parent / f"libtvm_ffi_orcjit.{_LIB_EXT}"
+_lib_path_str = None
 if _LIB_PATH.exists():
-    load_module(str(_LIB_PATH))
+    _lib_module = load_module(str(_LIB_PATH))
+    _lib_path_str = str(_LIB_PATH)
 else:
     # Fallback: search in site-packages (installed location)
     found = False
     for site_pkg in sys.path:
         candidate = Path(site_pkg) / f"libtvm_ffi_orcjit.{_LIB_EXT}"
         if candidate.exists():
-            load_module(str(candidate))
+            _lib_module = load_module(str(candidate))
+            _lib_path_str = str(candidate)
             found = True
             break
 
@@ -65,6 +69,19 @@
             f"Please ensure the package is installed correctly."
         )
 
+# Explicitly initialize the library to register functions
+# This is needed because static initializers may not run when loaded via dlopen
+try:
+    # Load the library with ctypes and call the initialization function
+    c_lib = ctypes.CDLL(_lib_path_str, mode=ctypes.RTLD_GLOBAL)
+    init_func = c_lib.TVMFFIOrcJITInitialize
+    init_func.restype = None
+    init_func()
+except Exception as e:
+    import warnings
+
+    warnings.warn(f"Failed to explicitly initialize orcjit library: {e}")
+
 from .dylib import DynamicLibrary
 from .session import ExecutionSession, create_session
 
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index 33d3a598..133609de 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -27,6 +27,7 @@
 #include <llvm/Object/ObjectFile.h>
 #include <llvm/Support/Error.h>
 #include <llvm/Support/MemoryBuffer.h>
+#include <tvm/ffi/c_api.h>
 #include <tvm/ffi/cast.h>
 #include <tvm/ffi/error.h>
 #include <tvm/ffi/extra/module.h>
@@ -145,7 +146,11 @@ class DynamicLibraryModuleObj : public ModuleObj {
 // Registration
 //-------------------------------------
 
-TVM_FFI_STATIC_INIT_BLOCK() {
+static void RegisterOrcJITFunctions() {
+  static bool registered = false;
+  if (registered) return;
+  registered = true;
+
   namespace refl = tvm::ffi::reflection;
 
   refl::GlobalDef()
@@ -169,6 +174,18 @@ TVM_FFI_STATIC_INIT_BLOCK() {
       });
 }
 
+TVM_FFI_STATIC_INIT_BLOCK() {
+  // This block may not execute when loaded via dlopen on some platforms.
+  // Call TVMFFIOrcJITInitialize() explicitly if functions are not registered.
+  RegisterOrcJITFunctions();
+}
+
 }  // namespace orcjit
 }  // namespace ffi
 }  // namespace tvm
+
+// C API for explicit initialization
+extern "C" {
+
+TVM_FFI_DLL_EXPORT void TVMFFIOrcJITInitialize() { tvm::ffi::orcjit::RegisterOrcJITFunctions(); }
+}

From c15f03739f08ff57a024007543ac42e34f2194b6 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 18:40:23 -0500
Subject: [PATCH 17/47] Update addons/tvm-ffi-orcjit/pyproject.toml

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 addons/tvm-ffi-orcjit/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/addons/tvm-ffi-orcjit/pyproject.toml b/addons/tvm-ffi-orcjit/pyproject.toml
index 3371b99d..365b5b0e 100644
--- a/addons/tvm-ffi-orcjit/pyproject.toml
+++ b/addons/tvm-ffi-orcjit/pyproject.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [build-system]
-requires = ["scikit-build-core>=0.3.3", "pybind11"]
+requires = ["scikit-build-core>=0.3.3"]
 build-backend = "scikit_build_core.build"
 
 [project]

From 1bc971c7a58099179eb21320fc1c43665d03e9fb Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 18:40:39 -0500
Subject: [PATCH 18/47] Update
 addons/tvm-ffi-orcjit/examples/quick-start/compile.sh

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 addons/tvm-ffi-orcjit/examples/quick-start/compile.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh b/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh
index e8c10042..3001d202 100755
--- a/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh
@@ -31,7 +31,7 @@ echo -e "${GREEN}Compiling add.cc to object file...${NC}"
 if ! command -v tvm-ffi-config &> /dev/null; then
     echo -e "${RED}Error: tvm-ffi-config not found${NC}"
     echo "Make sure apache-tvm-ffi is installed:"
-    echo "  pip install -e ../../3rdparty/tvm-ffi"
+    echo "  pip install -e ../../../"
     exit 1
 fi
 

From c9dc32056d6d3c12da759a7ac473a577f2bd9a02 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 18:40:57 -0500
Subject: [PATCH 19/47] Update addons/tvm-ffi-orcjit/README.md

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
---
 addons/tvm-ffi-orcjit/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/addons/tvm-ffi-orcjit/README.md b/addons/tvm-ffi-orcjit/README.md
index d94db69b..a9342cb0 100644
--- a/addons/tvm-ffi-orcjit/README.md
+++ b/addons/tvm-ffi-orcjit/README.md
@@ -175,7 +175,8 @@ tvm-ffi-orcjit/
 └── python/
     └── tvm_ffi_orcjit/
         ├── __init__.py         # Module exports
-        └── loader.py           # Python ObjectLoader class
+        └── session.py          # Python ExecutionSession class
+        └── dylib.py            # Python DynamicLibrary class
 ```
 
 ## Examples

From b855a9ef3011e4919f1da85829b138b966771340 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 19:35:03 -0500
Subject: [PATCH 20/47] wip

---
 .github/workflows/orcjit-tests.yml            |   2 +-
 addons/tvm-ffi-orcjit/README.md               | 316 ++++++++++++------
 .../examples/quick-start/compile.sh           |  60 ----
 .../include/tvm/ffi/orcjit/orcjit_dylib.h     |  12 +-
 .../python/tvm_ffi_orcjit/dylib.py            |  21 +-
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc |  40 ++-
 6 files changed, 268 insertions(+), 183 deletions(-)
 delete mode 100755 addons/tvm-ffi-orcjit/examples/quick-start/compile.sh

diff --git a/.github/workflows/orcjit-tests.yml b/.github/workflows/orcjit-tests.yml
index 1e6cd174..98878172 100644
--- a/.github/workflows/orcjit-tests.yml
+++ b/.github/workflows/orcjit-tests.yml
@@ -39,7 +39,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ubuntu-latest, macos-latest]
-        python-version: ['3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.9']
 
     steps:
       - uses: actions/checkout@v5
diff --git a/addons/tvm-ffi-orcjit/README.md b/addons/tvm-ffi-orcjit/README.md
index a9342cb0..bbc3b102 100644
--- a/addons/tvm-ffi-orcjit/README.md
+++ b/addons/tvm-ffi-orcjit/README.md
@@ -17,71 +17,66 @@
 
 # TVM-FFI OrcJIT
 
-A Python package that enables dynamic loading of TVM-FFI exported object files (`.o`) using LLVM ORC JIT v2.
+A Python package that enables dynamic loading of compiled object files (`.o`) using LLVM ORC JIT v2, providing a flexible JIT execution environment for TVM-FFI exported functions.
 
 ## Features
 
-- **Dynamic Loading**: Load compiled object files at runtime using LLVM's ORC JIT v2
-- **Incremental Loading**: Add multiple object files to the same loader instance
+- **JIT Execution**: Load and execute compiled object files at runtime using LLVM's ORC JIT v2
+- **Multiple Libraries**: Create separate dynamic libraries with independent symbol namespaces
+- **Incremental Loading**: Add multiple object files to the same library incrementally
+- **Symbol Isolation**: Different libraries can define the same symbol without conflicts
 - **TVM-FFI Integration**: Seamlessly works with TVM-FFI's stable C ABI
-- **Python API**: Simple Pythonic interface for loading and calling compiled functions
-- **Standalone Package**: Works alongside apache-tvm-ffi without conflicts
+- **Cross-Platform**: Supports Linux, macOS, and Windows (on the plan)
+- **Python API**: Simple Pythonic interface for JIT compilation and execution
 
 ## Installation
 
 ### Prerequisites
 
-- Python 3.8+
+- Python 3.9+
 - CMake 3.18+
-- LLVM 14+ (with ORC JIT support)
-- Ninja build system (recommended)
+- LLVM 18+ (with ORC JIT support)
+- C/C++ compiler with C++17 support
+- Ninja build system (optional, recommended)
 
-### Build from Source
+### Installing LLVM
 
-1. Clone the repository with submodules:
+**Ubuntu/Debian:**
 
 ```bash
-git clone --recursive https://github.com/apache/tvm-ffi.git
-cd tvm-ffi/addons/tvm-ffi-orcjit
+sudo apt-get install -y llvm-18-dev
 ```
 
-1. Build TVM-FFI dependency (from the root of tvm-ffi repository):
+**macOS:**
 
 ```bash
-cd ../..  # Go to tvm-ffi root
-mkdir -p build && cd build
-cmake .. -G Ninja
-ninja
-cd addons/tvm-ffi-orcjit
+brew install llvm@18
+export LLVM_DIR=$(brew --prefix llvm@18)
+export CC=$(brew --prefix llvm@18)/bin/clang
+export CXX=$(brew --prefix llvm@18)/bin/clang++
 ```
 
-1. Create build directory and configure with CMake:
+### Build from Source
 
-```bash
-mkdir -p build
-cd build
-cmake .. \
-    -DCMAKE_BUILD_TYPE=Release \
-    -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-    -G Ninja
-```
+1. Install the core TVM-FFI package:
 
-1. Build the project:
+   ```bash
+   pip install apache-tvm-ffi
+   ```
 
-```bash
-cmake --build . -j$(nproc)
-cd ..
-```
+1. Install tvm-ffi-orcjit:
 
-The shared library will be created at: `build/libtvm_ffi_orcjit.so`
+   ```bash
+   pip install tvm-ffi-orcjit
+   ```
 
-1. Install the Python package:
+### Development Installation
 
-```bash
-# Using pip
-pip install .
+For development, install from source:
 
-# Or for development (editable install)
+```bash
+git clone --recursive https://github.com/yaoyaoding/tvm-ffi.git
+cd tvm-ffi/addons/tvm-ffi-orcjit
 pip install -e .
 ```
 
@@ -90,72 +85,99 @@ pip install -e .
 ### Basic Example
 
 ```python
-from tvm_ffi_orcjit import ObjectLoader
+from tvm_ffi_orcjit import create_session
+
+# Create an execution session
+session = create_session()
 
-# Create a loader instance
-loader = ObjectLoader()
+# Create a dynamic library
+lib = session.create_library()
 
 # Load an object file
-loader.load("example.o")
+lib.add("example.o")
 
 # Get and call a function
-add_func = loader.get_function("simple_add")
+add_func = lib.get_function("test_add")
 result = add_func(1, 2)
 print(f"Result: {result}")  # Output: Result: 3
 ```
 
-### Incremental Loading
+### Multiple Libraries with Symbol Isolation
 
-Load multiple object files and access functions from all of them:
+Create separate libraries to avoid symbol conflicts:
 
 ```python
-from tvm_ffi_orcjit import ObjectLoader
+from tvm_ffi_orcjit import create_session
+
+session = create_session()
 
-loader = ObjectLoader()
+# Create two separate libraries
+lib1 = session.create_library("lib1")
+lib2 = session.create_library("lib2")
 
-# Load first object file
-loader.load("math_ops.o")
-add = loader.get_function("simple_add")
+# Each library can have its own version of the same symbol
+lib1.add("implementation_v1.o")  # Contains test_add
+lib2.add("implementation_v2.o")  # Contains test_add with different behavior
 
-# Load second object file - functions from first remain accessible
-loader.load("string_ops.o")
-concat = loader.get_function("string_concat")
+# Get functions from different libraries
+add_v1 = lib1.get_function("test_add")
+add_v2 = lib2.get_function("test_add")
 
-# Both functions work
-print(add(10, 20))           # From math_ops.o
-print(concat("Hello", "World"))  # From string_ops.o
+print(add_v1(5, 3))  # Uses implementation from lib1
+print(add_v2(5, 3))  # Uses implementation from lib2
 ```
 
-### Direct Module Access
+### Incremental Loading
 
-You can also use TVM-FFI's `load_module` directly (`.o` files are automatically handled):
+Add multiple object files to the same library:
 
 ```python
-import tvm_ffi
+from tvm_ffi_orcjit import create_session
+
+session = create_session()
+lib = session.create_library()
 
-# Load object file as a module
-module = tvm_ffi.load_module("example.o")
+# Load multiple object files incrementally
+lib.add("math_ops.o")
+lib.add("string_ops.o")
+lib.add("utils.o")
 
-# Get function
-func = module.get_function("my_function")
-result = func(arg1, arg2)
+# Access functions from any loaded object file
+add = lib.get_function("test_add")
+subtract = lib.get_function("test_subtract")
+concat = lib.get_function("string_concat")
+
+print(add(10, 5))              # From math_ops.o
+print(subtract(10, 5))         # From math_ops.o
+print(concat("Hello", " World"))  # From string_ops.o
 ```
 
 ## How It Works
 
-1. **C++ Backend**: The package implements a `Library` subclass using LLVM's ORC JIT v2 (`LLJIT`)
-2. **Registration**: Registers with TVM-FFI as a loader for `.o` files via `ffi.Module.load_from_file.o`
-3. **Symbol Resolution**: Uses LLJIT's `lookup()` to resolve TVM-FFI exported symbols
-4. **Module Integration**: Wraps the ORC JIT library in `LibraryModuleObj` for seamless TVM-FFI integration
+1. **ExecutionSession**: Manages the LLVM ORC JIT execution session and multiple dynamic libraries
+2. **DynamicLibrary**: Represents a JITDylib that can load object files and resolve symbols
+3. **Symbol Resolution**: Uses LLVM's ORC JIT v2 symbol lookup with proper linkage semantics
+4. **Memory Management**: Allocates `__dso_handle` in JIT memory to ensure proper relocations
+5. **TVM-FFI Integration**: Functions are exposed through TVM-FFI's PackedFunc interface
+
+### Technical Details
+
+- **ORC JIT v2**: Uses LLVM's modern JIT infrastructure (LLJIT)
+- **Weak Linkage**: Each library gets its own `__dso_handle` with weak linkage
+- **IR-based Allocation**: Creates LLVM IR modules for runtime symbols to ensure JIT memory allocation
+- **Cross-Platform**: Correctly handles `.so` (Linux), `.dylib` (macOS), and `.dll` (Windows)
 
 ## Development
 
-### Building with CMake
+### Building Tests
+
+The project includes comprehensive tests with CMake-built test objects:
 
 ```bash
-mkdir build && cd build
-cmake .. -DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-cmake --build .
+cd tests
+cmake -B build
+cmake --build build
+pytest -v
 ```
 
 ### Project Structure
@@ -165,30 +187,80 @@ tvm-ffi-orcjit/
 ├── CMakeLists.txt              # CMake build configuration
 ├── pyproject.toml              # Python package metadata
 ├── README.md                   # This file
-├── example.py                  # Usage example
 ├── include/
 │   └── tvm/ffi/orcjit/
-│       └── orcjit_library.h    # C++ header
+│       ├── orcjit_session.h    # ExecutionSession C++ header
+│       └── orcjit_dylib.h      # DynamicLibrary C++ header
 ├── src/
 │   └── ffi/
-│       └── orcjit_library.cc   # C++ implementation
-└── python/
-    └── tvm_ffi_orcjit/
-        ├── __init__.py         # Module exports
-        └── session.py          # Python ExecutionSession class
-        └── dylib.py            # Python DynamicLibrary class
+│       ├── orcjit_session.cc   # ExecutionSession implementation
+│       └── orcjit_dylib.cc     # DynamicLibrary implementation
+├── python/
+│   └── tvm_ffi_orcjit/
+│       ├── __init__.py         # Module exports and library loading
+│       ├── session.py          # Python ExecutionSession wrapper
+│       └── dylib.py            # Python DynamicLibrary wrapper
+├── tests/
+│   ├── CMakeLists.txt          # Test object file builds
+│   ├── test_basic.py           # Python tests
+│   └── sources/
+│       ├── test_funcs.cc       # Test functions
+│       ├── test_funcs2.cc      # Additional test functions
+│       └── test_funcs_conflict.cc  # Conflicting symbols for testing
+└── examples/
+    └── quick-start/            # Complete example with CMake
 ```
 
 ## Examples
 
-See `example.py` for a complete demonstration of incremental loading.
+Complete examples are available in the `examples/` directory:
+
+- **quick-start/**: Demonstrates basic usage with a simple add function
+  - Shows how to compile C++ code with TVM-FFI exports
+  - Loads and executes the compiled object file
+  - Uses CMake for building the example
+
+## Writing Functions for OrcJIT
+
+Functions must use TVM-FFI's export macros:
+
+```cpp
+#include <tvm/ffi/c_ffi_api.h>
+
+// Simple function
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(simple_add, [](int a, int b) {
+    return a + b;
+});
+
+// Function with implementation
+static int multiply_impl(int a, int b) {
+    return a * b;
+}
+
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(simple_multiply, multiply_impl);
+```
+
+Compile with C++17:
+
+```bash
+clang++ -std=c++17 -fPIC -c -o example.o example.cc
+```
 
 ## Requirements
 
 The package depends on:
 
-- `apache-tvm-ffi>=0.1.0` - TVM-FFI library
-- LLVM 14+ (linked at build time) - For ORC JIT functionality
+- `apache-tvm-ffi>=0.1.0` - TVM-FFI core library
+- LLVM 18+ (linked at build time) - For ORC JIT v2 functionality
+- Python 3.9+ - For runtime
+
+## Known Limitations
+
+### Optimized Code and Relocations
+
+When compiling object files with optimization enabled (`-O2`, `-O3`), ensure your code doesn't generate PC-relative relocations that exceed ±2GB range. The package allocates `__dso_handle` in JIT memory to mitigate this, but extremely large programs may still encounter issues.
+
+**Workaround**: Compile test objects with `-O0` if you encounter "relocation out of range" errors during sequential test runs.
 
 ## License
 
@@ -198,34 +270,80 @@ Apache License 2.0
 
 Contributions are welcome! Please ensure that:
 
-1. Code follows the existing style
-2. New features include tests
-3. Documentation is updated
+1. Code follows the existing C++17 and Python style
+2. New features include tests in `tests/test_basic.py`
+3. Documentation is updated (README and docstrings)
+4. CI tests pass on all platforms (Linux, macOS)
 
 ## Troubleshooting
 
-### Symbol not found errors
+### "Cannot find global function" error
 
-Make sure your object file was compiled with TVM-FFI export macros:
+The shared library wasn't loaded. This usually means:
 
-```cpp
-#include <tvm/ffi/c_ffi_api.h>
+- The library file extension doesn't match your platform
+- The library wasn't installed correctly
+- Python can't find the library file
 
-TVM_FFI_DLL_EXPORT_TYPED_FUNC(my_function, [](int a, int b) {
-    return a + b;
-});
+**Solution**: Reinstall the package:
+
+```bash
+pip install --force-reinstall tvm-ffi-orcjit
+```
+
+### "Duplicate definition of symbol" error
+
+You're adding multiple object files with the same symbol to the same library.
+
+**Solution**: Use separate libraries for different implementations:
+
+```python
+lib1 = session.create_library("lib1")
+lib2 = session.create_library("lib2")
 ```
 
+### "Symbol not found" error
+
+The symbol wasn't exported with TVM-FFI macros.
+
+**Solution**: Use `TVM_FFI_DLL_EXPORT_TYPED_FUNC`:
+
+```cpp
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(my_function, impl);
+```
+
+### Relocation errors with optimized code
+
+Object files compiled with `-O2` or higher may fail with "relocation out of range" in some scenarios.
+
+**Solution**:
+
+- Use `-O0` for test/development builds
+- Run tests in separate processes (using `pytest-xdist`)
+- This limitation primarily affects test scenarios, not production use
+
 ### LLVM version mismatch
 
-Ensure the LLVM version used to build this package matches your system's LLVM installation.
+The package requires LLVM 18+. Using older versions will cause build failures.
+
+**Solution**: Install LLVM 18:
+
+```bash
+# Ubuntu
+sudo ./llvm.sh 18
+
+# macOS
+brew install llvm@18
+```
 
-### TVM-FFI not found
+### CMake can't find LLVM
 
-Make sure TVM-FFI is built in the parent repository:
+Set the `LLVM_DIR` environment variable:
 
 ```bash
-cd ../../  # Go to tvm-ffi root
-mkdir -p build && cd build
-cmake .. -G Ninja && ninja
+# macOS
+export LLVM_DIR=$(brew --prefix llvm@18)/lib/cmake/llvm
+
+# Linux
+export LLVM_DIR=/usr/lib/llvm-18/cmake
 ```
diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh b/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh
deleted file mode 100755
index 3001d202..00000000
--- a/addons/tvm-ffi-orcjit/examples/quick-start/compile.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# Compile script for quick-start example
-
-set -e
-
-# Colors for output
-GREEN='\\033[0;32m'
-RED='\\033[0;31m'
-NC='\\033[0m' # No Color
-
-echo -e "${GREEN}Compiling add.cc to object file...${NC}"
-
-# Check if tvm-ffi-config is available
-if ! command -v tvm-ffi-config &> /dev/null; then
-    echo -e "${RED}Error: tvm-ffi-config not found${NC}"
-    echo "Make sure apache-tvm-ffi is installed:"
-    echo "  pip install -e ../../../"
-    exit 1
-fi
-
-# Get compilation flags from tvm-ffi-config
-echo -e "${GREEN}Getting compilation flags from tvm-ffi-config...${NC}"
-CXXFLAGS=$(tvm-ffi-config --cxxflags)
-LDFLAGS=$(tvm-ffi-config --ldflags)
-
-# Override C++ standard to C++20 (needed for lambda in unevaluated context)
-CXXFLAGS="${CXXFLAGS/-std=c++17/-std=c++20}"
-
-echo "  CXXFLAGS: $CXXFLAGS"
-echo "  LDFLAGS: $LDFLAGS"
-
-# Compile to object file
-echo -e "${GREEN}Compiling...${NC}"
-# shellcheck disable=SC2086
-g++ -c add.cc \
-    -o add.o \
-    $CXXFLAGS \
-    -fPIC \
-    -O2
-
-echo -e "${GREEN}Successfully compiled add.o${NC}"
-echo ""
-echo "You can now run: python run.py"
diff --git a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
index 8280f80f..9916a2df 100644
--- a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
+++ b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
@@ -55,12 +55,13 @@ class ORCJITDynamicLibrary : public Object {
   void AddObjectFile(const String& path);
 
   /*!
-   * \brief Link this library against another library
-   * \param other The library to link against
+   * \brief Set the link order for symbol resolution
+   * \param libraries Vector of libraries to search for symbols (in order)
    *
-   * After this call, this library can resolve symbols from 'other'.
+   * When resolving symbols, this library will search in the specified libraries
+   * in the order provided. This replaces any previous link order.
    */
-  void LinkAgainst(const ORCJITDynamicLibrary& other);
+  void SetLinkOrder(const std::vector<ObjectPtr<ORCJITDynamicLibrary>>& libraries);
 
   /*!
    * \brief Look up a symbol in this library
@@ -106,6 +107,9 @@ class ORCJITDynamicLibrary : public Object {
 
   /*! \brief Library name */
   String name_;
+
+  /*! \brief Link order tracking (to support incremental linking) */
+  llvm::orc::JITDylibSearchOrder link_order_;
 };
 
 }  // namespace orcjit
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
index 83c9149f..7bf9c8ce 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
@@ -59,7 +59,7 @@ def __init__(self, handle: Any, session: ExecutionSession) -> None:
         self._handle = handle
         self._session = session  # Keep session alive
         self._add_func = get_global_func("orcjit.DynamicLibraryAdd")
-        self._link_func = get_global_func("orcjit.DynamicLibraryLinkAgainst")
+        self._set_link_order_func = get_global_func("orcjit.DynamicLibrarySetLinkOrder")
         self._to_module_func = get_global_func("orcjit.DynamicLibraryToModule")
 
     def add(self, object_file: str | Path) -> None:
@@ -80,29 +80,32 @@ def add(self, object_file: str | Path) -> None:
             object_file = str(object_file)
         self._add_func(self._handle, object_file)
 
-    def link_against(self, *libraries: DynamicLibrary) -> None:
-        """Link this library against other dynamic libraries.
+    def set_link_order(self, *libraries: DynamicLibrary) -> None:
+        """Set the link order for symbol resolution.
 
-        Sets the search order for symbol resolution. Symbols not found in this library
-        will be searched in the linked libraries in the order specified.
+        When resolving symbols, this library will search in the specified libraries
+        in the order provided. This replaces any previous link order.
 
         Parameters
         ----------
         *libraries : DynamicLibrary
-            One or more dynamic libraries to link against.
+            One or more dynamic libraries to search for symbols (in order).
 
         Examples
         --------
         >>> session = create_session()
         >>> lib_utils = session.create_library()
         >>> lib_utils.add("utils.o")
+        >>> lib_core = session.create_library()
+        >>> lib_core.add("core.o")
         >>> lib_main = session.create_library()
         >>> lib_main.add("main.o")
-        >>> lib_main.link_against(lib_utils)  # main can call utils symbols
+        >>> # main can call symbols from utils and core (utils searched first)
+        >>> lib_main.set_link_order(lib_utils, lib_core)
 
         """
-        handles = [lib._handle for lib in libraries]
-        self._link_func(self._handle, *handles)
+        lib_handles = [lib._handle for lib in libraries]
+        self._set_link_order_func(self._handle, lib_handles)
 
     def get_function(self, name: str) -> Function:
         """Get a function from this dynamic library.
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index 133609de..0dcaa8d7 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -29,6 +29,7 @@
 #include <llvm/Support/MemoryBuffer.h>
 #include <tvm/ffi/c_api.h>
 #include <tvm/ffi/cast.h>
+#include <tvm/ffi/container/array.h>
 #include <tvm/ffi/error.h>
 #include <tvm/ffi/extra/module.h>
 #include <tvm/ffi/orcjit/orcjit_dylib.h>
@@ -64,18 +65,29 @@ void ORCJITDynamicLibrary::AddObjectFile(const String& path) {
   }
 }
 
-void ORCJITDynamicLibrary::LinkAgainst(const ORCJITDynamicLibrary& other) {
-  // Set up link order: this dylib should search in other dylib
-  llvm::orc::JITDylibSearchOrder search_order;
-  search_order.push_back({other.dylib_, llvm::orc::JITDylibLookupFlags::MatchAllSymbols});
+void ORCJITDynamicLibrary::SetLinkOrder(
+    const std::vector<ObjectPtr<ORCJITDynamicLibrary>>& libraries) {
+  // Clear and rebuild the link order
+  link_order_.clear();
+
+  for (const auto& lib : libraries) {
+    link_order_.push_back({lib->dylib_, llvm::orc::JITDylibLookupFlags::MatchAllSymbols});
+  }
 
-  dylib_->setLinkOrder(search_order, false);
+  // Set the link order in the LLVM JITDylib
+  dylib_->setLinkOrder(link_order_, false);
 }
 
 void* ORCJITDynamicLibrary::GetSymbol(const String& name) {
-  // Look up symbol
+  // Build search order: this dylib first, then all linked dylibs
+  llvm::orc::JITDylibSearchOrder search_order;
+  search_order.push_back({dylib_, llvm::orc::JITDylibLookupFlags::MatchAllSymbols});
+  // Append linked libraries
+  search_order.insert(search_order.end(), link_order_.begin(), link_order_.end());
+
+  // Look up symbol using the full search order
   auto symbol_or_err =
-      jit_->getExecutionSession().lookup({dylib_}, jit_->mangleAndIntern(name.c_str()));
+      jit_->getExecutionSession().lookup(search_order, jit_->mangleAndIntern(name.c_str()));
   if (!symbol_or_err) {
     auto err = symbol_or_err.takeError();
     std::string err_msg;
@@ -162,9 +174,17 @@ static void RegisterOrcJITFunctions() {
            })
       .def("orcjit.DynamicLibraryAdd",
            [](ORCJITDynamicLibrary* dylib, String path) { dylib->AddObjectFile(path); })
-      .def("orcjit.DynamicLibraryLinkAgainst",
-           [](ORCJITDynamicLibrary* dylib, ORCJITDynamicLibrary* other) {
-             dylib->LinkAgainst(*other);
+      .def("orcjit.DynamicLibrarySetLinkOrder",
+           [](ORCJITDynamicLibrary* dylib, Array<ObjectRef> libraries) {
+             std::vector<ObjectPtr<ORCJITDynamicLibrary>> lib_ptrs;
+             lib_ptrs.reserve(libraries.size());
+             for (const auto& lib_ref : libraries) {
+               auto* lib = lib_ref.as<ORCJITDynamicLibrary>();
+               auto lib_ptr =
+                   GetObjectPtr<ORCJITDynamicLibrary>(const_cast<ORCJITDynamicLibrary*>(lib));
+               lib_ptrs.push_back(lib_ptr);
+             }
+             dylib->SetLinkOrder(lib_ptrs);
            })
       .def("orcjit.DynamicLibraryGetName",
            [](ORCJITDynamicLibrary* dylib) -> String { return dylib->GetName(); })

From 0dfc2d14e2e4d0cd531ec7e3c4ab64fd08b93284 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 19:42:51 -0500
Subject: [PATCH 21/47] use the right way to define object

---
 .../include/tvm/ffi/orcjit/orcjit_dylib.h     |  6 +--
 .../include/tvm/ffi/orcjit/orcjit_session.h   | 44 ++++++++++++-------
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc | 10 +++--
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.cc  | 21 ++++-----
 4 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
index 9916a2df..a028e45b 100644
--- a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
+++ b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
@@ -36,7 +36,7 @@ namespace tvm {
 namespace ffi {
 namespace orcjit {
 
-class ORCJITExecutionSession;
+class ORCJITExecutionSessionObj;
 
 /*!
  * \brief DynamicLibrary wrapper for LLVM ORC JIT v2 JITDylib
@@ -92,12 +92,12 @@ class ORCJITDynamicLibrary : public Object {
    * \param jit The LLJIT instance
    * \param name The library name
    */
-  ORCJITDynamicLibrary(ObjectPtr<ORCJITExecutionSession> session, llvm::orc::JITDylib* dylib,
+  ORCJITDynamicLibrary(ObjectPtr<ORCJITExecutionSessionObj> session, llvm::orc::JITDylib* dylib,
                        llvm::orc::LLJIT* jit, String name);
 
  private:
   /*! \brief Parent execution session (for lifetime management) */
-  ObjectPtr<ORCJITExecutionSession> session_;
+  ObjectPtr<ORCJITExecutionSessionObj> session_;
 
   /*! \brief The LLVM JITDylib */
   llvm::orc::JITDylib* dylib_;
diff --git a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h
index 835b07c6..1115aeac 100644
--- a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h
+++ b/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h
@@ -40,18 +40,22 @@ namespace orcjit {
 class ORCJITDynamicLibrary;
 
 /*!
- * \brief ExecutionSession wrapper for LLVM ORC JIT v2
+ * \brief ExecutionSession object for LLVM ORC JIT v2
  *
  * This class manages the lifetime of an LLVM ExecutionSession and provides
  * functionality to create and manage multiple JITDylibs (DynamicLibraries).
  */
-class ORCJITExecutionSession : public Object {
+class ORCJITExecutionSessionObj : public Object {
  public:
   /*!
-   * \brief Create a new ExecutionSession
-   * \return The created execution session instance
+   * \brief Default constructor (for make_object)
+   */
+  ORCJITExecutionSessionObj();
+
+  /*!
+   * \brief Initialize the LLJIT instance
    */
-  static ObjectPtr<ORCJITExecutionSession> Create();
+  void Initialize();
 
   /*!
    * \brief Create a new DynamicLibrary (JITDylib) in this session
@@ -73,19 +77,9 @@ class ORCJITExecutionSession : public Object {
   llvm::orc::LLJIT& GetLLJIT();
 
   static constexpr bool _type_mutable = true;
-  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("orcjit.ExecutionSession", ORCJITExecutionSession, Object);
-
-  /*!
-   * \brief Default constructor (for make_object)
-   */
-  ORCJITExecutionSession();
+  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("orcjit.ExecutionSession", ORCJITExecutionSessionObj, Object);
 
  private:
-  /*!
-   * \brief Initialize the LLJIT instance
-   */
-  void Initialize();
-
   /*! \brief The LLVM ORC JIT instance */
   std::unique_ptr<llvm::orc::LLJIT> jit_;
 
@@ -96,6 +90,24 @@ class ORCJITExecutionSession : public Object {
   std::unordered_map<std::string, ObjectPtr<ORCJITDynamicLibrary>> dylibs_;
 };
 
+/*!
+ * \brief Reference wrapper for ORCJITExecutionSessionObj
+ *
+ * A reference wrapper serves as a reference-counted pointer to the session object.
+ */
+class ORCJITExecutionSession : public ObjectRef {
+ public:
+  /*!
+   * \brief Create a new ExecutionSession
+   * \return The created execution session instance
+   */
+  static ORCJITExecutionSession Create();
+
+  // Required: define object reference methods
+  TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(ORCJITExecutionSession, ObjectRef,
+                                             ORCJITExecutionSessionObj);
+};
+
 }  // namespace orcjit
 }  // namespace ffi
 }  // namespace tvm
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index 0dcaa8d7..df0ef0b9 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -40,7 +40,7 @@ namespace tvm {
 namespace ffi {
 namespace orcjit {
 
-ORCJITDynamicLibrary::ORCJITDynamicLibrary(ObjectPtr<ORCJITExecutionSession> session,
+ORCJITDynamicLibrary::ORCJITDynamicLibrary(ObjectPtr<ORCJITExecutionSessionObj> session,
                                            llvm::orc::JITDylib* dylib, llvm::orc::LLJIT* jit,
                                            String name)
     : session_(std::move(session)), dylib_(dylib), jit_(jit), name_(std::move(name)) {
@@ -167,10 +167,12 @@ static void RegisterOrcJITFunctions() {
 
   refl::GlobalDef()
       .def("orcjit.CreateExecutionSession",
-           []() -> ObjectRef { return ObjectRef(ORCJITExecutionSession::Create()); })
+           []() -> ORCJITExecutionSession { return ORCJITExecutionSession::Create(); })
       .def("orcjit.SessionCreateDynamicLibrary",
-           [](ORCJITExecutionSession* session, String name) -> ObjectRef {
-             return ObjectRef(session->CreateDynamicLibrary(name));
+           [](ORCJITExecutionSession session, String name) -> ObjectRef {
+             auto session_obj = GetObjectPtr<ORCJITExecutionSessionObj>(
+                 const_cast<ORCJITExecutionSessionObj*>(session.as<ORCJITExecutionSessionObj>()));
+             return ObjectRef(session_obj->CreateDynamicLibrary(name));
            })
       .def("orcjit.DynamicLibraryAdd",
            [](ORCJITDynamicLibrary* dylib, String path) { dylib->AddObjectFile(path); })
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
index 0392c1f5..8d4037b0 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
@@ -52,9 +52,9 @@ struct LLVMInitializer {
 
 static LLVMInitializer llvm_initializer;
 
-ORCJITExecutionSession::ORCJITExecutionSession() : jit_(nullptr), dylib_counter_(0) {}
+ORCJITExecutionSessionObj::ORCJITExecutionSessionObj() : jit_(nullptr), dylib_counter_(0) {}
 
-void ORCJITExecutionSession::Initialize() {
+void ORCJITExecutionSessionObj::Initialize() {
   // Create LLJIT instance
   auto jit_or_err = llvm::orc::LLJITBuilder().create();
   if (!jit_or_err) {
@@ -67,13 +67,14 @@ void ORCJITExecutionSession::Initialize() {
   jit_ = std::move(*jit_or_err);
 }
 
-ObjectPtr<ORCJITExecutionSession> ORCJITExecutionSession::Create() {
-  auto session = make_object<ORCJITExecutionSession>();
-  session->Initialize();
-  return session;
+ORCJITExecutionSession ORCJITExecutionSession::Create() {
+  auto obj = make_object<ORCJITExecutionSessionObj>();
+  obj->Initialize();
+  return ORCJITExecutionSession(obj);
 }
 
-ObjectPtr<ORCJITDynamicLibrary> ORCJITExecutionSession::CreateDynamicLibrary(const String& name) {
+ObjectPtr<ORCJITDynamicLibrary> ORCJITExecutionSessionObj::CreateDynamicLibrary(
+    const String& name) {
   TVM_FFI_CHECK(jit_ != nullptr, InternalError) << "ExecutionSession not initialized";
 
   // Generate name if not provided
@@ -129,7 +130,7 @@ ObjectPtr<ORCJITDynamicLibrary> ORCJITExecutionSession::CreateDynamicLibrary(con
   }
 
   // Create the wrapper object
-  auto dylib = make_object<ORCJITDynamicLibrary>(GetObjectPtr<ORCJITExecutionSession>(this), &jd,
+  auto dylib = make_object<ORCJITDynamicLibrary>(GetObjectPtr<ORCJITExecutionSessionObj>(this), &jd,
                                                  jit_.get(), lib_name);
 
   // Store for lifetime management
@@ -138,12 +139,12 @@ ObjectPtr<ORCJITDynamicLibrary> ORCJITExecutionSession::CreateDynamicLibrary(con
   return dylib;
 }
 
-llvm::orc::ExecutionSession& ORCJITExecutionSession::GetLLVMExecutionSession() {
+llvm::orc::ExecutionSession& ORCJITExecutionSessionObj::GetLLVMExecutionSession() {
   TVM_FFI_CHECK(jit_ != nullptr, InternalError) << "ExecutionSession not initialized";
   return jit_->getExecutionSession();
 }
 
-llvm::orc::LLJIT& ORCJITExecutionSession::GetLLJIT() {
+llvm::orc::LLJIT& ORCJITExecutionSessionObj::GetLLJIT() {
   TVM_FFI_CHECK(jit_ != nullptr, InternalError) << "ExecutionSession not initialized";
   return *jit_;
 }

From 5af6c7791774601f0256e02ba85b9d8db66445e6 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 19:54:59 -0500
Subject: [PATCH 22/47] move include to src

---
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc               | 6 ++++--
 .../{include/tvm/ffi/orcjit => src/ffi}/orcjit_dylib.h      | 0
 addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc             | 6 ++++--
 .../{include/tvm/ffi/orcjit => src/ffi}/orcjit_session.h    | 0
 4 files changed, 8 insertions(+), 4 deletions(-)
 rename addons/tvm-ffi-orcjit/{include/tvm/ffi/orcjit => src/ffi}/orcjit_dylib.h (100%)
 rename addons/tvm-ffi-orcjit/{include/tvm/ffi/orcjit => src/ffi}/orcjit_session.h (100%)

diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index df0ef0b9..c61a3a33 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -22,6 +22,8 @@
  * \brief LLVM ORC JIT DynamicLibrary implementation
  */
 
+#include "orcjit_dylib.h"
+
 #include <llvm/ExecutionEngine/Orc/Core.h>
 #include <llvm/ExecutionEngine/Orc/LLJIT.h>
 #include <llvm/Object/ObjectFile.h>
@@ -32,10 +34,10 @@
 #include <tvm/ffi/container/array.h>
 #include <tvm/ffi/error.h>
 #include <tvm/ffi/extra/module.h>
-#include <tvm/ffi/orcjit/orcjit_dylib.h>
-#include <tvm/ffi/orcjit/orcjit_session.h>
 #include <tvm/ffi/reflection/registry.h>
 
+#include "orcjit_session.h"
+
 namespace tvm {
 namespace ffi {
 namespace orcjit {
diff --git a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
similarity index 100%
rename from addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_dylib.h
rename to addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
index 8d4037b0..e4a638f7 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
@@ -22,6 +22,8 @@
  * \brief LLVM ORC JIT ExecutionSession implementation
  */
 
+#include "orcjit_session.h"
+
 #include <llvm/ExecutionEngine/Orc/LLJIT.h>
 #include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
 #include <llvm/IR/GlobalVariable.h>
@@ -31,12 +33,12 @@
 #include <llvm/Support/TargetSelect.h>
 #include <tvm/ffi/cast.h>
 #include <tvm/ffi/error.h>
-#include <tvm/ffi/orcjit/orcjit_dylib.h>
-#include <tvm/ffi/orcjit/orcjit_session.h>
 #include <tvm/ffi/reflection/registry.h>
 
 #include <sstream>
 
+#include "orcjit_dylib.h"
+
 namespace tvm {
 namespace ffi {
 namespace orcjit {
diff --git a/addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
similarity index 100%
rename from addons/tvm-ffi-orcjit/include/tvm/ffi/orcjit/orcjit_session.h
rename to addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h

From b9eb2aa9b864ae550db064da5d26b2aebffe650d Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 10 Nov 2025 20:12:55 -0500
Subject: [PATCH 23/47] update cmakelists.txt

---
 addons/tvm-ffi-orcjit/CMakeLists.txt | 69 +++++++---------------------
 addons/tvm-ffi-orcjit/pyproject.toml |  2 +-
 2 files changed, 17 insertions(+), 54 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/CMakeLists.txt b/addons/tvm-ffi-orcjit/CMakeLists.txt
index a880093f..24b87575 100644
--- a/addons/tvm-ffi-orcjit/CMakeLists.txt
+++ b/addons/tvm-ffi-orcjit/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.18)
+cmake_minimum_required(VERSION 3.20)
 project(
   tvm_ffi_orcjit
   VERSION 0.1.0
@@ -15,46 +15,22 @@ find_package(LLVM REQUIRED CONFIG)
 message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
 message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
 
-# Add LLVM definitions and include directories early
-separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
-add_definitions(${LLVM_DEFINITIONS_LIST})
-include_directories(${LLVM_INCLUDE_DIRS})
-
-# Find tvm-ffi Look for tvm-ffi in the parent repository first, then system
-set(TVM_FFI_ROOT
-    "${CMAKE_CURRENT_SOURCE_DIR}/../.."
-    CACHE PATH "Path to tvm-ffi"
+# Find tvm-ffi package using the same method as quickstart example
+find_package(
+  Python
+  COMPONENTS Interpreter
+  REQUIRED
 )
 
-if (EXISTS "${TVM_FFI_ROOT}/include/tvm/ffi/c_api.h")
-  message(STATUS "Using tvm-ffi from: ${TVM_FFI_ROOT}")
-
-  # Determine the library extension based on platform
-  if (APPLE)
-    set(TVM_FFI_LIB_EXT "dylib")
-  elseif (WIN32)
-    set(TVM_FFI_LIB_EXT "dll")
-  else ()
-    set(TVM_FFI_LIB_EXT "so")
-  endif ()
-
-  # Create imported target for tvm_ffi
-  add_library(tvm_ffi SHARED IMPORTED)
-  set_target_properties(
-    tvm_ffi
-    PROPERTIES IMPORTED_LOCATION "${TVM_FFI_ROOT}/build/lib/libtvm_ffi.${TVM_FFI_LIB_EXT}"
-               INTERFACE_INCLUDE_DIRECTORIES
-               "${TVM_FFI_ROOT}/include;${TVM_FFI_ROOT}/3rdparty/dlpack/include"
-  )
+# Get the cmake directory for tvm-ffi
+execute_process(
+  COMMAND "${Python_EXECUTABLE}" -m tvm_ffi.config --cmakedir
+  OUTPUT_STRIP_TRAILING_WHITESPACE
+  OUTPUT_VARIABLE tvm_ffi_ROOT
+)
+message(STATUS "tvm_ffi_ROOT: ${tvm_ffi_ROOT}")
 
-  # Set include directories (including src for internal headers)
-  include_directories(${TVM_FFI_ROOT}/include)
-  include_directories(${TVM_FFI_ROOT}/src)
-  include_directories(${TVM_FFI_ROOT}/3rdparty/dlpack/include)
-else ()
-  message(STATUS "Looking for system tvm-ffi")
-  find_package(tvm-ffi REQUIRED CONFIG)
-endif ()
+find_package(tvm_ffi CONFIG REQUIRED)
 
 # LLVM components needed for ORC JIT v2
 llvm_map_components_to_libnames(LLVM_LIBS Core OrcJIT Support native)
@@ -77,16 +53,10 @@ set(SOURCES src/ffi/orcjit_session.cc src/ffi/orcjit_dylib.cc)
 add_library(tvm_ffi_orcjit SHARED ${SOURCES})
 
 target_include_directories(
-  tvm_ffi_orcjit
-  PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>
-  PRIVATE ${LLVM_INCLUDE_DIRS}
+  tvm_ffi_orcjit PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/src ${LLVM_INCLUDE_DIRS}
 )
 
-target_link_libraries(
-  tvm_ffi_orcjit
-  PUBLIC tvm_ffi
-  PRIVATE LLVM
-)
+target_link_libraries(tvm_ffi_orcjit PRIVATE tvm_ffi_header tvm_ffi_shared LLVM)
 
 # Compile definitions
 separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS})
@@ -100,13 +70,6 @@ install(
   RUNTIME DESTINATION bin
 )
 
-install(
-  DIRECTORY include/
-  DESTINATION include
-  FILES_MATCHING
-  PATTERN "*.h"
-)
-
 # For Python package building
 if (SKBUILD)
   # Install shared library alongside Python modules
diff --git a/addons/tvm-ffi-orcjit/pyproject.toml b/addons/tvm-ffi-orcjit/pyproject.toml
index 365b5b0e..5f7549fe 100644
--- a/addons/tvm-ffi-orcjit/pyproject.toml
+++ b/addons/tvm-ffi-orcjit/pyproject.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [build-system]
-requires = ["scikit-build-core>=0.3.3"]
+requires = ["scikit-build-core>=0.3.3", "apache-tvm-ffi"]
 build-backend = "scikit_build_core.build"
 
 [project]

From b78c6db3d844ad5ff8cb25a1222c6ba067eac2f9 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Sat, 15 Nov 2025 08:37:14 +0000
Subject: [PATCH 24/47] refactor

---
 docs/guides/kernel_library_guide.rst | 155 ++++++++++++++++-----------
 1 file changed, 93 insertions(+), 62 deletions(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 034adedd..f000af0a 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -19,30 +19,30 @@
 Kernel Library Guide
 ====================
 
-This guide serves as a quick start for shipping python version and framework agnostic kernel libraries with TVM FFI.
+This guide serves as a quick start for shipping python version and machine learning(ML) framework agnostic kernel libraries with TVM FFI. With the help of TVM FFI, we can connect the kernel libraries to multiple ML framework, such as PyTorch, XLA, JAX, together with the minimal efforts.
 
 Tensor
 ======
 
-TVM FFI provide minimal set of data structures to represent tensors from frameworks and allows us to build kernels for frameworks. In TVM FFI, we support two types of tensor constructs, ``ffi::Tensor`` and ``ffi::TensorView`` that can be used to represent a tensor from machine learning frameworks, such as PyTorch, XLA, JAX, and so on.
+Almost all kernel libraries are about tensor computation and manipulation. For better adaptation to different ML frameworks, TVM FFI provides a minimal set of data structures to represent tensors from ML frameworks, including the tensor basic attributes and storage pointer. To be specific, in TVM FFI, two types of tensor constructs, ``ffi::Tensor`` and ``ffi::TensorView``, can be used to represent a tensor from ML frameworks.
 
 Tensor and TensorView
 ---------------------
 
-Both ``ffi::Tensor`` and ``ffi::TensorView`` are designed to represent tensors from ML frameworks that interact with the TVM FFI ABI. The main difference is whether it is an owning tensor structure.
+Though both ``ffi::Tensor`` and ``ffi::TensorView`` are designed to represent tensors from ML frameworks that interact with the TVM FFI ABI. The main difference is whether it is an owning tensor structure.
 
 ffi::Tensor
  ``ffi::Tensor`` is a completely onwing tensor pointer, pointing to a TVM FFI tensor object. TVM FFI handles the lifetime of ``ffi::Tensor`` by retaining a strong reference.
 
 ffi::TensorView
- ``ffi::TensorView`` is non-owning view of an existing tensor. It is backed by ``DLTensor`` structure in DLPack. Since it is a non-owning view, it is the user's responsibility to ensure the lifetime of underlying tensor data and attributes of the viewed tensor object.
+ ``ffi::TensorView`` is a non-owning view of an existing tensor, pointint to an existing ML framework tensor. It is backed by ``DLTensor`` structure in DLPack in practice. And TVM FFI does not guarantee its lifetime also.
 
-We **recommend** to use ``ffi::TensorView`` when possible, that helps us to support more cases, including cases where only view but not strong reference are passed, like XLA buffer. It is also more lightweight.
+It is **recommended** to use ``ffi::TensorView`` when possible, that helps us to support more cases, including cases where only view but not strong reference are passed, like XLA buffer. It is also more lightweight. However, since ``ffi::TensorView`` is a non-owning view, it is the user's responsibility to ensure the lifetime of underlying tensor data and attributes of the viewed tensor object.
 
 Tensor Attributes
 -----------------
 
-For the sake of convenience, ``ffi::TensorView`` and ``ffi::Tensor`` align the following attributes retrieval mehtods to ``at::Tensor`` interface:
+For the sake of convenience, ``ffi::TensorView`` and ``ffi::Tensor`` align the following attributes retrieval mehtods to ``at::Tensor`` interface, to obtain tensor basic attributes and storage pointer:
 
 ``dim``, ``sizes``, ``size``, ``strides``, ``stride``, ``numel``, ``data_ptr``, ``device``, ``is_contiguous``
 
@@ -58,52 +58,61 @@ ShapeView
 Tensor Allocation
 -----------------
 
-TVM FFI provides several methods to allocate tensors, when dynamic tensor allocation is necessary.
+TVM FFI provides several methods to allocate tensors at C++ runtime. Generally, there are two types of tensor allocation:
+
+* Allocate a tensor with new storage from scratch, i.e. ``FromEnvAlloc`` and ``FromNDAlloc``. By this types of methods, the shapes, strides, data types, devices and other attributes are required for the allocation.
+* Allocate a tensor with existing storage following DLPack protocol, i.e. ``FromDLPack`` and ``FromDLPackVersioned``. By this types of methods, the shapes, data types, devices and other attributes can be inferred from the DLPack attributes.
 
 FromEnvAlloc
- Usually TVM FFI works together with a ML framework with its own tensor allocator. ``FromEnvAlloc`` is tailor-made for this case, so that it is possible to use framework tensor allocator when allocating ``ffi::Tensor``. And TVM FFI automatically sets the framework tensor allocator when the corresponding framework tensor exists in FFI arguments. For example, when calling TVM FFI packed kernels, if there are any input arguments of type ``torch.Tensor`` at Python side, TVM FFI will bind the ``at::Empty`` as the global framework tensor allocator - ``TVMFFIEnvTensorAlloc``. Here is an example:
+^^^^^^^^^^^^
+
+To better adapt to the ML framework, it is **recommended** to reuse the framework tensor allocator anyway, instead of directly allocating the tensors via CUDA runtime API, like ``cudaMalloc``. Since reusing the framework tensor allocator:
+
+* Benefit from the framework's native caching allocator or related allocation mechanism.
+* Help framework tracking memory usage and planning globally.
+
+For this case, TVM FFI provides ``FromEnvAlloc``. It internally calls the framework tensor allocator. To determine which framework tensor allocator, TVM FFI infers it from the passed-in framework tensors. For example, when calling the kernel library at Python side, there is an input framework tensor if of type ``torch.Tensor``, TVM FFI will automatically bind the ``at::empty`` as the current framework tensor allocator by ``TVMFFIEnvTensorAlloc``. And then the ``FromEnvAlloc`` is calling the ``at::empty`` actually:
 
- .. code-block:: c++
+.. code-block:: c++
+
+ ffi::Tensor tensor = ffi::Tensor::FromEnvAlloc(TVMFFIEnvTensorAlloc, ...);
 
-  void func(ffi::TensorView arg0, ffi::TensorView arg1, ...) {
-   ffi::Tensor tensor0 = ffi::Tensor::FromEnvAlloc(TVMFFIEnvTensorAlloc, ...);
-   ffi::Tensor tensor1 = ffi::Tensor::FromDLPackVersioned(at::toDLPackImpl<DLManagedTensorVersioned>(at::empty(...)))
-   // tensor0 and tensor1 are equivalent once arg{i} at Python side has type of torch.Tensor.
-  }
+which is equivalent to:
 
- We **recommend** to use ``FromEnvAlloc`` when possible, since the framework tensor allocator has adavantages:
+.. code-block:: c++
 
- * Benefit from the framework's native caching allocator or related mechanism.
- * Help framework tracking memory usage and planning globally.
+ at::Tensor tensor = at::empty(...);
 
 FromNDAlloc
- ``FromNDAlloc`` is the most basic tensor allocator. Besides of the basic attributes like shape, data type and device, it requires a custom allocator struct to handle the allocation and free. The allocator must consist of ``void AllocData(DLTensor*)`` and ``void FreeData(DLTensor*)`` methods. Here are the examples of CPU, CUDA and NVSHMEM allocation:
+^^^^^^^^^^^
 
- .. code-block:: c++
+``FromNDAlloc`` is the most basic tensor allocator. It is designed for simple cases where framework tensor allocator is no longer needed. ``FromNDAlloc`` just requires a custom allocator struct to handle the tensor allocation and free, with fixed interface ``void AllocData(DLTensor*)`` and ``void FreeData(DLTensor*)`` methods. Here are the examples of CPU, CUDA and NVSHMEM allocation:
 
-  // CPU Allocator
-  struct CPUNDAlloc {
-    void AllocData(DLTensor* tensor) { tensor->data = malloc(ffi::GetDataSize(*tensor)); }
-    void FreeData(DLTensor* tensor) { free(tensor->data); }
-  };
+.. code-block:: c++
 
-  // CUDA Allocator
-  struct CUDANDAlloc {
-    void AllocData(DLTensor* tensor) {
-      size_t data_size = ffi::GetDataSize(*tensor);
-      void* ptr = nullptr;
-      cudaError_t err = cudaMalloc(&ptr, data_size);
-      TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaMalloc failed: " << cudaGetErrorString(err);
-      tensor->data = ptr;
-    }
-    void FreeData(DLTensor* tensor) {
-      if (tensor->data != nullptr) {
-        cudaError_t err = cudaFree(tensor->data);
-        TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaFree failed: " << cudaGetErrorString(err);
-        tensor->data = nullptr;
-      }
-    }
-  };
+ // CPU Allocator
+ struct CPUNDAlloc {
+   void AllocData(DLTensor* tensor) { tensor->data = malloc(ffi::GetDataSize(*tensor)); }
+   void FreeData(DLTensor* tensor) { free(tensor->data); }
+ };
+
+ // CUDA Allocator
+ struct CUDANDAlloc {
+   void AllocData(DLTensor* tensor) {
+     size_t data_size = ffi::GetDataSize(*tensor);
+     void* ptr = nullptr;
+     cudaError_t err = cudaMalloc(&ptr, data_size);
+     TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaMalloc failed: " << cudaGetErrorString(err);
+     tensor->data = ptr;
+   }
+   void FreeData(DLTensor* tensor) {
+     if (tensor->data != nullptr) {
+       cudaError_t err = cudaFree(tensor->data);
+       TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaFree failed: " << cudaGetErrorString(err);
+       tensor->data = nullptr;
+     }
+   }
+ };
 
   // NVSHMEM Allocator
   struct NVSHMEMNDAlloc {
@@ -121,41 +130,63 @@ FromNDAlloc
   ffi::Tensor nvshmem_tensor = ffi::Tensor::FromNDAlloc(NVSHMEMNDAlloc(), ...);
 
 FromDLPack
- ``FromDLPack`` enables creating ``ffi::Tensor`` from ``DLManagedTensor*``, working with ``ToDLPack`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack pre V1.0 API.
+^^^^^^^^^^
+
+``FromDLPack`` enables creating ``ffi::Tensor`` from ``DLManagedTensor*``, working with ``ToDLPack`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack pre V1.0 API. It is used for wrapping the existing framework tensor to ``ffi::Tensor``.
 
 FromDLPackVersioned
- ``FromDLPackVersioned`` enables creating ``ffi::Tensor`` from ``DLManagedTensorVersioned*``, working with ``ToDLPackVersioned`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack post V1.0 API.
+^^^^^^^^^^^^^^^^^^^
 
-Tensor Passing FFI
-------------------
+``FromDLPackVersioned`` enables creating ``ffi::Tensor`` from ``DLManagedTensorVersioned*``, working with ``ToDLPackVersioned`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack post V1.0 API. It is used for wrapping the existing framework tensor to ``ffi::Tensor`` too.
 
-TVM FFI does two conversions when calling the compiled kernels to pass the tensor across FFI. It first converts the framework tensor at Python side to ``ffi::Tensor`` or ``ffi::TensorView``. And then it converts the output ``ffi::Tensor`` back to the framework tensor at Python side. When converting back, TVM FFI will convert to the same framework as arguments. If there are no framework tensors provided in the arguments, TVM FFI will output tensors with the type of ``tvm_ffi.core.Tensor`` still.
+Python Calling FFI
+==================
 
-Actually, in practie, we **recommend** that all input and output tensors are pre-allocated at Python side by framework alreadly. As for the optional arguments, use ``ffi::Optional`` as wrapper. So, for the kernel function, it returns nothing with a ``void`` return type. Here is a paradigm of TVM FFI interact with Pytorch:
+As we already have our kernel library wrapped with TVM FFI interface, our next and final step is exporting kernel library to Python side and enabling interaction with runtime environment or context.
+
+Function Exporting
+------------------
+
+TVM FFI provides macro ``TVM_FFI_DLL_EXPORT_TYPED_FUNC`` for exporting the kernel functions to the output library files. So that at Python side, it is possible to load the library files and call the kernel functions directly. For example, we export our kernels as:
 
 .. code-block:: c++
 
- // Kernel definition
- void func(ffi::TensorView input, ffi::Optional<ffi::Tensor> optional_input, ffi::TensorView output, ffi::TensorView workspace);
+ void func(ffi::TensorView input, ffi::TensorView output);
+ TVM_FFI_DLL_EXPORT_TYPED_FUNC(func, func);
+
+And then we compile the sources into ``func.so``, or ``func.dylib`` for macOS, or ``func.dll`` for Windows. Finally, we can load and call our kernel functions at Python side as:
 
 .. code-block:: python
 
- # Kernel calling
- input: torch.Tensor = ...
- output: torch.Tensor = ...
- workspace: torch.Tensor = ...
- func(input, None, output, workspace)
+ mod = tvm_ffi.load_module("func.so")
+ x = ...
+ y = ...
+ mod.func(x, y)
 
-Stream
-======
+``x`` and ``y`` here can be any ML framework tensors, such as ``torch.Tensor``, ``numpy.NDArray``, ``cupy.ndarray``, or other tensors as long as TVM FFI supports. TVM FFI detects the tensor types in arguments and converts them into ``ffi::TensorView`` automatically. So that we do not have to write the specific conversion codes per framework.
+
+In constrast, if the kernel function returns ``ffi::Tensor`` instead of ``void`` in the example above. TVM FFI automatically converts the output ``ffi::Tensor`` to framework tensors also. The output framework is inferred from the input framework tensors. For example, if the input framework tensors are of ``torch.Tensor``, TVM FFI will convert the output tensor to ``torch.Tensor``. And if none of the input tensors are from ML framework, the output tensor will be the ``tvm_ffi.core.Tensor`` as fallback.
+
+Actually, it is **recommended** to pre-allocated input and output tensors from framework at Python side alreadly. So that the return type of kernel functions at C++ side should be ``void`` always.
+
+Context Inherit
+---------------
+
+Also, when calling our kernel library at Python side, we usually need to pass the important context to the kernel library, for example, the CUDA stream context from ``torch.cuda.stream`` or ``torch.cuda.graph``. So that the kernels can be dispatched to the expected CUDA stream. TVM FFI has already made it by maintaining the stream context table per device type and index. And when converting the framework tensors as mentioned above, TVM FFI automatically updates the stream context table, by the device on which the converted framework tensors. For example, if there is an framework tensor as ``torch.Tensor(device="cuda:3")``, TVM FFI will automatically update the current stream of cuda device 3 to torch current context stream, by ``TVMFFIEnvSetStream``. And then at C++ side, we just use ``TVMFFIEnvGetStream`` to get the updated current stream on the specific device. Here is an example:
+
+.. code-block:: c++
+
+ void func(ffi::TensorView input, ...) {
+   ffi::DLDevice device = input.device();
+   cudaStream_t stream = reinterpret_cast<cudaStream_t>(TVMFFIEnvGetStream(device.device_type, device.device_id));
+ }
+
+which is equivalent to:
 
-TVM FFI maintains the stream context per device type and index. And TVM FFI automatically updates the context stream when handling the arguments. For example, if there is an argument of ``torch.Tensor(device="cuda:3")``, TVM FFI will set the current stream of cuda device 3 from torch current context stream. Then at C++ side, use ``TVMFFIEnvGetStream`` to get the current stream on the specific device. Here is an example:
 
 .. code-block:: c++
 
- void func(ffi::TensorView arg0, ...) {
-  ffi::DLDevice device = arg0.device();
-  cudaStream_t stream0 = reinterpret_cast<cudaStream_t>(TVMFFIEnvGetStream(device.device_type, device.device_id));
-  cudaStream_t stream1 = reinterpret_cast<cudaStream_t>(at::cuda::getCurrentCUDAStream(device.device_id).stream());
-  // stream0 and stream1 are the same cuda stream handle once arg0 is of type torch.Tensor at Python side, or any other torch.Tensor arguments at PYthon side are on the same device as arg0.
+ void func(at::Tensor input, ...) {
+   c10::Device = input.device();
+   cudaStream_t stream = reinterpret_cast<cudaStream_t>(c10::cuda::getCurrentCUDAStream(device.index()).stream());
  }

From df7105417717a44f556ab9b63055e250e5ccf690 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 17 Nov 2025 14:12:07 -0500
Subject: [PATCH 25/47] test with empty commit


From 00e6bdbc0a5415e4751ff9de7ef12e352930b8e2 Mon Sep 17 00:00:00 2001
From: Yaoyao Ding <dingyaoyao.cs@gmail.com>
Date: Mon, 17 Nov 2025 16:16:44 -0500
Subject: [PATCH 26/47] add more links; some modifications

---
 docs/conf.py                         |  5 ++
 docs/guides/kernel_library_guide.rst | 93 ++++++++--------------------
 include/tvm/ffi/container/tensor.h   | 44 +++++++++++++
 3 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 3aa19690..664e8827 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -157,6 +157,8 @@
     "pillow": ("https://pillow.readthedocs.io/en/stable", None),
     "numpy": ("https://numpy.org/doc/stable", None),
     "torch": ("https://pytorch.org/docs/stable", None),
+    "torch-cpp": ("https://docs.pytorch.org/cppdocs/", None),
+    "dlpack": ("https://dmlc.github.io/dlpack/latest", None),
 }
 
 autosummary_generate = True  # actually create stub pages
@@ -463,3 +465,6 @@ def footer_html() -> str:
 
 
 html_css_files = ["custom.css"]
+
+
+show_warning_types = True
diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index f000af0a..4115d026 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -24,44 +24,45 @@ This guide serves as a quick start for shipping python version and machine learn
 Tensor
 ======
 
-Almost all kernel libraries are about tensor computation and manipulation. For better adaptation to different ML frameworks, TVM FFI provides a minimal set of data structures to represent tensors from ML frameworks, including the tensor basic attributes and storage pointer. To be specific, in TVM FFI, two types of tensor constructs, ``ffi::Tensor`` and ``ffi::TensorView``, can be used to represent a tensor from ML frameworks.
+Almost all kernel libraries are about tensor computation and manipulation. For better adaptation to different ML frameworks, TVM FFI provides a minimal set of data structures to represent tensors from ML frameworks, including the tensor basic attributes and storage pointer.
+To be specific, in TVM FFI, two types of tensor constructs, :cpp:class:`~tvm::ffi::Tensor` and :cpp:class:`~tvm::ffi::TensorView`, can be used to represent a tensor from ML frameworks.
 
 Tensor and TensorView
 ---------------------
 
-Though both ``ffi::Tensor`` and ``ffi::TensorView`` are designed to represent tensors from ML frameworks that interact with the TVM FFI ABI. The main difference is whether it is an owning tensor structure.
+Though both :cpp:class:`~tvm::ffi::Tensor` and :cpp:class:`~tvm::ffi::TensorView` are designed to represent tensors from ML frameworks that interact with the TVM FFI ABI. They are backed by the `DLTensor` in DLPack in practice. The main difference is whether it is an owning tensor structure.
 
-ffi::Tensor
- ``ffi::Tensor`` is a completely onwing tensor pointer, pointing to a TVM FFI tensor object. TVM FFI handles the lifetime of ``ffi::Tensor`` by retaining a strong reference.
+:cpp:class:`tvm::ffi::Tensor`
+ :cpp:class:`~tvm::ffi::Tensor` is a completely owning tensor with reference counting. It can be created and passed between C++ and Python side safely. When the counting reference goes to zero, its underlying deleter function will be called to free the tensor storage.
 
-ffi::TensorView
- ``ffi::TensorView`` is a non-owning view of an existing tensor, pointint to an existing ML framework tensor. It is backed by ``DLTensor`` structure in DLPack in practice. And TVM FFI does not guarantee its lifetime also.
+:cpp:class:`tvm::ffi::TensorView`
+ :cpp:class:`~tvm::ffi::TensorView` is a non-owning view of an existing tensor, pointing to an existing tensor (e.g., a tensor allocated by PyTorch).
 
-It is **recommended** to use ``ffi::TensorView`` when possible, that helps us to support more cases, including cases where only view but not strong reference are passed, like XLA buffer. It is also more lightweight. However, since ``ffi::TensorView`` is a non-owning view, it is the user's responsibility to ensure the lifetime of underlying tensor data and attributes of the viewed tensor object.
+It is **recommended** to use :cpp:class:`~tvm::ffi::TensorView` when possible, that helps us to support more cases, including cases where only view but not strong reference are passed, like XLA buffer.
+It is also more lightweight. However, since :cpp:class:`~tvm::ffi::TensorView` is a non-owning view, it is the user's responsibility to ensure the lifetime of underlying tensor data and attributes of the viewed tensor object.
 
 Tensor Attributes
 -----------------
 
-For the sake of convenience, ``ffi::TensorView`` and ``ffi::Tensor`` align the following attributes retrieval mehtods to ``at::Tensor`` interface, to obtain tensor basic attributes and storage pointer:
+For the sake of convenience, :cpp:class:`~tvm::ffi::TensorView` and :cpp:class:`~tvm::ffi::Tensor` align the following attributes retrieval mehtods to :cpp:class:`torch-cpp:at::Tensor` interface, to obtain tensor basic attributes and storage pointer:
+``dim``, ``dtype``, ``sizes``, ``size``, ``strides``, ``stride``, ``numel``, ``data_ptr``, ``device``, ``is_contiguous``
 
-``dim``, ``sizes``, ``size``, ``strides``, ``stride``, ``numel``, ``data_ptr``, ``device``, ``is_contiguous``
+:c:struct:`dlpack:DLDataType`
+ The ``dtype`` of the tensor. It's represented by a struct with three fields: code, bits, and lanes, defined by DLPack protocol.
 
-DLDataType
- In TVM FFI, tensor data types are stored as ``DLDataType`` which is defined by DLPack protocol.
+:c:struct:`dlpack:DLDevice`
+ The ``device`` where the tensor is stored. It is represented by a struct with two fields: device_type and device_id, defined by DLPack protocol.
 
-DLDevice
- In TVM FFI, tensor device information are stored as ``DLDevice`` which is defined by DLPack protocol.
-
-ShapeView
- In TVM FFI, tensor shapes and strides attributes retrieval are returned as ``ShapeView``. It is an iterate-able data structure storing the shapes or strides data as ``int64_t`` array.
+:cpp:class:`tvm::ffi::ShapeView`
+ The ``sizes`` and ``strides`` attributes retrieval are returned as :cpp:class:`~tvm::ffi::ShapeView`. It is an iterate-able data structure storing the shapes or strides data as ``int64_t`` array.
 
 Tensor Allocation
 -----------------
 
-TVM FFI provides several methods to allocate tensors at C++ runtime. Generally, there are two types of tensor allocation:
+TVM FFI provides several methods to create or allocate tensors at C++ runtime. Generally, there are two types of tensor creation methods:
 
-* Allocate a tensor with new storage from scratch, i.e. ``FromEnvAlloc`` and ``FromNDAlloc``. By this types of methods, the shapes, strides, data types, devices and other attributes are required for the allocation.
-* Allocate a tensor with existing storage following DLPack protocol, i.e. ``FromDLPack`` and ``FromDLPackVersioned``. By this types of methods, the shapes, data types, devices and other attributes can be inferred from the DLPack attributes.
+* Allocate a tensor with new storage from scratch, i.e. :cpp:func:`~tvm::ffi::Tensor::FromEnvAlloc` and :cpp:func:`~tvm::ffi::Tensor::FromNDAlloc`. By this types of methods, the shapes, strides, data types, devices and other attributes are required for the allocation.
+* Create a tensor with existing storage following DLPack protocol, i.e. :cpp:func:`~tvm::ffi::Tensor::FromDLPack` and :cpp:func:`~tvm::ffi::Tensor::FromDLPackVersioned`. By this types of methods, the shapes, data types, devices and other attributes can be inferred from the DLPack attributes.
 
 FromEnvAlloc
 ^^^^^^^^^^^^
@@ -71,7 +72,7 @@ To better adapt to the ML framework, it is **recommended** to reuse the framewor
 * Benefit from the framework's native caching allocator or related allocation mechanism.
 * Help framework tracking memory usage and planning globally.
 
-For this case, TVM FFI provides ``FromEnvAlloc``. It internally calls the framework tensor allocator. To determine which framework tensor allocator, TVM FFI infers it from the passed-in framework tensors. For example, when calling the kernel library at Python side, there is an input framework tensor if of type ``torch.Tensor``, TVM FFI will automatically bind the ``at::empty`` as the current framework tensor allocator by ``TVMFFIEnvTensorAlloc``. And then the ``FromEnvAlloc`` is calling the ``at::empty`` actually:
+For this case, TVM FFI provides :cpp:func:`tvm::ffi::Tensor::FromEnvAlloc`. It internally calls the framework tensor allocator. To determine which framework tensor allocator, TVM FFI infers it from the passed-in framework tensors. For example, when calling the kernel library at Python side, there is an input framework tensor if of type ``torch.Tensor``, TVM FFI will automatically bind the ``at::empty`` as the current framework tensor allocator by ``TVMFFIEnvTensorAlloc``. And then the :cpp:func:`~tvm::ffi::Tensor::FromEnvAlloc` is calling the ``at::empty`` actually:
 
 .. code-block:: c++
 
@@ -86,58 +87,18 @@ which is equivalent to:
 FromNDAlloc
 ^^^^^^^^^^^
 
-``FromNDAlloc`` is the most basic tensor allocator. It is designed for simple cases where framework tensor allocator is no longer needed. ``FromNDAlloc`` just requires a custom allocator struct to handle the tensor allocation and free, with fixed interface ``void AllocData(DLTensor*)`` and ``void FreeData(DLTensor*)`` methods. Here are the examples of CPU, CUDA and NVSHMEM allocation:
-
-.. code-block:: c++
+:cpp:func:`tvm::ffi::Tensor::FromNDAlloc` can be used to create a tensor with custom memory allocator. It's used by the kernel provider if they don't want to rely on the framework tensor allocator. Instead, they provide their own custom allocator for tensor allocation and free.
 
- // CPU Allocator
- struct CPUNDAlloc {
-   void AllocData(DLTensor* tensor) { tensor->data = malloc(ffi::GetDataSize(*tensor)); }
-   void FreeData(DLTensor* tensor) { free(tensor->data); }
- };
-
- // CUDA Allocator
- struct CUDANDAlloc {
-   void AllocData(DLTensor* tensor) {
-     size_t data_size = ffi::GetDataSize(*tensor);
-     void* ptr = nullptr;
-     cudaError_t err = cudaMalloc(&ptr, data_size);
-     TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaMalloc failed: " << cudaGetErrorString(err);
-     tensor->data = ptr;
-   }
-   void FreeData(DLTensor* tensor) {
-     if (tensor->data != nullptr) {
-       cudaError_t err = cudaFree(tensor->data);
-       TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaFree failed: " << cudaGetErrorString(err);
-       tensor->data = nullptr;
-     }
-   }
- };
-
-  // NVSHMEM Allocator
-  struct NVSHMEMNDAlloc {
-    void AllocData(DLTensor* tensor) {
-      size_t size = tvm::ffi::GetDataSize(*tensor);
-      tensor->data = nvshmem_malloc(size);
-      TVM_FFI_ICHECK_NE(tensor->data, nullptr) << "nvshmem_malloc failed. size: " << size;
-    }
-    void FreeData(DLTensor* tensor) { nvshmem_free(tensor->data); }
-  };
-
-  // Allocator usage
-  ffi::Tensor cpu_tensor = ffi::Tensor::FromNDAlloc(CPUNDAlloc(), ...);
-  ffi::Tensor cuda_tensor = ffi::Tensor::FromNDAlloc(CUDANDAlloc(), ...);
-  ffi::Tensor nvshmem_tensor = ffi::Tensor::FromNDAlloc(NVSHMEMNDAlloc(), ...);
 
 FromDLPack
 ^^^^^^^^^^
 
-``FromDLPack`` enables creating ``ffi::Tensor`` from ``DLManagedTensor*``, working with ``ToDLPack`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack pre V1.0 API. It is used for wrapping the existing framework tensor to ``ffi::Tensor``.
+:cpp:func:`tvm::ffi::Tensor::FromDLPack` enables creating :cpp:class:`~tvm::ffi::Tensor` from ``DLManagedTensor*``, working with ``ToDLPack`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack pre V1.0 API. It is used for wrapping the existing framework tensor to :cpp:class:`~tvm::ffi::Tensor`.
 
 FromDLPackVersioned
 ^^^^^^^^^^^^^^^^^^^
 
-``FromDLPackVersioned`` enables creating ``ffi::Tensor`` from ``DLManagedTensorVersioned*``, working with ``ToDLPackVersioned`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack post V1.0 API. It is used for wrapping the existing framework tensor to ``ffi::Tensor`` too.
+:cpp:func:`tvm::ffi::Tensor::FromDLPackVersioned` enables creating :cpp:class:`~tvm::ffi::Tensor` from ``DLManagedTensorVersioned*``, working with ``ToDLPackVersioned`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack post V1.0 API. It is used for wrapping the existing framework tensor to :cpp:class:`~tvm::ffi::Tensor` too.
 
 Python Calling FFI
 ==================
@@ -147,7 +108,7 @@ As we already have our kernel library wrapped with TVM FFI interface, our next a
 Function Exporting
 ------------------
 
-TVM FFI provides macro ``TVM_FFI_DLL_EXPORT_TYPED_FUNC`` for exporting the kernel functions to the output library files. So that at Python side, it is possible to load the library files and call the kernel functions directly. For example, we export our kernels as:
+TVM FFI provides macro :c:macro:`TVM_FFI_DLL_EXPORT_TYPED_FUNC` for exporting the kernel functions to the output library files. So that at Python side, it is possible to load the library files and call the kernel functions directly. For example, we export our kernels as:
 
 .. code-block:: c++
 
@@ -163,9 +124,9 @@ And then we compile the sources into ``func.so``, or ``func.dylib`` for macOS, o
  y = ...
  mod.func(x, y)
 
-``x`` and ``y`` here can be any ML framework tensors, such as ``torch.Tensor``, ``numpy.NDArray``, ``cupy.ndarray``, or other tensors as long as TVM FFI supports. TVM FFI detects the tensor types in arguments and converts them into ``ffi::TensorView`` automatically. So that we do not have to write the specific conversion codes per framework.
+``x`` and ``y`` here can be any ML framework tensors, such as ``torch.Tensor``, ``numpy.NDArray``, ``cupy.ndarray``, or other tensors as long as TVM FFI supports. TVM FFI detects the tensor types in arguments and converts them into :cpp:class:`~tvm::ffi::TensorView` automatically. So that we do not have to write the specific conversion codes per framework.
 
-In constrast, if the kernel function returns ``ffi::Tensor`` instead of ``void`` in the example above. TVM FFI automatically converts the output ``ffi::Tensor`` to framework tensors also. The output framework is inferred from the input framework tensors. For example, if the input framework tensors are of ``torch.Tensor``, TVM FFI will convert the output tensor to ``torch.Tensor``. And if none of the input tensors are from ML framework, the output tensor will be the ``tvm_ffi.core.Tensor`` as fallback.
+In constrast, if the kernel function returns :cpp:class:`~tvm::ffi::Tensor` instead of ``void`` in the example above. TVM FFI automatically converts the output :cpp:class:`~tvm::ffi::Tensor` to framework tensors also. The output framework is inferred from the input framework tensors. For example, if the input framework tensors are of ``torch.Tensor``, TVM FFI will convert the output tensor to ``torch.Tensor``. And if none of the input tensors are from ML framework, the output tensor will be the ``tvm_ffi.core.Tensor`` as fallback.
 
 Actually, it is **recommended** to pre-allocated input and output tensors from framework at Python side alreadly. So that the return type of kernel functions at C++ side should be ``void`` always.
 
diff --git a/include/tvm/ffi/container/tensor.h b/include/tvm/ffi/container/tensor.h
index da750e65..944bb2c5 100644
--- a/include/tvm/ffi/container/tensor.h
+++ b/include/tvm/ffi/container/tensor.h
@@ -350,6 +350,50 @@ class Tensor : public ObjectRef {
   bool IsAligned(size_t alignment) const { return tvm::ffi::IsAligned(*get(), alignment); }
   /*!
    * \brief Create a Tensor from a NDAllocator.
+   *
+   *
+   * Example usage:
+   * \code
+   * // CPU Allocator
+   * struct CPUNDAlloc {
+   *   void AllocData(DLTensor* tensor) { tensor->data = malloc(ffi::GetDataSize(*tensor)); }
+   *   void FreeData(DLTensor* tensor) { free(tensor->data); }
+   * };
+   *
+   * // CUDA Allocator
+   * struct CUDANDAlloc {
+   *   void AllocData(DLTensor* tensor) {
+   *     size_t data_size = ffi::GetDataSize(*tensor);
+   *     void* ptr = nullptr;
+   *     cudaError_t err = cudaMalloc(&ptr, data_size);
+   *     TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaMalloc failed: " << cudaGetErrorString(err);
+   *     tensor->data = ptr;
+   *   }
+   *   void FreeData(DLTensor* tensor) {
+   *     if (tensor->data != nullptr) {
+   *       cudaError_t err = cudaFree(tensor->data);
+   *       TVM_FFI_ICHECK_EQ(err, cudaSuccess) << "cudaFree failed: " << cudaGetErrorString(err);
+   *       tensor->data = nullptr;
+   *     }
+   *   }
+   * };
+   *
+   *   // NVSHMEM Allocator
+   *   struct NVSHMEMNDAlloc {
+   *     void AllocData(DLTensor* tensor) {
+   *       size_t size = tvm::ffi::GetDataSize(*tensor);
+   *       tensor->data = nvshmem_malloc(size);
+   *       TVM_FFI_ICHECK_NE(tensor->data, nullptr) << "nvshmem_malloc failed. size: " << size;
+   *     }
+   *     void FreeData(DLTensor* tensor) { nvshmem_free(tensor->data); }
+   *   };
+   *
+   *   // Allocator usage
+   *   ffi::Tensor cpu_tensor = ffi::Tensor::FromNDAlloc(CPUNDAlloc(), ...);
+   *   ffi::Tensor cuda_tensor = ffi::Tensor::FromNDAlloc(CUDANDAlloc(), ...);
+   *   ffi::Tensor nvshmem_tensor = ffi::Tensor::FromNDAlloc(NVSHMEMNDAlloc(), ...);
+   * \endcode
+   *
    * \param alloc The NDAllocator.
    * \param shape The shape of the Tensor.
    * \param dtype The data type of the Tensor.

From 3e968b2d266445a7125b8a68bd30875eaa03e2da Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Tue, 18 Nov 2025 01:26:15 +0000
Subject: [PATCH 27/47] fix

---
 docs/guides/kernel_library_guide.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 4115d026..3a47958f 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -44,13 +44,13 @@ It is also more lightweight. However, since :cpp:class:`~tvm::ffi::TensorView` i
 Tensor Attributes
 -----------------
 
-For the sake of convenience, :cpp:class:`~tvm::ffi::TensorView` and :cpp:class:`~tvm::ffi::Tensor` align the following attributes retrieval mehtods to :cpp:class:`torch-cpp:at::Tensor` interface, to obtain tensor basic attributes and storage pointer:
+For the sake of convenience, :cpp:class:`~tvm::ffi::TensorView` and :cpp:class:`~tvm::ffi::Tensor` align the following attributes retrieval mehtods to :cpp:class:`at::Tensor` interface, to obtain tensor basic attributes and storage pointer:
 ``dim``, ``dtype``, ``sizes``, ``size``, ``strides``, ``stride``, ``numel``, ``data_ptr``, ``device``, ``is_contiguous``
 
-:c:struct:`dlpack:DLDataType`
+:c:struct:`DLDataType`
  The ``dtype`` of the tensor. It's represented by a struct with three fields: code, bits, and lanes, defined by DLPack protocol.
 
-:c:struct:`dlpack:DLDevice`
+:c:struct:`DLDevice`
  The ``device`` where the tensor is stored. It is represented by a struct with two fields: device_type and device_id, defined by DLPack protocol.
 
 :cpp:class:`tvm::ffi::ShapeView`

From 3e36d6dfb81413097f1486e0ba27c4dac53715a3 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Tue, 18 Nov 2025 01:50:50 +0000
Subject: [PATCH 28/47] upd

---
 docs/guides/kernel_library_guide.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 3a47958f..cec29de6 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -72,7 +72,7 @@ To better adapt to the ML framework, it is **recommended** to reuse the framewor
 * Benefit from the framework's native caching allocator or related allocation mechanism.
 * Help framework tracking memory usage and planning globally.
 
-For this case, TVM FFI provides :cpp:func:`tvm::ffi::Tensor::FromEnvAlloc`. It internally calls the framework tensor allocator. To determine which framework tensor allocator, TVM FFI infers it from the passed-in framework tensors. For example, when calling the kernel library at Python side, there is an input framework tensor if of type ``torch.Tensor``, TVM FFI will automatically bind the ``at::empty`` as the current framework tensor allocator by ``TVMFFIEnvTensorAlloc``. And then the :cpp:func:`~tvm::ffi::Tensor::FromEnvAlloc` is calling the ``at::empty`` actually:
+For this case, TVM FFI provides :cpp:func:`tvm::ffi::Tensor::FromEnvAlloc`. It internally calls the framework tensor allocator. To determine which framework tensor allocator, TVM FFI infers it from the passed-in framework tensors. For example, when calling the kernel library at Python side, there is an input framework tensor if of type ``torch.Tensor``, TVM FFI will automatically bind the :cpp:function:`at::empty` as the current framework tensor allocator by ``TVMFFIEnvTensorAlloc``. And then the :cpp:func:`~tvm::ffi::Tensor::FromEnvAlloc` is calling the :cpp:class:`at::empty` actually:
 
 .. code-block:: c++
 
@@ -87,7 +87,7 @@ which is equivalent to:
 FromNDAlloc
 ^^^^^^^^^^^
 
-:cpp:func:`tvm::ffi::Tensor::FromNDAlloc` can be used to create a tensor with custom memory allocator. It's used by the kernel provider if they don't want to rely on the framework tensor allocator. Instead, they provide their own custom allocator for tensor allocation and free.
+:cpp:func:`tvm::ffi::Tensor::FromNDAlloc` can be used to create a tensor with custom memory allocator. It's used by the kernel provider if they don't want to rely on the framework tensor allocator. Instead, they provide their own custom allocator and deleter for tensor allocation and free. However, the tensors allocated by ``FromNDAlloc`` only retain the function pointer to its custom allocator and deleter. The custom allocators and deletes are all owned by the kernel library still. So it is important to make sure the loaded kernel library, :py:class:`tvm_ffi.Module`, outlives the tensors allocated by ``FromNDAlloc``. Otherwise, the function pointers to the custom deleter will be invalid. Here a typical approach is to retain the loaded :py:class:`tvm_ffi.Module` globally or for the period of time. But ``FromEnvAlloc`` is free of this issue, which is more **recommended** in practice.
 
 
 FromDLPack

From 2d6c522540a58e4ec1b74a797be80266f86e2c27 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Tue, 18 Nov 2025 01:52:36 +0000
Subject: [PATCH 29/47] upd

---
 docs/guides/kernel_library_guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index cec29de6..a5b78782 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -87,7 +87,7 @@ which is equivalent to:
 FromNDAlloc
 ^^^^^^^^^^^
 
-:cpp:func:`tvm::ffi::Tensor::FromNDAlloc` can be used to create a tensor with custom memory allocator. It's used by the kernel provider if they don't want to rely on the framework tensor allocator. Instead, they provide their own custom allocator and deleter for tensor allocation and free. However, the tensors allocated by ``FromNDAlloc`` only retain the function pointer to its custom allocator and deleter. The custom allocators and deletes are all owned by the kernel library still. So it is important to make sure the loaded kernel library, :py:class:`tvm_ffi.Module`, outlives the tensors allocated by ``FromNDAlloc``. Otherwise, the function pointers to the custom deleter will be invalid. Here a typical approach is to retain the loaded :py:class:`tvm_ffi.Module` globally or for the period of time. But ``FromEnvAlloc`` is free of this issue, which is more **recommended** in practice.
+:cpp:func:`tvm::ffi::Tensor::FromNDAlloc` can be used to create a tensor with custom memory allocator. It's used by the kernel provider if they don't want to rely on the framework tensor allocator. Instead, they provide their own custom allocator and deleter for tensor allocation and free. However, the tensors allocated by ``FromNDAlloc`` only retain the function pointer to its custom deleter for deconstruction. The custom deletes are all owned by the kernel library still. So it is important to make sure the loaded kernel library, :py:class:`tvm_ffi.Module`, outlives the tensors allocated by ``FromNDAlloc``. Otherwise, the function pointers to the custom deleter will be invalid. Here a typical approach is to retain the loaded :py:class:`tvm_ffi.Module` globally or for the period of time. But ``FromEnvAlloc`` is free of this issue, which is more **recommended** in practice.
 
 
 FromDLPack

From b6eae620f1f151fa4cebce60c887437a5f54b40d Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Tue, 18 Nov 2025 01:57:36 +0000
Subject: [PATCH 30/47] fix

---
 docs/guides/kernel_library_guide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index a5b78782..aeed0300 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -72,7 +72,7 @@ To better adapt to the ML framework, it is **recommended** to reuse the framewor
 * Benefit from the framework's native caching allocator or related allocation mechanism.
 * Help framework tracking memory usage and planning globally.
 
-For this case, TVM FFI provides :cpp:func:`tvm::ffi::Tensor::FromEnvAlloc`. It internally calls the framework tensor allocator. To determine which framework tensor allocator, TVM FFI infers it from the passed-in framework tensors. For example, when calling the kernel library at Python side, there is an input framework tensor if of type ``torch.Tensor``, TVM FFI will automatically bind the :cpp:function:`at::empty` as the current framework tensor allocator by ``TVMFFIEnvTensorAlloc``. And then the :cpp:func:`~tvm::ffi::Tensor::FromEnvAlloc` is calling the :cpp:class:`at::empty` actually:
+For this case, TVM FFI provides :cpp:func:`tvm::ffi::Tensor::FromEnvAlloc`. It internally calls the framework tensor allocator. To determine which framework tensor allocator, TVM FFI infers it from the passed-in framework tensors. For example, when calling the kernel library at Python side, there is an input framework tensor if of type ``torch.Tensor``, TVM FFI will automatically bind the :cpp:func:`at::empty` as the current framework tensor allocator by ``TVMFFIEnvTensorAlloc``. And then the :cpp:func:`~tvm::ffi::Tensor::FromEnvAlloc` is calling the :cpp:class:`at::empty` actually:
 
 .. code-block:: c++
 

From 5f246044029875a5c174230578fccbd34827193b Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Tue, 18 Nov 2025 23:28:23 +0000
Subject: [PATCH 31/47] upd

---
 docs/guides/kernel_library_guide.rst | 63 ++++++++++++++++++----------
 1 file changed, 41 insertions(+), 22 deletions(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index aeed0300..9e21e6b9 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -100,6 +100,47 @@ FromDLPackVersioned
 
 :cpp:func:`tvm::ffi::Tensor::FromDLPackVersioned` enables creating :cpp:class:`~tvm::ffi::Tensor` from ``DLManagedTensorVersioned*``, working with ``ToDLPackVersioned`` for DLPack C Tensor Object ``DLTensor`` exchange protocol. Both are used for DLPack post V1.0 API. It is used for wrapping the existing framework tensor to :cpp:class:`~tvm::ffi::Tensor` too.
 
+Stream
+======
+
+Besides of tensors, stream context is another key concept in kernel library, especially for kernel execution. And the kernel library should be able to obtain the current stream context from ML framework via TVM FFI.
+
+Stream Obtaining
+----------------
+
+In practice, TVM FFI maintains a stream context table per device type and index. And kernel libraries can obtain the current stream context on specific device by :cpp:func:`TVMFFIEnvGetStream`. Here is an example:
+
+.. code-block:: c++
+
+ void func(ffi::TensorView input, ...) {
+   ffi::DLDevice device = input.device();
+   cudaStream_t stream = reinterpret_cast<cudaStream_t>(TVMFFIEnvGetStream(device.device_type, device.device_id));
+ }
+
+which is equivalent to:
+
+.. code-block:: c++
+
+ void func(at::Tensor input, ...) {
+   c10::Device = input.device();
+   cudaStream_t stream = reinterpret_cast<cudaStream_t>(c10::cuda::getCurrentCUDAStream(device.index()).stream());
+ }
+
+Stream Update
+-------------
+
+Corresponding to :cpp:func:`TVMFFIEnvGetStream`, TVM FFI updates the stream context table via interface :cpp:func:`TVMFFIEnvSetStream`. But the updating methods can be implicit and explicit.
+
+Implicit Update
+^^^^^^^^^^^^^^^
+
+Similar to the tensor allocation :ref:`guides/kernel_library_guide:FromNDAlloc`, TVM FFI does the implicit update on stream context table as well. When converting the framework tensors as mentioned above, TVM FFI automatically updates the stream context table, by the device on which the converted framework tensors. For example, if there is an framework tensor as ``torch.Tensor(device="cuda:3")``, TVM FFI would automatically update the current stream of cuda device 3 to torch current context stream. So nothing for the kernel library to do with the stream context updaing, as long as the tensors from ML framework covers all the devices on which the stream contexts reside.
+
+Explicit Update
+^^^^^^^^^^^^^^^
+
+Once the devices on which the stream contexts reside cannot be inferred from the tensors, the explicit update on stream context table is necessary. TVM FFI provides :py:func:`tvm_ffi.use_torch_stream` and :py:func:`tvm_ffi.use_raw_stream` for manual stream context update. However, it is **recommended** to use implicit update above, to reduce code complexity.
+
 Python Calling FFI
 ==================
 
@@ -129,25 +170,3 @@ And then we compile the sources into ``func.so``, or ``func.dylib`` for macOS, o
 In constrast, if the kernel function returns :cpp:class:`~tvm::ffi::Tensor` instead of ``void`` in the example above. TVM FFI automatically converts the output :cpp:class:`~tvm::ffi::Tensor` to framework tensors also. The output framework is inferred from the input framework tensors. For example, if the input framework tensors are of ``torch.Tensor``, TVM FFI will convert the output tensor to ``torch.Tensor``. And if none of the input tensors are from ML framework, the output tensor will be the ``tvm_ffi.core.Tensor`` as fallback.
 
 Actually, it is **recommended** to pre-allocated input and output tensors from framework at Python side alreadly. So that the return type of kernel functions at C++ side should be ``void`` always.
-
-Context Inherit
----------------
-
-Also, when calling our kernel library at Python side, we usually need to pass the important context to the kernel library, for example, the CUDA stream context from ``torch.cuda.stream`` or ``torch.cuda.graph``. So that the kernels can be dispatched to the expected CUDA stream. TVM FFI has already made it by maintaining the stream context table per device type and index. And when converting the framework tensors as mentioned above, TVM FFI automatically updates the stream context table, by the device on which the converted framework tensors. For example, if there is an framework tensor as ``torch.Tensor(device="cuda:3")``, TVM FFI will automatically update the current stream of cuda device 3 to torch current context stream, by ``TVMFFIEnvSetStream``. And then at C++ side, we just use ``TVMFFIEnvGetStream`` to get the updated current stream on the specific device. Here is an example:
-
-.. code-block:: c++
-
- void func(ffi::TensorView input, ...) {
-   ffi::DLDevice device = input.device();
-   cudaStream_t stream = reinterpret_cast<cudaStream_t>(TVMFFIEnvGetStream(device.device_type, device.device_id));
- }
-
-which is equivalent to:
-
-
-.. code-block:: c++
-
- void func(at::Tensor input, ...) {
-   c10::Device = input.device();
-   cudaStream_t stream = reinterpret_cast<cudaStream_t>(c10::cuda::getCurrentCUDAStream(device.index()).stream());
- }

From 264dfad84c1ce172ac8e6f430f6ddeb013eb1224 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Tue, 18 Nov 2025 23:30:08 +0000
Subject: [PATCH 32/47] upd

---
 docs/guides/kernel_library_guide.rst | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/docs/guides/kernel_library_guide.rst b/docs/guides/kernel_library_guide.rst
index 9e21e6b9..b71be993 100644
--- a/docs/guides/kernel_library_guide.rst
+++ b/docs/guides/kernel_library_guide.rst
@@ -141,15 +141,10 @@ Explicit Update
 
 Once the devices on which the stream contexts reside cannot be inferred from the tensors, the explicit update on stream context table is necessary. TVM FFI provides :py:func:`tvm_ffi.use_torch_stream` and :py:func:`tvm_ffi.use_raw_stream` for manual stream context update. However, it is **recommended** to use implicit update above, to reduce code complexity.
 
-Python Calling FFI
-==================
-
-As we already have our kernel library wrapped with TVM FFI interface, our next and final step is exporting kernel library to Python side and enabling interaction with runtime environment or context.
-
 Function Exporting
-------------------
+==================
 
-TVM FFI provides macro :c:macro:`TVM_FFI_DLL_EXPORT_TYPED_FUNC` for exporting the kernel functions to the output library files. So that at Python side, it is possible to load the library files and call the kernel functions directly. For example, we export our kernels as:
+As we already have our kernel library wrapped with TVM FFI interface, our next and final step is exporting kernel library to Python side. TVM FFI provides macro :c:macro:`TVM_FFI_DLL_EXPORT_TYPED_FUNC` for exporting the kernel functions to the output library files. So that at Python side, it is possible to load the library files and call the kernel functions directly. For example, we export our kernels as:
 
 .. code-block:: c++
 

From 3d98e3b78fa091400a90d0ab687c31279f1e827d Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Wed, 19 Nov 2025 09:15:39 +0000
Subject: [PATCH 33/47] wip

---
 .../python/tvm_ffi_orcjit/__init__.py         |   4 +-
 .../python/tvm_ffi_orcjit/_ffi_api.py         |  20 +++
 .../python/tvm_ffi_orcjit/dylib.py            |  15 +--
 .../python/tvm_ffi_orcjit/session.py          |  47 ++-----
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc | 121 ++++++------------
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h  |  41 +++---
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.cc  |   9 +-
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.h   |   6 +-
 addons/tvm-ffi-orcjit/tests/test_basic.py     |  20 +--
 9 files changed, 121 insertions(+), 162 deletions(-)
 create mode 100644 addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py

diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
index 066c5e57..986ea7be 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
@@ -83,7 +83,7 @@
     warnings.warn(f"Failed to explicitly initialize orcjit library: {e}")
 
 from .dylib import DynamicLibrary
-from .session import ExecutionSession, create_session
+from .session import ExecutionSession
 
-__all__ = ["DynamicLibrary", "ExecutionSession", "create_session"]
+__all__ = ["DynamicLibrary", "ExecutionSession"]
 __version__ = "0.1.0"
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py
new file mode 100644
index 00000000..28078e84
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py
@@ -0,0 +1,20 @@
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""FFI APIs for orcjit"""
+
+import tvm_ffi
+
+tvm_ffi.init_ffi_api("orcjit", __name__)
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
index 7bf9c8ce..e0372262 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
@@ -21,9 +21,11 @@
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 
-from tvm_ffi import Function, get_global_func
+from tvm_ffi import Function, Module
 from tvm_ffi._ffi_api import ModuleGetFunction
 
+from . import _ffi_api
+
 if TYPE_CHECKING:
     from .session import ExecutionSession
 
@@ -58,9 +60,6 @@ def __init__(self, handle: Any, session: ExecutionSession) -> None:
         """
         self._handle = handle
         self._session = session  # Keep session alive
-        self._add_func = get_global_func("orcjit.DynamicLibraryAdd")
-        self._set_link_order_func = get_global_func("orcjit.DynamicLibrarySetLinkOrder")
-        self._to_module_func = get_global_func("orcjit.DynamicLibraryToModule")
 
     def add(self, object_file: str | Path) -> None:
         """Add an object file to this dynamic library.
@@ -78,7 +77,7 @@ def add(self, object_file: str | Path) -> None:
         """
         if isinstance(object_file, Path):
             object_file = str(object_file)
-        self._add_func(self._handle, object_file)
+        _ffi_api.DynamicLibraryAdd(self._handle, object_file)
 
     def set_link_order(self, *libraries: DynamicLibrary) -> None:
         """Set the link order for symbol resolution.
@@ -105,7 +104,7 @@ def set_link_order(self, *libraries: DynamicLibrary) -> None:
 
         """
         lib_handles = [lib._handle for lib in libraries]
-        self._set_link_order_func(self._handle, lib_handles)
+        _ffi_api.DynamicLibrarySetLinkOrder(self._handle, lib_handles)
 
     def get_function(self, name: str) -> Function:
         """Get a function from this dynamic library.
@@ -128,9 +127,7 @@ def get_function(self, name: str) -> Function:
 
         """
         # Get the module handle and use ModuleGetFunction
-        module_handle = self._to_module_func(self._handle)
-
-        func = ModuleGetFunction(module_handle, name, False)
+        func = ModuleGetFunction(self._handle, name, False)
         if func is None:
             raise AttributeError(f"Module has no function '{name}'")
         return func
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
index 96ed7146..a4a0d54c 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
@@ -18,14 +18,15 @@
 
 from __future__ import annotations
 
-from typing import Any
 
-from tvm_ffi import get_global_func
+from tvm_ffi import Object, register_object
 
 from .dylib import DynamicLibrary
 
+from . import _ffi_api
 
-class ExecutionSession:
+
+class ExecutionSession(Object):
     """ORC JIT Execution Session.
 
     Manages the LLVM ORC JIT execution environment and creates dynamic libraries (JITDylibs).
@@ -33,24 +34,16 @@ class ExecutionSession:
 
     Examples
     --------
-    >>> session = create_session()
+    >>> session = ExecutionSession()
     >>> lib = session.create_library(name="main")
     >>> lib.add("add.o")
     >>> add_func = lib.get_function("add")
 
     """
 
-    def __init__(self, handle: Any) -> None:
-        """Initialize ExecutionSession from a handle.
-
-        Parameters
-        ----------
-        handle : object
-            The underlying C++ ORCJITExecutionSession object.
-
-        """
-        self._handle = handle
-        self._create_dylib_func = get_global_func("orcjit.SessionCreateDynamicLibrary")
+    def __init__(self) -> None:
+        """Initialize ExecutionSession from a handle."""
+        self.__init_handle_by_constructor__(_ffi_api.ExecutionSession)  # type: ignore
 
     def create_library(self, name: str = "") -> DynamicLibrary:
         """Create a new dynamic library associated with this execution session.
@@ -62,27 +55,5 @@ def create_library(self, name: str = "") -> DynamicLibrary:
             A new DynamicLibrary instance.
 
         """
-        handle = self._create_dylib_func(self._handle, name)
+        handle = _ffi_api.ExecutionSessionCreateDynamicLibrary(self, name)  # type: ignore
         return DynamicLibrary(handle, self)
-
-
-def create_session() -> ExecutionSession:
-    """Create a new ORC JIT execution session.
-
-    This is the main entry point for using the ORC JIT system. The session
-    manages the LLVM ORC JIT infrastructure and allows creating dynamic libraries.
-
-    Returns
-    -------
-    ExecutionSession
-        A new execution session instance.
-
-    Examples
-    --------
-    >>> session = create_session()
-    >>> lib = session.create_library()
-
-    """
-    create_func = get_global_func("orcjit.CreateExecutionSession")
-    handle = create_func()
-    return ExecutionSession(handle)
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index c61a3a33..18d9a7f9 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -33,6 +33,7 @@
 #include <tvm/ffi/cast.h>
 #include <tvm/ffi/container/array.h>
 #include <tvm/ffi/error.h>
+#include <tvm/ffi/extra/c_env_api.h>
 #include <tvm/ffi/extra/module.h>
 #include <tvm/ffi/reflection/registry.h>
 
@@ -42,15 +43,14 @@ namespace tvm {
 namespace ffi {
 namespace orcjit {
 
-ORCJITDynamicLibrary::ORCJITDynamicLibrary(ObjectPtr<ORCJITExecutionSessionObj> session,
-                                           llvm::orc::JITDylib* dylib, llvm::orc::LLJIT* jit,
-                                           String name)
+DynamicLibraryObj::DynamicLibraryObj(ObjectPtr<ORCJITExecutionSessionObj> session,
+                                     llvm::orc::JITDylib* dylib, llvm::orc::LLJIT* jit, String name)
     : session_(std::move(session)), dylib_(dylib), jit_(jit), name_(std::move(name)) {
   TVM_FFI_CHECK(dylib_ != nullptr, ValueError) << "JITDylib cannot be null";
   TVM_FFI_CHECK(jit_ != nullptr, ValueError) << "LLJIT cannot be null";
 }
 
-void ORCJITDynamicLibrary::AddObjectFile(const String& path) {
+void DynamicLibraryObj::AddObjectFile(const String& path) {
   // Read object file
   auto buffer_or_err = llvm::MemoryBuffer::getFile(path.c_str());
   if (!buffer_or_err) {
@@ -67,23 +67,22 @@ void ORCJITDynamicLibrary::AddObjectFile(const String& path) {
   }
 }
 
-void ORCJITDynamicLibrary::SetLinkOrder(
-    const std::vector<ObjectPtr<ORCJITDynamicLibrary>>& libraries) {
+void DynamicLibraryObj::SetLinkOrder(const std::vector<llvm::orc::JITDylib*>& dylibs) {
   // Clear and rebuild the link order
   link_order_.clear();
 
-  for (const auto& lib : libraries) {
-    link_order_.push_back({lib->dylib_, llvm::orc::JITDylibLookupFlags::MatchAllSymbols});
+  for (auto* lib : dylibs) {
+    link_order_.emplace_back(lib, llvm::orc::JITDylibLookupFlags::MatchAllSymbols);
   }
 
   // Set the link order in the LLVM JITDylib
   dylib_->setLinkOrder(link_order_, false);
 }
 
-void* ORCJITDynamicLibrary::GetSymbol(const String& name) {
+void* DynamicLibraryObj::GetSymbol(const String& name) {
   // Build search order: this dylib first, then all linked dylibs
   llvm::orc::JITDylibSearchOrder search_order;
-  search_order.push_back({dylib_, llvm::orc::JITDylibLookupFlags::MatchAllSymbols});
+  search_order.emplace_back(dylib_, llvm::orc::JITDylibLookupFlags::MatchAllSymbols);
   // Append linked libraries
   search_order.insert(search_order.end(), link_order_.begin(), link_order_.end());
 
@@ -102,59 +101,33 @@ void* ORCJITDynamicLibrary::GetSymbol(const String& name) {
   return symbol_or_err->getAddress().toPtr<void*>();
 }
 
-llvm::orc::JITDylib& ORCJITDynamicLibrary::GetJITDylib() {
+llvm::orc::JITDylib& DynamicLibraryObj::GetJITDylib() {
   TVM_FFI_CHECK(dylib_ != nullptr, InternalError) << "JITDylib is null";
   return *dylib_;
 }
 
-//-------------------------------------
-// Module wrapper for DynamicLibrary
-//-------------------------------------
-
-class DynamicLibraryModuleObj : public ModuleObj {
- public:
-  explicit DynamicLibraryModuleObj(ObjectPtr<ORCJITDynamicLibrary> dylib)
-      : dylib_(std::move(dylib)) {}
-
-  const char* kind() const final { return "orcjit_dynamic_library"; }
-
-  Optional<Function> GetFunction(const String& name) override {
-    // TVM-FFI exports have __tvm_ffi_ prefix
-    std::string symbol_name = "__tvm_ffi_" + std::string(name);
-
-    // Try to get the symbol - return NullOpt if not found
-    void* symbol = nullptr;
-    try {
-      symbol = dylib_->GetSymbol(symbol_name);
-    } catch (const Error& e) {
-      // Symbol not found
-      return Optional<Function>();
-    }
-
-    // Wrap C function pointer as tvm-ffi Function
-    using TVMFFISafeCallType =
-        int (*)(void* handle, const TVMFFIAny* args, int32_t num_args, TVMFFIAny* rv);
-    auto c_func = reinterpret_cast<TVMFFISafeCallType>(symbol);
-
-    return Function::FromPacked([c_func, name](PackedArgs args, Any* rv) {
-      std::vector<AnyView> arg_views;
-      arg_views.reserve(args.size());
-      for (int i = 0; i < args.size(); ++i) {
-        arg_views.push_back(args[i]);
-      }
-
-      int ret_code = c_func(nullptr, reinterpret_cast<const TVMFFIAny*>(arg_views.data()),
-                            static_cast<int32_t>(args.size()), reinterpret_cast<TVMFFIAny*>(rv));
-
-      if (ret_code != 0) {
-        TVM_FFI_THROW(RuntimeError) << "Function '" << name << "' returned error code " << ret_code;
-      }
-    });
+Optional<Function> DynamicLibraryObj::GetFunction(const String& name) {
+  // TVM-FFI exports have __tvm_ffi_ prefix
+  std::string symbol_name = "__tvm_ffi_" + std::string(name);
+
+  // Try to get the symbol - return NullOpt if not found
+  void* symbol = nullptr;
+  try {
+    symbol = GetSymbol(symbol_name);
+  } catch (const Error& e) {
+    // Symbol not found
+    return std::nullopt;
   }
 
- private:
-  ObjectPtr<ORCJITDynamicLibrary> dylib_;
-};
+  // Wrap C function pointer as tvm-ffi Function
+  auto c_func = reinterpret_cast<TVMFFISafeCallType>(symbol);
+
+  return Function::FromPacked([c_func, name](PackedArgs args, Any* rv) {
+    TVM_FFI_ICHECK_LT(rv->type_index(), ffi::TypeIndex::kTVMFFIStaticObjectBegin);
+    TVM_FFI_CHECK_SAFE_CALL((*c_func)(nullptr, reinterpret_cast<const TVMFFIAny*>(args.data()),
+                                      args.size(), reinterpret_cast<TVMFFIAny*>(rv)));
+  });
+}
 
 //-------------------------------------
 // Registration
@@ -168,34 +141,24 @@ static void RegisterOrcJITFunctions() {
   namespace refl = tvm::ffi::reflection;
 
   refl::GlobalDef()
-      .def("orcjit.CreateExecutionSession",
-           []() -> ORCJITExecutionSession { return ORCJITExecutionSession::Create(); })
-      .def("orcjit.SessionCreateDynamicLibrary",
-           [](ORCJITExecutionSession session, String name) -> ObjectRef {
-             auto session_obj = GetObjectPtr<ORCJITExecutionSessionObj>(
-                 const_cast<ORCJITExecutionSessionObj*>(session.as<ORCJITExecutionSessionObj>()));
-             return ObjectRef(session_obj->CreateDynamicLibrary(name));
+      .def("orcjit.ExecutionSession", ORCJITExecutionSession::Create)
+      .def("orcjit.ExecutionSessionCreateDynamicLibrary",
+           [](const ORCJITExecutionSession& session, const String& name) -> ObjectRef {
+             return session->CreateDynamicLibrary(name);
            })
       .def("orcjit.DynamicLibraryAdd",
-           [](ORCJITDynamicLibrary* dylib, String path) { dylib->AddObjectFile(path); })
+           [](const DynamicLibrary& dylib, const String& path) { dylib->AddObjectFile(path); })
       .def("orcjit.DynamicLibrarySetLinkOrder",
-           [](ORCJITDynamicLibrary* dylib, Array<ObjectRef> libraries) {
-             std::vector<ObjectPtr<ORCJITDynamicLibrary>> lib_ptrs;
-             lib_ptrs.reserve(libraries.size());
-             for (const auto& lib_ref : libraries) {
-               auto* lib = lib_ref.as<ORCJITDynamicLibrary>();
-               auto lib_ptr =
-                   GetObjectPtr<ORCJITDynamicLibrary>(const_cast<ORCJITDynamicLibrary*>(lib));
-               lib_ptrs.push_back(lib_ptr);
+           [](const DynamicLibrary& dylib, const Array<DynamicLibrary>& libraries) {
+             std::vector<llvm::orc::JITDylib*> libs;
+             libs.reserve(libraries.size());
+             for (const auto& lib : libraries) {
+               libs.push_back(&lib->GetJITDylib());
              }
-             dylib->SetLinkOrder(lib_ptrs);
+             dylib->SetLinkOrder(libs);
            })
       .def("orcjit.DynamicLibraryGetName",
-           [](ORCJITDynamicLibrary* dylib) -> String { return dylib->GetName(); })
-      .def("orcjit.DynamicLibraryToModule", [](ORCJITDynamicLibrary* dylib) -> Module {
-        return Module(
-            make_object<DynamicLibraryModuleObj>(GetObjectPtr<ORCJITDynamicLibrary>(dylib)));
-      });
+           [](const DynamicLibrary& dylib) -> String { return dylib->GetName(); });
 }
 
 TVM_FFI_STATIC_INIT_BLOCK() {
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
index a028e45b..e7784e12 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
@@ -26,6 +26,8 @@
 
 #include <llvm/ExecutionEngine/Orc/Core.h>
 #include <llvm/ExecutionEngine/Orc/LLJIT.h>
+#include <tvm/ffi/container/array.h>
+#include <tvm/ffi/extra/module.h>
 #include <tvm/ffi/object.h>
 #include <tvm/ffi/string.h>
 
@@ -38,15 +40,7 @@ namespace orcjit {
 
 class ORCJITExecutionSessionObj;
 
-/*!
- * \brief DynamicLibrary wrapper for LLVM ORC JIT v2 JITDylib
- *
- * This class wraps an LLVM JITDylib and provides functionality to:
- * - Load object files
- * - Link against other dynamic libraries
- * - Look up symbols
- */
-class ORCJITDynamicLibrary : public Object {
+class DynamicLibraryObj : public ModuleObj {
  public:
   /*!
    * \brief Add an object file to this library
@@ -56,12 +50,12 @@ class ORCJITDynamicLibrary : public Object {
 
   /*!
    * \brief Set the link order for symbol resolution
-   * \param libraries Vector of libraries to search for symbols (in order)
+   * \param dylibs Vector of libraries to search for symbols (in order)
    *
    * When resolving symbols, this library will search in the specified libraries
    * in the order provided. This replaces any previous link order.
    */
-  void SetLinkOrder(const std::vector<ObjectPtr<ORCJITDynamicLibrary>>& libraries);
+  void SetLinkOrder(const std::vector<llvm::orc::JITDylib*>& dylibs);
 
   /*!
    * \brief Look up a symbol in this library
@@ -82,9 +76,6 @@ class ORCJITDynamicLibrary : public Object {
    */
   String GetName() const { return name_; }
 
-  static constexpr bool _type_mutable = true;
-  TVM_FFI_DECLARE_OBJECT_INFO_FINAL("orcjit.DynamicLibrary", ORCJITDynamicLibrary, Object);
-
   /*!
    * \brief Constructor
    * \param session The parent execution session
@@ -92,8 +83,12 @@ class ORCJITDynamicLibrary : public Object {
    * \param jit The LLJIT instance
    * \param name The library name
    */
-  ORCJITDynamicLibrary(ObjectPtr<ORCJITExecutionSessionObj> session, llvm::orc::JITDylib* dylib,
-                       llvm::orc::LLJIT* jit, String name);
+  DynamicLibraryObj(ObjectPtr<ORCJITExecutionSessionObj> session, llvm::orc::JITDylib* dylib,
+                    llvm::orc::LLJIT* jit, String name);
+
+  const char* kind() const final { return "orcjit_dynamic_library"; }
+
+  Optional<Function> GetFunction(const String& name) override;
 
  private:
   /*! \brief Parent execution session (for lifetime management) */
@@ -112,6 +107,20 @@ class ORCJITDynamicLibrary : public Object {
   llvm::orc::JITDylibSearchOrder link_order_;
 };
 
+/*!
+ * \brief DynamicLibrary wrapper for LLVM ORC JIT v2 JITDylib
+ *
+ * This class wraps an LLVM JITDylib and provides functionality to:
+ * - Load object files
+ * - Link against other dynamic libraries
+ * - Look up symbols
+ */
+class DynamicLibrary : public Module {
+ public:
+  explicit DynamicLibrary(const ObjectPtr<DynamicLibraryObj>& ptr) : Module(ptr){};
+  TVM_FFI_DEFINE_OBJECT_REF_METHODS_NOTNULLABLE(DynamicLibrary, Module, DynamicLibraryObj);
+};
+
 }  // namespace orcjit
 }  // namespace ffi
 }  // namespace tvm
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
index e4a638f7..13e8118a 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
@@ -75,8 +75,7 @@ ORCJITExecutionSession ORCJITExecutionSession::Create() {
   return ORCJITExecutionSession(obj);
 }
 
-ObjectPtr<ORCJITDynamicLibrary> ORCJITExecutionSessionObj::CreateDynamicLibrary(
-    const String& name) {
+DynamicLibrary ORCJITExecutionSessionObj::CreateDynamicLibrary(const String& name) {
   TVM_FFI_CHECK(jit_ != nullptr, InternalError) << "ExecutionSession not initialized";
 
   // Generate name if not provided
@@ -132,11 +131,11 @@ ObjectPtr<ORCJITDynamicLibrary> ORCJITExecutionSessionObj::CreateDynamicLibrary(
   }
 
   // Create the wrapper object
-  auto dylib = make_object<ORCJITDynamicLibrary>(GetObjectPtr<ORCJITExecutionSessionObj>(this), &jd,
-                                                 jit_.get(), lib_name);
+  auto dylib = DynamicLibrary(make_object<DynamicLibraryObj>(
+      GetObjectPtr<ORCJITExecutionSessionObj>(this), &jd, jit_.get(), lib_name));
 
   // Store for lifetime management
-  dylibs_[lib_name] = dylib;
+  dylibs_.insert({lib_name, dylib});
 
   return dylib;
 }
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
index 1115aeac..fb6dbff3 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
@@ -37,7 +37,7 @@ namespace ffi {
 namespace orcjit {
 
 // Forward declaration
-class ORCJITDynamicLibrary;
+class DynamicLibrary;
 
 /*!
  * \brief ExecutionSession object for LLVM ORC JIT v2
@@ -62,7 +62,7 @@ class ORCJITExecutionSessionObj : public Object {
    * \param name Optional name for the library (for debugging)
    * \return The created dynamic library instance
    */
-  ObjectPtr<ORCJITDynamicLibrary> CreateDynamicLibrary(const String& name);
+  DynamicLibrary CreateDynamicLibrary(const String& name);
 
   /*!
    * \brief Get the underlying LLVM ExecutionSession
@@ -87,7 +87,7 @@ class ORCJITExecutionSessionObj : public Object {
   int dylib_counter_ = 0;
 
   /*! \brief Map of created dynamic libraries for lifetime management */
-  std::unordered_map<std::string, ObjectPtr<ORCJITDynamicLibrary>> dylibs_;
+  std::unordered_map<std::string, DynamicLibrary> dylibs_;
 };
 
 /*!
diff --git a/addons/tvm-ffi-orcjit/tests/test_basic.py b/addons/tvm-ffi-orcjit/tests/test_basic.py
index b3577030..396b1c07 100644
--- a/addons/tvm-ffi-orcjit/tests/test_basic.py
+++ b/addons/tvm-ffi-orcjit/tests/test_basic.py
@@ -21,7 +21,7 @@
 from pathlib import Path
 
 import pytest
-from tvm_ffi_orcjit import create_session
+from tvm_ffi_orcjit import ExecutionSession
 
 
 def get_test_obj_file() -> Path:
@@ -93,13 +93,13 @@ def get_test_obj_file_conflict() -> Path:
 
 def test_create_session() -> None:
     """Test creating an execution session."""
-    session = create_session()
+    session = ExecutionSession()
     assert session is not None
 
 
 def test_create_library() -> None:
     """Test creating a dynamic library."""
-    session = create_session()
+    session = ExecutionSession()
     lib = session.create_library()
     assert lib is not None
 
@@ -110,7 +110,7 @@ def test_load_and_execute_function() -> None:
     obj_file = get_test_obj_file()
 
     # Create session and library
-    session = create_session()
+    session = ExecutionSession()
     lib = session.create_library()
 
     # Load object file
@@ -129,7 +129,7 @@ def test_load_and_execute_function() -> None:
 
 def test_multiple_libraries() -> None:
     """Test creating and using multiple libraries."""
-    session = create_session()
+    session = ExecutionSession()
 
     lib1 = session.create_library("lib1")
     lib2 = session.create_library("lib2")
@@ -143,7 +143,7 @@ def test_function_not_found() -> None:
     # Get pre-built test object file
     obj_file = get_test_obj_file()
 
-    session = create_session()
+    session = ExecutionSession()
     lib = session.create_library()
     lib.add(str(obj_file))
 
@@ -156,7 +156,7 @@ def test_gradually_add_objects_to_same_library() -> None:
     obj_file1 = get_test_obj_file()
     obj_file2 = get_test_obj_file2()
 
-    session = create_session()
+    session = ExecutionSession()
     lib = session.create_library()
 
     # Add first object file
@@ -189,7 +189,7 @@ def test_two_separate_libraries() -> None:
     obj_file1 = get_test_obj_file()
     obj_file2 = get_test_obj_file2()
 
-    session = create_session()
+    session = ExecutionSession()
 
     # Create first library with first object
     lib1 = session.create_library("lib1")
@@ -227,7 +227,7 @@ def test_symbol_conflict_same_library() -> None:
     obj_file1 = get_test_obj_file()
     obj_file_conflict = get_test_obj_file_conflict()
 
-    session = create_session()
+    session = ExecutionSession()
     lib = session.create_library()
 
     # Add first object file
@@ -247,7 +247,7 @@ def test_symbol_conflict_different_libraries() -> None:
     obj_file1 = get_test_obj_file()
     obj_file_conflict = get_test_obj_file_conflict()
 
-    session = create_session()
+    session = ExecutionSession()
 
     # Create first library with first object
     lib1 = session.create_library("lib1")

From 876213fba5036a11ff8f1fac10d56a885c6a2487 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Wed, 19 Nov 2025 09:38:58 +0000
Subject: [PATCH 34/47] fix

---
 addons/tvm-ffi-orcjit/examples/quick-start/run.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/examples/quick-start/run.py b/addons/tvm-ffi-orcjit/examples/quick-start/run.py
index 28926190..5077f26d 100755
--- a/addons/tvm-ffi-orcjit/examples/quick-start/run.py
+++ b/addons/tvm-ffi-orcjit/examples/quick-start/run.py
@@ -33,7 +33,7 @@
 sys.path.insert(0, str(Path(__file__).parent.parent.parent / "python"))
 
 
-from tvm_ffi_orcjit import create_session
+from tvm_ffi_orcjit import ExecutionSession
 
 
 def main() -> int:
@@ -48,7 +48,7 @@ def main() -> int:
     print(f"Loading object file: {obj_file}")
 
     # Create execution session and dynamic library
-    session = create_session()
+    session = ExecutionSession()
     lib = session.create_library()
     lib.add(str(obj_file))
 

From 90933d7035216bd7520f4e494d0f10d511ad8eb7 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Thu, 20 Nov 2025 18:10:18 +0000
Subject: [PATCH 35/47] wip

---
 addons/tvm-ffi-orcjit/CMakeLists.txt          |  6 ----
 addons/tvm-ffi-orcjit/MANIFEST.in             |  7 ----
 addons/tvm-ffi-orcjit/pyproject.toml          | 20 +++++++++--
 .../python/tvm_ffi_orcjit/__init__.py         | 36 ++++++++-----------
 .../python/tvm_ffi_orcjit/session.py          |  1 +
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc |  6 ++--
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.cc  |  8 +++--
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.h   |  6 ++--
 8 files changed, 45 insertions(+), 45 deletions(-)
 delete mode 100644 addons/tvm-ffi-orcjit/MANIFEST.in

diff --git a/addons/tvm-ffi-orcjit/CMakeLists.txt b/addons/tvm-ffi-orcjit/CMakeLists.txt
index 24b87575..7c5e575a 100644
--- a/addons/tvm-ffi-orcjit/CMakeLists.txt
+++ b/addons/tvm-ffi-orcjit/CMakeLists.txt
@@ -69,9 +69,3 @@ install(
   ARCHIVE DESTINATION lib
   RUNTIME DESTINATION bin
 )
-
-# For Python package building
-if (SKBUILD)
-  # Install shared library alongside Python modules
-  install(TARGETS tvm_ffi_orcjit LIBRARY DESTINATION .)
-endif ()
diff --git a/addons/tvm-ffi-orcjit/MANIFEST.in b/addons/tvm-ffi-orcjit/MANIFEST.in
deleted file mode 100644
index 83b838e8..00000000
--- a/addons/tvm-ffi-orcjit/MANIFEST.in
+++ /dev/null
@@ -1,7 +0,0 @@
-include README.md
-include LICENSE
-include pyproject.toml
-include CMakeLists.txt
-recursive-include include *.h
-recursive-include src *.cc *.cpp
-recursive-include python *.py
diff --git a/addons/tvm-ffi-orcjit/pyproject.toml b/addons/tvm-ffi-orcjit/pyproject.toml
index 5f7549fe..f6c51812 100644
--- a/addons/tvm-ffi-orcjit/pyproject.toml
+++ b/addons/tvm-ffi-orcjit/pyproject.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [build-system]
-requires = ["scikit-build-core>=0.3.3", "apache-tvm-ffi"]
+requires = ["scikit-build-core>=0.10.0", "apache-tvm-ffi"]
 build-backend = "scikit_build_core.build"
 
 [project]
@@ -46,10 +46,26 @@ Homepage = "https://github.com/apache/tvm-ffi"
 Repository = "https://github.com/apache/tvm-ffi"
 
 [tool.scikit-build]
-cmake.minimum-version = "3.18"
+cmake.version = "CMakeLists.txt"
 cmake.build-type = "Release"
 wheel.py-api = "py3"
+build-dir = "build"
+build.verbose = true
+editable.rebuild = false
+editable.verbose = true
+wheel.packages = ["python/tvm_ffi_orcjit"]
+wheel.install-dir = "tvm_ffi_orcjit"
 # Don't set install-dir, let it use the default python/ directory
+sdist.include = [
+  "/README.md",
+  "/LICENSE",
+  "/pyproject.toml",
+  "/CMakeLists.txt",
+  "/include/**/*.h",
+  "/src/**/*.cc",
+  "/src/**/*.cpp",
+  "/python/**/*.py"
+]
 
 [tool.scikit-build.cmake.define]
 CMAKE_EXPORT_COMPILE_COMMANDS = "ON"
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
index 986ea7be..566c7e70 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/__init__.py
@@ -32,7 +32,6 @@
 
 import ctypes
 import platform
-import sys
 from pathlib import Path
 
 from tvm_ffi import load_module
@@ -46,28 +45,21 @@
     _LIB_EXT = "so"
 
 # Load the orcjit extension library
-_LIB_PATH = Path(__file__).parent.parent.parent / f"libtvm_ffi_orcjit.{_LIB_EXT}"
+_LIB_PATH = [
+    Path(__file__).parent.parent.parent / "lib" / f"libtvm_ffi_orcjit.{_LIB_EXT}",
+    Path(__file__).parent.parent.parent / "build" / f"libtvm_ffi_orcjit.{_LIB_EXT}",
+]
 _lib_path_str = None
-if _LIB_PATH.exists():
-    _lib_module = load_module(str(_LIB_PATH))
-    _lib_path_str = str(_LIB_PATH)
-else:
-    # Fallback: search in site-packages (installed location)
-    found = False
-    for site_pkg in sys.path:
-        candidate = Path(site_pkg) / f"libtvm_ffi_orcjit.{_LIB_EXT}"
-        if candidate.exists():
-            _lib_module = load_module(str(candidate))
-            _lib_path_str = str(candidate)
-            found = True
-            break
-
-    if not found:
-        raise RuntimeError(
-            f"Could not find libtvm_ffi_orcjit.{_LIB_EXT}. "
-            f"Searched in {_LIB_PATH} and site-packages. "
-            f"Please ensure the package is installed correctly."
-        )
+for path in _LIB_PATH:
+    if path.exists():
+        _ = load_module(str(path))
+        _lib_path_str = str(path)
+if _lib_path_str is None:
+    raise RuntimeError(
+        f"Could not find libtvm_ffi_orcjit.{_LIB_EXT}. "
+        f"Searched in {_LIB_PATH} and site-packages. "
+        f"Please ensure the package is installed correctly."
+    )
 
 # Explicitly initialize the library to register functions
 # This is needed because static initializers may not run when loaded via dlopen
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
index a4a0d54c..404cb571 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
@@ -26,6 +26,7 @@
 from . import _ffi_api
 
 
+@register_object("orcjit.ExecutionSession")
 class ExecutionSession(Object):
     """ORC JIT Execution Session.
 
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index 18d9a7f9..3ab08ebf 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -140,10 +140,12 @@ static void RegisterOrcJITFunctions() {
 
   namespace refl = tvm::ffi::reflection;
 
+  refl::ObjectDef<ORCJITExecutionSessionObj>();
+
   refl::GlobalDef()
-      .def("orcjit.ExecutionSession", ORCJITExecutionSession::Create)
+      .def("orcjit.ExecutionSession", []() { return ORCJITExecutionSession(); })
       .def("orcjit.ExecutionSessionCreateDynamicLibrary",
-           [](const ORCJITExecutionSession& session, const String& name) -> ObjectRef {
+           [](const ORCJITExecutionSession& session, const String& name) -> Module {
              return session->CreateDynamicLibrary(name);
            })
       .def("orcjit.DynamicLibraryAdd",
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
index 13e8118a..adc4a303 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
@@ -38,6 +38,8 @@
 #include <sstream>
 
 #include "orcjit_dylib.h"
+#include "tvm/ffi/extra/module.h"
+#include "tvm/ffi/object.h"
 
 namespace tvm {
 namespace ffi {
@@ -69,10 +71,10 @@ void ORCJITExecutionSessionObj::Initialize() {
   jit_ = std::move(*jit_or_err);
 }
 
-ORCJITExecutionSession ORCJITExecutionSession::Create() {
-  auto obj = make_object<ORCJITExecutionSessionObj>();
+ORCJITExecutionSession::ORCJITExecutionSession() {
+  ObjectPtr<ORCJITExecutionSessionObj> obj = make_object<ORCJITExecutionSessionObj>();
   obj->Initialize();
-  return ORCJITExecutionSession(obj);
+  data_ = std::move(obj);
 }
 
 DynamicLibrary ORCJITExecutionSessionObj::CreateDynamicLibrary(const String& name) {
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
index fb6dbff3..9809fe44 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
@@ -101,11 +101,11 @@ class ORCJITExecutionSession : public ObjectRef {
    * \brief Create a new ExecutionSession
    * \return The created execution session instance
    */
-  static ORCJITExecutionSession Create();
+  ORCJITExecutionSession();
 
   // Required: define object reference methods
-  TVM_FFI_DEFINE_OBJECT_REF_METHODS_NULLABLE(ORCJITExecutionSession, ObjectRef,
-                                             ORCJITExecutionSessionObj);
+  TVM_FFI_DEFINE_OBJECT_REF_METHODS_NOTNULLABLE(ORCJITExecutionSession, ObjectRef,
+                                                ORCJITExecutionSessionObj);
 };
 
 }  // namespace orcjit

From 6d7061bf09c9fee4f1660f6a85045b858b1b8f34 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Thu, 20 Nov 2025 21:05:31 +0000
Subject: [PATCH 36/47] upd

---
 .../python/tvm_ffi_orcjit/dylib.py            | 56 ++-----------------
 .../python/tvm_ffi_orcjit/session.py          |  3 +-
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc | 47 ++++++++--------
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h  | 39 ++++++-------
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.cc  |  5 +-
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.h   |  6 +-
 6 files changed, 55 insertions(+), 101 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
index e0372262..36b98e3c 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
@@ -19,18 +19,11 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
 
-from tvm_ffi import Function, Module
-from tvm_ffi._ffi_api import ModuleGetFunction
+from tvm_ffi import Module
 
-from . import _ffi_api
 
-if TYPE_CHECKING:
-    from .session import ExecutionSession
-
-
-class DynamicLibrary:
+class DynamicLibrary(Module):
     """ORC JIT Dynamic Library (JITDylib).
 
     Represents a collection of symbols that can be loaded from object files and linked
@@ -47,20 +40,6 @@ class DynamicLibrary:
 
     """
 
-    def __init__(self, handle: Any, session: ExecutionSession) -> None:
-        """Initialize DynamicLibrary from a handle.
-
-        Parameters
-        ----------
-        handle : object
-            The underlying C++ ORCJITDynamicLibrary object.
-        session : ExecutionSession
-            The parent execution session (kept alive for the library's lifetime).
-
-        """
-        self._handle = handle
-        self._session = session  # Keep session alive
-
     def add(self, object_file: str | Path) -> None:
         """Add an object file to this dynamic library.
 
@@ -77,7 +56,7 @@ def add(self, object_file: str | Path) -> None:
         """
         if isinstance(object_file, Path):
             object_file = str(object_file)
-        _ffi_api.DynamicLibraryAdd(self._handle, object_file)
+        self.get_function("add")(object_file)
 
     def set_link_order(self, *libraries: DynamicLibrary) -> None:
         """Set the link order for symbol resolution.
@@ -103,31 +82,4 @@ def set_link_order(self, *libraries: DynamicLibrary) -> None:
         >>> lib_main.set_link_order(lib_utils, lib_core)
 
         """
-        lib_handles = [lib._handle for lib in libraries]
-        _ffi_api.DynamicLibrarySetLinkOrder(self._handle, lib_handles)
-
-    def get_function(self, name: str) -> Function:
-        """Get a function from this dynamic library.
-
-        Parameters
-        ----------
-        name : str
-            The name of the function to retrieve.
-
-        Returns
-        -------
-        callable
-            The function object that can be called from Python.
-
-        Examples
-        --------
-        >>> lib.add("add.o")
-        >>> add_func = lib.get_function("add")
-        >>> result = add_func(1, 2)
-
-        """
-        # Get the module handle and use ModuleGetFunction
-        func = ModuleGetFunction(self._handle, name, False)
-        if func is None:
-            raise AttributeError(f"Module has no function '{name}'")
-        return func
+        self.get_function("set_link_order")(libraries)
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
index 404cb571..f4ffdedd 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
@@ -56,5 +56,4 @@ def create_library(self, name: str = "") -> DynamicLibrary:
             A new DynamicLibrary instance.
 
         """
-        handle = _ffi_api.ExecutionSessionCreateDynamicLibrary(self, name)  # type: ignore
-        return DynamicLibrary(handle, self)
+        return _ffi_api.ExecutionSessionCreateDynamicLibrary(self, name)  # type: ignore
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index 3ab08ebf..fbe7db15 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -38,19 +38,21 @@
 #include <tvm/ffi/reflection/registry.h>
 
 #include "orcjit_session.h"
+#include "tvm/ffi/function.h"
 
 namespace tvm {
 namespace ffi {
 namespace orcjit {
 
-DynamicLibraryObj::DynamicLibraryObj(ObjectPtr<ORCJITExecutionSessionObj> session,
-                                     llvm::orc::JITDylib* dylib, llvm::orc::LLJIT* jit, String name)
+ORCJITDynamicLibraryObj::ORCJITDynamicLibraryObj(ObjectPtr<ORCJITExecutionSessionObj> session,
+                                                 llvm::orc::JITDylib* dylib, llvm::orc::LLJIT* jit,
+                                                 String name)
     : session_(std::move(session)), dylib_(dylib), jit_(jit), name_(std::move(name)) {
   TVM_FFI_CHECK(dylib_ != nullptr, ValueError) << "JITDylib cannot be null";
   TVM_FFI_CHECK(jit_ != nullptr, ValueError) << "LLJIT cannot be null";
 }
 
-void DynamicLibraryObj::AddObjectFile(const String& path) {
+void ORCJITDynamicLibraryObj::AddObjectFile(const String& path) {
   // Read object file
   auto buffer_or_err = llvm::MemoryBuffer::getFile(path.c_str());
   if (!buffer_or_err) {
@@ -67,7 +69,7 @@ void DynamicLibraryObj::AddObjectFile(const String& path) {
   }
 }
 
-void DynamicLibraryObj::SetLinkOrder(const std::vector<llvm::orc::JITDylib*>& dylibs) {
+void ORCJITDynamicLibraryObj::SetLinkOrder(const std::vector<llvm::orc::JITDylib*>& dylibs) {
   // Clear and rebuild the link order
   link_order_.clear();
 
@@ -79,7 +81,7 @@ void DynamicLibraryObj::SetLinkOrder(const std::vector<llvm::orc::JITDylib*>& dy
   dylib_->setLinkOrder(link_order_, false);
 }
 
-void* DynamicLibraryObj::GetSymbol(const String& name) {
+void* ORCJITDynamicLibraryObj::GetSymbol(const String& name) {
   // Build search order: this dylib first, then all linked dylibs
   llvm::orc::JITDylibSearchOrder search_order;
   search_order.emplace_back(dylib_, llvm::orc::JITDylibLookupFlags::MatchAllSymbols);
@@ -101,14 +103,28 @@ void* DynamicLibraryObj::GetSymbol(const String& name) {
   return symbol_or_err->getAddress().toPtr<void*>();
 }
 
-llvm::orc::JITDylib& DynamicLibraryObj::GetJITDylib() {
+llvm::orc::JITDylib& ORCJITDynamicLibraryObj::GetJITDylib() {
   TVM_FFI_CHECK(dylib_ != nullptr, InternalError) << "JITDylib is null";
   return *dylib_;
 }
 
-Optional<Function> DynamicLibraryObj::GetFunction(const String& name) {
+Optional<Function> ORCJITDynamicLibraryObj::GetFunction(const String& name) {
+  if (name == "add") {
+    return Function::FromTyped([this](const String& path) { AddObjectFile(path); });
+  }
+  if (name == "set_link_order") {
+    return Function::FromTyped([this](const Array<ORCJITDynamicLibrary>& libraries) {
+      std::vector<llvm::orc::JITDylib*> libs;
+      libs.reserve(libraries.size());
+      for (const auto& lib : libraries) {
+        libs.push_back(&GetJITDylib());
+      }
+      SetLinkOrder(libs);
+    });
+  }
+
   // TVM-FFI exports have __tvm_ffi_ prefix
-  std::string symbol_name = "__tvm_ffi_" + std::string(name);
+  std::string symbol_name = symbol::tvm_ffi_symbol_prefix + std::string(name);
 
   // Try to get the symbol - return NullOpt if not found
   void* symbol = nullptr;
@@ -147,20 +163,7 @@ static void RegisterOrcJITFunctions() {
       .def("orcjit.ExecutionSessionCreateDynamicLibrary",
            [](const ORCJITExecutionSession& session, const String& name) -> Module {
              return session->CreateDynamicLibrary(name);
-           })
-      .def("orcjit.DynamicLibraryAdd",
-           [](const DynamicLibrary& dylib, const String& path) { dylib->AddObjectFile(path); })
-      .def("orcjit.DynamicLibrarySetLinkOrder",
-           [](const DynamicLibrary& dylib, const Array<DynamicLibrary>& libraries) {
-             std::vector<llvm::orc::JITDylib*> libs;
-             libs.reserve(libraries.size());
-             for (const auto& lib : libraries) {
-               libs.push_back(&lib->GetJITDylib());
-             }
-             dylib->SetLinkOrder(libs);
-           })
-      .def("orcjit.DynamicLibraryGetName",
-           [](const DynamicLibrary& dylib) -> String { return dylib->GetName(); });
+           });
 }
 
 TVM_FFI_STATIC_INIT_BLOCK() {
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
index e7784e12..975150b9 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
@@ -40,8 +40,23 @@ namespace orcjit {
 
 class ORCJITExecutionSessionObj;
 
-class DynamicLibraryObj : public ModuleObj {
+class ORCJITDynamicLibraryObj : public ModuleObj {
  public:
+  /*!
+   * \brief Constructor
+   * \param session The parent execution session
+   * \param dylib The LLVM JITDylib
+   * \param jit The LLJIT instance
+   * \param name The library name
+   */
+  ORCJITDynamicLibraryObj(ObjectPtr<ORCJITExecutionSessionObj> session, llvm::orc::JITDylib* dylib,
+                          llvm::orc::LLJIT* jit, String name);
+
+  const char* kind() const final { return "orcjit"; }
+
+  Optional<Function> GetFunction(const String& name) override;
+
+ private:
   /*!
    * \brief Add an object file to this library
    * \param path Path to the object file to load
@@ -76,21 +91,6 @@ class DynamicLibraryObj : public ModuleObj {
    */
   String GetName() const { return name_; }
 
-  /*!
-   * \brief Constructor
-   * \param session The parent execution session
-   * \param dylib The LLVM JITDylib
-   * \param jit The LLJIT instance
-   * \param name The library name
-   */
-  DynamicLibraryObj(ObjectPtr<ORCJITExecutionSessionObj> session, llvm::orc::JITDylib* dylib,
-                    llvm::orc::LLJIT* jit, String name);
-
-  const char* kind() const final { return "orcjit_dynamic_library"; }
-
-  Optional<Function> GetFunction(const String& name) override;
-
- private:
   /*! \brief Parent execution session (for lifetime management) */
   ObjectPtr<ORCJITExecutionSessionObj> session_;
 
@@ -115,10 +115,11 @@ class DynamicLibraryObj : public ModuleObj {
  * - Link against other dynamic libraries
  * - Look up symbols
  */
-class DynamicLibrary : public Module {
+class ORCJITDynamicLibrary : public Module {
  public:
-  explicit DynamicLibrary(const ObjectPtr<DynamicLibraryObj>& ptr) : Module(ptr){};
-  TVM_FFI_DEFINE_OBJECT_REF_METHODS_NOTNULLABLE(DynamicLibrary, Module, DynamicLibraryObj);
+  explicit ORCJITDynamicLibrary(const ObjectPtr<ORCJITDynamicLibraryObj>& ptr) : Module(ptr){};
+  TVM_FFI_DEFINE_OBJECT_REF_METHODS_NOTNULLABLE(ORCJITDynamicLibrary, Module,
+                                                ORCJITDynamicLibraryObj);
 };
 
 }  // namespace orcjit
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
index adc4a303..06dedf22 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
@@ -38,7 +38,6 @@
 #include <sstream>
 
 #include "orcjit_dylib.h"
-#include "tvm/ffi/extra/module.h"
 #include "tvm/ffi/object.h"
 
 namespace tvm {
@@ -77,7 +76,7 @@ ORCJITExecutionSession::ORCJITExecutionSession() {
   data_ = std::move(obj);
 }
 
-DynamicLibrary ORCJITExecutionSessionObj::CreateDynamicLibrary(const String& name) {
+ORCJITDynamicLibrary ORCJITExecutionSessionObj::CreateDynamicLibrary(const String& name) {
   TVM_FFI_CHECK(jit_ != nullptr, InternalError) << "ExecutionSession not initialized";
 
   // Generate name if not provided
@@ -133,7 +132,7 @@ DynamicLibrary ORCJITExecutionSessionObj::CreateDynamicLibrary(const String& nam
   }
 
   // Create the wrapper object
-  auto dylib = DynamicLibrary(make_object<DynamicLibraryObj>(
+  auto dylib = ORCJITDynamicLibrary(make_object<ORCJITDynamicLibraryObj>(
       GetObjectPtr<ORCJITExecutionSessionObj>(this), &jd, jit_.get(), lib_name));
 
   // Store for lifetime management
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
index 9809fe44..016d0b18 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.h
@@ -37,7 +37,7 @@ namespace ffi {
 namespace orcjit {
 
 // Forward declaration
-class DynamicLibrary;
+class ORCJITDynamicLibrary;
 
 /*!
  * \brief ExecutionSession object for LLVM ORC JIT v2
@@ -62,7 +62,7 @@ class ORCJITExecutionSessionObj : public Object {
    * \param name Optional name for the library (for debugging)
    * \return The created dynamic library instance
    */
-  DynamicLibrary CreateDynamicLibrary(const String& name);
+  ORCJITDynamicLibrary CreateDynamicLibrary(const String& name);
 
   /*!
    * \brief Get the underlying LLVM ExecutionSession
@@ -87,7 +87,7 @@ class ORCJITExecutionSessionObj : public Object {
   int dylib_counter_ = 0;
 
   /*! \brief Map of created dynamic libraries for lifetime management */
-  std::unordered_map<std::string, DynamicLibrary> dylibs_;
+  std::unordered_map<std::string, ORCJITDynamicLibrary> dylibs_;
 };
 
 /*!

From c831c7a08ea3d8db58e19239abe78c135060c33e Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Thu, 20 Nov 2025 22:11:17 +0000
Subject: [PATCH 37/47] upd

---
 addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py | 4 ++--
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc        | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
index 36b98e3c..446b8a40 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/dylib.py
@@ -56,7 +56,7 @@ def add(self, object_file: str | Path) -> None:
         """
         if isinstance(object_file, Path):
             object_file = str(object_file)
-        self.get_function("add")(object_file)
+        self.get_function("orcjit.add_object_file")(object_file)
 
     def set_link_order(self, *libraries: DynamicLibrary) -> None:
         """Set the link order for symbol resolution.
@@ -82,4 +82,4 @@ def set_link_order(self, *libraries: DynamicLibrary) -> None:
         >>> lib_main.set_link_order(lib_utils, lib_core)
 
         """
-        self.get_function("set_link_order")(libraries)
+        self.get_function("orcjit.set_link_order")(libraries)
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index fbe7db15..4dc63000 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -109,10 +109,10 @@ llvm::orc::JITDylib& ORCJITDynamicLibraryObj::GetJITDylib() {
 }
 
 Optional<Function> ORCJITDynamicLibraryObj::GetFunction(const String& name) {
-  if (name == "add") {
+  if (name == "orcjit.add_object_file") {
     return Function::FromTyped([this](const String& path) { AddObjectFile(path); });
   }
-  if (name == "set_link_order") {
+  if (name == "orcjit.set_link_order") {
     return Function::FromTyped([this](const Array<ORCJITDynamicLibrary>& libraries) {
       std::vector<llvm::orc::JITDylib*> libs;
       libs.reserve(libraries.size());

From 9868ad45dcb724a9c7a7f2172246b2f3d56c15d5 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Mon, 24 Nov 2025 04:47:00 +0000
Subject: [PATCH 38/47] upd

---
 .../python/tvm_ffi_orcjit/session.py          |  5 ++-
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc | 44 +++++++++----------
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h  |  3 --
 3 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
index f4ffdedd..8cdc3d60 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
@@ -56,4 +56,7 @@ def create_library(self, name: str = "") -> DynamicLibrary:
             A new DynamicLibrary instance.
 
         """
-        return _ffi_api.ExecutionSessionCreateDynamicLibrary(self, name)  # type: ignore
+        handle = _ffi_api.ExecutionSessionCreateDynamicLibrary(self, name)  # type: ignore
+        lib = DynamicLibrary.__new__(DynamicLibrary)
+        lib.__move_handle_from__(handle)
+        return lib
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index 4dc63000..6e47ba9c 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -48,6 +48,14 @@ ORCJITDynamicLibraryObj::ORCJITDynamicLibraryObj(ObjectPtr<ORCJITExecutionSessio
                                                  llvm::orc::JITDylib* dylib, llvm::orc::LLJIT* jit,
                                                  String name)
     : session_(std::move(session)), dylib_(dylib), jit_(jit), name_(std::move(name)) {
+  if (void** ctx_addr = reinterpret_cast<void**>(GetSymbol(ffi::symbol::tvm_ffi_library_ctx))) {
+    *ctx_addr = this;
+  }
+  Module::VisitContextSymbols([this](const ffi::String& name, void* symbol) {
+    if (void** ctx_addr = reinterpret_cast<void**>(GetSymbol(ffi::symbol::tvm_ffi_library_ctx))) {
+      *ctx_addr = symbol;
+    }
+  });
   TVM_FFI_CHECK(dylib_ != nullptr, ValueError) << "JITDylib cannot be null";
   TVM_FFI_CHECK(jit_ != nullptr, ValueError) << "LLJIT cannot be null";
 }
@@ -91,16 +99,9 @@ void* ORCJITDynamicLibraryObj::GetSymbol(const String& name) {
   // Look up symbol using the full search order
   auto symbol_or_err =
       jit_->getExecutionSession().lookup(search_order, jit_->mangleAndIntern(name.c_str()));
-  if (!symbol_or_err) {
-    auto err = symbol_or_err.takeError();
-    std::string err_msg;
-    llvm::handleAllErrors(std::move(err),
-                          [&](const llvm::ErrorInfoBase& eib) { err_msg = eib.message(); });
-    TVM_FFI_THROW(ValueError) << "Failed to find symbol '" << name << "': " << err_msg;
-  }
 
   // Convert ExecutorAddr to pointer
-  return symbol_or_err->getAddress().toPtr<void*>();
+  return symbol_or_err ? symbol_or_err->getAddress().toPtr<void*>() : nullptr;
 }
 
 llvm::orc::JITDylib& ORCJITDynamicLibraryObj::GetJITDylib() {
@@ -116,7 +117,7 @@ Optional<Function> ORCJITDynamicLibraryObj::GetFunction(const String& name) {
     return Function::FromTyped([this](const Array<ORCJITDynamicLibrary>& libraries) {
       std::vector<llvm::orc::JITDylib*> libs;
       libs.reserve(libraries.size());
-      for (const auto& lib : libraries) {
+      for (const ORCJITDynamicLibrary& lib : libraries) {
         libs.push_back(&GetJITDylib());
       }
       SetLinkOrder(libs);
@@ -127,22 +128,17 @@ Optional<Function> ORCJITDynamicLibraryObj::GetFunction(const String& name) {
   std::string symbol_name = symbol::tvm_ffi_symbol_prefix + std::string(name);
 
   // Try to get the symbol - return NullOpt if not found
-  void* symbol = nullptr;
-  try {
-    symbol = GetSymbol(symbol_name);
-  } catch (const Error& e) {
-    // Symbol not found
-    return std::nullopt;
+  if (void* symbol = GetSymbol(symbol_name)) {
+    // Wrap C function pointer as tvm-ffi Function
+    TVMFFISafeCallType c_func = reinterpret_cast<TVMFFISafeCallType>(symbol);
+
+    return Function::FromPacked([c_func, name](PackedArgs args, Any* rv) {
+      TVM_FFI_ICHECK_LT(rv->type_index(), ffi::TypeIndex::kTVMFFIStaticObjectBegin);
+      TVM_FFI_CHECK_SAFE_CALL((*c_func)(nullptr, reinterpret_cast<const TVMFFIAny*>(args.data()),
+                                        args.size(), reinterpret_cast<TVMFFIAny*>(rv)));
+    });
   }
-
-  // Wrap C function pointer as tvm-ffi Function
-  auto c_func = reinterpret_cast<TVMFFISafeCallType>(symbol);
-
-  return Function::FromPacked([c_func, name](PackedArgs args, Any* rv) {
-    TVM_FFI_ICHECK_LT(rv->type_index(), ffi::TypeIndex::kTVMFFIStaticObjectBegin);
-    TVM_FFI_CHECK_SAFE_CALL((*c_func)(nullptr, reinterpret_cast<const TVMFFIAny*>(args.data()),
-                                      args.size(), reinterpret_cast<TVMFFIAny*>(rv)));
-  });
+  return std::nullopt;
 }
 
 //-------------------------------------
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
index 975150b9..1bdd63c7 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
@@ -31,9 +31,6 @@
 #include <tvm/ffi/object.h>
 #include <tvm/ffi/string.h>
 
-#include <memory>
-#include <string>
-
 namespace tvm {
 namespace ffi {
 namespace orcjit {

From 3f042781217c883eca1e7a97c098128b33cd56a1 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Mon, 24 Nov 2025 04:52:21 +0000
Subject: [PATCH 39/47] fix

---
 addons/tvm-ffi-orcjit/pyproject.toml                   | 2 +-
 addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py | 4 +---
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h           | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/pyproject.toml b/addons/tvm-ffi-orcjit/pyproject.toml
index f6c51812..f02b7103 100644
--- a/addons/tvm-ffi-orcjit/pyproject.toml
+++ b/addons/tvm-ffi-orcjit/pyproject.toml
@@ -64,7 +64,7 @@ sdist.include = [
   "/include/**/*.h",
   "/src/**/*.cc",
   "/src/**/*.cpp",
-  "/python/**/*.py"
+  "/python/**/*.py",
 ]
 
 [tool.scikit-build.cmake.define]
diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
index 8cdc3d60..1269c7b3 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/session.py
@@ -18,12 +18,10 @@
 
 from __future__ import annotations
 
-
 from tvm_ffi import Object, register_object
 
-from .dylib import DynamicLibrary
-
 from . import _ffi_api
+from .dylib import DynamicLibrary
 
 
 @register_object("orcjit.ExecutionSession")
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
index 1bdd63c7..1d1eec13 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.h
@@ -114,7 +114,7 @@ class ORCJITDynamicLibraryObj : public ModuleObj {
  */
 class ORCJITDynamicLibrary : public Module {
  public:
-  explicit ORCJITDynamicLibrary(const ObjectPtr<ORCJITDynamicLibraryObj>& ptr) : Module(ptr){};
+  explicit ORCJITDynamicLibrary(const ObjectPtr<ORCJITDynamicLibraryObj>& ptr) : Module(ptr) {};
   TVM_FFI_DEFINE_OBJECT_REF_METHODS_NOTNULLABLE(ORCJITDynamicLibrary, Module,
                                                 ORCJITDynamicLibraryObj);
 };

From 6ae4991f7dbabf90540e98ac43f6748cc5dbe1c3 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Mon, 24 Nov 2025 04:54:59 +0000
Subject: [PATCH 40/47] upd

---
 addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py
index 28078e84..8181b3c4 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py
@@ -13,7 +13,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""FFI APIs for orcjit"""
+"""FFI APIs for orcjit."""
 
 import tvm_ffi
 

From 9496a0c3545baee104c94c2e6c99397e2a9e851d Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Mon, 24 Nov 2025 04:57:39 +0000
Subject: [PATCH 41/47] fix

---
 addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py
index 8181b3c4..fb2464b4 100644
--- a/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py
+++ b/addons/tvm-ffi-orcjit/python/tvm_ffi_orcjit/_ffi_api.py
@@ -1,3 +1,4 @@
+# Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file

From 2f504dde2d373e168f93b2d8148ce304fde6be18 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Thu, 4 Dec 2025 06:14:37 +0000
Subject: [PATCH 42/47] wip

---
 addons/tvm-ffi-orcjit/tests/CMakeLists.txt    | 31 +++---
 addons/tvm-ffi-orcjit/tests/README.md         |  2 +-
 .../tests/sources/test_funcs_cuda.cu          | 83 ++++++++++++++++
 addons/tvm-ffi-orcjit/tests/test_basic.py     | 94 ++++++++-----------
 4 files changed, 139 insertions(+), 71 deletions(-)
 create mode 100644 addons/tvm-ffi-orcjit/tests/sources/test_funcs_cuda.cu

diff --git a/addons/tvm-ffi-orcjit/tests/CMakeLists.txt b/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
index 1f53f7eb..5aa168f6 100644
--- a/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
+++ b/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
@@ -39,27 +39,32 @@ find_package(tvm_ffi CONFIG REQUIRED)
 add_library(test_funcs_obj OBJECT sources/test_funcs.cc)
 target_link_libraries(test_funcs_obj PRIVATE tvm_ffi_header)
 target_compile_options(test_funcs_obj PRIVATE -fPIC -O2)
+install(FILES $<TARGET_OBJECTS:test_funcs_obj>
+        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} RENAME test_funcs.o)
 
 # Create object library for second set of test functions
 add_library(test_funcs2_obj OBJECT sources/test_funcs2.cc)
 target_link_libraries(test_funcs2_obj PRIVATE tvm_ffi_header)
 target_compile_options(test_funcs2_obj PRIVATE -fPIC -O2)
+install(FILES $<TARGET_OBJECTS:test_funcs2_obj>
+        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} RENAME test_funcs2.o)
 
 # Create object library for conflicting test functions
 add_library(test_funcs_conflict_obj OBJECT sources/test_funcs_conflict.cc)
 target_link_libraries(test_funcs_conflict_obj PRIVATE tvm_ffi_header)
 target_compile_options(test_funcs_conflict_obj PRIVATE -fPIC -O2)
+install(FILES $<TARGET_OBJECTS:test_funcs_conflict_obj>
+        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} RENAME test_funcs_conflict.o)
 
-# Custom target to copy all object files to the test directory
-add_custom_target(
-  copy_test_obj_files ALL
-  COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:test_funcs_obj>
-          ${CMAKE_CURRENT_SOURCE_DIR}/test_funcs.o
-  COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:test_funcs2_obj>
-          ${CMAKE_CURRENT_SOURCE_DIR}/test_funcs2.o
-  COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_OBJECTS:test_funcs_conflict_obj>
-          ${CMAKE_CURRENT_SOURCE_DIR}/test_funcs_conflict.o
-  COMMAND ${CMAKE_COMMAND} -E echo "Successfully compiled all test object files"
-  DEPENDS test_funcs_obj test_funcs2_obj test_funcs_conflict_obj
-  COMMENT "Copying all test object files to test directory"
-)
+
+find_package(CUDAToolkit)
+
+if(CUDAToolkit_FOUND)
+  enable_language(CUDA)
+  message(STATUS "CUDA found: ${CUDAToolkit_VERSION}")
+  add_library(test_funcs_cuda_obj OBJECT sources/test_funcs_cuda.cu)
+  target_link_libraries(test_funcs_cuda_obj PRIVATE tvm_ffi_header)
+  target_compile_options(test_funcs_cuda_obj PRIVATE -fPIC -O2)
+  install(FILES $<TARGET_OBJECTS:test_funcs_cuda_obj>
+    DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} RENAME test_funcs_cuda.o)
+endif()
diff --git a/addons/tvm-ffi-orcjit/tests/README.md b/addons/tvm-ffi-orcjit/tests/README.md
index 1a90808f..852ff6ff 100644
--- a/addons/tvm-ffi-orcjit/tests/README.md
+++ b/addons/tvm-ffi-orcjit/tests/README.md
@@ -26,7 +26,7 @@ The tests require pre-built object files. To build them:
 ```bash
 cd tests
 cmake -B build
-cmake --build build
+cmake --build build --target install
 ```
 
 This will compile `sources/test_funcs.cc` and generate `test_funcs.o` in the tests directory.
diff --git a/addons/tvm-ffi-orcjit/tests/sources/test_funcs_cuda.cu b/addons/tvm-ffi-orcjit/tests/sources/test_funcs_cuda.cu
new file mode 100644
index 00000000..3e9d9efd
--- /dev/null
+++ b/addons/tvm-ffi-orcjit/tests/sources/test_funcs_cuda.cu
@@ -0,0 +1,83 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <tvm/ffi/function.h>
+
+#include <cassert>
+#include <cstdio>
+
+void checkPtr(void* ptr) {
+  cudaPointerAttributes attr;
+  cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
+
+  if (err != cudaSuccess) {
+    printf("Pointer check failed: %s\n", cudaGetErrorString(err));
+    return;
+  }
+
+  printf("Pointer is valid:\n");
+  printf("  type       : %d\n", attr.type);
+  printf("  device     : %d\n", attr.device);
+  printf("  devicePointer: %p\n", attr.devicePointer);
+  printf("  hostPointer  : %p\n", attr.hostPointer);
+}
+
+// Simple addition function
+__global__ void test_add_kernel(int* a, int* b, int* c) { *c = *a + *b; }
+int test_add_impl(int a, int b) {
+  int c;
+  int *d_a, *d_b, *d_c;
+  cudaMalloc(&d_a, sizeof(int));
+  cudaMalloc(&d_b, sizeof(int));
+  cudaMalloc(&d_c, sizeof(int));
+  cudaMemcpy(d_a, &a, sizeof(int), cudaMemcpyHostToDevice);
+  cudaMemcpy(d_b, &b, sizeof(int), cudaMemcpyHostToDevice);
+  printf("ttt %p %p %p\n", d_a, d_b, d_c);
+  checkPtr(d_a);
+  checkPtr(d_b);
+  checkPtr(d_c);
+  test_add_kernel<<<1, 1>>>(d_a, d_b, d_c);
+  cudaError_t err = cudaGetLastError();
+  if (err != cudaSuccess) printf("Kernel launch error: %s\n", cudaGetErrorString(err));
+  cudaMemcpy(&c, d_c, sizeof(int), cudaMemcpyDeviceToHost);
+  printf("ggg %d %d %d", a, b, c);
+  cudaFree(d_a);
+  cudaFree(d_b);
+  cudaFree(d_c);
+  return c;
+}
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(test_add, test_add_impl);
+
+// Multiplication function
+
+__global__ void test_multiply_kernel(int* a, int* b, int* c) { *c = *a * *b; }
+int test_multiply_impl(int a, int b) {
+  int c;
+  int *d_a, *d_b, *d_c;
+  cudaMalloc(&d_a, sizeof(int));
+  cudaMalloc(&d_b, sizeof(int));
+  cudaMalloc(&d_c, sizeof(int));
+  cudaMemcpy(d_a, &a, sizeof(int), cudaMemcpyHostToDevice);
+  cudaMemcpy(d_b, &b, sizeof(int), cudaMemcpyHostToDevice);
+  test_multiply_kernel<<<1, 1>>>(d_a, d_b, d_c);
+  cudaMemcpy(&c, d_c, sizeof(int), cudaMemcpyDeviceToHost);
+  cudaFree(d_a);
+  cudaFree(d_b);
+  cudaFree(d_c);
+  return c;
+}
+TVM_FFI_DLL_EXPORT_TYPED_FUNC(test_multiply, test_multiply_impl);
diff --git a/addons/tvm-ffi-orcjit/tests/test_basic.py b/addons/tvm-ffi-orcjit/tests/test_basic.py
index 396b1c07..78121d93 100644
--- a/addons/tvm-ffi-orcjit/tests/test_basic.py
+++ b/addons/tvm-ffi-orcjit/tests/test_basic.py
@@ -24,7 +24,7 @@
 from tvm_ffi_orcjit import ExecutionSession
 
 
-def get_test_obj_file() -> Path:
+def get_test_obj_file(object_file: str) -> Path:
     """Get the path to the pre-built test object file.
 
     Returns
@@ -35,51 +35,7 @@ def get_test_obj_file() -> Path:
     """
     # The object file should be built by CMake and located in the tests directory
     test_dir = Path(__file__).parent
-    obj_file = test_dir / "test_funcs.o"
-
-    if not obj_file.exists():
-        raise FileNotFoundError(
-            f"Test object file not found: {obj_file}\n"
-            "Please build the test object file first:\n"
-            "  cd tests && cmake -B build && cmake --build build"
-        )
-
-    return obj_file
-
-
-def get_test_obj_file2() -> Path:
-    """Get the path to the second pre-built test object file.
-
-    Returns
-    -------
-    Path
-        Path to the test_funcs2.o object file.
-
-    """
-    test_dir = Path(__file__).parent
-    obj_file = test_dir / "test_funcs2.o"
-
-    if not obj_file.exists():
-        raise FileNotFoundError(
-            f"Test object file not found: {obj_file}\n"
-            "Please build the test object file first:\n"
-            "  cd tests && cmake -B build && cmake --build build"
-        )
-
-    return obj_file
-
-
-def get_test_obj_file_conflict() -> Path:
-    """Get the path to the conflicting test object file.
-
-    Returns
-    -------
-    Path
-        Path to the test_funcs_conflict.o object file.
-
-    """
-    test_dir = Path(__file__).parent
-    obj_file = test_dir / "test_funcs_conflict.o"
+    obj_file = test_dir / object_file
 
     if not obj_file.exists():
         raise FileNotFoundError(
@@ -107,7 +63,7 @@ def test_create_library() -> None:
 def test_load_and_execute_function() -> None:
     """Test loading an object file and executing a function."""
     # Get pre-built test object file
-    obj_file = get_test_obj_file()
+    obj_file = get_test_obj_file("test_funcs.o")
 
     # Create session and library
     session = ExecutionSession()
@@ -141,7 +97,7 @@ def test_multiple_libraries() -> None:
 def test_function_not_found() -> None:
     """Test that getting a non-existent function raises an error."""
     # Get pre-built test object file
-    obj_file = get_test_obj_file()
+    obj_file = get_test_obj_file("test_funcs.o")
 
     session = ExecutionSession()
     lib = session.create_library()
@@ -153,8 +109,8 @@ def test_function_not_found() -> None:
 
 def test_gradually_add_objects_to_same_library() -> None:
     """Test gradually adding multiple object files to the same library."""
-    obj_file1 = get_test_obj_file()
-    obj_file2 = get_test_obj_file2()
+    obj_file1 = get_test_obj_file("test_funcs.o")
+    obj_file2 = get_test_obj_file("test_funcs2.o")
 
     session = ExecutionSession()
     lib = session.create_library()
@@ -186,8 +142,8 @@ def test_gradually_add_objects_to_same_library() -> None:
 
 def test_two_separate_libraries() -> None:
     """Test creating two separate libraries each with its own object file."""
-    obj_file1 = get_test_obj_file()
-    obj_file2 = get_test_obj_file2()
+    obj_file1 = get_test_obj_file("test_funcs.o")
+    obj_file2 = get_test_obj_file("test_funcs2.o")
 
     session = ExecutionSession()
 
@@ -224,8 +180,8 @@ def test_two_separate_libraries() -> None:
 
 def test_symbol_conflict_same_library() -> None:
     """Test that adding objects with conflicting symbols to same library fails."""
-    obj_file1 = get_test_obj_file()
-    obj_file_conflict = get_test_obj_file_conflict()
+    obj_file1 = get_test_obj_file("test_funcs.o")
+    obj_file_conflict = get_test_obj_file("test_funcs_conflict.o")
 
     session = ExecutionSession()
     lib = session.create_library()
@@ -244,8 +200,8 @@ def test_symbol_conflict_same_library() -> None:
 
 def test_symbol_conflict_different_libraries() -> None:
     """Test that adding objects with conflicting symbols to different libraries works."""
-    obj_file1 = get_test_obj_file()
-    obj_file_conflict = get_test_obj_file_conflict()
+    obj_file1 = get_test_obj_file("test_funcs.o")
+    obj_file_conflict = get_test_obj_file("test_funcs_conflict.o")
 
     session = ExecutionSession()
 
@@ -274,5 +230,29 @@ def test_symbol_conflict_different_libraries() -> None:
     assert mul_func2(5, 6) == 60  # Conflict: (5 * 6) * 2
 
 
+def test_load_and_execute_cuda_function() -> None:
+    """Test loading an object file and executing a function."""
+    # Get pre-built test object file
+    obj_file = get_test_obj_file("test_funcs_cuda.o")
+
+    # Create session and library
+    session = ExecutionSession()
+    lib = session.create_library()
+
+    # Load object file
+    lib.add(str(obj_file))
+
+    # Get and call test_add function
+    add_func = lib.get_function("test_add")
+    result = add_func(10, 20)
+    assert result == 30
+
+    # Get and call test_multiply function
+    mul_func = lib.get_function("test_multiply")
+    result = mul_func(7, 6)
+    assert result == 42
+
+
 if __name__ == "__main__":
-    pytest.main([__file__, "-v"])
+    # pytest.main([__file__, "-v"])
+    test_load_and_execute_cuda_function()

From 3066c29ea776aac18d6ba229ed6f0144c2e784c4 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Thu, 4 Dec 2025 06:15:07 +0000
Subject: [PATCH 43/47] fix

---
 .github/workflows/orcjit-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/orcjit-tests.yml b/.github/workflows/orcjit-tests.yml
index 98878172..1e9807bc 100644
--- a/.github/workflows/orcjit-tests.yml
+++ b/.github/workflows/orcjit-tests.yml
@@ -100,5 +100,5 @@ jobs:
         working-directory: addons/tvm-ffi-orcjit/examples/quick-start
         run: |
           cmake -B build
-          cmake --build build
+          cmake --build build --target install
           python run.py

From cbcd811d8f83deb535ab618b535fa36ef4408d4b Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Thu, 4 Dec 2025 06:17:46 +0000
Subject: [PATCH 44/47] fix

---
 addons/tvm-ffi-orcjit/tests/CMakeLists.txt | 28 +++++++++++++++-------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/tests/CMakeLists.txt b/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
index 5aa168f6..2873e006 100644
--- a/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
+++ b/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
@@ -39,22 +39,31 @@ find_package(tvm_ffi CONFIG REQUIRED)
 add_library(test_funcs_obj OBJECT sources/test_funcs.cc)
 target_link_libraries(test_funcs_obj PRIVATE tvm_ffi_header)
 target_compile_options(test_funcs_obj PRIVATE -fPIC -O2)
-install(FILES $<TARGET_OBJECTS:test_funcs_obj>
-        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} RENAME test_funcs.o)
+install(
+  FILES $<TARGET_OBJECTS:test_funcs_obj>
+  DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}
+  RENAME test_funcs.o
+)
 
 # Create object library for second set of test functions
 add_library(test_funcs2_obj OBJECT sources/test_funcs2.cc)
 target_link_libraries(test_funcs2_obj PRIVATE tvm_ffi_header)
 target_compile_options(test_funcs2_obj PRIVATE -fPIC -O2)
-install(FILES $<TARGET_OBJECTS:test_funcs2_obj>
-        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} RENAME test_funcs2.o)
+install(
+  FILES $<TARGET_OBJECTS:test_funcs2_obj>
+  DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}
+  RENAME test_funcs2.o
+)
 
 # Create object library for conflicting test functions
 add_library(test_funcs_conflict_obj OBJECT sources/test_funcs_conflict.cc)
 target_link_libraries(test_funcs_conflict_obj PRIVATE tvm_ffi_header)
 target_compile_options(test_funcs_conflict_obj PRIVATE -fPIC -O2)
-install(FILES $<TARGET_OBJECTS:test_funcs_conflict_obj>
-        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} RENAME test_funcs_conflict.o)
+install(
+  FILES $<TARGET_OBJECTS:test_funcs_conflict_obj>
+  DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}
+  RENAME test_funcs_conflict.o
+)
 
 
 find_package(CUDAToolkit)
@@ -65,6 +74,9 @@ if(CUDAToolkit_FOUND)
   add_library(test_funcs_cuda_obj OBJECT sources/test_funcs_cuda.cu)
   target_link_libraries(test_funcs_cuda_obj PRIVATE tvm_ffi_header)
   target_compile_options(test_funcs_cuda_obj PRIVATE -fPIC -O2)
-  install(FILES $<TARGET_OBJECTS:test_funcs_cuda_obj>
-    DESTINATION ${CMAKE_CURRENT_SOURCE_DIR} RENAME test_funcs_cuda.o)
+  install(
+    FILES $<TARGET_OBJECTS:test_funcs_cuda_obj>
+    DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}
+    RENAME test_funcs_cuda.o
+  )
 endif()

From 5464735c5761450803cfca5badfd4bd760550384 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Thu, 4 Dec 2025 06:19:53 +0000
Subject: [PATCH 45/47] fix

---
 .github/workflows/orcjit-tests.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/orcjit-tests.yml b/.github/workflows/orcjit-tests.yml
index 1e9807bc..33bf486b 100644
--- a/.github/workflows/orcjit-tests.yml
+++ b/.github/workflows/orcjit-tests.yml
@@ -89,7 +89,7 @@ jobs:
         working-directory: addons/tvm-ffi-orcjit/tests
         run: |
           cmake -B build
-          cmake --build build
+          cmake --build build --target install
 
       - name: Run tests
         working-directory: addons/tvm-ffi-orcjit
@@ -100,5 +100,5 @@ jobs:
         working-directory: addons/tvm-ffi-orcjit/examples/quick-start
         run: |
           cmake -B build
-          cmake --build build --target install
+          cmake --build build
           python run.py

From 5aa9c61e902a62b64eab38889b9e2e49b1e2bd09 Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Thu, 4 Dec 2025 06:24:03 +0000
Subject: [PATCH 46/47] fix

---
 addons/tvm-ffi-orcjit/tests/CMakeLists.txt | 5 ++---
 addons/tvm-ffi-orcjit/tests/test_basic.py  | 5 ++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/tests/CMakeLists.txt b/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
index 2873e006..9d3f5826 100644
--- a/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
+++ b/addons/tvm-ffi-orcjit/tests/CMakeLists.txt
@@ -65,10 +65,9 @@ install(
   RENAME test_funcs_conflict.o
 )
 
-
 find_package(CUDAToolkit)
 
-if(CUDAToolkit_FOUND)
+if (CUDAToolkit_FOUND)
   enable_language(CUDA)
   message(STATUS "CUDA found: ${CUDAToolkit_VERSION}")
   add_library(test_funcs_cuda_obj OBJECT sources/test_funcs_cuda.cu)
@@ -79,4 +78,4 @@ if(CUDAToolkit_FOUND)
     DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}
     RENAME test_funcs_cuda.o
   )
-endif()
+endif ()
diff --git a/addons/tvm-ffi-orcjit/tests/test_basic.py b/addons/tvm-ffi-orcjit/tests/test_basic.py
index 78121d93..7c21b48a 100644
--- a/addons/tvm-ffi-orcjit/tests/test_basic.py
+++ b/addons/tvm-ffi-orcjit/tests/test_basic.py
@@ -233,7 +233,10 @@ def test_symbol_conflict_different_libraries() -> None:
 def test_load_and_execute_cuda_function() -> None:
     """Test loading an object file and executing a function."""
     # Get pre-built test object file
-    obj_file = get_test_obj_file("test_funcs_cuda.o")
+    try:
+        obj_file = get_test_obj_file("test_funcs_cuda.o")
+    except FileNotFoundError:
+        return
 
     # Create session and library
     session = ExecutionSession()

From 316ea618ee0ece1ef6e553ebcb644ed3a8bf5a0d Mon Sep 17 00:00:00 2001
From: Yaxing Cai <caiyaxing666@gmail.com>
Date: Tue, 23 Dec 2025 10:03:27 +0000
Subject: [PATCH 47/47] wip

---
 addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc |  1 +
 .../tvm-ffi-orcjit/src/ffi/orcjit_session.cc  |  8 +++++--
 .../tests/sources/test_funcs_cuda.cu          | 22 -------------------
 3 files changed, 7 insertions(+), 24 deletions(-)

diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
index 6e47ba9c..4c716e01 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_dylib.cc
@@ -129,6 +129,7 @@ Optional<Function> ORCJITDynamicLibraryObj::GetFunction(const String& name) {
 
   // Try to get the symbol - return NullOpt if not found
   if (void* symbol = GetSymbol(symbol_name)) {
+    cantFail(jit_->initialize(*dylib_));
     // Wrap C function pointer as tvm-ffi Function
     TVMFFISafeCallType c_func = reinterpret_cast<TVMFFISafeCallType>(symbol);
 
diff --git a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
index 06dedf22..5d7de046 100644
--- a/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
+++ b/addons/tvm-ffi-orcjit/src/ffi/orcjit_session.cc
@@ -59,7 +59,10 @@ ORCJITExecutionSessionObj::ORCJITExecutionSessionObj() : jit_(nullptr), dylib_co
 
 void ORCJITExecutionSessionObj::Initialize() {
   // Create LLJIT instance
-  auto jit_or_err = llvm::orc::LLJITBuilder().create();
+  auto jit_or_err = llvm::orc::LLJITBuilder()
+                        .setPlatformSetUp(llvm::orc::ExecutorNativePlatform(
+                            "/usr/lib/llvm-20/lib/clang/20/lib/linux/liborc_rt-x86_64.a"))
+                        .create();
   if (!jit_or_err) {
     auto err = jit_or_err.takeError();
     std::string err_msg;
@@ -92,7 +95,8 @@ ORCJITDynamicLibrary ORCJITExecutionSessionObj::CreateDynamicLibrary(const Strin
       << "DynamicLibrary with name '" << lib_name << "' already exists";
 
   // Create a new JITDylib
-  auto& jd = jit_->getExecutionSession().createBareJITDylib(lib_name.c_str());
+  // auto& jd = jit_->getExecutionSession().createBareJITDylib(lib_name.c_str());
+  auto& jd = jit_->getMainJITDylib();
 
   // Add process symbol resolver to make C/C++ stdlib available
   auto dlsg = llvm::orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
diff --git a/addons/tvm-ffi-orcjit/tests/sources/test_funcs_cuda.cu b/addons/tvm-ffi-orcjit/tests/sources/test_funcs_cuda.cu
index 3e9d9efd..fda0d7c3 100644
--- a/addons/tvm-ffi-orcjit/tests/sources/test_funcs_cuda.cu
+++ b/addons/tvm-ffi-orcjit/tests/sources/test_funcs_cuda.cu
@@ -17,25 +17,8 @@
 
 #include <tvm/ffi/function.h>
 
-#include <cassert>
 #include <cstdio>
 
-void checkPtr(void* ptr) {
-  cudaPointerAttributes attr;
-  cudaError_t err = cudaPointerGetAttributes(&attr, ptr);
-
-  if (err != cudaSuccess) {
-    printf("Pointer check failed: %s\n", cudaGetErrorString(err));
-    return;
-  }
-
-  printf("Pointer is valid:\n");
-  printf("  type       : %d\n", attr.type);
-  printf("  device     : %d\n", attr.device);
-  printf("  devicePointer: %p\n", attr.devicePointer);
-  printf("  hostPointer  : %p\n", attr.hostPointer);
-}
-
 // Simple addition function
 __global__ void test_add_kernel(int* a, int* b, int* c) { *c = *a + *b; }
 int test_add_impl(int a, int b) {
@@ -46,15 +29,10 @@ int test_add_impl(int a, int b) {
   cudaMalloc(&d_c, sizeof(int));
   cudaMemcpy(d_a, &a, sizeof(int), cudaMemcpyHostToDevice);
   cudaMemcpy(d_b, &b, sizeof(int), cudaMemcpyHostToDevice);
-  printf("ttt %p %p %p\n", d_a, d_b, d_c);
-  checkPtr(d_a);
-  checkPtr(d_b);
-  checkPtr(d_c);
   test_add_kernel<<<1, 1>>>(d_a, d_b, d_c);
   cudaError_t err = cudaGetLastError();
   if (err != cudaSuccess) printf("Kernel launch error: %s\n", cudaGetErrorString(err));
   cudaMemcpy(&c, d_c, sizeof(int), cudaMemcpyDeviceToHost);
-  printf("ggg %d %d %d", a, b, c);
   cudaFree(d_a);
   cudaFree(d_b);
   cudaFree(d_c);