refactor: replace TORCH_CHECK with CHECK macros and optimize code layout.

yingxudeng · yingxudeng · commit 6964967db442 · 2025-12-10T10:56:32.000+08:00
diff --git a/xllm/core/kernels/npu/active.cpp b/xllm/core/kernels/npu/active.cpp
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <glog/logging.h>
 #include <torch_npu/csrc/aten/CustomFunctions.h>
 
 #include "npu_ops_api.h"
@@ -22,8 +23,7 @@ namespace xllm::kernel::npu {
 
 torch::Tensor active(const torch::Tensor& input, const std::string& act_mode) {
   if (act_mode != "silu" && act_mode != "swiglu") {
-    throw std::runtime_error(
-        "Only swiglu activation is supported in NPU active");
+    LOG(FATAL) << "Only swiglu activation is supported in NPU active";
   }
   return at_npu::native::custom_ops::npu_swiglu(input);
 }
diff --git a/xllm/core/kernels/npu/attention.cpp b/xllm/core/kernels/npu/attention.cpp
@@ -46,9 +46,9 @@ void batch_decode(const torch::Tensor& query,
                   const torch::Tensor& block_table,
                   const torch::Tensor& seq_lens,
                   torch::Tensor& output) {
-  auto head_size = query.size(-1);
-  auto num_heads = query.size(-2);
-  auto num_kv_heads = k_cache.size(-2);
+  int64_t head_size = query.size(-1);
+  int64_t num_heads = query.size(-2);
+  int64_t num_kv_heads = k_cache.size(-2);
   auto q = query.view({-1, num_heads, head_size});
   auto o = output.view({-1, num_heads, head_size});
   atb::_npu_paged_attention(q,
diff --git a/xllm/core/kernels/npu/custom_functions_npu/atb_common.cpp b/xllm/core/kernels/npu/custom_functions_npu/atb_common.cpp
@@ -33,7 +33,7 @@ atb::Tensor at_tensor_to_atb_tensor(const at::Tensor at_tensor) {
       {at::ScalarType::ComplexDouble, ACL_COMPLEX128},
   };
 
-  TORCH_CHECK(at_tensor.is_contiguous(), "at_tensor is not contiguous");
+  CHECK(at_tensor.is_contiguous()) << "at_tensor is not contiguous";
   atb::Tensor tensor;
   tensor.desc.format = atb::utils::get_format_for_atb(at_tensor);
   if (at_tensor.device().type() == at::kCPU) {
@@ -48,9 +48,8 @@ atb::Tensor at_tensor_to_atb_tensor(const at::Tensor at_tensor) {
   }
 
   auto dtype_iterator = dtype_map.find(at_tensor.scalar_type());
-  TORCH_CHECK(dtype_iterator != dtype_map.end(),
-              "not support dtype: ",
-              at_tensor.scalar_type());
+  CHECK(dtype_iterator != dtype_map.end())
+      << "not support dtype: " << at_tensor.scalar_type();
   tensor.desc.dtype = dtype_iterator->second;
 
   tensor.dataSize = atb::Utils::GetTensorSize(tensor);
@@ -168,7 +167,7 @@ uint64_t operation_setup(atb::VariantPack variant_pack,
   uint64_t workspace_size = 0;
   atb::Status status =
       operation->Setup(variant_pack, workspace_size, context_ptr);
-  TORCH_CHECK(status == 0, operation->GetName(), " setup failed!");
+  CHECK_EQ(status, 0) << operation->GetName() << " setup failed!";
   return workspace_size;
 }
 
diff --git a/xllm/core/kernels/npu/custom_functions_npu/atb_common.h b/xllm/core/kernels/npu/custom_functions_npu/atb_common.h
@@ -16,6 +16,7 @@ limitations under the License.
 #pragma once
 
 #include <dlfcn.h>
+#include <glog/logging.h>
 #include <torch/library.h>
 #include <torch_npu/csrc/core/npu/NPUStream.h>
 #include <torch_npu/csrc/core/npu/NPUWorkspaceAllocator.h>
@@ -30,7 +31,7 @@ namespace atb {
 
 using aclTensor = struct aclTensor;
 constexpr int64_t MAX_DIM_NUM = 5;
-const int N = 32;
+const int64_t N = 32;
 
 using _aclCreateTensor = aclTensor* (*)(const int64_t* view_dims,
                                         uint64_t view_dims_num,
@@ -87,7 +88,7 @@ inline void* get_api_func_addr(const char* api_name) {
     if (func_addr != nullptr) {
       return func_addr;
     }
-    TORCH_CHECK(false, "get_api_func_addr not found ", api_name);
+    LOG(FATAL) << "get_api_func_addr not found " << api_name;
   }
 }
 
@@ -119,8 +120,8 @@ inline aclTensor* convert_type(TensorMaintainer& maintainer,
   c10::SmallVector<int64_t, MAX_DIM_NUM> storageDims;
   // if acl_data_type is ACL_STRING, storageDims is empty.
   if (acl_data_type != ACL_STRING) {
-    TORCH_CHECK(at_tensor.itemsize() > 0,
-                "the itemsize of tensor must be greater than 0.");
+    CHECK_GT(at_tensor.itemsize(), 0)
+        << "the itemsize of tensor must be greater than 0.";
     storageDims.push_back(at_tensor.storage().nbytes() / at_tensor.itemsize());
   }
 
@@ -245,8 +246,8 @@ inline aclTensor* convert_type_v2(TensorStructPtr at_tensor) {
       atb::utils::convert_to_acl_data_type(scalar_data_type);
   c10::SmallVector<int64_t, MAX_DIM_NUM> storageDims;
   if (acl_data_type != ACL_STRING) {
-    TORCH_CHECK((*at_tensor).itemsize > 0,
-                "the itemsize of tensor must be greater than 0.");
+    CHECK_GT((*at_tensor).itemsize, 0)
+        << "the itemsize of tensor must be greater than 0.";
     storageDims.push_back((*at_tensor).nbytes / (*at_tensor).itemsize);
   }
 
@@ -349,16 +350,10 @@ void release_convert_types(Tuple& t) {
     static const auto getWorkspaceSizeFuncAddr =                             \
         get_api_func_addr(#atb_api "GetWorkspaceSize");                      \
     static const auto atbApiFuncAddr = get_api_func_addr(#atb_api);          \
-    TORCH_CHECK(                                                             \
-        getWorkspaceSizeFuncAddr != nullptr && atbApiFuncAddr != nullptr,    \
-        #atb_api,                                                            \
-        " or ",                                                              \
-        #atb_api "GetWorkspaceSize",                                         \
-        " not in ",                                                          \
-        get_atb_api_lib_name(),                                              \
-        ", or ",                                                             \
-        get_atb_api_lib_name(),                                              \
-        "not found.");                                                       \
+    CHECK(getWorkspaceSizeFuncAddr != nullptr && atbApiFuncAddr != nullptr)  \
+        << #atb_api << " or " << #atb_api "GetWorkspaceSize" << " not in "   \
+        << get_atb_api_lib_name() << ", or " << get_atb_api_lib_name()       \
+        << "not found.";                                                     \
     auto acl_stream = c10_npu::getCurrentNPUStream().stream(false);          \
     auto context_ptr = atb::utils::get_context(acl_stream);                  \
     uint64_t workspace_size = 0;                                             \
@@ -374,7 +369,7 @@ void release_convert_types(Tuple& t) {
     static auto getWorkspaceSizeFunc =                                       \
         convert_to_op_api_func(converted_params, getWorkspaceSizeFuncAddr);  \
     auto workspace_status = call(getWorkspaceSizeFunc, converted_params);    \
-    TORCH_CHECK(workspace_status == 0, "call " #atb_api " failed, detail:"); \
+    CHECK_EQ(workspace_status, 0) << "call " #atb_api " failed, detail:";    \
     void* workspace_addr = nullptr;                                          \
     at::Tensor workspace_tensor;                                             \
     if (workspace_size != 0) {                                               \
@@ -395,7 +390,7 @@ void release_convert_types(Tuple& t) {
       AtbApiFunc atbApiFunc = reinterpret_cast<AtbApiFunc>(atbApiFuncAddr);  \
       auto api_ret =                                                         \
           atbApiFunc(workspace_addr, workspace_size, op, context_ptr);       \
-      TORCH_CHECK(api_ret == 0, "call " #atb_api " failed, detail:");        \
+      CHECK_EQ(api_ret, 0) << "call " #atb_api " failed, detail:";           \
       DestroyOperation(op);                                                  \
       release_convert_types(converted_params);                               \
       return api_ret;                                                        \
@@ -408,16 +403,10 @@ void release_convert_types(Tuple& t) {
     static const auto getWorkspaceSizeFuncAddr =                               \
         get_api_func_addr(#atb_api "GetWorkspaceSize");                        \
     static const auto AtbApiFuncAddr = get_api_func_addr(#atb_api);            \
-    TORCH_CHECK(                                                               \
-        getWorkspaceSizeFuncAddr != nullptr && AtbApiFuncAddr != nullptr,      \
-        #atb_api,                                                              \
-        " or ",                                                                \
-        #atb_api "GetWorkspaceSize",                                           \
-        " not in ",                                                            \
-        get_atb_api_lib_name(),                                                \
-        ", or ",                                                               \
-        get_atb_api_lib_name(),                                                \
-        "not found.");                                                         \
+    CHECK(getWorkspaceSizeFuncAddr != nullptr && AtbApiFuncAddr != nullptr)    \
+        << #atb_api << " or " << #atb_api "GetWorkspaceSize" << " not in "     \
+        << get_atb_api_lib_name() << ", or " << get_atb_api_lib_name()         \
+        << "not found.";                                                       \
     auto acl_stream = c10_npu::getCurrentNPUStream().stream(false);            \
     TensorMaintainer tensor_maintainer;                                        \
     auto copied_params = copy_types_v2(tensor_maintainer, __VA_ARGS__);        \
@@ -440,8 +429,8 @@ void release_convert_types(Tuple& t) {
           convert_to_op_api_func(converted_params, getWorkspaceSizeFuncAddr);  \
       auto workspace_status = call(getWorkspaceSizeFunc, converted_params);    \
       opParamCache.save_operation(hash_id, op);                                \
-      TORCH_CHECK(workspace_status == 0,                                       \
-                  "call " #atb_api "GetWorkspaceSize failed");                 \
+      CHECK_EQ(workspace_status, 0)                                            \
+          << "call " #atb_api "GetWorkspaceSize failed";                       \
       void* workspace_addr = nullptr;                                          \
       at::Tensor workspace_tensor;                                             \
       if (workspace_size != 0) {                                               \
@@ -451,7 +440,7 @@ void release_convert_types(Tuple& t) {
       }                                                                        \
       AtbApiFunc atbApiFunc = reinterpret_cast<AtbApiFunc>(AtbApiFuncAddr);    \
       api_ret = atbApiFunc(workspace_addr, workspace_size, op, context_ptr);   \
-      TORCH_CHECK(api_ret == 0, "call " #atb_api " failed");                   \
+      CHECK_EQ(api_ret, 0) << "call " #atb_api " failed";                      \
       release_convert_types(converted_params);                                 \
       return api_ret;                                                          \
     };                                                                         \
diff --git a/xllm/core/kernels/npu/custom_functions_npu/operation_create.h b/xllm/core/kernels/npu/custom_functions_npu/operation_create.h
@@ -15,6 +15,7 @@ limitations under the License.
 
 #pragma once
 
+#include <glog/logging.h>
 #include <torch_npu/csrc/core/npu/NPUGraphsUtils.h>
 #include <torch_npu/csrc/framework/OpCommand.h>
 
@@ -55,7 +56,7 @@ atb::Operation* create_atb_operation(const ParamType& param,
                                      const std::string& name) {
   atb::Operation* op = nullptr;
   atb::CreateOperation(param, &op);
-  TORCH_CHECK(op != nullptr, name, " CreateOperation failed!");
+  CHECK(op != nullptr) << name << " CreateOperation failed!";
   return op;
 }
 
diff --git a/xllm/core/kernels/npu/custom_functions_npu/utils.cpp b/xllm/core/kernels/npu/custom_functions_npu/utils.cpp
@@ -30,15 +30,15 @@ ContextManager::ContextManager() : atb_context_(nullptr) {}
 ContextManager::~ContextManager() {
   if (atb_context_) {
     auto status = atb::DestroyContext(atb_context_);
-    TORCH_CHECK(status == 0, "Destroy context failed!");
+    CHECK_EQ(status, 0) << "Destroy context failed!";
     atb_context_ = nullptr;
   }
 }
 
 atb::Context* ContextManager::get_context(aclrtStream stream) {
   std::call_once(create_flag_, [this]() {
     auto status = atb::CreateContext(&atb_context_);
-    TORCH_CHECK(status == 0, "Create context failed!");
+    CHECK_EQ(status, 0) << "Create context failed!";
   });
 
   atb_context_->SetExecuteStream(stream);
@@ -52,8 +52,8 @@ atb::Context* get_context(aclrtStream stream) {
 aclDataType convert_to_acl_data_type(const at::ScalarType& data_type) {
   auto acl_dtype =
       kATenScalarTypeToAclDataTypeTable[static_cast<int64_t>(data_type)];
-  TORCH_CHECK(acl_dtype != ACL_DT_UNDEFINED,
-              std::string(c10::toString(data_type)) + " has not been supported")
+  CHECK_NE(acl_dtype, ACL_DT_UNDEFINED)
+      << std::string(c10::toString(data_type)) << " has not been supported";
   return acl_dtype;
 }
 
diff --git a/xllm/core/kernels/npu/custom_functions_npu/utils.h b/xllm/core/kernels/npu/custom_functions_npu/utils.h
@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <ATen/ATen.h>
 #include <acl/acl.h>
+#include <glog/logging.h>
 #include <torch_npu/csrc/core/npu/NPUFormat.h>
 
 #include "atb/atb_infer.h"
@@ -88,12 +89,8 @@ inline int get_op_mode(const MapType& mode_map,
                        const char* mode_name) {
   c10::string_view mode_str = mode_opt.value_or(default_mode);
   auto it = mode_map.find(mode_str);
-  TORCH_CHECK(it != mode_map.end(),
-              "Unsupported ",
-              mode_name,
-              " value: '",
-              mode_str,
-              "'");
+  CHECK(it != mode_map.end())
+      << "Unsupported " << mode_name << " value: '" << mode_str << "'";
   return it->second;
 }
 }  // namespace utils
diff --git a/xllm/core/kernels/npu/fused_layernorm.cpp b/xllm/core/kernels/npu/fused_layernorm.cpp
@@ -12,20 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include <glog/logging.h>
 #include <torch_npu/csrc/aten/CustomFunctions.h>
 
 #include "npu_ops_api.h"
 #include "ops_npu/npu_ops.h"
 
 namespace xllm::kernel::npu {
 
-torch::Tensor fused_layernorm(const torch::Tensor& input,
-                              const torch::Tensor& weight,
-                              double eps,
-                              const std::string& mode) {
+torch::Tensor rms_norm(const torch::Tensor& input,
+                       const torch::Tensor& weight,
+                       double eps,
+                       const std::string& mode) {
   if (mode != "rmsnorm") {
-    throw std::runtime_error(
-        "Only rmsnorm mode is supported in NPU fused_layernorm");
+    LOG(FATAL) << "Only rmsnorm mode is supported in NPU rms_norm";
   }
   std::tuple<at::Tensor, at::Tensor> result =
       at_npu::native::custom_ops::npu_rms_norm(input, weight, eps);
diff --git a/xllm/core/kernels/npu/npu_ops_api.h b/xllm/core/kernels/npu/npu_ops_api.h
@@ -18,7 +18,7 @@ limitations under the License.
 
 #include <optional>
 
-#include "./custom_functions_npu/atb_common.h"
+#include "custom_functions_npu/atb_common.h"
 
 namespace xllm::kernel::npu {
 
@@ -50,10 +50,10 @@ torch::Tensor matmul(const torch::Tensor& a,
 
 torch::Tensor active(const torch::Tensor& input, const std::string& act_mode);
 
-torch::Tensor fused_layernorm(const torch::Tensor& input,
-                              const torch::Tensor& weight,
-                              double eps,
-                              const std::string& mode);
+torch::Tensor rms_norm(const torch::Tensor& input,
+                       const torch::Tensor& weight,
+                       double eps,
+                       const std::string& mode);
 
 void apply_rotary(torch::Tensor& q,
                   torch::Tensor& k,
diff --git a/xllm/core/kernels/npu/ops_npu/npu_ops.h b/xllm/core/kernels/npu/ops_npu/npu_ops.h
@@ -14,16 +14,12 @@ limitations under the License.
 ==============================================================================*/
 #pragma once
 
-#include "../custom_functions_npu/atb_common.h"
+#include "kernels/npu/custom_functions_npu/atb_common.h"
 
 using namespace std;
 
 namespace atb {
 
-using PagedAttentionParam = atb::infer::PagedAttentionParam;
-using ReshapeAndCacheParam = atb::infer::ReshapeAndCacheParam;
-using SelfAttentionParam = atb::infer::SelfAttentionParam;
-
 void _npu_paged_attention(const at::Tensor& query,
                           const at::Tensor& key_cache,
                           const at::Tensor& value_cache,
diff --git a/xllm/core/kernels/npu/ops_npu/paged_attention_atb.cpp b/xllm/core/kernels/npu/ops_npu/paged_attention_atb.cpp
@@ -14,10 +14,9 @@ limitations under the License.
 ==============================================================================*/
 #include <acl/acl.h>
 
-#include "../custom_functions_npu/atb_common.h"
+#include "kernels/npu/custom_functions_npu/atb_common.h"
 
 namespace atb {
-using PagedAttentionParam = atb::infer::PagedAttentionParam;
 void _npu_paged_attention(const at::Tensor& query,
                           const at::Tensor& key_cache,
                           const at::Tensor& value_cache,
@@ -28,20 +27,21 @@ void _npu_paged_attention(const at::Tensor& query,
                           const at::Tensor& context_lens,
                           at::Tensor& out) {
   const c10::OptionalDeviceGuard device_guard(device_of(query));
-  OpParamCache<PagedAttentionParam>& pagedAttentionParamCache =
-      OpParamCache<PagedAttentionParam>::getInstance();
-  PagedAttentionParam pagedparam;
+  OpParamCache<atb::infer::PagedAttentionParam>& pagedAttentionParamCache =
+      OpParamCache<atb::infer::PagedAttentionParam>::getInstance();
+  atb::infer::PagedAttentionParam pagedparam;
   pagedparam.headNum = num_heads;
   pagedparam.qkScale = scale_value;
   pagedparam.kvHeadNum = num_kv_heads;
-  pagedparam.maskType = PagedAttentionParam::UNDEFINED;
+  pagedparam.maskType = atb::infer::PagedAttentionParam::UNDEFINED;
   pagedparam.batchRunStatusEnable = false;
-  pagedparam.quantType = PagedAttentionParam::TYPE_QUANT_UNDEFINED;
+  pagedparam.quantType = atb::infer::PagedAttentionParam::TYPE_QUANT_UNDEFINED;
   pagedparam.outDataType = ACL_DT_UNDEFINED;
   pagedparam.hasQuantOffset = false;
-  pagedparam.compressType = PagedAttentionParam::COMPRESS_TYPE_UNDEFINED;
-  pagedparam.calcType = PagedAttentionParam::CALC_TYPE_UNDEFINED;
-  pagedparam.scaleType = PagedAttentionParam::SCALE_TYPE_TOR;
+  pagedparam.compressType =
+      atb::infer::PagedAttentionParam::COMPRESS_TYPE_UNDEFINED;
+  pagedparam.calcType = atb::infer::PagedAttentionParam::CALC_TYPE_UNDEFINED;
+  pagedparam.scaleType = atb::infer::PagedAttentionParam::SCALE_TYPE_TOR;
   pagedparam.inputLayout = atb::infer::TYPE_BSND;
   pagedparam.mlaVHeadSize = 0;
 
diff --git a/xllm/core/kernels/npu/ops_npu/reshape_and_cach_atb.cpp b/xllm/core/kernels/npu/ops_npu/reshape_and_cach_atb.cpp
diff --git a/xllm/core/kernels/npu/ops_npu/self_attention_atb.cpp b/xllm/core/kernels/npu/ops_npu/self_attention_atb.cpp
diff --git a/xllm/core/kernels/ops_api.cpp b/xllm/core/kernels/ops_api.cpp

Original file line number	Diff line number	Diff line change
`@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and`
`13`	`13`	`limitations under the License.`
`14`	`14`	`==============================================================================*/`
`15`	`15`
	`16`	`+#include <glog/logging.h>`
`16`	`17`	`#include <torch_npu/csrc/aten/CustomFunctions.h>`
`17`	`18`
`18`	`19`	`#include "npu_ops_api.h"`
`@@ -22,8 +23,7 @@ namespace xllm::kernel::npu {`
`22`	`23`
`23`	`24`	`torch::Tensor active(const torch::Tensor& input, const std::string& act_mode) {`
`24`	`25`	`if (act_mode != "silu" && act_mode != "swiglu") {`
`25`		`- throw std::runtime_error(`
`26`		`- "Only swiglu activation is supported in NPU active");`
	`26`	`+ LOG(FATAL) << "Only swiglu activation is supported in NPU active";`
`27`	`27`	`}`
`28`	`28`	`return at_npu::native::custom_ops::npu_swiglu(input);`
`29`	`29`	`}`