From ba4cb723553690e70ad14b45fbc21c30f60405b2 Mon Sep 17 00:00:00 2001 From: Aditya Date: Wed, 14 Jan 2026 01:22:26 +0530 Subject: [PATCH 1/2] [TMVA] [SOFIE] Add ONNX Gelu operator support Implement the Gelu operator for the SOFIE inference engine with ONNX Opset 20 specifications. The implementation uses the exact Gelu formula: y = 0.5 * x * (1+ erf( x / sqrt(2))) Uses compile-time hexfloat constants for 1/sqrt(2) and 0.5 to ensure bit-exact reproducibility. Updates RModelParser_ONNX to register the Gelu operator node. It explicitly rejects the approximate attribute (tanh approximation) as it is not yet supported. Verified against SciPy/ONNX using a custom opset-20 graph constructed via onnx.helper (bypassing PyTorch export decomposition). Achieved bit-exact results (max error 0.0) on the test interval [-3.0, 3.0]. --- tmva/sofie/inc/TMVA/ROperator_GELU.hxx | 79 ++++++++++++++++++++ tmva/sofie_parsers/src/RModelParser_ONNX.cxx | 23 ++++++ 2 files changed, 102 insertions(+) create mode 100644 tmva/sofie/inc/TMVA/ROperator_GELU.hxx diff --git a/tmva/sofie/inc/TMVA/ROperator_GELU.hxx b/tmva/sofie/inc/TMVA/ROperator_GELU.hxx new file mode 100644 index 0000000000000..b9551d375ac8a --- /dev/null +++ b/tmva/sofie/inc/TMVA/ROperator_GELU.hxx @@ -0,0 +1,79 @@ +#ifndef TMVA_SOFIE_ROPERATOR_GELU +#define TMVA_SOFIE_ROPERATOR_GELU + +#include "TMVA/SOFIE_common.hxx" +#include "TMVA/ROperator.hxx" +#include "TMVA/RModel.hxx" + +#include + +namespace TMVA{ +namespace Experimental{ +namespace SOFIE{ + +template +class ROperator_GELU final : public ROperator +{ + +private: + + std::string fNX; + std::string fNY; + std::vector fShape; + +public: + ROperator_GELU(){} + ROperator_GELU(std::string nameX, std::string nameY): + fNX(UTILITY::Clean_name(nameX)), fNY(UTILITY::Clean_name(nameY)){ + fInputTensorNames = { fNX }; + fOutputTensorNames = { fNY }; + } + + std::vector TypeInference(std::vector input) override { + return input; + } + + std::vector> ShapeInference(std::vector> input) override { + auto ret = input; //suggest copy to compiler + return ret; + } + + void Initialize(RModel& model) override { + //input must be a graph input, or already initialized intermediate tensor + if (model.CheckIfTensorAlreadyExist(fNX) == false){ + throw std::runtime_error("TMVA SOFIE GELU Op Input Tensor " + fNX + " is not found in model"); + } + fShape = model.GetTensorShape(fNX); + model.AddIntermediateTensor(fNY, model.GetTensorType(fNX), fShape); + } + + std::string Generate(std::string OpName) override { + OpName = "op_" + OpName; + if (fShape.empty()){ + throw std::runtime_error("TMVA SOFIE GELU operator called to Generate without being initialized first"); + } + std::stringstream out; + size_t length = ConvertShapeToLength(fShape); + + // GELU exact formula: y = 0.5 * x * (1 + erf(x / sqrt(2))) + // Using hexfloat for compile-time precision: + // 0x1.6a09e667f3bcdp-1 = 1/sqrt(2) = 0.7071067811865476 (exact to be precise) + // 0x1.0000000000000p-1 = 0.5 + + out << "\n//------ GELU\n"; + out << SP << "for (int id = 0; id < " << length << " ; id++){\n"; + out << SP << SP << "tensor_" << fNY << "[id] = 0x1.0000000000000p-1 * tensor_" << fNX + << "[id] * (1.0 + std::erf(tensor_" << fNX << "[id] * 0x1.6a09e667f3bcdp-1));\n"; + out << SP << "}\n"; + return out.str(); + } + + std::vector GetStdLibs() override { return { std::string("cmath") };} +}; + +}//SOFIE +}//Experimental +}//TMVA + + +#endif //TMVA_SOFIE_ROPERATOR_GELU diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx index 7b4ade2b6bc09..932e989ca64dd 100644 --- a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx +++ b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx @@ -10,6 +10,7 @@ #include #include #include "TMVA/SOFIE_common.hxx" +#include "TMVA/ROperator_GELU.hxx" namespace TMVA { namespace Experimental { @@ -220,6 +221,28 @@ RModelParser_ONNX::RModelParser_ONNX() noexcept : fOperatorsMapImpl(std::make_un RegisterOperator("Gather", ParseGather); RegisterOperator("Erf", ParseErf); RegisterOperator("Elu", ParseElu); + // GELU operator with inline lambda registration + RegisterOperator("Gelu", [](RModelParser_ONNX &parser, const onnx::NodeProto &nodeproto) { + // Check for unsupported tanh approximation attribute + for (int i = 0; i < nodeproto.attribute_size(); i++) { + if (nodeproto.attribute(i).name() == "approximate") { + if (nodeproto.attribute(i).s() == "tanh") { + throw std::runtime_error("TMVA::SOFIE GELU tanh approximation not implemented"); + } + } + } + auto input_name = nodeproto.input(0); + if (!parser.IsRegisteredTensorType(input_name)) { + throw std::runtime_error("TMVA::SOFIE ONNX Parser Gelu op has input tensor " + + input_name + " but its type is not yet registered"); + } + std::string output_name = nodeproto.output(0); + auto op = std::make_unique>(input_name, output_name); + if (!parser.IsRegisteredTensorType(output_name)) { + parser.RegisterTensorType(output_name, parser.GetTensorType(input_name)); + } + return op; + }); RegisterOperator("EyeLike", ParseEyeLike); RegisterOperator("Range", ParseRange); RegisterOperator("TopK", ParseTopK); From 9d3519d2327b87a214d3394c33282ee2e98f3950 Mon Sep 17 00:00:00 2001 From: Aditya Date: Wed, 14 Jan 2026 12:19:31 +0530 Subject: [PATCH 2/2] [sofie] Add GTest for GELU operator with hexfloat validation. Validates generation of bit-exact hexfloat constants and shape inference. Ensures no runtime division is performed in the generated kernel. Golden data validation from scipy.special.erf Type/shape inference and error handling tests --- tmva/sofie/test/CMakeLists.txt | 7 ++ tmva/sofie/test/TestSofieGELU.cxx | 127 ++++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+) create mode 100644 tmva/sofie/test/TestSofieGELU.cxx diff --git a/tmva/sofie/test/CMakeLists.txt b/tmva/sofie/test/CMakeLists.txt index 2b4b558d3c1e6..5f2fcd53cdac3 100644 --- a/tmva/sofie/test/CMakeLists.txt +++ b/tmva/sofie/test/CMakeLists.txt @@ -103,6 +103,13 @@ if (BLAS_FOUND) # Creating a Google Test for the automatic differentiation of Gemm_Call ROOT_ADD_GTEST(TestGemmDerivative TestGemmDerivative.cxx LIBRARIES Core BLAS::BLAS) endif() + + # GELU Operator Unit Test + # Tests code generation with hexfloat constants for bit-exact reproducibility + ROOT_ADD_GTEST(TestSofieGELU TestSofieGELU.cxx + LIBRARIES + ROOTTMVASofie + ) endif() # Look for needed Python modules diff --git a/tmva/sofie/test/TestSofieGELU.cxx b/tmva/sofie/test/TestSofieGELU.cxx new file mode 100644 index 0000000000000..772d5dc3f4682 --- /dev/null +++ b/tmva/sofie/test/TestSofieGELU.cxx @@ -0,0 +1,127 @@ +/// \file TestSofieGELU.cxx +/// \brief Unit tests for the SOFIE GELU operator +/// \author ROOT TMVA Team + +#include "TMVA/ROperator_GELU.hxx" +#include "TMVA/RModel.hxx" + +#include "gtest/gtest.h" + +#include +#include +#include +#include +#include + +using namespace TMVA::Experimental::SOFIE; + +// Validate generation of hexfloat constants for bit-exact reproducibility +TEST(SOFIE_GELU, GenerateHexfloatConstants) +{ + RModel model; + model.AddInputTensorInfo("input", ETensorType::FLOAT, {1, 10}); + model.AddOutputTensorNameList({"output"}); + + ROperator_GELU op("input", "output"); + op.Initialize(model); + + std::string code = op.Generate("gelu_test"); + + // Expect 1/sqrt(2) as hexfloat: 0x1.6a09e667f3bcdp-1 + EXPECT_TRUE(code.find("0x1.6a09e667f3bcdp-1") != std::string::npos) + << "Generated code missing optimized hexfloat constant for 1/sqrt(2)"; + + // Expect 0.5 as hexfloat + EXPECT_TRUE(code.find("0x1.0000000000000p-1") != std::string::npos) + << "Generated code missing hexfloat constant for 0.5"; +} + +// Check structure of generated C++ code +TEST(SOFIE_GELU, GenerateStructure) +{ + RModel model; + model.AddInputTensorInfo("X", ETensorType::FLOAT, {2, 5}); + model.AddOutputTensorNameList({"Y"}); + + ROperator_GELU op("X", "Y"); + op.Initialize(model); + + std::string code = op.Generate("gelu_struct_test"); + + EXPECT_TRUE(code.find("std::erf") != std::string::npos) << "Missing std::erf call"; + EXPECT_TRUE(code.find("tensor_Y") != std::string::npos) << "Missing output tensor access"; + EXPECT_TRUE(code.find("tensor_X") != std::string::npos) << "Missing input tensor access"; + // Loop limit check for shape {2, 5} + EXPECT_TRUE(code.find("10") != std::string::npos) << "Incorrect loop limit generated"; +} + +// Compare implementation against SciPy reference values +TEST(SOFIE_GELU, NumericCorrectness) +{ + // Reference values computed using scipy.special.erf + const std::vector> referenceData = { + {-3.0f, -0.00404996f}, + {-2.5f, -0.01974636f}, + {-2.0f, -0.04540230f}, + {-1.5f, -0.08771890f}, + {-1.0f, -0.15880800f}, + {-0.5f, -0.15426877f}, + { 0.0f, 0.00000000f}, + { 0.5f, 0.34573123f}, + { 1.0f, 0.84119201f}, + { 1.5f, 1.41281096f}, + { 2.0f, 1.95459771f}, + { 2.5f, 2.48025364f}, + { 3.0f, 2.99595003f}, + {-10.0f, 0.0f}, // Limit -> 0 + { 10.0f, 10.0f} // Limit -> x + }; + + // Proxy for generated logic + auto gelu_eval = [](float x) -> float { + constexpr double kInvSqrt2 = 0x1.6a09e667f3bcdp-1; + return 0.5f * x * (1.0 + std::erf(x * kInvSqrt2)); + }; + + for (const auto& [input, expected] : referenceData) { + float computed = gelu_eval(input); + float tol = std::max(1e-6f * std::abs(expected), 1e-7f); + + EXPECT_NEAR(computed, expected, tol) + << "Mismatch at x = " << input; + } +} + +TEST(SOFIE_GELU, StdLibDependencies) +{ + ROperator_GELU op("in", "out"); + auto libs = op.GetStdLibs(); + ASSERT_EQ(libs.size(), 1u); + EXPECT_EQ(libs[0], "cmath"); +} + +TEST(SOFIE_GELU, Inference) +{ + ROperator_GELU op("in", "out"); + + // Type inference + auto types = op.TypeInference({ETensorType::FLOAT}); + EXPECT_EQ(types[0], ETensorType::FLOAT); + + // Shape inference + std::vector shape = {4, 16, 32}; + auto shapes = op.ShapeInference({shape}); + EXPECT_EQ(shapes[0], shape); +} + +TEST(SOFIE_GELU, ErrorHandling) +{ + ROperator_GELU op("in", "out"); + + // Generate without Initialize + EXPECT_THROW(op.Generate("test"), std::runtime_error); + + // Initialize with missing tensor + RModel model; + EXPECT_THROW(op.Initialize(model), std::runtime_error); +} \ No newline at end of file