openvinotoolkit · mzegla · Mar 11, 2026
diff --git a/src/llm/BUILD b/src/llm/BUILD
@@ -136,25 +136,30 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w
     hdrs = [
             "io_processing/hermes3/tool_parser.hpp",
             "io_processing/llama3/tool_parser.hpp",
+            "io_processing/llama3/genai_tool_parser.hpp",
             "io_processing/phi4/tool_parser.hpp",
             "io_processing/devstral/tool_parser.hpp",
             "io_processing/mistral/tool_parser.hpp",
             "io_processing/qwen3/reasoning_parser.hpp",
             "io_processing/gptoss/reasoning_parser.hpp",
             "io_processing/gptoss/tool_parser.hpp",
             "io_processing/gptoss/harmony.hpp",
+            "io_processing/base_genai_parser.hpp",
+            "io_processing/genai_parser_adapter.hpp",
             "io_processing/output_parser.hpp",
     ],
     srcs = [
             "io_processing/hermes3/tool_parser.cpp",
             "io_processing/llama3/tool_parser.cpp",
+            "io_processing/llama3/genai_tool_parser.cpp",
             "io_processing/phi4/tool_parser.cpp",
             "io_processing/devstral/tool_parser.cpp",
             "io_processing/mistral/tool_parser.cpp",
             "io_processing/qwen3/reasoning_parser.cpp",
             "io_processing/gptoss/reasoning_parser.cpp",
             "io_processing/gptoss/tool_parser.cpp",
             "io_processing/gptoss/harmony.cpp",
+            "io_processing/genai_parser_adapter.cpp",
             "io_processing/output_parser.cpp",
     ],
     deps = [

diff --git a/src/llm/io_processing/base_genai_parser.hpp b/src/llm/io_processing/base_genai_parser.hpp
@@ -0,0 +1,133 @@
+//*****************************************************************************
+// Copyright 2026 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+
+#include <string>
+#include <vector>
+
+#include <openvino/genai/parsers.hpp>
+
+namespace ovms {
+
+/**
+ * @brief Extension of ov::genai::Parser that exposes OVMS OutputParser metadata.
+ *
+ * Concrete implementations should override the tag getters to tell
+ * GenAIParserAdapter (and therefore the surrounding OutputParser machinery)
+ * where parsing segments begin and end in the generated text, and whether
+ * the text streamer must preserve special tokens.
+ *
+ * Defaults:
+ *   - getParsingStartTags()        pure virtual — must be overridden
+ *   - getSpecialParsingStartTags() returns empty vector
+ *   - getParsingEndTag()           pure virtual — must be overridden
+ *   - requiresStreamingWithSpecialTokens() returns false
+ */
+class BaseGenAIParser : public ov::genai::Parser {
+public:
+    BaseGenAIParser() = default;
+    ~BaseGenAIParser() override = default;
+
+    /**
+     * @brief Tags that trigger the start of a parsed segment.
+     *
+     * Used by OutputParser to transition into the phase handled by this parser.
+     * Return an empty vector if the parser handles all output unconditionally.
+     */
+    virtual const std::vector<std::string>& getParsingStartTags() const = 0;
+
+    /**
+     * @brief Tags that trigger parsing only when they appear at the very start of output.
+     *
+     * These are considered exclusively during the UNKNOWN phase of OutputParser.
+     */
+    virtual const std::vector<std::string>& getSpecialParsingStartTags() const {
+        static const std::vector<std::string> empty{};
+        return empty;
+    }
+
+    /**
+     * @brief Tag that marks the end of the segment processed by this parser.
+     *
+     * Return an empty string if the parser consumes until end-of-generation.
+     */
+    virtual const std::string& getParsingEndTag() const = 0;
+
+    /**
+     * @brief Whether the text streamer must include special tokens.
+     *
+     * If true, the tokenizer used in the TextStreamer should be configured to
+     * not skip special tokens so that this parser can use them as boundaries.
+     */
+    virtual bool requiresStreamingWithSpecialTokens() const {
+        return false;
+    }
+};
+
+/**
+ * @brief Extension of ov::genai::IncrementalParser that exposes OVMS OutputParser metadata.
+ *
+ * Mirrors BaseGenAIParser but for the streaming (incremental) variant.
+ * The same defaults apply.
+ */
+class BaseGenAIIncrementalParser : public ov::genai::IncrementalParser {
+public:
+    BaseGenAIIncrementalParser() = default;
+    ~BaseGenAIIncrementalParser() override = default;
+
+    /**
+     * @brief Tags that trigger the start of a parsed segment.
+     */
+    virtual const std::vector<std::string>& getParsingStartTags() const = 0;
+
+    /**
+     * @brief Tags that trigger parsing only when they appear at the very start of output.
+     */
+    virtual const std::vector<std::string>& getSpecialParsingStartTags() const {
+        static const std::vector<std::string> empty{};
+        return empty;
+    }
+
+    /**
+     * @brief Tag that marks the end of the segment processed by this parser.
+     */
+    virtual const std::string& getParsingEndTag() const = 0;
+
+    /**
+     * @brief Whether the text streamer must include special tokens.
+     */
+    virtual bool requiresStreamingWithSpecialTokens() const {
+        return false;
+    }
+
+    /**
+     * @brief Flush any internally buffered state after the final chunk.
+     *
+     * Called by GenAIParserAdapter::parseChunk when finishReason != NONE,
+     * immediately after the last parse() call. Implementations that use a
+     * delay window (or any other look-ahead buffering) must drain the pending
+     * content here and populate @p delta_message with a final delta if one is
+     * available.
+     *
+     * The default implementation is a no-op.
+     *
+     * @param delta_message  Populated with a structured delta when pending
+     *                       content exists; left untouched otherwise.
+     */
+    virtual void flush(ov::genai::JsonContainer& /*delta_message*/) {}
+};
+
+}  // namespace ovms
diff --git a/src/llm/io_processing/genai_parser_adapter.cpp b/src/llm/io_processing/genai_parser_adapter.cpp
@@ -0,0 +1,197 @@
+//*****************************************************************************
+// Copyright 2026 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "genai_parser_adapter.hpp"
+
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace ovms {
+
+GenAIParserAdapter::GenAIParserAdapter(
+    ov::genai::Tokenizer& tokenizer,
+    std::shared_ptr<BaseGenAIParser> parser,
+    std::shared_ptr<BaseGenAIIncrementalParser> incrementalParser) :
+    BaseOutputParser(tokenizer),
+    parser(std::move(parser)),
+    incrementalParser(std::move(incrementalParser)) {}
+
+// static
+void GenAIParserAdapter::extractToolCalls(const ov::genai::JsonContainer& message, ToolCalls_t& toolCalls) {
+    if (!message.contains("tool_calls") || !message["tool_calls"].is_array()) {
+        return;
+    }
+    const ov::genai::JsonContainer toolCallsArr = message["tool_calls"];
+    const size_t count = toolCallsArr.size();
+    for (size_t i = 0; i < count; ++i) {
+        const ov::genai::JsonContainer tc = toolCallsArr[i];
+        ToolCall toolCall;
+        if (tc.contains("id") && tc["id"].is_string()) {
+            toolCall.id = tc["id"].get_string();
+        }
+        if (tc.contains("function") && tc["function"].is_object()) {
+            const ov::genai::JsonContainer fn = tc["function"];
+            if (fn.contains("name") && fn["name"].is_string()) {
+                toolCall.name = fn["name"].get_string();
+            }
+            if (fn.contains("arguments") && fn["arguments"].is_string()) {
+                toolCall.arguments = fn["arguments"].get_string();
+            }
+        }
+        toolCalls.push_back(std::move(toolCall));
+    }
+}
+
+// static
+rapidjson::Document GenAIParserAdapter::jsonContainerToDocument(const ov::genai::JsonContainer& container) {
+    const std::string jsonStr = container.to_json_string();
+    rapidjson::Document doc;
+    doc.Parse(jsonStr.c_str(), static_cast<rapidjson::SizeType>(jsonStr.size()));
+    return doc;
+}
+
+void GenAIParserAdapter::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& /*generatedTokens*/) {
+    ov::genai::JsonContainer message({{"content", parsedOutput.content}});
+
+    parser->parse(message);
+
+    if (message.contains("content") && message["content"].is_string()) {
+        parsedOutput.content = message["content"].get_string();
+    }
+
+    extractToolCalls(message, parsedOutput.toolCalls);
+
+    if (message.contains("reasoning_content") && message["reasoning_content"].is_string()) {
+        parsedOutput.reasoning = message["reasoning_content"].get_string();
+    }
+}
+
+std::optional<rapidjson::Document> GenAIParserAdapter::parseChunk(
+    const std::string& chunkResponse,
+    ov::genai::GenerationFinishReason finishReason) {
+
+    ov::genai::JsonContainer deltaMessage = ov::genai::JsonContainer::object();
+    std::string deltaText = chunkResponse;
+
+    const std::string filteredContent = incrementalParser->parse(deltaMessage, deltaText);
+
+    // On the final chunk, drain any content still buffered in the parser's delay window.
+    if (finishReason != ov::genai::GenerationFinishReason::NONE) {
+        ov::genai::JsonContainer flushedDelta = ov::genai::JsonContainer::object();
+        incrementalParser->flush(flushedDelta);
+
+        if (!flushedDelta.empty()) {
+            rapidjson::Document flushedDoc = jsonContainerToDocument(flushedDelta);
+            if (!flushedDoc.HasParseError() && flushedDoc.IsObject() && !flushedDoc.ObjectEmpty()) {
+                // If parse() also produced a structured delta, combine argument strings
+                // so a single document is returned.
+                if (!deltaMessage.empty()) {
+                    rapidjson::Document primaryDoc = jsonContainerToDocument(deltaMessage);
+                    if (!primaryDoc.HasParseError() && primaryDoc.IsObject() && !primaryDoc.ObjectEmpty()) {
+                        return combineArgumentDeltas(std::move(primaryDoc), std::move(flushedDoc));
+                    }
+                }
+                return flushedDoc;
+            }
+        }
+    }
+
+    // Prefer structured deltas (tool calls, reasoning) if the incremental parser emitted any
+    if (!deltaMessage.empty()) {
+        rapidjson::Document doc = jsonContainerToDocument(deltaMessage);
+        if (!doc.HasParseError() && doc.IsObject() && !doc.ObjectEmpty()) {
+            return doc;
+        }
+    }
+
+    // Fall back to plain content delta
+    if (!filteredContent.empty()) {
+        rapidjson::Document doc;
+        doc.SetObject();
+        rapidjson::Value contentVal(
+            filteredContent.c_str(),
+            static_cast<rapidjson::SizeType>(filteredContent.size()),
+            doc.GetAllocator());
+        doc.AddMember("content", contentVal, doc.GetAllocator());
+        return doc;
+    }
+
+    return std::nullopt;
+}
+
+// static
+rapidjson::Document GenAIParserAdapter::combineArgumentDeltas(
+    rapidjson::Document primary,
+    rapidjson::Document flushed) {
+    // Navigate primary: delta.tool_calls[0].function.arguments
+    if (!primary.HasMember("delta") || !flushed.HasMember("delta")) {
+        return primary;
+    }
+    auto& pDelta = primary["delta"];
+    auto& fDelta = flushed["delta"];
+    if (!pDelta.IsObject() || !pDelta.HasMember("tool_calls") ||
+        !fDelta.IsObject() || !fDelta.HasMember("tool_calls")) {
+        return primary;
+    }
+    auto& pTC = pDelta["tool_calls"];
+    auto& fTC = fDelta["tool_calls"];
+    if (!pTC.IsArray() || pTC.Empty() || !fTC.IsArray() || fTC.Empty()) {
+        return primary;
+    }
+    auto& pEntry = pTC[rapidjson::SizeType(0)];
+    auto& fEntry = fTC[rapidjson::SizeType(0)];
+    if (!pEntry.IsObject() || !pEntry.HasMember("function") ||
+        !fEntry.IsObject() || !fEntry.HasMember("function")) {
+        return primary;
+    }
+    auto& pFunc = pEntry["function"];
+    auto& fFunc = fEntry["function"];
+    if (!pFunc.IsObject() || !pFunc.HasMember("arguments") ||
+        !fFunc.IsObject() || !fFunc.HasMember("arguments")) {
+        return primary;
+    }
+    if (!pFunc["arguments"].IsString() || !fFunc["arguments"].IsString()) {
+        return primary;
+    }
+    const std::string combined =
+        std::string(pFunc["arguments"].GetString()) +
+        std::string(fFunc["arguments"].GetString());
+    pFunc["arguments"].SetString(
+        combined.c_str(),
+        static_cast<rapidjson::SizeType>(combined.size()),
+        primary.GetAllocator());
+    return primary;
+}
+
+const std::vector<std::string>& GenAIParserAdapter::getParsingStartTags() const {
+    return parser->getParsingStartTags();
+}
+
+const std::vector<std::string>& GenAIParserAdapter::getSpecialParsingStartTags() const {
+    return parser->getSpecialParsingStartTags();
+}
+
+const std::string& GenAIParserAdapter::getParsingEndTag() const {
+    return parser->getParsingEndTag();
+}
+
+bool GenAIParserAdapter::requiresStreamingWithSpecialTokens() const {
+    return parser->requiresStreamingWithSpecialTokens() ||
+           incrementalParser->requiresStreamingWithSpecialTokens();
+}
+
+}  // namespace ovms