Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/llm/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -136,25 +136,30 @@ ovms_cc_library( # TODO split further so we don't have to recompile everything w
hdrs = [
"io_processing/hermes3/tool_parser.hpp",
"io_processing/llama3/tool_parser.hpp",
"io_processing/llama3/genai_tool_parser.hpp",
"io_processing/phi4/tool_parser.hpp",
"io_processing/devstral/tool_parser.hpp",
"io_processing/mistral/tool_parser.hpp",
"io_processing/qwen3/reasoning_parser.hpp",
"io_processing/gptoss/reasoning_parser.hpp",
"io_processing/gptoss/tool_parser.hpp",
"io_processing/gptoss/harmony.hpp",
"io_processing/base_genai_parser.hpp",
"io_processing/genai_parser_adapter.hpp",
"io_processing/output_parser.hpp",
],
srcs = [
"io_processing/hermes3/tool_parser.cpp",
"io_processing/llama3/tool_parser.cpp",
"io_processing/llama3/genai_tool_parser.cpp",
"io_processing/phi4/tool_parser.cpp",
"io_processing/devstral/tool_parser.cpp",
"io_processing/mistral/tool_parser.cpp",
"io_processing/qwen3/reasoning_parser.cpp",
"io_processing/gptoss/reasoning_parser.cpp",
"io_processing/gptoss/tool_parser.cpp",
"io_processing/gptoss/harmony.cpp",
"io_processing/genai_parser_adapter.cpp",
"io_processing/output_parser.cpp",
],
deps = [
Expand Down
133 changes: 133 additions & 0 deletions src/llm/io_processing/base_genai_parser.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//*****************************************************************************
// Copyright 2026 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once

#include <string>
#include <vector>

#include <openvino/genai/parsers.hpp>

namespace ovms {

/**
* @brief Extension of ov::genai::Parser that exposes OVMS OutputParser metadata.
*
* Concrete implementations should override the tag getters to tell
* GenAIParserAdapter (and therefore the surrounding OutputParser machinery)
* where parsing segments begin and end in the generated text, and whether
* the text streamer must preserve special tokens.
*
* Defaults:
* - getParsingStartTags() pure virtual — must be overridden
* - getSpecialParsingStartTags() returns empty vector
* - getParsingEndTag() pure virtual — must be overridden
* - requiresStreamingWithSpecialTokens() returns false
*/
class BaseGenAIParser : public ov::genai::Parser {
public:
BaseGenAIParser() = default;
~BaseGenAIParser() override = default;

/**
* @brief Tags that trigger the start of a parsed segment.
*
* Used by OutputParser to transition into the phase handled by this parser.
* Return an empty vector if the parser handles all output unconditionally.
*/
virtual const std::vector<std::string>& getParsingStartTags() const = 0;

/**
* @brief Tags that trigger parsing only when they appear at the very start of output.
*
* These are considered exclusively during the UNKNOWN phase of OutputParser.
*/
virtual const std::vector<std::string>& getSpecialParsingStartTags() const {
static const std::vector<std::string> empty{};
return empty;
}

/**
* @brief Tag that marks the end of the segment processed by this parser.
*
* Return an empty string if the parser consumes until end-of-generation.
*/
virtual const std::string& getParsingEndTag() const = 0;

/**
* @brief Whether the text streamer must include special tokens.
*
* If true, the tokenizer used in the TextStreamer should be configured to
* not skip special tokens so that this parser can use them as boundaries.
*/
virtual bool requiresStreamingWithSpecialTokens() const {
return false;
}
};

/**
* @brief Extension of ov::genai::IncrementalParser that exposes OVMS OutputParser metadata.
*
* Mirrors BaseGenAIParser but for the streaming (incremental) variant.
* The same defaults apply.
*/
class BaseGenAIIncrementalParser : public ov::genai::IncrementalParser {
public:
BaseGenAIIncrementalParser() = default;
~BaseGenAIIncrementalParser() override = default;

/**
* @brief Tags that trigger the start of a parsed segment.
*/
virtual const std::vector<std::string>& getParsingStartTags() const = 0;

/**
* @brief Tags that trigger parsing only when they appear at the very start of output.
*/
virtual const std::vector<std::string>& getSpecialParsingStartTags() const {
static const std::vector<std::string> empty{};
return empty;
}

/**
* @brief Tag that marks the end of the segment processed by this parser.
*/
virtual const std::string& getParsingEndTag() const = 0;

/**
* @brief Whether the text streamer must include special tokens.
*/
virtual bool requiresStreamingWithSpecialTokens() const {
return false;
}

/**
* @brief Flush any internally buffered state after the final chunk.
*
* Called by GenAIParserAdapter::parseChunk when finishReason != NONE,
* immediately after the last parse() call. Implementations that use a
* delay window (or any other look-ahead buffering) must drain the pending
* content here and populate @p delta_message with a final delta if one is
* available.
*
* The default implementation is a no-op.
*
* @param delta_message Populated with a structured delta when pending
* content exists; left untouched otherwise.
*/
virtual void flush(ov::genai::JsonContainer& /*delta_message*/) {}
};

} // namespace ovms
197 changes: 197 additions & 0 deletions src/llm/io_processing/genai_parser_adapter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
//*****************************************************************************
// Copyright 2026 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************

#include "genai_parser_adapter.hpp"

#include <string>
#include <utility>
#include <vector>

namespace ovms {

GenAIParserAdapter::GenAIParserAdapter(
ov::genai::Tokenizer& tokenizer,
std::shared_ptr<BaseGenAIParser> parser,
std::shared_ptr<BaseGenAIIncrementalParser> incrementalParser) :
BaseOutputParser(tokenizer),
parser(std::move(parser)),
incrementalParser(std::move(incrementalParser)) {}

// static
void GenAIParserAdapter::extractToolCalls(const ov::genai::JsonContainer& message, ToolCalls_t& toolCalls) {
if (!message.contains("tool_calls") || !message["tool_calls"].is_array()) {
return;
}
const ov::genai::JsonContainer toolCallsArr = message["tool_calls"];
const size_t count = toolCallsArr.size();
for (size_t i = 0; i < count; ++i) {
const ov::genai::JsonContainer tc = toolCallsArr[i];
ToolCall toolCall;
if (tc.contains("id") && tc["id"].is_string()) {
toolCall.id = tc["id"].get_string();
}
if (tc.contains("function") && tc["function"].is_object()) {
const ov::genai::JsonContainer fn = tc["function"];
if (fn.contains("name") && fn["name"].is_string()) {
toolCall.name = fn["name"].get_string();
}
if (fn.contains("arguments") && fn["arguments"].is_string()) {
toolCall.arguments = fn["arguments"].get_string();
}
}
toolCalls.push_back(std::move(toolCall));
}
}

// static
rapidjson::Document GenAIParserAdapter::jsonContainerToDocument(const ov::genai::JsonContainer& container) {
const std::string jsonStr = container.to_json_string();
rapidjson::Document doc;
doc.Parse(jsonStr.c_str(), static_cast<rapidjson::SizeType>(jsonStr.size()));
return doc;
}

void GenAIParserAdapter::parse(ParsedOutput& parsedOutput, const std::vector<int64_t>& /*generatedTokens*/) {
ov::genai::JsonContainer message({{"content", parsedOutput.content}});

parser->parse(message);

if (message.contains("content") && message["content"].is_string()) {
parsedOutput.content = message["content"].get_string();
}

extractToolCalls(message, parsedOutput.toolCalls);

if (message.contains("reasoning_content") && message["reasoning_content"].is_string()) {
parsedOutput.reasoning = message["reasoning_content"].get_string();
}
}

std::optional<rapidjson::Document> GenAIParserAdapter::parseChunk(
const std::string& chunkResponse,
ov::genai::GenerationFinishReason finishReason) {

ov::genai::JsonContainer deltaMessage = ov::genai::JsonContainer::object();
std::string deltaText = chunkResponse;

const std::string filteredContent = incrementalParser->parse(deltaMessage, deltaText);

// On the final chunk, drain any content still buffered in the parser's delay window.
if (finishReason != ov::genai::GenerationFinishReason::NONE) {
ov::genai::JsonContainer flushedDelta = ov::genai::JsonContainer::object();
incrementalParser->flush(flushedDelta);

if (!flushedDelta.empty()) {
rapidjson::Document flushedDoc = jsonContainerToDocument(flushedDelta);
if (!flushedDoc.HasParseError() && flushedDoc.IsObject() && !flushedDoc.ObjectEmpty()) {
// If parse() also produced a structured delta, combine argument strings
// so a single document is returned.
if (!deltaMessage.empty()) {
rapidjson::Document primaryDoc = jsonContainerToDocument(deltaMessage);
if (!primaryDoc.HasParseError() && primaryDoc.IsObject() && !primaryDoc.ObjectEmpty()) {
return combineArgumentDeltas(std::move(primaryDoc), std::move(flushedDoc));
}
}
return flushedDoc;
}
}
}

// Prefer structured deltas (tool calls, reasoning) if the incremental parser emitted any
if (!deltaMessage.empty()) {
rapidjson::Document doc = jsonContainerToDocument(deltaMessage);
if (!doc.HasParseError() && doc.IsObject() && !doc.ObjectEmpty()) {
return doc;
}
}

// Fall back to plain content delta
if (!filteredContent.empty()) {
rapidjson::Document doc;
doc.SetObject();
rapidjson::Value contentVal(
filteredContent.c_str(),
static_cast<rapidjson::SizeType>(filteredContent.size()),
doc.GetAllocator());
doc.AddMember("content", contentVal, doc.GetAllocator());
return doc;
}

return std::nullopt;
}

// static
rapidjson::Document GenAIParserAdapter::combineArgumentDeltas(
rapidjson::Document primary,
rapidjson::Document flushed) {
// Navigate primary: delta.tool_calls[0].function.arguments
if (!primary.HasMember("delta") || !flushed.HasMember("delta")) {
return primary;
}
auto& pDelta = primary["delta"];
auto& fDelta = flushed["delta"];
if (!pDelta.IsObject() || !pDelta.HasMember("tool_calls") ||
!fDelta.IsObject() || !fDelta.HasMember("tool_calls")) {
return primary;
}
auto& pTC = pDelta["tool_calls"];
auto& fTC = fDelta["tool_calls"];
if (!pTC.IsArray() || pTC.Empty() || !fTC.IsArray() || fTC.Empty()) {
return primary;
}
auto& pEntry = pTC[rapidjson::SizeType(0)];
auto& fEntry = fTC[rapidjson::SizeType(0)];
if (!pEntry.IsObject() || !pEntry.HasMember("function") ||
!fEntry.IsObject() || !fEntry.HasMember("function")) {
return primary;
}
auto& pFunc = pEntry["function"];
auto& fFunc = fEntry["function"];
if (!pFunc.IsObject() || !pFunc.HasMember("arguments") ||
!fFunc.IsObject() || !fFunc.HasMember("arguments")) {
return primary;
}
if (!pFunc["arguments"].IsString() || !fFunc["arguments"].IsString()) {
return primary;
}
const std::string combined =
std::string(pFunc["arguments"].GetString()) +
std::string(fFunc["arguments"].GetString());
pFunc["arguments"].SetString(
combined.c_str(),
static_cast<rapidjson::SizeType>(combined.size()),
primary.GetAllocator());
return primary;
}

const std::vector<std::string>& GenAIParserAdapter::getParsingStartTags() const {
return parser->getParsingStartTags();
}

const std::vector<std::string>& GenAIParserAdapter::getSpecialParsingStartTags() const {
return parser->getSpecialParsingStartTags();
}

const std::string& GenAIParserAdapter::getParsingEndTag() const {
return parser->getParsingEndTag();
}

bool GenAIParserAdapter::requiresStreamingWithSpecialTokens() const {
return parser->requiresStreamingWithSpecialTokens() ||
incrementalParser->requiresStreamingWithSpecialTokens();
}

} // namespace ovms
Loading