@@ -68,10 +68,12 @@ absl::Status GenAiServable::loadRequest(std::shared_ptr<GenAiServableExecutionCo
6868 executionContext->endpoint = Endpoint::CHAT_COMPLETIONS;
6969 } else if (payload.uri == " /v3/completions" || payload.uri == " /v3/v1/completions" ) {
7070 executionContext->endpoint = Endpoint::COMPLETIONS;
71+ } else if (payload.uri == " /v3/responses" || payload.uri == " /v3/v1/responses" ) {
72+ executionContext->endpoint = Endpoint::RESPONSES;
7173 } else if (TokenizeParser::isTokenizeEndpoint (payload.uri )) {
7274 executionContext->endpoint = Endpoint::TOKENIZE;
7375 } else {
74- return absl::InvalidArgumentError (" Wrong endpoint. Allowed endpoints: /v3/chat/completions, /v3/completions" );
76+ return absl::InvalidArgumentError (" Wrong endpoint. Allowed endpoints: /v3/chat/completions, /v3/completions, /v3/responses, /v3/tokenize " );
7577 }
7678 executionContext->payload = payload;
7779 return absl::OkStatus ();
@@ -204,6 +206,50 @@ absl::Status GenAiServable::prepareInputs(std::shared_ptr<GenAiServableExecution
204206 }
205207 break ;
206208 }
209+ case Endpoint::RESPONSES: {
210+ if (executionContext->apiHandler ->getChatHistory ().size () > 0 ) {
211+ #if (PYTHON_DISABLE == 0)
212+ bool success;
213+ if (executionContext->apiHandler ->getProcessedJson ().size () > 0 ) {
214+ success = PyJinjaTemplateProcessor::applyChatTemplate (getProperties ()->templateProcessor , getProperties ()->modelsPath , executionContext->apiHandler ->getProcessedJson (), inputText);
215+ } else {
216+ success = PyJinjaTemplateProcessor::applyChatTemplate (getProperties ()->templateProcessor , getProperties ()->modelsPath , executionContext->payload .body , inputText);
217+ }
218+ if (!success) {
219+ return absl::Status (absl::StatusCode::kInvalidArgument , inputText);
220+ }
221+ #else
222+ ov::genai::ChatHistory& chatHistory = executionContext->apiHandler ->getChatHistory ();
223+ constexpr bool add_generation_prompt = true ;
224+ auto toolsStatus = executionContext->apiHandler ->parseToolsToJsonContainer ();
225+ if (!toolsStatus.ok ()) {
226+ return toolsStatus.status ();
227+ }
228+ const auto & tools = toolsStatus.value ();
229+ auto chatTemplateKwargsStatus = executionContext->apiHandler ->parseChatTemplateKwargsToJsonContainer ();
230+ if (!chatTemplateKwargsStatus.ok ()) {
231+ return chatTemplateKwargsStatus.status ();
232+ }
233+ const auto & chatTemplateKwargs = chatTemplateKwargsStatus.value ();
234+ try {
235+ inputText = getProperties ()->tokenizer .apply_chat_template (chatHistory, add_generation_prompt, {}, tools, chatTemplateKwargs);
236+ } catch (const std::exception& e) {
237+ SPDLOG_LOGGER_DEBUG (llm_calculator_logger, " Failed to apply chat template: {}" , e.what ());
238+ return absl::Status (absl::StatusCode::kInvalidArgument , " Failed to apply chat template. The model either does not have chat template or has an invalid one." );
239+ }
240+ #endif
241+ if (inputText.size () == 0 ) {
242+ return absl::Status (absl::StatusCode::kInvalidArgument , " Final prompt after applying chat template is empty" );
243+ }
244+ } else {
245+ auto prompt = executionContext->apiHandler ->getPrompt ();
246+ if (!prompt.has_value ()) {
247+ return absl::Status (absl::StatusCode::kInvalidArgument , " input is missing" );
248+ }
249+ inputText = prompt.value ();
250+ }
251+ break ;
252+ }
207253 case Endpoint::COMPLETIONS: {
208254 inputText = executionContext->apiHandler ->getPrompt ().value ();
209255 break ;
@@ -277,8 +323,12 @@ absl::Status GenAiServable::preparePartialResponse(std::shared_ptr<GenAiServable
277323 if (!serializedChunk.empty ()) {
278324 executionContext->response = wrapTextInServerSideEventMessage (serializedChunk);
279325 }
280- if (executionContext->apiHandler ->getStreamOptions ().includeUsage )
281- executionContext->response += wrapTextInServerSideEventMessage (executionContext->apiHandler ->serializeStreamingUsageChunk ());
326+ if (executionContext->apiHandler ->getStreamOptions ().includeUsage ) {
327+ std::string usageChunk = executionContext->apiHandler ->serializeStreamingUsageChunk ();
328+ if (!usageChunk.empty ()) {
329+ executionContext->response += wrapTextInServerSideEventMessage (usageChunk);
330+ }
331+ }
282332
283333 executionContext->response += wrapTextInServerSideEventMessage (" [DONE]" );
284334
0 commit comments