Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ Some providers support multiple API formats (OpenAI chat, Anthropic messages, em
|----------|----------|
| `chat` | OpenAI-compatible chat completions |
| `messages` | Anthropic Claude Messages API |
| `embeddings` | OpenAI-compatible embeddings |
| `embeddings` | OpenAI-compatible embeddings (Gemini providers auto-transformed) |
| `image` | Image generation (DALL-E, etc.) |
| `transcriptions` | Speech-to-text (Whisper) |
| `speech` | Text-to-speech |
Expand Down
3 changes: 2 additions & 1 deletion docs/openapi/components/schemas/AliasConfig.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ properties:

- **text** — Text generation (LLM). Supports all text wire protocols:
chat completions, messages, gemini, responses, ollama.
- **embeddings** — Vector embeddings.
- **embeddings** — Vector embeddings, including Gemini backends via
automatic request/response transformation.
- **transcriptions** — Audio to text.
- **speech** — Text to audio.
- **image** — Image generation/editing.
Expand Down
12 changes: 9 additions & 3 deletions docs/openapi/paths/v1_embeddings.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
post:
tags:
- Inference — Embeddings
summary: OpenAI-compatible embeddings (always pass-through)
summary: OpenAI-compatible embeddings with provider-aware transformation
description: |
Requires a model configured with `type: embeddings`. The request and
response are forwarded verbatim.
Requires a model configured with `type: embeddings`. Accepts requests in
OpenAI embeddings format. For OpenAI-compatible providers, requests and
responses are forwarded directly. For Gemini providers, Plexus transforms
the request to the Gemini `embedContent`/`batchEmbedContents` format and
normalises the response back to OpenAI format.

Supported provider types: `openai`, `chat` (pass-through), `gemini`
(transformed).
requestBody:
required: true
content:
Expand Down
21 changes: 15 additions & 6 deletions packages/backend/src/routes/inference/embeddings.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { FastifyInstance } from 'fastify';
import { logger } from '../../utils/logger';
import { Dispatcher } from '../../services/dispatcher';
import { EmbeddingsTransformer } from '../../transformers';
import { OpenAIEmbeddingsTransformer } from '../../transformers/embeddings';
import { UnifiedEmbeddingsRequest } from '../../types/unified';
import { UsageStorageService } from '../../services/usage-storage';
import { UsageRecord } from '../../types/usage';
import { getClientIp } from '../../utils/ip';
Expand Down Expand Up @@ -54,11 +55,17 @@ export async function registerEmbeddingsRoute(

logger.silly('Incoming Embeddings Request', body);

const transformer = new EmbeddingsTransformer();
let unifiedRequest = await transformer.parseRequest(body);
unifiedRequest.incomingApiType = 'embeddings';
unifiedRequest.originalBody = body;
unifiedRequest.requestId = requestId;
const transformer = new OpenAIEmbeddingsTransformer();
let unifiedRequest: UnifiedEmbeddingsRequest = {
model: body.model,
input: body.input,
encoding_format: body.encoding_format,
dimensions: body.dimensions,
user: body.user,
incomingApiType: 'embeddings',
originalBody: body,
requestId,
};
unifiedRequest = attachKeyAccessPolicy(request, unifiedRequest);

DebugManager.getInstance().startLog(requestId, body, sanitizeHeaders(request.headers as any));
Expand All @@ -78,6 +85,8 @@ export async function registerEmbeddingsRoute(
usageRecord.selectedModelName = unifiedResponse.plexus?.model;
usageRecord.canonicalModelName = unifiedResponse.plexus?.canonicalModel;
usageRecord.outgoingApiType = unifiedResponse.plexus?.apiType;
usageRecord.attemptCount = unifiedResponse.plexus?.attemptCount ?? 1;
usageRecord.retryHistory = unifiedResponse.plexus?.retryHistory ?? null;
usageRecord.isPassthrough = true; // Embeddings are always pass-through (OpenAI format)
usageRecord.tokensInput = unifiedResponse.usage?.prompt_tokens ?? 0;
usageRecord.tokensOutput = 0; // Embeddings don't have output tokens
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import { test, expect, describe } from 'vitest';
import { EmbeddingsTransformerFactory } from '../../services/embeddings-transformer-factory';

describe('EmbeddingsTransformerFactory', () => {
test('should return GeminiEmbeddingsTransformer for gemini type', () => {
const t = EmbeddingsTransformerFactory.getTransformer('gemini');
expect(t.name).toBe('gemini');
});

test('should return OpenAIEmbeddingsTransformer for openai type', () => {
const t = EmbeddingsTransformerFactory.getTransformer('openai');
expect(t.name).toBe('openai');
});

test('should return OpenAIEmbeddingsTransformer for chat type', () => {
const t = EmbeddingsTransformerFactory.getTransformer('chat');
expect(t.name).toBe('openai');
});

test('should default to OpenAI for unknown type', () => {
const t = EmbeddingsTransformerFactory.getTransformer('unknown');
expect(t.name).toBe('openai');
});
});

describe('resolveTransformer', () => {
test('should resolve Gemini transformer when gemini is in provider types', () => {
const t = EmbeddingsTransformerFactory.resolveTransformer(['chat', 'gemini']);
expect(t.name).toBe('gemini');
});

test('should fall back to OpenAI when no dedicated type matches', () => {
const t = EmbeddingsTransformerFactory.resolveTransformer(['chat', 'openai']);
expect(t.name).toBe('openai');
});

test('should fall back to OpenAI for unknown provider types', () => {
const t = EmbeddingsTransformerFactory.resolveTransformer(['anthropic']);
expect(t.name).toBe('openai');
});

test('should fall back to OpenAI for empty provider types', () => {
const t = EmbeddingsTransformerFactory.resolveTransformer([]);
expect(t.name).toBe('openai');
});

test('should resolve Gemini when gemini appears alongside other types', () => {
const t = EmbeddingsTransformerFactory.resolveTransformer(['ollama', 'gemini', 'chat']);
expect(t.name).toBe('gemini');
});
});
109 changes: 74 additions & 35 deletions packages/backend/src/services/dispatcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import { UsageStorageService } from './usage-storage';
import { CooldownParserRegistry } from './cooldown-parsers';
import { getConfig, getProviderTypes } from '../config';
import { applyModelBehaviors } from './model-behaviors';
import { EmbeddingsTransformerFactory } from './embeddings-transformer-factory';
import { resolveAdapters } from './adapter-resolver';
import type { ResolvedAdapter } from '../types/provider-adapter';
import { getModels } from '@earendil-works/pi-ai';
Expand Down Expand Up @@ -62,12 +63,25 @@ interface RetryHistoryLikeEntry {

type ResolveTimeoutMs = (timeoutMs?: number | null) => number;

/**
* Request-level API types (e.g. embeddings, transcriptions) share base URLs
* with their provider-level counterparts (e.g. chat, gemini). This map defines
* which provider-level URL keys to try when no exact or default URL is configured.
*/
const API_TYPE_ALIASES: Record<string, string[]> = {
embeddings: ['chat', 'gemini'],
transcriptions: ['chat', 'gemini'],
speech: ['chat', 'gemini'],
images: ['chat', 'gemini'],
};

/**
* Strips trailing /v1beta* path segments from Gemini base URLs.
* Gemini's transformer adds /v1beta to the path, so we need to ensure
* the base URL doesn't include it to avoid duplication like /v1beta/v1beta/...
* Only strips beta versions (e.g. /v1beta, /v1beta1) — plain /v1 is valid for other APIs.
*/

function stripTrailingApiVersion(url: string): string {
return url.replace(/\/(v\d+beta\d*)$/i, '');
}
Expand Down Expand Up @@ -1684,26 +1698,33 @@ export class Dispatcher {
rawBaseUrl = defaultUrl;
logger.debug(`Dispatcher: Using default base URL.`);
} else {
// If we can't find a specific URL for this type, and no default, fall back to the first one?
// Or throw error.
const firstKey = Object.keys(urlMap)[0];

if (firstKey) {
const firstUrl = urlMap[firstKey];
if (firstUrl) {
rawBaseUrl = firstUrl;
logger.warn(
`No specific base URL found for api type '${targetApiType}'. using '${firstKey}' as fallback.`
);
// Resolve via API_TYPE_ALIASES before falling back to the first key.
const aliases = API_TYPE_ALIASES[typeKey];
const aliasKey = aliases?.find((a) => urlMap[a]);

if (aliasKey) {
rawBaseUrl = urlMap[aliasKey]!;
logger.debug(`Dispatcher: Using '${aliasKey}' base URL for api type '${targetApiType}'.`);
} else {
const firstKey = Object.keys(urlMap)[0];

if (firstKey) {
const firstUrl = urlMap[firstKey];
if (firstUrl) {
rawBaseUrl = firstUrl;
logger.warn(
`No specific base URL found for api type '${targetApiType}'. using '${firstKey}' as fallback.`
);
} else {
throw new Error(
`No base URL configured for api type '${targetApiType}' and no default found.`
);
}
} else {
throw new Error(
`No base URL configured for api type '${targetApiType}' and no default found.`
);
}
} else {
throw new Error(
`No base URL configured for api type '${targetApiType}' and no default found.`
);
}
}
}
Expand Down Expand Up @@ -3158,10 +3179,10 @@ export class Dispatcher {

/**
* Dispatch embeddings request to provider
* Simplified version of dispatch() since embeddings:
* - Don't support streaming
* - Use universal API format (no transformation needed)
* - Always use /embeddings endpoint
* Uses EmbeddingsTransformerFactory for provider-type-aware:
* - URL construction (e.g. Gemini /v1beta/models/{model}:embedContent)
* - Auth headers (e.g. x-goog-api-key for Gemini)
* - Request/response transformation
*/
async dispatchEmbeddings(request: any): Promise<any> {
const config = getConfig();
Expand Down Expand Up @@ -3230,32 +3251,40 @@ export class Dispatcher {
this.emitRoutingUpdate(request.requestId, route);

try {
const providerTypes = getProviderTypes(route.config);
const transformer = EmbeddingsTransformerFactory.resolveTransformer(providerTypes);
const requestWithModel = { ...request, model: route.model };

const baseUrl = this.resolveBaseUrl(route, 'embeddings');
const url = `${baseUrl}/embeddings`;
const endpoint = transformer.getEndpoint
? transformer.getEndpoint(requestWithModel)
: transformer.defaultEndpoint;
const url = `${baseUrl}${endpoint}`;

const headers: Record<string, string> = {
'Content-Type': 'application/json',
Accept: 'application/json',
};

if (route.config.api_key) {
headers['Authorization'] = `Bearer ${route.config.api_key}`;
if (transformer.getAuthHeaders) {
transformer.getAuthHeaders(route.config.api_key, headers);
} else {
headers['Authorization'] = `Bearer ${route.config.api_key}`;
}
}

if (route.config.headers) {
Object.assign(headers, route.config.headers);
}

const payload = {
...request.originalBody,
model: route.model,
};

let payload = await transformer.transformRequest(requestWithModel);
if (route.config.extraBody) {
Object.assign(payload, route.config.extraBody);
}

// Merge alias-level extraBody (overrides provider level)
// Merge model-level extraBody (overrides provider level)
if (route.modelConfig?.extraBody) {
Object.assign(payload, route.modelConfig.extraBody);
}
// Merge alias-level extraBody (overrides provider and model level)
if (route.canonicalModel) {
const aliasConfig = getConfig().models?.[route.canonicalModel];
if (aliasConfig?.extraBody) {
Expand Down Expand Up @@ -3326,19 +3355,28 @@ export class Dispatcher {
}
}

const responseBody = await response.json();
logger.silly('Embeddings Response Payload', responseBody);
const rawResponseBody = await this.parseJsonResponseBody(
response,
request.requestId,
route,
'embeddings'
);
logger.silly('Embeddings Response Payload', rawResponseBody);

if (request.requestId) {
DebugManager.getInstance().addRawResponse(request.requestId, responseBody);
DebugManager.getInstance().addRawResponse(request.requestId, rawResponseBody);
}

const transformedResponse = await transformer.transformResponse(
rawResponseBody,
requestWithModel
);
const enrichedResponse: any = {
...responseBody,
...transformedResponse,
plexus: {
provider: route.provider,
model: route.model,
apiType: 'embeddings',
isPassthrough: true,
pricing: route.modelConfig?.pricing,
providerDiscount: route.config.discount,
canonicalModel: route.canonicalModel,
Expand All @@ -3347,6 +3385,7 @@ export class Dispatcher {
};

await this.recordAttemptMetric(route, request.requestId, true);
CooldownManager.getInstance().markProviderSuccess(route.provider, route.model);
this.appendSuccessAttempt(retryHistory, route, 'embeddings');
this.attachAttemptMetadata(
enrichedResponse,
Expand Down
45 changes: 45 additions & 0 deletions packages/backend/src/services/embeddings-transformer-factory.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { logger } from '../utils/logger';
import { EmbeddingsTransformer } from '../types/embeddings-transformer';
import { OpenAIEmbeddingsTransformer } from '../transformers/embeddings/openai';
import { GeminiEmbeddingsTransformer } from '../transformers/embeddings/gemini';

/**
* EmbeddingsTransformerFactory
*
* Factory for retrieving the correct embeddings transformer based on the provider's API type.
* Supports 'gemini' (Google) and 'openai'/'chat' (OpenAI-compatible). Unknown types default to OpenAI format.
*/
export class EmbeddingsTransformerFactory {
/**
* Provider types with dedicated (non-OpenAI) embeddings transformers,
* in priority order. Used by resolveTransformer to pick the best match.
*/
static readonly DEDICATED_TYPES = ['gemini'] as const;

/**
* Resolve the best embeddings transformer for a provider based on its type list.
* Checks dedicated types first (in priority order), then falls back to OpenAI format.
*/
static resolveTransformer(providerTypes: string[]): EmbeddingsTransformer {
const dedicated = providerTypes.find((t) =>
(this.DEDICATED_TYPES as readonly string[]).includes(t.toLowerCase())
);
return this.getTransformer(dedicated ?? 'openai');
}

static getTransformer(providerType: string): EmbeddingsTransformer {
switch (providerType.toLowerCase()) {
case 'gemini':
return new GeminiEmbeddingsTransformer();
case 'openai':
case 'chat':
default:
if (!['openai', 'chat'].includes(providerType.toLowerCase())) {
logger.warn(
`Unknown embeddings provider type '${providerType}', defaulting to OpenAI format`
);
}
return new OpenAIEmbeddingsTransformer();
}
}
}
2 changes: 2 additions & 0 deletions packages/backend/src/services/probe-service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,10 @@ export class ProbeService {

response = await this.dispatcher.dispatch(unifiedRequest as any);
} else if (apiType === 'embeddings') {
const embReq = testRequest as { model: string; input: string | string[] };
response = await this.dispatcher.dispatchEmbeddings({
model: directModelPath,
input: embReq.input,
originalBody: testRequest,
requestId,
incomingApiType: 'embeddings',
Expand Down
3 changes: 2 additions & 1 deletion packages/backend/src/services/router.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ async function filterGroupTargets(
}

if (alias.type === 'embeddings') return true;
return getProviderTypes(providerConfig).includes('embeddings');
const providerTypes = getProviderTypes(providerConfig);
return providerTypes.includes('embeddings') || providerTypes.includes('gemini');
});

if (embeddingsTargets.length > 0) {
Expand Down
Loading