software-mansion · NorbertKlockiewicz · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 22, 2026
diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
diff --git a/apps/text-embeddings/utils/math.ts b/apps/text-embeddings/utils/math.ts
@@ -1,6 +1,7 @@
 import {
   RnExecutorchError,
   RnExecutorchErrorCode,
+  EmbeddingResult,
 } from 'react-native-executorch';
 
 export const dotProduct = (a: Float32Array, b: Float32Array) => {
@@ -17,3 +18,28 @@ export const dotProduct = (a: Float32Array, b: Float32Array) => {
   }
   return sum;
 };
+
+export const maxSim = (
+  query: EmbeddingResult,
+  doc: EmbeddingResult,
+  skipListIds: number[] = []
+) => {
+  const dim = query.embeddingDim;
+  const skip = new Set(skipListIds);
+  let score = 0;
+  for (let qi = 0; qi < query.numTokens; qi++) {
+    const qOff = qi * dim;
+    let best = -Infinity;
+    for (let di = 0; di < doc.numTokens; di++) {
+      if (skip.has(doc.tokenIds[di]!)) continue;
+      const dOff = di * dim;
+      let dot = 0;
+      for (let k = 0; k < dim; k++) {
+        dot += (query.vectors[qOff + k] ?? 0) * (doc.vectors[dOff + k] ?? 0);
+      }
+      if (dot > best) best = dot;
+    }
+    if (best !== -Infinity) score += best;
+  }
+  return score;
+};
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -45,7 +45,13 @@ try {
 
 `useTextEmbeddings` takes [`TextEmbeddingsProps`](../../06-api-reference/interfaces/TextEmbeddingsProps.md) that consists of:
 
-- `model` of type `object` containing the [model source](../../06-api-reference/interfaces/TextEmbeddingsProps.md#modelsource) and [tokenizer source](../../06-api-reference/interfaces/TextEmbeddingsProps.md#tokenizersource).
+- `model` of type `object` ([`TextEmbeddingsModel`](../../06-api-reference/interfaces/TextEmbeddingsModel.md)) containing:
+  - `modelName` - Unique name identifying the model.
+  - `modelSource` - Location of the used model.
+  - `tokenizerSource` - Location of the used tokenizer.
+  - `prompts` _(optional)_ - Asymmetric `query`/`document` prompts the model is trained with. When present, `forward` requires a `role` and prepends the matching prompt.
+  - `multiVector` _(optional)_ - When `true`, `forward` returns the per-token [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) instead of a single pooled `Float32Array`.
+  - `skipListIds` _(optional)_ - Token ids to exclude from late-interaction (MaxSim) scoring.
 - An optional flag [`preventLoad`](../../06-api-reference/interfaces/TextEmbeddingsProps.md#preventload) which prevents auto-loading of the model.
 
 You need more details? Check the following resources:
@@ -60,7 +66,21 @@ You need more details? Check the following resources:
 
 ## Running the model
 
-To run the model, you can use the [`forward`](../../06-api-reference/interfaces/TextEmbeddingsType.md#forward) method. It accepts one argument, which is a string representing the text you want to embed. The function returns a promise, which can resolve either to an error or an array of numbers representing the embedding.
+To run the model, you can use the [`forward`](../../06-api-reference/interfaces/TextEmbeddingsType.md#forward) method. It accepts the text to embed and, for models trained with asymmetric prompts, an optional `role`. The return type depends on the model:
+
+- **Pooled models** (the default, e.g. MiniLM, MPNet, LFM2.5-Embedding) resolve to a single `Float32Array` — one normalized vector for the whole input.
+- **Multi-vector models** (`multiVector: true`, e.g. LFM2.5-ColBERT) resolve to an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) with the per-token vectors (`vectors`, `numTokens`, `embeddingDim`, `tokenIds`).
+
+### Asymmetric prompts (`role`)
+
+Some retrieval models are trained to embed queries and documents with different prefixes (e.g. LFM2.5 uses `query: `/`document: `, ColBERT uses `[Q] `/`[D] `). For these models the model config carries the prompts and `forward` requires a `role`:
+
+```typescript
+const queryEmbedding = await model.forward('What is the weather?', 'query');
+const docEmbedding = await model.forward('It is sunny today.', 'document');
+```
+
+The matching prompt is prepended automatically; for models without prompts the `role` argument is absent.
 
 ## Example
 
@@ -112,6 +132,8 @@ function App() {
 | [distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)   | 50+ languages |    126     |         512          | Multilingual DistilBERT with a 768→512 projection head. Recommended when broader language coverage matters more than the exact English quality of MiniLM/MPNet.                                                                                                                                                                                                                                                                  |
 | [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) | 50+ languages |    126     |         384          | Multilingual MiniLM-L12 distilled from paraphrase-multilingual-mpnet-base-v2. Compact (≈118 M params) sentence encoder for cross-lingual semantic similarity and retrieval across 50+ languages.                                                                                                                                                                                                                                 |
 | [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                                           |    English    |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
+| [LFM2.5-Embedding-350M](https://huggingface.co/LiquidAI/LFM2.5-Embedding-350M)                                              |  Multilingual |    512     |         1024         | Dense bi-encoder from Liquid AI with CLS pooling. Trained with asymmetric `query: `/`document: ` prompts, so `forward` requires a `role`. On iOS it runs on the GPU via the MLX backend (physical device only); Android uses XNNPACK.                                                                                                                                                                                            |
+| [LFM2.5-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2.5-ColBERT-350M)                                                  |  Multilingual |    512     |     128 (per token)  | Late-interaction (multi-vector) retriever from Liquid AI: a `Linear(1024→128)` head emits one normalized vector per token. `forward` returns an `EmbeddingResult`; score query/document pairs with MaxSim (see below). Uses `[Q] `/`[D] ` role prompts.                                                                                                                                                                          |
 
 **`Max Tokens`** - The maximum number of tokens that can be processed by the model. If the input text exceeds this limit, it will be truncated.
 
@@ -120,3 +142,53 @@ function App() {
 :::note
 For the supported models, the returned embedding vector is normalized, meaning that its length is equal to 1. This allows for easier comparison of vectors using cosine similarity, just calculate the dot product of two vectors to get the cosine similarity score.
 :::
+
+## Late interaction (multi-vector models)
+
+Multi-vector models such as LFM2.5-ColBERT do not pool the sequence into a single vector. Instead, `forward` returns an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) holding one normalized vector per token. You score a query against a document with **MaxSim**: for every query-token vector, take its highest dot product against the document-token vectors, then sum those maxima.
+
+The library is a pure embedder — it gives you the per-token vectors and the model's punctuation `skipListIds`, but scoring is your concern (so it can run wherever you store the vectors). A reference `maxSim` implementation:
+
+```typescript
+import { models, useTextEmbeddings, EmbeddingResult } from 'react-native-executorch';
+
+const colbert = models.text_embedding.lfm2_5_colbert_350m();
+const skipListIds = colbert.skipListIds ?? [];
+
+const maxSim = (
+  query: EmbeddingResult,
+  doc: EmbeddingResult,
+  skip: number[] = []
+) => {
+  const dim = query.embeddingDim;
+  const skipped = new Set(skip);
+  let score = 0;
+  for (let qi = 0; qi < query.numTokens; qi++) {
+    const qOff = qi * dim;
+    let best = -Infinity;
+    for (let di = 0; di < doc.numTokens; di++) {
+      if (skipped.has(doc.tokenIds[di])) continue;
+      const dOff = di * dim;
+      let dot = 0;
+      for (let k = 0; k < dim; k++) {
+        dot += query.vectors[qOff + k] * doc.vectors[dOff + k];
+      }
+      if (dot > best) best = dot;
+    }
+    if (best !== -Infinity) score += best;
+  }
+  return score;
+};
+
+function App() {
+  const model = useTextEmbeddings({ model: colbert });
+
+  // ...
+
+  const query = await model.forward('What is the weather?', 'query');
+  const doc = await model.forward('It is sunny today.', 'document');
+  const score = maxSim(query, doc, skipListIds);
+}
+```
+
+The `skipListIds` shipped on the model config are the punctuation token ids excluded from scoring (derived from the model's training config). Per-token vectors are L2-normalized by the graph, so the dot product equals cosine similarity.
diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md
@@ -30,13 +30,20 @@ All methods of `TextEmbeddingsModule` are explained in details here: [`TextEmbed
 
 Use the static [`fromModelName`](../../06-api-reference/classes/TextEmbeddingsModule.md#frommodelname) factory method. It accepts a model config object (e.g. `ALL_MINILM_L6_V2`) containing:
 
-- [`modelSource`](../../06-api-reference/classes/TextEmbeddingsModule.md#modelsource) - Location of the used model.
-- [`tokenizerSource`](../../06-api-reference/classes/TextEmbeddingsModule.md#tokenizersource) - Location of the used tokenizer.
+- `modelName` - Unique name identifying the model.
+- `modelSource` - Location of the used model.
+- `tokenizerSource` - Location of the used tokenizer.
+- `prompts` _(optional)_ - Asymmetric `query`/`document` prompts the model is trained with. When present, `forward` requires a `role` and prepends the matching prompt.
+- `multiVector` _(optional)_ - When `true`, `forward` returns the per-token `EmbeddingResult` instead of a single pooled `Float32Array`.
+- `skipListIds` _(optional)_ - Token ids to exclude from late-interaction (MaxSim) scoring.
 
-And an optional `onDownloadProgress` callback. It returns a promise resolving to a `TextEmbeddingsModule` instance.
+And an optional `onDownloadProgress` callback (receiving a value between 0 and 1). It returns a promise resolving to a `TextEmbeddingsModule` instance.
 
 For more information on loading resources, take a look at [loading models](../../01-fundamentals/02-loading-models.md) page.
 
 ## Running the model
 
-To run the model, you can use the [`forward`](../../06-api-reference/classes/TextEmbeddingsModule.md#forward) method. It accepts one argument, which is the text you want to embed. The method returns a promise, which can resolve either to an error or an array of numbers representing the embedding.
+To run the model, use the [`forward`](../../06-api-reference/classes/TextEmbeddingsModule.md#forward) method. It accepts the text to embed and, for models with asymmetric prompts, an optional `role` (`'query' | 'document'`). The method returns a promise resolving to:
+
+- a `Float32Array` — a single pooled vector — for standard models, or
+- an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) with the per-token vectors for `multiVector` models.