From 50e80e1adce6bd93e88250ca3dae0b6bd03b5b2b Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 22 Jun 2026 12:44:04 +0200
Subject: [PATCH 01/14] feat: LFM2.5 text-embedding + ColBERT (MLX/XNNPACK)
 with prompts & MaxSim
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the LFM2.5-Embedding-350M and LFM2.5-ColBERT-350M models, served from
HuggingFace (MLX on iOS, XNNPACK on Android / iOS simulator).

Text embeddings are unified into one runner and one hook: the native
TextEmbeddings model returns the raw [numTokens, embeddingDim] matrix
(numTokens === 1 for pooled models, the full sequence for multi-vector /
late-interaction models like ColBERT), plus the input token ids. The TS
layer reduces it — toVector() for the single-vector case, getTokenVectors()
and maxSim() for late interaction.

Models trained with asymmetric query/document prompts (LFM uses query:/
document:, ColBERT uses [Q] /[D] ) carry a "prompts" config; forward then
requires a role argument ('query' | 'document') that auto-prepends the
prompt. The role is type-enforced: required for prompted models, forbidden
for plain ones.

Also: tokenizer post_processor is now applied for text embeddings so the
BOS special token is added (CLS-pooled models depend on it), and the
text-to-image Encoder reads the new EmbeddingResult.

Example app gains a semantic-search screen and a ColBERT late-interaction
search screen demonstrating MaxSim.

Authored with Claude.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 apps/text-embeddings/app/_layout.tsx          |   8 +
 .../app/clip-embeddings/index.tsx             |   3 +-
 apps/text-embeddings/app/colbert/index.tsx    | 289 ++++++++++++
 .../app/text-embeddings/index.tsx             | 439 ++++++++++--------
 .../common/rnexecutorch/TokenizerModule.cpp   |  19 +
 .../common/rnexecutorch/TokenizerModule.h     |   6 +
 .../host_objects/JsiConversions.h             |  30 ++
 .../rnexecutorch/models/embeddings/Types.h    |  23 +
 .../models/embeddings/text/TextEmbeddings.cpp |  22 +-
 .../models/embeddings/text/TextEmbeddings.h   |   8 +-
 .../models/text_to_image/Encoder.cpp          |   7 +-
 .../src/constants/modelRegistry.ts            |  58 +++
 .../src/constants/modelUrls.ts                |  15 +
 .../useTextEmbeddings.ts                      |  18 +-
 packages/react-native-executorch/src/index.ts |   1 +
 .../TextEmbeddingsModule.ts                   |  64 ++-
 .../src/types/textEmbeddings.ts               | 129 +++--
 .../src/utils/textEmbeddings.ts               |  74 +++
 18 files changed, 938 insertions(+), 275 deletions(-)
 create mode 100644 apps/text-embeddings/app/colbert/index.tsx
 create mode 100644 packages/react-native-executorch/common/rnexecutorch/models/embeddings/Types.h
 create mode 100644 packages/react-native-executorch/src/utils/textEmbeddings.ts
diff --git a/apps/text-embeddings/app/_layout.tsx b/apps/text-embeddings/app/_layout.tsx
index bb8e1deeb8..57acb26eb2 100644
--- a/apps/text-embeddings/app/_layout.tsx
+++ b/apps/text-embeddings/app/_layout.tsx
@@ -109,6 +109,14 @@ export default function _layout() {
             headerTitleStyle: { color: ColorPalette.primary },
           }}
         />
+        <Drawer.Screen
+          name="colbert/index"
+          options={{
+            drawerLabel: 'ColBERT search',
+            title: 'ColBERT search',
+            headerTitleStyle: { color: ColorPalette.primary },
+          }}
+        />
       </Drawer>
     </GeneratingContext>
   );
diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx
index 02a8a9c656..e0232d3440 100644
--- a/apps/text-embeddings/app/clip-embeddings/index.tsx
+++ b/apps/text-embeddings/app/clip-embeddings/index.tsx
@@ -16,6 +16,7 @@ import {
   models,
   useTextEmbeddings,
   useImageEmbeddings,
+  toVector,
   ImageEmbeddingsProps,
 } from 'react-native-executorch';
 
@@ -101,7 +102,7 @@ function ClipEmbeddingsScreen() {
       const txtStart = Date.now();
       const scored: { label: string; similarity: number }[] = [];
       for (const label of labels) {
-        const textEmbedding = await textModel.forward(label);
+        const textEmbedding = toVector(await textModel.forward(label));
         scored.push({
           label,
           similarity: dotProduct(imageEmbedding, textEmbedding),
diff --git a/apps/text-embeddings/app/colbert/index.tsx b/apps/text-embeddings/app/colbert/index.tsx
new file mode 100644
index 0000000000..d686168f43
--- /dev/null
+++ b/apps/text-embeddings/app/colbert/index.tsx
@@ -0,0 +1,289 @@
+import { useEffect, useState } from 'react';
+import {
+  StyleSheet,
+  Text,
+  TextInput,
+  TouchableOpacity,
+  View,
+  SafeAreaView,
+  ScrollView,
+  KeyboardAvoidingView,
+  Platform,
+} from 'react-native';
+import { Ionicons } from '@expo/vector-icons';
+import { useIsFocused } from 'expo-router';
+import {
+  models,
+  useTextEmbeddings,
+  maxSim,
+  EmbeddingResult,
+} from 'react-native-executorch';
+import ColorPalette from '../../colors';
+import ErrorBanner from '../../components/ErrorBanner';
+
+const colbertModel = models.text_embedding.lfm2_5_colbert_350m();
+
+// The library auto-applies the model's [Q]/[D] prompts via forward(text, role).
+// Late-interaction MaxSim is a shipped util; the document skiplist (punctuation
+// token ids excluded from scoring) is the consumer's choice — these are the
+// LFM2.5-ColBERT skiplist ids.
+const SKIPLIST = [
+  510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524,
+  535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601,
+  602, 603,
+];
+
+const CORPUS: string[] = [
+  'The forecast says heavy showers this afternoon.',
+  "It's so sunny outside today!",
+  'The home team scored in the final minute to win the match.',
+  'Fans packed the stadium for the championship game.',
+  'Simmer the tomatoes with garlic before adding the pasta.',
+  'He whisked the eggs and folded in the melted chocolate.',
+  'The new phone has a faster chip and a brighter screen.',
+  'The flight to Tokyo was delayed by three hours.',
+  'We hiked along the coast and camped near the cliffs.',
+];
+
+const EXAMPLE_QUERIES: string[] = [
+  "What's the weather like?",
+  'Who won the match?',
+  'How do I cook dinner?',
+  'Tell me about the latest technology',
+];
+
+type Ranked = { sentence: string; score: number };
+
+export default function ColbertScreenWrapper() {
+  return useIsFocused() ? <ColbertScreen /> : null;
+}
+
+function ColbertScreen() {
+  const model = useTextEmbeddings({ model: colbertModel });
+  const [error, setError] = useState<string | null>(null);
+  const [query, setQuery] = useState('');
+  const [docEncs, setDocEncs] = useState<
+    { sentence: string; enc: EmbeddingResult }[]
+  >([]);
+  const [results, setResults] = useState<Ranked[]>([]);
+  const [indexing, setIndexing] = useState(false);
+  const [encodeTime, setEncodeTime] = useState<number | null>(null);
+
+  useEffect(
+    () => {
+      let cancelled = false;
+      const indexCorpus = async () => {
+        if (!model.isReady) return;
+        setIndexing(true);
+        setResults([]);
+        try {
+          const encs = [];
+          for (const sentence of CORPUS) {
+            const enc = await model.forward(sentence, 'document');
+            if (cancelled) return;
+            encs.push({ sentence, enc });
+          }
+          setDocEncs(encs);
+        } catch (e) {
+          setError(e instanceof Error ? e.message : String(e));
+        } finally {
+          if (!cancelled) setIndexing(false);
+        }
+      };
+      indexCorpus();
+      return () => {
+        cancelled = true;
+      };
+    },
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+    [model.isReady]
+  );
+
+  const runSearch = async (queryText: string = query) => {
+    const q = queryText.trim();
+    if (!model.isReady || !q || docEncs.length === 0) return;
+    setQuery(queryText);
+    try {
+      const start = Date.now();
+      const qEnc = await model.forward(q, 'query');
+      setEncodeTime(Date.now() - start);
+      const ranked = docEncs
+        .map(({ sentence, enc }) => ({
+          sentence,
+          score: maxSim(qEnc, enc, SKIPLIST),
+        }))
+        .sort((a, b) => b.score - a.score);
+      setResults(ranked);
+    } catch (e) {
+      setError(e instanceof Error ? e.message : String(e));
+    }
+  };
+
+  const ready = model.isReady && !indexing && docEncs.length > 0;
+  const canSearch = ready && !!query.trim();
+
+  const statusText = model.error
+    ? `Error: ${model.error}`
+    : !model.isReady
+      ? `Loading model ${(model.downloadProgress * 100).toFixed(0)}%`
+      : indexing
+        ? 'Indexing corpus…'
+        : 'Ready';
+
+  return (
+    <SafeAreaView style={styles.container}>
+      <KeyboardAvoidingView
+        style={styles.flex}
+        behavior={Platform.OS === 'ios' ? 'padding' : undefined}
+      >
+        <ScrollView contentContainerStyle={styles.scroll}>
+          <Text style={styles.heading}>ColBERT Late-Interaction Search</Text>
+          <Text style={styles.status}>{statusText}</Text>
+          <ErrorBanner message={error} onDismiss={() => setError(null)} />
+
+          <View style={styles.card}>
+            <Text style={styles.sectionTitle}>
+              Search the corpus ({CORPUS.length} sentences)
+            </Text>
+            <Text style={styles.hint}>
+              Per-token vectors scored with MaxSim. Tap an example or type a
+              query.
+            </Text>
+            <View style={styles.chipRow}>
+              {EXAMPLE_QUERIES.map((q) => (
+                <TouchableOpacity
+                  key={q}
+                  style={[styles.chip, !ready && styles.chipDisabled]}
+                  disabled={!ready}
+                  onPress={() => runSearch(q)}
+                >
+                  <Text style={styles.chipText}>{q}</Text>
+                </TouchableOpacity>
+              ))}
+            </View>
+            <TextInput
+              placeholder="Type a search query..."
+              placeholderTextColor="#94A3B8"
+              style={styles.input}
+              value={query}
+              onChangeText={setQuery}
+              onSubmitEditing={() => runSearch()}
+              returnKeyType="search"
+            />
+            <TouchableOpacity
+              onPress={() => runSearch()}
+              style={[styles.button, !canSearch && styles.buttonDisabled]}
+              disabled={!canSearch}
+            >
+              <Ionicons
+                name="search"
+                size={16}
+                color={!canSearch ? 'gray' : 'white'}
+              />
+              <Text style={[styles.buttonText, !canSearch && styles.buttonTextDisabled]}>
+                {indexing ? 'Indexing…' : 'Search'}
+              </Text>
+            </TouchableOpacity>
+            {encodeTime !== null && (
+              <Text style={styles.stats}>Query encoded in {encodeTime} ms</Text>
+            )}
+          </View>
+
+          {results.length > 0 && (
+            <View style={styles.card}>
+              <Text style={styles.sectionTitle}>Results</Text>
+              {results.map((r, i) => (
+                <View key={i} style={styles.resultRow}>
+                  <View style={styles.resultHeader}>
+                    <Text style={styles.resultText}>{r.sentence}</Text>
+                    <Text style={styles.resultScore}>{r.score.toFixed(2)}</Text>
+                  </View>
+                  <View style={styles.barTrack}>
+                    <View
+                      style={[
+                        styles.barFill,
+                        {
+                          width: `${Math.round(
+                            (results[0].score > 0 ? r.score / results[0].score : 0) * 100
+                          )}%`,
+                        },
+                        i === 0 && styles.barFillTop,
+                      ]}
+                    />
+                  </View>
+                </View>
+              ))}
+            </View>
+          )}
+        </ScrollView>
+      </KeyboardAvoidingView>
+    </SafeAreaView>
+  );
+}
+
+const styles = StyleSheet.create({
+  container: { flex: 1, backgroundColor: '#F8FAFC' },
+  flex: { flex: 1 },
+  scroll: { padding: 20 },
+  heading: { fontSize: 22, fontWeight: '500', marginBottom: 8, color: '#0F172A' },
+  status: { fontSize: 14, color: '#64748B', marginBottom: 12 },
+  card: {
+    backgroundColor: '#fff',
+    padding: 16,
+    borderRadius: 16,
+    borderColor: '#E2E8F0',
+    borderWidth: 2,
+    marginBottom: 20,
+  },
+  sectionTitle: { fontSize: 16, fontWeight: '500', marginBottom: 8, color: '#1E293B' },
+  hint: { fontSize: 13, color: '#64748B', marginBottom: 12, lineHeight: 18 },
+  chipRow: { flexDirection: 'row', flexWrap: 'wrap', gap: 8, marginBottom: 12 },
+  chip: {
+    backgroundColor: '#EEF2FF',
+    borderColor: '#C7D2FE',
+    borderWidth: 1,
+    borderRadius: 16,
+    paddingHorizontal: 12,
+    paddingVertical: 6,
+  },
+  chipDisabled: { opacity: 0.4 },
+  chipText: { fontSize: 13, color: 'navy' },
+  input: {
+    backgroundColor: '#F1F5F9',
+    borderRadius: 10,
+    padding: 10,
+    marginBottom: 10,
+    fontSize: 16,
+    color: '#0F172A',
+    minHeight: 40,
+  },
+  button: {
+    backgroundColor: 'navy',
+    borderRadius: 10,
+    paddingVertical: 12,
+    flexDirection: 'row',
+    alignItems: 'center',
+    justifyContent: 'center',
+  },
+  buttonDisabled: { backgroundColor: '#f0f0f0' },
+  buttonText: { color: '#fff', fontWeight: '500', marginLeft: 6 },
+  buttonTextDisabled: { color: 'gray' },
+  stats: { fontSize: 13, color: '#64748B', marginTop: 8, textAlign: 'center' },
+  resultRow: { marginBottom: 14 },
+  resultHeader: {
+    flexDirection: 'row',
+    justifyContent: 'space-between',
+    marginBottom: 6,
+    gap: 8,
+  },
+  resultText: { flex: 1, fontSize: 14, color: '#334155' },
+  resultScore: {
+    fontSize: 14,
+    fontWeight: '600',
+    color: '#0F172A',
+    fontVariant: ['tabular-nums'],
+  },
+  barTrack: { height: 8, borderRadius: 4, backgroundColor: '#E2E8F0', overflow: 'hidden' },
+  barFill: { height: '100%', borderRadius: 4, backgroundColor: '#94A3B8' },
+  barFillTop: { backgroundColor: 'navy' },
+});
diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index 88e39ce063..470094da02 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -15,10 +15,13 @@ import { ModelPicker } from '../../components/ModelPicker';
 import {
   models,
   useTextEmbeddings,
+  toVector,
   TextEmbeddingsProps,
 } from 'react-native-executorch';
 const textEmbedding = models.text_embedding;
 
+// Single-vector (pooled) models: forward() returns the raw result; toVector()
+// gives the single embedding. The multi-vector ColBERT model has its own screen.
 type TextEmbeddingModel = TextEmbeddingsProps['model'];
 
 const MODELS: { label: string; value: TextEmbeddingModel }[] = [
@@ -43,6 +46,42 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
     label: 'Multilingual Paraphrase',
     value: textEmbedding.paraphrase_multilingual_minilm_l12_v2(),
   },
+  {
+    label: 'LFM2.5 Embedding XNNPACK',
+    value: textEmbedding.lfm2_5_embedding_350m({ backend: 'xnnpack' }),
+  },
+  {
+    label: 'LFM2.5 Embedding MLX',
+    value: textEmbedding.lfm2_5_embedding_350m({ backend: 'mlx' }),
+  },
+];
+
+// A multi-topic corpus so semantic ranking is visible: a weather query should
+// float the weather lines to the top and push sports/cooking/tech down, even
+// with no shared keywords.
+const CORPUS: string[] = [
+  'The forecast says heavy showers this afternoon.',
+  "It's so sunny outside today!",
+  'A thick fog rolled in over the harbor at dawn.',
+  'The home team scored in the final minute to win the match.',
+  'She sprinted the last lap and broke the national record.',
+  'Fans packed the stadium for the championship game.',
+  'Simmer the tomatoes with garlic before adding the pasta.',
+  'He whisked the eggs and folded in the melted chocolate.',
+  'The new phone has a faster chip and a brighter screen.',
+  'Our servers crashed under the sudden spike in traffic.',
+  'The flight to Tokyo was delayed by three hours.',
+  'We hiked along the coast and camped near the cliffs.',
+];
+
+// Tap-to-run example queries. Natural-language questions — how these models
+// are trained to be queried — give the cleanest separation.
+const EXAMPLE_QUERIES: string[] = [
+  "What's the weather like?",
+  'Who won the match?',
+  'Tell me about the latest technology',
+  'How do I cook dinner?',
+  'Where did they travel?',
 ];
 import { useIsFocused } from 'expo-router';
 import { dotProduct } from '../../utils/math';
@@ -54,6 +93,8 @@ export default function TextEmbeddingsScreenWrapper() {
   return isFocused ? <TextEmbeddingsScreen /> : null;
 }
 
+type RankedResult = { sentence: string; similarity: number };
+
 function TextEmbeddingsScreen() {
   const [selectedModel, setSelectedModel] = useState<TextEmbeddingModel>(
     textEmbedding.all_minilm_l6_v2()
@@ -61,88 +102,70 @@ function TextEmbeddingsScreen() {
   const model = useTextEmbeddings({ model: selectedModel });
   const [error, setError] = useState<string | null>(null);
 
-  const [inputSentence, setInputSentence] = useState('');
-  const [sentencesWithEmbeddings, setSentencesWithEmbeddings] = useState<
+  const [query, setQuery] = useState('');
+  const [corpusEmbeddings, setCorpusEmbeddings] = useState<
     { sentence: string; embedding: Float32Array }[]
   >([]);
-  const [topMatches, setTopMatches] = useState<
-    { sentence: string; similarity: number }[]
-  >([]);
+  const [results, setResults] = useState<RankedResult[]>([]);
   const [embeddingTime, setEmbeddingTime] = useState<number | null>(null);
+  const [indexing, setIndexing] = useState(false);
 
+  // Embed the whole corpus once the model is ready (re-runs on model change so
+  // prefixes / weights match the active model).
   useEffect(
     () => {
-      const computeEmbeddings = async () => {
+      let cancelled = false;
+      const indexCorpus = async () => {
         if (!model.isReady) return;
-
-        const sentences = [
-          'The weather is lovely today.',
-          "It's so sunny outside!",
-          'He drove to the stadium.',
-        ];
-
+        setIndexing(true);
+        setResults([]);
         try {
-          const embeddings = [];
-          for (const sentence of sentences) {
-            const embedding = await model.forward(sentence);
-            embeddings.push({ sentence, embedding });
+          const embedded = [];
+          for (const sentence of CORPUS) {
+            // forward(_, 'document') auto-applies the model's document prompt
+            // (a no-op for models without one).
+            const embedding = toVector(
+              await model.forward(sentence, 'document')
+            );
+            if (cancelled) return;
+            embedded.push({ sentence, embedding });
           }
-
-          setSentencesWithEmbeddings(embeddings);
-        } catch (e) {
-          setError(e instanceof Error ? e.message : String(e));
+          setCorpusEmbeddings(embedded);
+        } catch {
+          // A transient "Model not loaded" can fire while the hook swaps
+          // models; the effect re-runs once the new model is ready.
+        } finally {
+          if (!cancelled) setIndexing(false);
         }
       };
-
-      computeEmbeddings();
+      indexCorpus();
+      return () => {
+        cancelled = true;
+      };
     },
+    // Re-index when the model becomes ready OR the selected model changes, so
+    // the corpus is embedded by the active model. The "Model not loaded" race
+    // is handled by the isReady gate plus clearing the corpus on switch;
+    // switching sets isReady false→true so the re-run sees the new model.
     // eslint-disable-next-line react-hooks/exhaustive-deps
-    [model.isReady]
+    [model.isReady, selectedModel]
   );
 
-  const checkSimilarities = async () => {
-    if (!model.isReady || !inputSentence.trim()) return;
-
+  const runSearch = async (queryText: string = query) => {
+    const q = queryText.trim();
+    if (!model.isReady || !q || corpusEmbeddings.length === 0) return;
+    setQuery(queryText);
     try {
       const start = Date.now();
-      const inputEmbedding = await model.forward(inputSentence);
+      const queryEmbedding = toVector(await model.forward(q, 'query'));
       setEmbeddingTime(Date.now() - start);
-      const matches = sentencesWithEmbeddings.map(
-        ({ sentence, embedding }) => ({
+      const ranked = corpusEmbeddings
+        .map(({ sentence, embedding }) => ({
           sentence,
-          similarity: dotProduct(inputEmbedding, embedding),
-        })
-      );
-      matches.sort((a, b) => b.similarity - a.similarity);
-      setTopMatches(matches.slice(0, 3));
-    } catch (e) {
-      setError(e instanceof Error ? e.message : String(e));
-    }
-  };
-
-  const addToSentences = async () => {
-    if (!model.isReady || !inputSentence.trim()) return;
-
-    try {
-      const start = Date.now();
-      const embedding = await model.forward(inputSentence);
-      setEmbeddingTime(Date.now() - start);
-      setSentencesWithEmbeddings((prev) => [
-        ...prev,
-        { sentence: inputSentence, embedding },
-      ]);
-    } catch (e) {
-      setError(e instanceof Error ? e.message : String(e));
-    }
-
-    setInputSentence('');
-    setTopMatches([]);
-  };
-
-  const clearList = async () => {
-    if (!model.isReady) return;
-    try {
-      setSentencesWithEmbeddings([]);
+          similarity: dotProduct(queryEmbedding, embedding),
+        }))
+        .sort((a, b) => b.similarity - a.similarity);
+      setResults(ranked);
     } catch (e) {
       setError(e instanceof Error ? e.message : String(e));
     }
@@ -158,6 +181,11 @@ function TextEmbeddingsScreen() {
     return model.isGenerating ? 'Generating...' : 'Model is ready';
   };
 
+  // Chips/examples just need a ready, indexed model; the Search button also
+  // needs a non-empty typed query.
+  const ready = model.isReady && !indexing && corpusEmbeddings.length > 0;
+  const canSearch = ready && !!query.trim();
+
   return (
     <SafeAreaView style={styles.container}>
       <KeyboardAvoidingView
@@ -165,133 +193,131 @@ function TextEmbeddingsScreen() {
         behavior={Platform.OS === 'ios' ? 'padding' : undefined}
       >
         <ScrollView contentContainerStyle={styles.scrollContainer}>
-          <Text style={styles.heading}>Text Embeddings Playground</Text>
+          <Text style={styles.heading}>Semantic Search</Text>
           <Text style={styles.sectionTitle}>{getModelStatusText()}</Text>
           <ModelPicker
             models={MODELS}
             selectedModel={selectedModel}
             onSelect={(m) => {
               setSelectedModel(m);
-              setSentencesWithEmbeddings([]);
-              setTopMatches([]);
+              setCorpusEmbeddings([]);
+              setResults([]);
+              setQuery('');
             }}
           />
           <ErrorBanner message={error} onDismiss={() => setError(null)} />
 
           <View style={styles.card}>
-            <Text style={styles.sectionTitle}>List of Existing Sentences</Text>
-            {sentencesWithEmbeddings.map((item, index) => (
-              <Text key={index} style={styles.sentenceText}>
-                - {item.sentence}
-              </Text>
-            ))}
-          </View>
-          <View style={styles.card}>
-            <Text style={styles.sectionTitle}>Try Your Sentence</Text>
+            <Text style={styles.sectionTitle}>
+              Search the corpus ({CORPUS.length} sentences)
+            </Text>
+            <Text style={styles.hint}>
+              Ranks every sentence by meaning. Ask a full question — tap an
+              example or type your own.
+            </Text>
+            <View style={styles.chipRow}>
+              {EXAMPLE_QUERIES.map((q) => (
+                <TouchableOpacity
+                  key={q}
+                  style={[styles.chip, !ready && styles.chipDisabled]}
+                  disabled={!ready}
+                  onPress={() => runSearch(q)}
+                >
+                  <Text style={styles.chipText}>{q}</Text>
+                </TouchableOpacity>
+              ))}
+            </View>
             <TextInput
-              placeholder="Type your sentence here..."
+              placeholder="Type a search query..."
               placeholderTextColor="#94A3B8"
               style={styles.input}
-              value={inputSentence}
-              onChangeText={setInputSentence}
-              multiline
+              value={query}
+              onChangeText={setQuery}
+              onSubmitEditing={() => runSearch()}
+              returnKeyType="search"
             />
-            <View style={styles.buttonContainer}>
-              <TouchableOpacity
-                onPress={checkSimilarities}
+            <TouchableOpacity
+              onPress={() => runSearch()}
+              style={[
+                styles.buttonPrimary,
+                !canSearch && styles.buttonDisabled,
+              ]}
+              disabled={!canSearch}
+            >
+              <Ionicons
+                name="search"
+                size={16}
+                color={!canSearch ? 'gray' : 'white'}
+              />
+              <Text
                 style={[
-                  styles.buttonPrimary,
-                  !inputSentence && styles.buttonDisabled,
+                  styles.buttonText,
+                  !canSearch && styles.buttonTextDisabled,
                 ]}
-                disabled={!inputSentence}
               >
-                <Ionicons
-                  name="search"
-                  size={16}
-                  color={!inputSentence ? 'gray' : 'white'}
-                />
-                <Text
-                  style={[
-                    styles.buttonText,
-                    !inputSentence && styles.buttonTextDisabled,
-                  ]}
-                >
-                  Find Similar
-                </Text>
-              </TouchableOpacity>
-              <View style={styles.buttonGroup}>
-                <TouchableOpacity
-                  onPress={addToSentences}
-                  style={[
-                    styles.buttonSecondary,
-                    !inputSentence && styles.buttonDisabled,
-                  ]}
-                  disabled={!inputSentence}
-                >
-                  <Ionicons
-                    name="add-circle-outline"
-                    size={16}
-                    color={!inputSentence ? 'gray' : 'navy'}
-                  />
-                  <Text
-                    style={[
-                      styles.buttonTextOutline,
-                      !inputSentence && styles.buttonTextDisabled,
-                    ]}
-                  >
-                    Add to List
-                  </Text>
-                </TouchableOpacity>
-                <TouchableOpacity
-                  onPress={clearList}
-                  style={[
-                    styles.buttonSecondary,
-                    sentencesWithEmbeddings.length === 0 &&
-                      styles.buttonDisabled,
-                  ]}
-                  disabled={sentencesWithEmbeddings.length === 0}
-                >
-                  <Ionicons
-                    name="close-outline"
-                    size={16}
-                    color={
-                      sentencesWithEmbeddings.length === 0 ? 'gray' : 'navy'
-                    }
-                  />
-                  <Text
-                    style={[
-                      styles.buttonTextOutline,
-                      sentencesWithEmbeddings.length === 0 &&
-                        styles.buttonTextDisabled,
-                    ]}
-                  >
-                    Clear List
-                  </Text>
-                </TouchableOpacity>
-              </View>
-            </View>
+                {indexing ? 'Indexing corpus…' : 'Search'}
+              </Text>
+            </TouchableOpacity>
             {embeddingTime !== null && (
               <Text style={styles.statsText}>
-                Embedding time: {embeddingTime} ms
+                Query embedded in {embeddingTime} ms
               </Text>
             )}
-            {topMatches.length > 0 && (
-              <View style={styles.topMatchesContainer}>
-                <Text style={styles.sectionTitle}>Top Matches</Text>
-                {topMatches.map((item, index) => (
-                  <Text key={index} style={styles.sentenceText}>
-                    {item.sentence} ({item.similarity.toFixed(2)})
-                  </Text>
-                ))}
-              </View>
-            )}
           </View>
+
+          {results.length > 0 && (
+            <View style={styles.card}>
+              <Text style={styles.sectionTitle}>Results</Text>
+              {results.map((item, index) => (
+                <ResultRow
+                  key={index}
+                  sentence={item.sentence}
+                  similarity={item.similarity}
+                  best={results[0].similarity}
+                  rank={index}
+                />
+              ))}
+            </View>
+          )}
         </ScrollView>
       </KeyboardAvoidingView>
     </SafeAreaView>
   );
 }
 
+// One ranked result with a similarity bar. The bar is scaled relative to the
+// top hit so the ranking is visually obvious; the raw cosine is shown too.
+function ResultRow({
+  sentence,
+  similarity,
+  best,
+  rank,
+}: {
+  sentence: string;
+  similarity: number;
+  best: number;
+  rank: number;
+}) {
+  const fraction = best > 0 ? Math.max(0, similarity / best) : 0;
+  return (
+    <View style={styles.resultRow}>
+      <View style={styles.resultHeader}>
+        <Text style={styles.resultText}>{sentence}</Text>
+        <Text style={styles.resultScore}>{similarity.toFixed(2)}</Text>
+      </View>
+      <View style={styles.barTrack}>
+        <View
+          style={[
+            styles.barFill,
+            { width: `${Math.round(fraction * 100)}%` },
+            rank === 0 && styles.barFillTop,
+          ]}
+        />
+      </View>
+    </View>
+  );
+}
+
 const styles = StyleSheet.create({
   container: {
     flex: 1,
@@ -323,11 +349,68 @@ const styles = StyleSheet.create({
     marginBottom: 12,
     color: '#1E293B',
   },
-  sentenceText: {
-    fontSize: 14,
+  hint: {
+    fontSize: 13,
+    color: '#64748B',
+    marginBottom: 12,
+    lineHeight: 18,
+  },
+  chipRow: {
+    flexDirection: 'row',
+    flexWrap: 'wrap',
+    gap: 8,
+    marginBottom: 12,
+  },
+  chip: {
+    backgroundColor: '#EEF2FF',
+    borderColor: '#C7D2FE',
+    borderWidth: 1,
+    borderRadius: 16,
+    paddingHorizontal: 12,
+    paddingVertical: 6,
+  },
+  chipDisabled: {
+    opacity: 0.4,
+  },
+  chipText: {
+    fontSize: 13,
+    color: 'navy',
+  },
+  resultRow: {
+    marginBottom: 14,
+  },
+  resultHeader: {
+    flexDirection: 'row',
+    justifyContent: 'space-between',
+    alignItems: 'flex-start',
     marginBottom: 6,
+    gap: 8,
+  },
+  resultText: {
+    flex: 1,
+    fontSize: 14,
     color: '#334155',
   },
+  resultScore: {
+    fontSize: 14,
+    fontWeight: '600',
+    color: '#0F172A',
+    fontVariant: ['tabular-nums'],
+  },
+  barTrack: {
+    height: 8,
+    borderRadius: 4,
+    backgroundColor: '#E2E8F0',
+    overflow: 'hidden',
+  },
+  barFill: {
+    height: '100%',
+    borderRadius: 4,
+    backgroundColor: '#94A3B8',
+  },
+  barFillTop: {
+    backgroundColor: 'navy',
+  },
   input: {
     backgroundColor: '#F1F5F9',
     borderRadius: 10,
@@ -338,17 +421,8 @@ const styles = StyleSheet.create({
     minHeight: 40,
     textAlignVertical: 'top',
   },
-  buttonContainer: {
-    width: '100%',
-    gap: 10,
-  },
-  buttonGroup: {
-    flexDirection: 'row',
-    justifyContent: 'space-between',
-    gap: 10,
-  },
   buttonPrimary: {
-    flex: 1,
+    width: '100%',
     backgroundColor: 'navy',
     padding: 12,
     borderRadius: 10,
@@ -356,17 +430,6 @@ const styles = StyleSheet.create({
     alignItems: 'center',
     justifyContent: 'center',
   },
-  buttonSecondary: {
-    flex: 1,
-    backgroundColor: 'transparent',
-    borderWidth: 2,
-    borderColor: 'navy',
-    padding: 12,
-    borderRadius: 10,
-    flexDirection: 'row',
-    alignItems: 'center',
-    justifyContent: 'center',
-  },
   buttonDisabled: {
     backgroundColor: '#f0f0f0',
     borderColor: '#d3d3d3',
@@ -376,17 +439,9 @@ const styles = StyleSheet.create({
     textAlign: 'center',
     fontWeight: '500',
   },
-  buttonTextOutline: {
-    color: 'navy',
-    textAlign: 'center',
-    fontWeight: '500',
-  },
   buttonTextDisabled: {
     color: 'gray',
   },
-  topMatchesContainer: {
-    marginTop: 20,
-  },
   statsText: {
     fontSize: 13,
     color: '#64748B',
diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp
index 76e0fb90c7..3315baa2dd 100644
--- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp
@@ -46,6 +46,25 @@ std::vector<uint64_t> TokenizerModule::encode(std::string s) const {
   return encodeResult.get();
 }
 
+std::vector<uint64_t>
+TokenizerModule::encodeWithSpecialTokens(std::string s) const {
+  if (!tokenizer) {
+    THROW_NOT_LOADED_ERROR();
+  }
+
+  // Passing non-zero bos/eos makes HFTokenizer run the tokenizer.json
+  // post_processor with add_special_token=true (the underlying encode treats
+  // these as a flag, not a literal count, when a post_processor is defined).
+  auto encodeResult = tokenizer->encode(s, /*bos=*/1, /*eos=*/1);
+  if (!encodeResult.ok()) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::TokenizerError,
+        "Unexpected issue occurred while encoding: " +
+            std::to_string(static_cast<int32_t>(encodeResult.error())));
+  }
+  return encodeResult.get();
+}
+
 std::string TokenizerModule::decode(std::vector<uint64_t> vec,
                                     bool skipSpecialTokens) const {
   if (!tokenizer) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
index 3c90b25557..a511340af6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
+++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
@@ -13,6 +13,12 @@ class TokenizerModule {
                            std::shared_ptr<react::CallInvoker> callInvoker);
   [[nodiscard("Registered non-void function")]] std::vector<uint64_t>
   encode(std::string s) const;
+  // Like encode, but applies the tokenizer.json post_processor (e.g.
+  // TemplateProcessing that prepends BOS). Needed by models whose pooling
+  // depends on the BOS/CLS token (e.g. CLS-pooled text embeddings). Not JS-
+  // bound; encode() keeps its single-arg signature for the JS API.
+  [[nodiscard("Registered non-void function")]] std::vector<uint64_t>
+  encodeWithSpecialTokens(std::string s) const;
   [[nodiscard("Registered non-void function")]] std::string
   decode(std::vector<uint64_t> vec, bool skipSpecialTokens) const;
   [[nodiscard("Registered non-void function")]] std::string
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index e4209b2f79..8e211f0028 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -17,6 +17,7 @@
 #include <rnexecutorch/jsi/OwningArrayBuffer.h>
 
 #include <rnexecutorch/metaprogramming/TypeConcepts.h>
+#include <rnexecutorch/models/embeddings/Types.h>
 #include <rnexecutorch/models/instance_segmentation/Types.h>
 #include <rnexecutorch/models/llm/Types.h>
 #include <rnexecutorch/models/object_detection/Constants.h>
@@ -707,6 +708,35 @@ getJsiValue(const models::style_transfer::PixelDataResult &result,
   return obj;
 }
 
+// Text embedding output: a [numTokens, embeddingDim] fp32 matrix + input token
+// ids. Pooled models give numTokens == 1; multi-vector give the full sequence.
+// The TS layer reduces to a single vector or keeps the matrix per model config.
+inline jsi::Value
+getJsiValue(const models::embeddings::EmbeddingResult &result,
+            jsi::Runtime &runtime) {
+  jsi::Object obj(runtime);
+
+  auto arrayBuffer = jsi::ArrayBuffer(runtime, result.dataPtr);
+  auto float32ArrayCtor =
+      runtime.global().getPropertyAsFunction(runtime, "Float32Array");
+  auto float32Array =
+      float32ArrayCtor.callAsConstructor(runtime, arrayBuffer)
+          .getObject(runtime);
+  obj.setProperty(runtime, "dataPtr", float32Array);
+
+  obj.setProperty(runtime, "numTokens", jsi::Value(result.numTokens));
+  obj.setProperty(runtime, "embeddingDim", jsi::Value(result.embeddingDim));
+
+  auto idsArray = jsi::Array(runtime, result.tokenIds.size());
+  for (size_t i = 0; i < result.tokenIds.size(); ++i) {
+    idsArray.setValueAtIndex(
+        runtime, i, jsi::Value(static_cast<double>(result.tokenIds[i])));
+  }
+  obj.setProperty(runtime, "tokenIds", idsArray);
+
+  return obj;
+}
+
 inline jsi::Value getJsiValue(
     const rnexecutorch::models::semantic_segmentation::SegmentationResult
         &result,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/Types.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/Types.h
new file mode 100644
index 0000000000..f2de1e899a
--- /dev/null
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/Types.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <rnexecutorch/jsi/OwningArrayBuffer.h>
+#include <vector>
+
+namespace rnexecutorch::models::embeddings {
+
+// Text embedding output as a [numTokens, embeddingDim] fp32 matrix. Pooled
+// single-vector models output numTokens == 1 (the exported graph pools + L2-
+// normalizes); multi-vector (late-interaction / ColBERT) models output
+// numTokens == sequence length. The TS layer reduces to a single vector or
+// keeps the per-token matrix based on the model's config. `tokenIds` are the
+// input ids (used JS-side for late-interaction skiplist masking).
+struct EmbeddingResult {
+  std::shared_ptr<OwningArrayBuffer> dataPtr;
+  int32_t numTokens;
+  int32_t embeddingDim;
+  std::vector<int64_t> tokenIds;
+};
+
+} // namespace rnexecutorch::models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
index ba2c3243b2..d673f0ac87 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
@@ -16,7 +16,10 @@ TextEmbeddings::TextEmbeddings(const std::string &modelSource,
           std::make_unique<TokenizerModule>(tokenizerSource, callInvoker)) {}
 
 TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) {
-  auto inputIds = tokenizer->encode(input);
+  // Apply the tokenizer's post_processor so declared special tokens (e.g. a
+  // BOS prepended via TemplateProcessing) are added. CLS-pooled embedding
+  // models read position 0, so a missing BOS corrupts the pooled vector.
+  auto inputIds = tokenizer->encodeWithSpecialTokens(input);
   // Tokenizers-cpp return tokens as int32, but text embedding models require
   // int64 as input
   std::vector<int64_t> inputIds64;
@@ -40,8 +43,7 @@ void TextEmbeddings::unload() noexcept {
   BaseModel::unload();
 }
 
-std::shared_ptr<OwningArrayBuffer>
-TextEmbeddings::generate(const std::string input) {
+EmbeddingResult TextEmbeddings::generate(const std::string input) {
   std::scoped_lock lock(inference_mutex_);
   auto preprocessed = preprocess(input);
 
@@ -58,7 +60,19 @@ TextEmbeddings::generate(const std::string input) {
   auto forwardResult = BaseModel::forward({tokenIds, attnMask});
   CHECK_OK_OR_THROW_FORWARD_ERROR(forwardResult);
 
-  return BaseEmbeddings::postprocess(forwardResult);
+  // Output is [1, numTokens, embeddingDim] (numTokens == 1 for pooled models,
+  // == sequence length for multi-vector models). Return the raw matrix + the
+  // input ids; the TS layer reduces to a single vector or keeps the matrix.
+  auto out = forwardResult->at(0).toTensor();
+  auto sizes = out.sizes();
+
+  EmbeddingResult result;
+  result.dataPtr = std::make_shared<OwningArrayBuffer>(out.const_data_ptr(),
+                                                       out.nbytes());
+  result.numTokens = static_cast<int32_t>(sizes[sizes.size() - 2]);
+  result.embeddingDim = static_cast<int32_t>(sizes[sizes.size() - 1]);
+  result.tokenIds = std::move(preprocessed.inputIds);
+  return result;
 }
 
 } // namespace rnexecutorch::models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
index 93d0988c04..cb6059b96e 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
@@ -4,6 +4,7 @@
 #include <mutex>
 #include <rnexecutorch/TokenizerModule.h>
 #include <rnexecutorch/models/embeddings/BaseEmbeddings.h>
+#include <rnexecutorch/models/embeddings/Types.h>
 
 namespace rnexecutorch {
 namespace models::embeddings {
@@ -18,8 +19,11 @@ class TextEmbeddings final : public BaseEmbeddings {
   TextEmbeddings(const std::string &modelSource,
                  const std::string &tokenizerSource,
                  std::shared_ptr<react::CallInvoker> callInvoker);
-  [[nodiscard(
-      "Registered non-void function")]] std::shared_ptr<OwningArrayBuffer>
+  // Returns the raw [numTokens, embeddingDim] output. Pooled models give
+  // numTokens == 1; multi-vector (late-interaction) models give the full
+  // sequence. The TS layer reduces to a single vector or keeps the matrix
+  // based on the model's config.
+  [[nodiscard("Registered non-void function")]] EmbeddingResult
   generate(const std::string input);
   void unload() noexcept;
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp
index 68a9a9fef4..6abbccb9c6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp
@@ -16,9 +16,12 @@ Encoder::Encoder(const std::string &tokenizerSource,
           encoderSource, tokenizerSource, callInvoker)) {}
 
 std::vector<float> Encoder::generate(std::string input) {
-  std::shared_ptr<OwningArrayBuffer> embeddingsText = encoder->generate(input);
+  // TextEmbeddings returns the raw [numTokens, embeddingDim] matrix; this
+  // encoder pools/uses the flat fp32 buffer directly (dataPtr).
+  std::shared_ptr<OwningArrayBuffer> embeddingsText =
+      encoder->generate(input).dataPtr;
   std::shared_ptr<OwningArrayBuffer> embeddingsUncond =
-      encoder->generate(std::string(constants::kBosToken));
+      encoder->generate(std::string(constants::kBosToken)).dataPtr;
 
   assert(embeddingsText->size() == embeddingsUncond->size());
   size_t embeddingsSize = embeddingsText->size() / sizeof(float);
diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts
index eb0c98dae7..cb06ccb308 100644
--- a/packages/react-native-executorch/src/constants/modelRegistry.ts
+++ b/packages/react-native-executorch/src/constants/modelRegistry.ts
@@ -198,6 +198,7 @@ function pair<D extends { modelName: string }, Q extends { modelName: string }>(
   return variant({ xnnpack: { base: baseC, quant: quantC } });
 }
 
+
 // TTS presets bundle model + voice + phonemizer in a single config; they
 // don't share the `{ modelName: string }` shape of the rest of the registry,
 // and have no quant/backend axis. Expose them as a plain `() => Config`
@@ -260,6 +261,52 @@ const GEMMA4_E2B_MM_VARIANTS = {
   },
 };
 
+// Asymmetric query/document prompts the LFM models are trained with.
+// forward(text, role) auto-prepends these.
+const LFM_EMBEDDING_PROMPTS = { query: 'query: ', document: 'document: ' };
+const LFM_COLBERT_PROMPTS = { query: '[Q] ', document: '[D] ' };
+
+const LFM2_5_EMBEDDING_350M_VARIANTS = {
+  mlx: {
+    base: {
+      modelName: 'lfm2-5-embedding-350m' as const,
+      modelSource: M.LFM2_5_EMBEDDING_350M_MLX_MODEL,
+      tokenizerSource: M.LFM2_5_EMBEDDING_350M_TOKENIZER,
+      prompts: LFM_EMBEDDING_PROMPTS,
+    },
+  },
+  xnnpack: {
+    base: {
+      modelName: 'lfm2-5-embedding-350m' as const,
+      modelSource: M.LFM2_5_EMBEDDING_350M_XNNPACK_MODEL,
+      tokenizerSource: M.LFM2_5_EMBEDDING_350M_TOKENIZER,
+      prompts: LFM_EMBEDDING_PROMPTS,
+    },
+  },
+};
+
+// LFM2.5-ColBERT is a plain text-embedding model from the library's POV: it
+// returns per-token vectors. Late-interaction scoring (MaxSim / skiplist) is
+// the consumer's concern; the library only auto-applies the role prompts.
+const LFM2_5_COLBERT_350M_VARIANTS = {
+  mlx: {
+    base: {
+      modelName: 'lfm2-5-colbert-350m' as const,
+      modelSource: M.LFM2_5_COLBERT_350M_MLX_MODEL,
+      tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
+      prompts: LFM_COLBERT_PROMPTS,
+    },
+  },
+  xnnpack: {
+    base: {
+      modelName: 'lfm2-5-colbert-350m' as const,
+      modelSource: M.LFM2_5_COLBERT_350M_XNNPACK_MODEL,
+      tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
+      prompts: LFM_COLBERT_PROMPTS,
+    },
+  },
+};
+
 const EFFICIENTNET_V2_S_VARIANTS = {
   xnnpack: {
     base: {
@@ -742,6 +789,17 @@ export const models = {
       M.PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_QUANTIZED
     ),
     clip_vit_base_patch32_text: base(M.CLIP_VIT_BASE_PATCH32_TEXT),
+    lfm2_5_embedding_350m: variant(LFM2_5_EMBEDDING_350M_VARIANTS, {
+      ios: 'mlx',
+      android: 'xnnpack',
+    }),
+    // ColBERT (late-interaction): forward() returns per-token vectors. Scoring
+    // (markers / MaxSim / skiplist) is the consumer's concern — see the
+    // colbert example screen for a reference implementation.
+    lfm2_5_colbert_350m: variant(LFM2_5_COLBERT_350M_VARIANTS, {
+      ios: 'mlx',
+      android: 'xnnpack',
+    }),
   },
   image_embedding: {
     clip_vit_base_patch32_image: pair(
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index 0e36f812ff..7c4b73483c 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -1197,6 +1197,21 @@ const PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_QUANTIZED_MODEL = `${URL_PREFIX}-par
 const PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_TOKENIZER = `${URL_PREFIX}-paraphrase-multilingual-MiniLM-L12-v2/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
 const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
 const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
+// LFM2.5-Embedding-350M: XNNPACK 8da4w (Android/CPU), MLX int4 bf16 (iOS GPU,
+// physical device only). The exported graph bakes in CLS pooling + L2 norm.
+// Requires the runner to add the BOS special token (CLS-pooled at index 0).
+export const LFM2_5_EMBEDDING_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/xnnpack/lfm_2_5_embedding_350m_xnnpack_8da4w.pte`;
+export const LFM2_5_EMBEDDING_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/mlx/lfm_2_5_embedding_350m_mlx_int4.pte`;
+export const LFM2_5_EMBEDDING_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
+// LFM2.5-ColBERT-350M: late-interaction multi-vector retriever (per-token
+// [S,128]). Same bidirectional backbone as the embedding model + a Linear
+// 1024->128 head. forward() returns per-token vectors; late-interaction
+// scoring (MaxSim) is the consumer's concern (see the colbert example).
+// NOTE: pinned to `resolve/main` for testing — the v0.9.0 tag does not exist
+// on this repo yet. Switch to `${PREVIOUS_VERSION_TAG}` once the tag is cut.
+export const LFM2_5_COLBERT_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/xnnpack/lfm_2_5_colbert_350m_xnnpack_8da4w.pte`;
+export const LFM2_5_COLBERT_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/mlx/lfm_2_5_colbert_350m_mlx_int4.pte`;
+export const LFM2_5_COLBERT_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/tokenizer.json`;
 
 /**
  * @category Models - Text Embeddings
diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
index 31ee179925..b4679b4237 100644
--- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
+++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
@@ -1,20 +1,25 @@
 import { TextEmbeddingsModule } from '../../modules/natural_language_processing/TextEmbeddingsModule';
 import { useModuleFactory } from '../useModuleFactory';
 import {
+  AnyTextEmbeddingsModel,
+  EmbeddingRole,
+  ForwardFn,
   TextEmbeddingsType,
   TextEmbeddingsProps,
 } from '../../types/textEmbeddings';
 
 /**
- * React hook for managing a Text Embeddings model instance.
+ * React hook for a Text Embeddings model.
  * @category Hooks
- * @param TextEmbeddingsProps - Configuration object containing `model` source and optional `preventLoad` flag.
- * @returns Ready to use Text Embeddings model.
+ * @param TextEmbeddingsProps - `model` source + optional `preventLoad`.
+ * @returns Ready to use embeddings model. `forward` returns the raw
+ *   [numTokens, embeddingDim] result; use `toVector` for a single vector.
+ *   Models with prompts require a `role` ('query' | 'document') on `forward`.
  */
-export const useTextEmbeddings = ({
+export const useTextEmbeddings = <M extends AnyTextEmbeddingsModel>({
   model,
   preventLoad = false,
-}: TextEmbeddingsProps): TextEmbeddingsType => {
+}: TextEmbeddingsProps<M>): TextEmbeddingsType<M> => {
   const { error, isReady, isGenerating, downloadProgress, runForward } =
     useModuleFactory({
       factory: (config, onProgress) =>
@@ -24,7 +29,8 @@ export const useTextEmbeddings = ({
       preventLoad,
     });
 
-  const forward = (input: string) => runForward((inst) => inst.forward(input));
+  const forward = ((input: string, role?: EmbeddingRole) =>
+    runForward((inst) => inst.forward(input, role))) as ForwardFn<M>;
 
   return { error, isReady, isGenerating, downloadProgress, forward };
 };
diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts
index 1f190d41f5..34cdf97d8d 100644
--- a/packages/react-native-executorch/src/index.ts
+++ b/packages/react-native-executorch/src/index.ts
@@ -212,6 +212,7 @@ export * from './utils/ResourceFetcher';
 export * from './utils/ResourceFetcherUtils';
 export * from './utils/BaseResourceFetcherClass';
 export * from './utils/llm';
+export * from './utils/textEmbeddings';
 export * from './common/Logger';
 export * from './utils/llms/context_strategy';
 export * from './utils/segmentAnythingPrompts';
diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
index 27b0e59ceb..d9ab4f45da 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
@@ -1,5 +1,11 @@
 import { ResourceSource } from '../../types/common';
-import { TextEmbeddingsModelName } from '../../types/textEmbeddings';
+import {
+  AnyTextEmbeddingsModel,
+  EmbeddingPrompts,
+  EmbeddingResult,
+  EmbeddingRole,
+  TextEmbeddingsModelName,
+} from '../../types/textEmbeddings';
 import { ResourceFetcher } from '../../utils/ResourceFetcher';
 import { BaseModule } from '../BaseModule';
 import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
@@ -7,27 +13,28 @@ import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
 import { Logger } from '../../common/Logger';
 
 /**
- * Module for generating text embeddings from input text.
+ * Module for text embeddings. Returns the raw [numTokens, embeddingDim] output
+ * for any model — pooled (numTokens === 1) or multi-vector. Scoring / pooling
+ * is the consumer's concern (see the `toVector` util for the single-vector
+ * common case).
  * @category Typescript API
  */
 export class TextEmbeddingsModule extends BaseModule {
-  private constructor(nativeModule: unknown) {
+  private prompts?: EmbeddingPrompts;
+
+  private constructor(nativeModule: unknown, prompts?: EmbeddingPrompts) {
     super();
     this.nativeModule = nativeModule;
+    this.prompts = prompts;
   }
 
   /**
    * Creates a text embeddings instance for a built-in model.
-   * @param namedSources - An object specifying which built-in model to load and where to fetch it from.
-   * @param onDownloadProgress - Optional callback to monitor download progress, receiving a value between 0 and 1.
-   * @returns A Promise resolving to a `TextEmbeddingsModule` instance.
+   * @param namedSources - The model + tokenizer sources.
+   * @param onDownloadProgress - Optional download progress callback (0..1).
    */
   static async fromModelName(
-    namedSources: {
-      modelName: TextEmbeddingsModelName;
-      modelSource: ResourceSource;
-      tokenizerSource: ResourceSource;
-    },
+    namedSources: AnyTextEmbeddingsModel,
     onDownloadProgress: (progress: number) => void = () => {}
   ): Promise<TextEmbeddingsModule> {
     try {
@@ -41,7 +48,8 @@ export class TextEmbeddingsModule extends BaseModule {
         throw new RnExecutorchError(RnExecutorchErrorCode.DownloadInterrupted);
       }
       return new TextEmbeddingsModule(
-        await global.loadTextEmbeddings(modelPath, tokenizerPath)
+        await global.loadTextEmbeddings(modelPath, tokenizerPath),
+        namedSources.prompts
       );
     } catch (error) {
       Logger.error('Load failed:', error);
@@ -50,14 +58,9 @@ export class TextEmbeddingsModule extends BaseModule {
   }
 
   /**
-   * Creates a text embeddings instance with a user-provided model binary and tokenizer.
-   * Use this when working with a custom-exported model that is not one of the built-in presets.
-   * @remarks The native model contract for this method is not formally defined and may change
-   * between releases. Refer to the native source code for the current expected tensor interface.
-   * @param modelSource - A fetchable resource pointing to the model binary.
-   * @param tokenizerSource - A fetchable resource pointing to the tokenizer file.
-   * @param onDownloadProgress - Optional callback to monitor download progress, receiving a value between 0 and 1.
-   * @returns A Promise resolving to a `TextEmbeddingsModule` instance.
+   * Creates a text embeddings instance from a custom model binary + tokenizer.
+   * @remarks The native tensor contract is not formally guaranteed across
+   * releases.
    */
   static fromCustomModel(
     modelSource: ResourceSource,
@@ -75,13 +78,24 @@ export class TextEmbeddingsModule extends BaseModule {
   }
 
   /**
-   * Executes the model's forward pass to generate an embedding for the provided text.
-   * @param input - The text string to embed.
-   * @returns A Promise resolving to a `Float32Array` containing the embedding vector.
+   * Embed text. Returns the raw [numTokens, embeddingDim] result.
+   * @param input - The text to embed.
+   * @param role - Optional 'query' | 'document'; prepends the model's prompt
+   *   for that role when configured (no-op otherwise).
    */
-  async forward(input: string): Promise<Float32Array> {
+  async forward(
+    input: string,
+    role?: EmbeddingRole
+  ): Promise<EmbeddingResult> {
     if (this.nativeModule == null)
       throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded);
-    return new Float32Array(await this.nativeModule.generate(input));
+    const prefix = (role && this.prompts?.[role]) || '';
+    const res = await this.nativeModule.generate(prefix + input);
+    return {
+      vectors: new Float32Array(res.dataPtr),
+      numTokens: res.numTokens,
+      embeddingDim: res.embeddingDim,
+      tokenIds: res.tokenIds,
+    };
   }
 }
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index d9cd120e26..47e056794f 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -12,65 +12,108 @@ export type TextEmbeddingsModelName =
   | 'multi-qa-mpnet-base-dot-v1'
   | 'distiluse-base-multilingual-cased-v2-8da4w'
   | 'paraphrase-multilingual-minilm-l12-v2-quantized'
-  | 'clip-vit-base-patch32-text';
+  | 'clip-vit-base-patch32-text'
+  | 'lfm2-5-embedding-350m'
+  | 'lfm2-5-colbert-350m';
+
+/**
+ * Raw text embedding output: a [numTokens, embeddingDim] fp32 matrix (row-
+ * major) plus the input token ids. Single-vector (pooled) models give
+ * numTokens === 1 — use `toVector` for that common case. Multi-vector (late-
+ * interaction, e.g. ColBERT) models give the full per-token sequence; scoring
+ * (e.g. MaxSim) is the consumer's concern.
+ * @category Types
+ */
+export interface EmbeddingResult {
+  /** Flat [numTokens * embeddingDim] fp32 vectors (row-major). */
+  vectors: Float32Array;
+  /** Number of token rows (1 for pooled models). */
+  numTokens: number;
+  /** Per-token vector dimension. */
+  embeddingDim: number;
+  /** Input token ids per row. */
+  tokenIds: number[];
+}
+
+/**
+ * Role for `forward`. Some models are trained with asymmetric query/document
+ * prompts (e.g. LFM2.5 uses `query: `/`document: `, ColBERT uses `[Q] `/`[D] `).
+ * Passing a role auto-prepends the model's configured prompt for that role.
+ * @category Types
+ */
+export type EmbeddingRole = 'query' | 'document';
+
+/**
+ * Asymmetric prompts a model is trained with. When a model config carries
+ * these, `forward` REQUIRES a `role` so the matching prompt is always applied
+ * (forgetting it would silently embed raw text and wreck asymmetric retrieval).
+ * @category Types
+ */
+export interface EmbeddingPrompts {
+  query: string;
+  document: string;
+}
+
+/** A standard (symmetric) embedding model — `forward(text)`, no role. */
+export interface TextEmbeddingsModel {
+  modelName: TextEmbeddingsModelName;
+  modelSource: ResourceSource;
+  tokenizerSource: ResourceSource;
+  prompts?: undefined;
+}
+
+/**
+ * An asymmetric model with query/document prompts — `forward(text, role)` with
+ * role REQUIRED.
+ */
+export interface PromptedTextEmbeddingsModel {
+  modelName: TextEmbeddingsModelName;
+  modelSource: ResourceSource;
+  tokenizerSource: ResourceSource;
+  prompts: EmbeddingPrompts;
+}
+
+export type AnyTextEmbeddingsModel =
+  | TextEmbeddingsModel
+  | PromptedTextEmbeddingsModel;
+
+/**
+ * `forward`'s signature, discriminated by the model: prompted models require a
+ * `role` argument; standard models take none.
+ */
+export type ForwardFn<M extends AnyTextEmbeddingsModel> =
+  M extends PromptedTextEmbeddingsModel
+    ? (input: string, role: EmbeddingRole) => Promise<EmbeddingResult>
+    : (input: string) => Promise<EmbeddingResult>;
 
 /**
  * Props for the useTextEmbeddings hook.
  * @category Types
- * @property {object} model - An object containing the model configuration.
- * @property {TextEmbeddingsModelName} model.modelName - Unique name identifying the model.
- * @property {ResourceSource} model.modelSource - The source of the text embeddings model binary.
- * @property {ResourceSource} model.tokenizerSource - The source of the tokenizer JSON file.
- * @property {boolean} [preventLoad] - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
  */
-export interface TextEmbeddingsProps {
-  model: {
-    /**
-     * The unique name of the text embeddings model.
-     */
-    modelName: TextEmbeddingsModelName;
-    /**
-     * The source of the text embeddings model binary.
-     */
-    modelSource: ResourceSource;
-    /**
-     * The source of the tokenizer JSON file.
-     */
-    tokenizerSource: ResourceSource;
-  };
+export interface TextEmbeddingsProps<
+  M extends AnyTextEmbeddingsModel = AnyTextEmbeddingsModel,
+> {
+  model: M;
   preventLoad?: boolean;
 }
 
 /**
- * React hook state and methods for managing a Text Embeddings model instance.
+ * React hook state and methods for a Text Embeddings model instance.
  * @category Types
  */
-export interface TextEmbeddingsType {
-  /**
-   * Contains the error message if the model failed to load or during inference.
-   */
+export interface TextEmbeddingsType<
+  M extends AnyTextEmbeddingsModel = AnyTextEmbeddingsModel,
+> {
   error: null | RnExecutorchError;
-
-  /**
-   * Indicates whether the embeddings model has successfully loaded and is ready for inference.
-   */
   isReady: boolean;
-
-  /**
-   * Indicates whether the model is currently generating embeddings.
-   */
   isGenerating: boolean;
-
-  /**
-   * Tracks the progress of the model download process (value between 0 and 1).
-   */
   downloadProgress: number;
 
   /**
-   * Runs the text embeddings model on the provided input string.
-   * @param input - The text string to embed.
-   * @returns A promise resolving to a Float32Array containing the vector embeddings.
-   * @throws {RnExecutorchError} If the model is not loaded or is currently processing another request.
+   * Embed text into a [numTokens, embeddingDim] result. Pooled models return
+   * numTokens === 1 (use `toVector`); multi-vector models return the full
+   * per-token sequence. Models with prompts require a `role`
+   * ('query' | 'document'); standard models take none.
    */
-  forward(input: string): Promise<Float32Array>;
+  forward: ForwardFn<M>;
 }
diff --git a/packages/react-native-executorch/src/utils/textEmbeddings.ts b/packages/react-native-executorch/src/utils/textEmbeddings.ts
new file mode 100644
index 0000000000..c396145489
--- /dev/null
+++ b/packages/react-native-executorch/src/utils/textEmbeddings.ts
@@ -0,0 +1,74 @@
+import { EmbeddingResult } from '../types/textEmbeddings';
+
+/**
+ * Get the single pooled embedding vector from a result. Convenience for the
+ * common single-vector case: the exported graph pools + L2-normalizes to a
+ * [1, embeddingDim] output, so this returns row 0.
+ *
+ * For multi-vector (late-interaction) models, prefer the full per-token
+ * vectors (`getTokenVectors`); row 0 alone is not a meaningful sentence
+ * embedding there.
+ *
+ * @category Utils
+ */
+export function toVector(result: EmbeddingResult): Float32Array {
+  return result.vectors.slice(0, result.embeddingDim);
+}
+
+/**
+ * Split a result's flat `vectors` buffer into per-token rows
+ * (`numTokens` arrays of length `embeddingDim`). Useful for inspecting or
+ * storing individual token vectors (e.g. a multi-vector vector DB).
+ *
+ * @category Utils
+ */
+export function getTokenVectors(result: EmbeddingResult): Float32Array[] {
+  const { vectors, numTokens, embeddingDim } = result;
+  const rows: Float32Array[] = [];
+  for (let i = 0; i < numTokens; i++) {
+    rows.push(vectors.subarray(i * embeddingDim, (i + 1) * embeddingDim));
+  }
+  return rows;
+}
+
+/**
+ * Late-interaction MaxSim score between a query and a document encoding:
+ *
+ *   score = Σ_q  max_d ( q · d )
+ *
+ * For each query token, takes the max dot product over all (non-skiplist)
+ * document tokens, then sums across query tokens. Per-token vectors are
+ * L2-normalized by the graph, so a dot product is a cosine.
+ *
+ * `skiplistIds` (e.g. punctuation token ids) are excluded from the document
+ * side, matching ColBERT's document skiplist. Pass `[]` to score every token.
+ *
+ * @category Utils
+ */
+export function maxSim(
+  query: EmbeddingResult,
+  doc: EmbeddingResult,
+  skiplistIds: number[] = []
+): number {
+  const dim = query.embeddingDim;
+  const q = query.vectors;
+  const d = doc.vectors;
+  const skip = new Set(skiplistIds);
+
+  let score = 0;
+  for (let qi = 0; qi < query.numTokens; qi++) {
+    const qOff = qi * dim;
+    let best = -Infinity;
+    for (let di = 0; di < doc.numTokens; di++) {
+      if (skip.has(doc.tokenIds[di]!)) continue;
+      const dOff = di * dim;
+      let dot = 0;
+      for (let k = 0; k < dim; k++) {
+        dot += (q[qOff + k] ?? 0) * (d[dOff + k] ?? 0);
+      }
+      if (dot > best) best = dot;
+    }
+    if (best !== -Infinity) score += best;
+  }
+  return score;
+}

From b2e7e78917bd846773efd6d1f246869ad18f07e9 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 22 Jun 2026 14:34:58 +0200
Subject: [PATCH 02/14] fix: address review on text-embeddings/ColBERT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Migrate the segment-anything (SAM) screen to toVector(forward()) — its
  CLIP-text path broke when forward started returning EmbeddingResult.
- Update the C++ TextEmbeddings integration test for the EmbeddingResult
  return type (was still using the old OwningArrayBuffer pointer API).
- Guard the per-token invariant: throw InvalidModelOutput if output rows
  != input token count (pooled numTokens==1 exempt), so skiplist masking
  can't silently misalign if a graph pads/truncates.
- Dedup encode()/encodeWithSpecialTokens() into a shared encodeImpl.
- Drop the redundant Float32Array copy at the JSI boundary; document the
  getTokenVectors view lifetime; remove dead BaseEmbeddings::postprocess.

Authored with Claude.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../app/segment_anything/index.tsx            |  3 +-
 .../common/rnexecutorch/TokenizerModule.cpp   | 34 ++++++-------------
 .../common/rnexecutorch/TokenizerModule.h     |  5 +++
 .../models/embeddings/BaseEmbeddings.cpp      | 10 ------
 .../models/embeddings/BaseEmbeddings.h        |  4 ---
 .../models/embeddings/text/TextEmbeddings.cpp | 15 ++++++++
 .../tests/integration/TextEmbeddingsTest.cpp  | 30 ++++++++--------
 .../src/constants/modelRegistry.ts            |  1 -
 .../src/constants/modelUrls.ts                |  8 ++---
 .../TextEmbeddingsModule.ts                   |  4 ++-
 .../src/utils/textEmbeddings.ts               |  5 +++
 11 files changed, 59 insertions(+), 60 deletions(-)

diff --git a/apps/computer-vision/app/segment_anything/index.tsx b/apps/computer-vision/app/segment_anything/index.tsx
index ac7bbd06b5..0a7af9e1ed 100644
--- a/apps/computer-vision/app/segment_anything/index.tsx
+++ b/apps/computer-vision/app/segment_anything/index.tsx
@@ -25,6 +25,7 @@ import {
   useInstanceSegmentation,
   useImageEmbeddings,
   useTextEmbeddings,
+  toVector,
   InstanceSegmentationModelSources,
   SegmentedInstance,
   FastSAMLabel,
@@ -208,7 +209,7 @@ export default function SegmentAnythingScreen() {
         instanceEmbeddingsRef.current = embeddings;
         setEmbeddingProgress(null);
       }
-      const textEmb = await clipText.forward(textPrompt);
+      const textEmb = toVector(await clipText.forward(textPrompt));
       const match = selectByText(
         instances,
         instanceEmbeddingsRef.current,
diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp
index 3315baa2dd..dfd9243c48 100644
--- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.cpp
@@ -26,17 +26,15 @@ TokenizerModule::TokenizerModule(
   memorySizeLowerBound = std::filesystem::file_size(modelPath);
 }
 
-std::vector<uint64_t> TokenizerModule::encode(std::string s) const {
+// When the tokenizer.json defines a post_processor, the underlying HFTokenizer
+// treats non-zero bos/eos as a flag to run it with add_special_token=true (not
+// a literal count). So bos=eos=0 skips special tokens; bos=eos=1 applies them.
+std::vector<uint64_t> TokenizerModule::encodeImpl(const std::string &s,
+                                                  int8_t bos, int8_t eos) const {
   if (!tokenizer) {
     THROW_NOT_LOADED_ERROR();
   }
-
-  // If the used tokenizer.json has defined post_processor field,
-  // setting any of bos or eos arguments to value other than provided constant
-  // ( which is 0) will result in running the post_processor with
-  // 'add_special_token' flag
-  auto encodeResult =
-      tokenizer->encode(s, numOfAddedBoSTokens, numOfAddedEoSTokens);
+  auto encodeResult = tokenizer->encode(s, bos, eos);
   if (!encodeResult.ok()) {
     throw RnExecutorchError(
         RnExecutorchErrorCode::TokenizerError,
@@ -46,23 +44,13 @@ std::vector<uint64_t> TokenizerModule::encode(std::string s) const {
   return encodeResult.get();
 }
 
+std::vector<uint64_t> TokenizerModule::encode(std::string s) const {
+  return encodeImpl(s, numOfAddedBoSTokens, numOfAddedEoSTokens);
+}
+
 std::vector<uint64_t>
 TokenizerModule::encodeWithSpecialTokens(std::string s) const {
-  if (!tokenizer) {
-    THROW_NOT_LOADED_ERROR();
-  }
-
-  // Passing non-zero bos/eos makes HFTokenizer run the tokenizer.json
-  // post_processor with add_special_token=true (the underlying encode treats
-  // these as a flag, not a literal count, when a post_processor is defined).
-  auto encodeResult = tokenizer->encode(s, /*bos=*/1, /*eos=*/1);
-  if (!encodeResult.ok()) {
-    throw RnExecutorchError(
-        RnExecutorchErrorCode::TokenizerError,
-        "Unexpected issue occurred while encoding: " +
-            std::to_string(static_cast<int32_t>(encodeResult.error())));
-  }
-  return encodeResult.get();
+  return encodeImpl(s, /*bos=*/1, /*eos=*/1);
 }
 
 std::string TokenizerModule::decode(std::vector<uint64_t> vec,
diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
index a511340af6..09877dfc65 100644
--- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
+++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
@@ -30,6 +30,11 @@ class TokenizerModule {
   std::size_t getMemoryLowerBound() const noexcept;
 
 private:
+  // Shared encode implementation. bos/eos act as an add-special-tokens flag
+  // (not a literal count) when the tokenizer.json defines a post_processor.
+  std::vector<uint64_t> encodeImpl(const std::string &s, int8_t bos,
+                                   int8_t eos) const;
+
   std::unique_ptr<tokenizers::HFTokenizer> tokenizer;
   std::size_t memorySizeLowerBound{0};
 };
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp
index bf291136c1..e777be6704 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp
@@ -1,19 +1,9 @@
 #include "BaseEmbeddings.h"
 
-#include <span>
-
 namespace rnexecutorch::models::embeddings {
 
 BaseEmbeddings::BaseEmbeddings(const std::string &modelSource,
                                std::shared_ptr<react::CallInvoker> callInvoker)
     : BaseModel(modelSource, callInvoker) {}
 
-std::shared_ptr<OwningArrayBuffer>
-BaseEmbeddings::postprocess(const Result<std::vector<EValue>> &forwardResult) {
-  auto forwardResultTensor = forwardResult->at(0).toTensor();
-  auto buffer = std::make_shared<OwningArrayBuffer>(
-      forwardResultTensor.const_data_ptr(), forwardResultTensor.nbytes());
-  return buffer;
-}
-
 } // namespace rnexecutorch::models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h
index 216d6bf8ce..4b37a3fe93 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h
@@ -8,10 +8,6 @@ class BaseEmbeddings : public BaseModel {
 public:
   BaseEmbeddings(const std::string &modelSource,
                  std::shared_ptr<react::CallInvoker> callInvoker);
-
-protected:
-  std::shared_ptr<OwningArrayBuffer>
-  postprocess(const Result<std::vector<EValue>> &forwardResult);
 };
 
 }; // namespace rnexecutorch::models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
index d673f0ac87..26f3157690 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
@@ -72,6 +72,21 @@ EmbeddingResult TextEmbeddings::generate(const std::string input) {
   result.numTokens = static_cast<int32_t>(sizes[sizes.size() - 2]);
   result.embeddingDim = static_cast<int32_t>(sizes[sizes.size() - 1]);
   result.tokenIds = std::move(preprocessed.inputIds);
+
+  // Invariant for multi-vector models: one output row per input token, so
+  // numTokens (from the output tensor) must equal tokenIds.size() (from the
+  // input). Consumers index tokenIds[i] per output row (e.g. skiplist masking),
+  // which silently breaks if the graph ever pads/truncates the sequence.
+  // (Pooled models legitimately collapse to numTokens == 1.)
+  if (result.numTokens != 1 &&
+      result.numTokens != static_cast<int32_t>(result.tokenIds.size())) {
+    throw RnExecutorchError(
+        RnExecutorchErrorCode::InvalidModelOutput,
+        "Embedding output rows (" + std::to_string(result.numTokens) +
+            ") != input tokens (" +
+            std::to_string(result.tokenIds.size()) +
+            "); per-token tokenIds alignment is broken.");
+  }
   return result;
 }
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp
index ff1abd4c30..0e0cc846b5 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp
@@ -53,23 +53,23 @@ TEST(TextEmbeddingsGenerateTests, EmptyStringReturnsResults) {
   TextEmbeddings model(kValidTextEmbeddingsModelPath,
                        kValidTextEmbeddingsTokenizerPath, nullptr);
   auto result = model.generate("");
-  EXPECT_NE(result, nullptr);
-  EXPECT_GT(result->size(), 0u);
+  EXPECT_NE(result.dataPtr, nullptr);
+  EXPECT_GT(result.dataPtr->size(), 0u);
 }
 
 TEST(TextEmbeddingsGenerateTests, ValidTextReturnsResults) {
   TextEmbeddings model(kValidTextEmbeddingsModelPath,
                        kValidTextEmbeddingsTokenizerPath, nullptr);
   auto result = model.generate("Hello, world!");
-  EXPECT_NE(result, nullptr);
-  EXPECT_GT(result->size(), 0u);
+  EXPECT_NE(result.dataPtr, nullptr);
+  EXPECT_GT(result.dataPtr->size(), 0u);
 }
 
 TEST(TextEmbeddingsGenerateTests, ResultsHaveCorrectSize) {
   TextEmbeddings model(kValidTextEmbeddingsModelPath,
                        kValidTextEmbeddingsTokenizerPath, nullptr);
   auto result = model.generate("This is a test sentence.");
-  size_t numFloats = result->size() / sizeof(float);
+  size_t numFloats = result.dataPtr->size() / sizeof(float);
   EXPECT_EQ(numFloats, kMiniLmEmbeddingDimensions);
 }
 
@@ -78,8 +78,8 @@ TEST(TextEmbeddingsGenerateTests, ResultsAreNormalized) {
                        kValidTextEmbeddingsTokenizerPath, nullptr);
   auto result = model.generate("The quick brown fox jumps over the lazy dog.");
 
-  const float *data = reinterpret_cast<const float *>(result->data());
-  size_t numFloats = result->size() / sizeof(float);
+  const float *data = reinterpret_cast<const float *>(result.dataPtr->data());
+  size_t numFloats = result.dataPtr->size() / sizeof(float);
 
   float sumOfSquares = 0.0f;
   for (size_t i = 0; i < numFloats; ++i) {
@@ -94,8 +94,8 @@ TEST(TextEmbeddingsGenerateTests, ResultsContainValidValues) {
                        kValidTextEmbeddingsTokenizerPath, nullptr);
   auto result = model.generate("Testing valid values.");
 
-  const float *data = reinterpret_cast<const float *>(result->data());
-  size_t numFloats = result->size() / sizeof(float);
+  const float *data = reinterpret_cast<const float *>(result.dataPtr->data());
+  size_t numFloats = result.dataPtr->size() / sizeof(float);
 
   for (size_t i = 0; i < numFloats; ++i) {
     EXPECT_FALSE(std::isnan(data[i]));
@@ -110,9 +110,9 @@ TEST(TextEmbeddingsGenerateTests, DifferentTextProducesDifferentEmbeddings) {
   auto result1 = model.generate("Hello, world!");
   auto result2 = model.generate("Goodbye, moon!");
 
-  const float *data1 = reinterpret_cast<const float *>(result1->data());
-  const float *data2 = reinterpret_cast<const float *>(result2->data());
-  size_t numFloats = result1->size() / sizeof(float);
+  const float *data1 = reinterpret_cast<const float *>(result1.dataPtr->data());
+  const float *data2 = reinterpret_cast<const float *>(result2.dataPtr->data());
+  size_t numFloats = result1.dataPtr->size() / sizeof(float);
 
   bool allEqual = true;
   for (size_t i = 0; i < numFloats; ++i) {
@@ -131,9 +131,9 @@ TEST(TextEmbeddingsGenerateTests, SimilarTextProducesSimilarEmbeddings) {
   auto result1 = model.generate("I love programming");
   auto result2 = model.generate("I enjoy coding");
 
-  const float *data1 = reinterpret_cast<const float *>(result1->data());
-  const float *data2 = reinterpret_cast<const float *>(result2->data());
-  size_t numFloats = result1->size() / sizeof(float);
+  const float *data1 = reinterpret_cast<const float *>(result1.dataPtr->data());
+  const float *data2 = reinterpret_cast<const float *>(result2.dataPtr->data());
+  size_t numFloats = result1.dataPtr->size() / sizeof(float);
 
   float dotProduct = 0.0f;
   for (size_t i = 0; i < numFloats; ++i) {
diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts
index cb06ccb308..f411631aac 100644
--- a/packages/react-native-executorch/src/constants/modelRegistry.ts
+++ b/packages/react-native-executorch/src/constants/modelRegistry.ts
@@ -198,7 +198,6 @@ function pair<D extends { modelName: string }, Q extends { modelName: string }>(
   return variant({ xnnpack: { base: baseC, quant: quantC } });
 }
 
-
 // TTS presets bundle model + voice + phonemizer in a single config; they
 // don't share the `{ modelName: string }` shape of the rest of the registry,
 // and have no quant/backend axis. Expose them as a plain `() => Config`
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index 7c4b73483c..8fdebb1a6d 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -1207,11 +1207,9 @@ export const LFM2_5_EMBEDDING_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-embedding-3
 // [S,128]). Same bidirectional backbone as the embedding model + a Linear
 // 1024->128 head. forward() returns per-token vectors; late-interaction
 // scoring (MaxSim) is the consumer's concern (see the colbert example).
-// NOTE: pinned to `resolve/main` for testing — the v0.9.0 tag does not exist
-// on this repo yet. Switch to `${PREVIOUS_VERSION_TAG}` once the tag is cut.
-export const LFM2_5_COLBERT_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/xnnpack/lfm_2_5_colbert_350m_xnnpack_8da4w.pte`;
-export const LFM2_5_COLBERT_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/mlx/lfm_2_5_colbert_350m_mlx_int4.pte`;
-export const LFM2_5_COLBERT_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-colbert-350m/resolve/main/tokenizer.json`;
+export const LFM2_5_COLBERT_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/xnnpack/lfm_2_5_colbert_350m_xnnpack_8da4w.pte`;
+export const LFM2_5_COLBERT_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/mlx/lfm_2_5_colbert_350m_mlx_int4.pte`;
+export const LFM2_5_COLBERT_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
 
 /**
  * @category Models - Text Embeddings
diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
index d9ab4f45da..c11b9c9aff 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
@@ -91,8 +91,10 @@ export class TextEmbeddingsModule extends BaseModule {
       throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded);
     const prefix = (role && this.prompts?.[role]) || '';
     const res = await this.nativeModule.generate(prefix + input);
+    // res.dataPtr is already a Float32Array view over the owned native buffer
+    // (built at the JSI boundary), so use it directly — no extra copy.
     return {
-      vectors: new Float32Array(res.dataPtr),
+      vectors: res.dataPtr as Float32Array,
       numTokens: res.numTokens,
       embeddingDim: res.embeddingDim,
       tokenIds: res.tokenIds,
diff --git a/packages/react-native-executorch/src/utils/textEmbeddings.ts b/packages/react-native-executorch/src/utils/textEmbeddings.ts
index c396145489..da10d9aa08 100644
--- a/packages/react-native-executorch/src/utils/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/utils/textEmbeddings.ts
@@ -20,6 +20,11 @@ export function toVector(result: EmbeddingResult): Float32Array {
  * (`numTokens` arrays of length `embeddingDim`). Useful for inspecting or
  * storing individual token vectors (e.g. a multi-vector vector DB).
  *
+ * The rows are zero-copy `subarray` VIEWS over `result.vectors` — valid only
+ * while that buffer is alive and not mutated. Copy them (e.g. `new
+ * Float32Array(row)`) before storing beyond the result's lifetime. (`toVector`
+ * by contrast returns an independent copy.)
+ *
  * @category Utils
  */
 export function getTokenVectors(result: EmbeddingResult): Float32Array[] {

From cf74973f94ada255bcc00eaa910b724ff50658c4 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 22 Jun 2026 14:50:16 +0200
Subject: [PATCH 03/14] refactor: make useTextEmbeddings.forward non-breaking
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

forward(text) returns a single pooled Float32Array again for standard
models — restoring the original API, so MiniLM/MPNet/CLIP/SAM consumers
need no migration. The reduction (row 0 of the native [numTokens,
embeddingDim] matrix) happens in the TS module, not at the call site.

Multi-vector (late-interaction) models opt in via a `multiVector: true`
config flag; for those, forward returns the full per-token EmbeddingResult
so MaxSim/skiplist work. Return type is discriminated by the flag, and the
role argument by `prompts` (required when prompted, none when not).

Authored with Claude.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../app/segment_anything/index.tsx            |  3 +-
 .../app/clip-embeddings/index.tsx             |  3 +-
 .../app/text-embeddings/index.tsx             | 14 ++--
 .../src/constants/modelRegistry.ts            |  2 +
 .../useTextEmbeddings.ts                      |  4 +-
 .../TextEmbeddingsModule.ts                   | 44 ++++++++----
 .../src/types/textEmbeddings.ts               | 70 ++++++++++---------
 7 files changed, 79 insertions(+), 61 deletions(-)

diff --git a/apps/computer-vision/app/segment_anything/index.tsx b/apps/computer-vision/app/segment_anything/index.tsx
index 0a7af9e1ed..ac7bbd06b5 100644
--- a/apps/computer-vision/app/segment_anything/index.tsx
+++ b/apps/computer-vision/app/segment_anything/index.tsx
@@ -25,7 +25,6 @@ import {
   useInstanceSegmentation,
   useImageEmbeddings,
   useTextEmbeddings,
-  toVector,
   InstanceSegmentationModelSources,
   SegmentedInstance,
   FastSAMLabel,
@@ -209,7 +208,7 @@ export default function SegmentAnythingScreen() {
         instanceEmbeddingsRef.current = embeddings;
         setEmbeddingProgress(null);
       }
-      const textEmb = toVector(await clipText.forward(textPrompt));
+      const textEmb = await clipText.forward(textPrompt);
       const match = selectByText(
         instances,
         instanceEmbeddingsRef.current,
diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx
index e0232d3440..02a8a9c656 100644
--- a/apps/text-embeddings/app/clip-embeddings/index.tsx
+++ b/apps/text-embeddings/app/clip-embeddings/index.tsx
@@ -16,7 +16,6 @@ import {
   models,
   useTextEmbeddings,
   useImageEmbeddings,
-  toVector,
   ImageEmbeddingsProps,
 } from 'react-native-executorch';
 
@@ -102,7 +101,7 @@ function ClipEmbeddingsScreen() {
       const txtStart = Date.now();
       const scored: { label: string; similarity: number }[] = [];
       for (const label of labels) {
-        const textEmbedding = toVector(await textModel.forward(label));
+        const textEmbedding = await textModel.forward(label);
         scored.push({
           label,
           similarity: dotProduct(imageEmbedding, textEmbedding),
diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index 470094da02..8cb6777843 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -15,13 +15,12 @@ import { ModelPicker } from '../../components/ModelPicker';
 import {
   models,
   useTextEmbeddings,
-  toVector,
   TextEmbeddingsProps,
 } from 'react-native-executorch';
 const textEmbedding = models.text_embedding;
 
-// Single-vector (pooled) models: forward() returns the raw result; toVector()
-// gives the single embedding. The multi-vector ColBERT model has its own screen.
+// Single-vector (pooled) models: forward() returns a Float32Array directly.
+// The multi-vector ColBERT model has its own screen.
 type TextEmbeddingModel = TextEmbeddingsProps['model'];
 
 const MODELS: { label: string; value: TextEmbeddingModel }[] = [
@@ -123,10 +122,9 @@ function TextEmbeddingsScreen() {
           const embedded = [];
           for (const sentence of CORPUS) {
             // forward(_, 'document') auto-applies the model's document prompt
-            // (a no-op for models without one).
-            const embedding = toVector(
-              await model.forward(sentence, 'document')
-            );
+            // (a no-op for models without one). Single-vector models return
+            // a Float32Array directly.
+            const embedding = await model.forward(sentence, 'document');
             if (cancelled) return;
             embedded.push({ sentence, embedding });
           }
@@ -157,7 +155,7 @@ function TextEmbeddingsScreen() {
     setQuery(queryText);
     try {
       const start = Date.now();
-      const queryEmbedding = toVector(await model.forward(q, 'query'));
+      const queryEmbedding = await model.forward(q, 'query');
       setEmbeddingTime(Date.now() - start);
       const ranked = corpusEmbeddings
         .map(({ sentence, embedding }) => ({
diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts
index f411631aac..c2e3a2a21d 100644
--- a/packages/react-native-executorch/src/constants/modelRegistry.ts
+++ b/packages/react-native-executorch/src/constants/modelRegistry.ts
@@ -294,6 +294,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = {
       modelSource: M.LFM2_5_COLBERT_350M_MLX_MODEL,
       tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
       prompts: LFM_COLBERT_PROMPTS,
+      multiVector: true as const,
     },
   },
   xnnpack: {
@@ -302,6 +303,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = {
       modelSource: M.LFM2_5_COLBERT_350M_XNNPACK_MODEL,
       tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
       prompts: LFM_COLBERT_PROMPTS,
+      multiVector: true as const,
     },
   },
 };
diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
index b4679b4237..2f100b8cbb 100644
--- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
+++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
@@ -1,9 +1,9 @@
 import { TextEmbeddingsModule } from '../../modules/natural_language_processing/TextEmbeddingsModule';
 import { useModuleFactory } from '../useModuleFactory';
 import {
-  AnyTextEmbeddingsModel,
   EmbeddingRole,
   ForwardFn,
+  TextEmbeddingsModel,
   TextEmbeddingsType,
   TextEmbeddingsProps,
 } from '../../types/textEmbeddings';
@@ -16,7 +16,7 @@ import {
  *   [numTokens, embeddingDim] result; use `toVector` for a single vector.
  *   Models with prompts require a `role` ('query' | 'document') on `forward`.
  */
-export const useTextEmbeddings = <M extends AnyTextEmbeddingsModel>({
+export const useTextEmbeddings = <M extends TextEmbeddingsModel>({
   model,
   preventLoad = false,
 }: TextEmbeddingsProps<M>): TextEmbeddingsType<M> => {
diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
index c11b9c9aff..abb620e981 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
@@ -1,9 +1,9 @@
 import { ResourceSource } from '../../types/common';
 import {
-  AnyTextEmbeddingsModel,
   EmbeddingPrompts,
   EmbeddingResult,
   EmbeddingRole,
+  TextEmbeddingsModel,
   TextEmbeddingsModelName,
 } from '../../types/textEmbeddings';
 import { ResourceFetcher } from '../../utils/ResourceFetcher';
@@ -13,28 +13,35 @@ import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
 import { Logger } from '../../common/Logger';
 
 /**
- * Module for text embeddings. Returns the raw [numTokens, embeddingDim] output
- * for any model — pooled (numTokens === 1) or multi-vector. Scoring / pooling
- * is the consumer's concern (see the `toVector` util for the single-vector
- * common case).
+ * Module for text embeddings. `forward` returns a single pooled `Float32Array`
+ * for standard models, or the per-token `EmbeddingResult` for `multiVector`
+ * (late-interaction) models. The native runner always produces the raw
+ * [numTokens, embeddingDim] matrix; the reduction to a single vector happens
+ * here so the common single-vector API stays `Float32Array`.
  * @category Typescript API
  */
 export class TextEmbeddingsModule extends BaseModule {
   private prompts?: EmbeddingPrompts;
+  private multiVector: boolean;
 
-  private constructor(nativeModule: unknown, prompts?: EmbeddingPrompts) {
+  private constructor(
+    nativeModule: unknown,
+    prompts: EmbeddingPrompts | undefined,
+    multiVector: boolean
+  ) {
     super();
     this.nativeModule = nativeModule;
     this.prompts = prompts;
+    this.multiVector = multiVector;
   }
 
   /**
    * Creates a text embeddings instance for a built-in model.
-   * @param namedSources - The model + tokenizer sources.
+   * @param namedSources - The model config (+ optional prompts / multiVector).
    * @param onDownloadProgress - Optional download progress callback (0..1).
    */
   static async fromModelName(
-    namedSources: AnyTextEmbeddingsModel,
+    namedSources: TextEmbeddingsModel,
     onDownloadProgress: (progress: number) => void = () => {}
   ): Promise<TextEmbeddingsModule> {
     try {
@@ -49,7 +56,8 @@ export class TextEmbeddingsModule extends BaseModule {
       }
       return new TextEmbeddingsModule(
         await global.loadTextEmbeddings(modelPath, tokenizerPath),
-        namedSources.prompts
+        namedSources.prompts,
+        namedSources.multiVector ?? false
       );
     } catch (error) {
       Logger.error('Load failed:', error);
@@ -78,23 +86,29 @@ export class TextEmbeddingsModule extends BaseModule {
   }
 
   /**
-   * Embed text. Returns the raw [numTokens, embeddingDim] result.
+   * Embed text. Standard models return the single pooled `Float32Array`;
+   * `multiVector` models return the per-token `EmbeddingResult`.
    * @param input - The text to embed.
-   * @param role - Optional 'query' | 'document'; prepends the model's prompt
-   *   for that role when configured (no-op otherwise).
+   * @param role - 'query' | 'document'; prepends the model's prompt for that
+   *   role when configured (no-op otherwise).
    */
   async forward(
     input: string,
     role?: EmbeddingRole
-  ): Promise<EmbeddingResult> {
+  ): Promise<Float32Array | EmbeddingResult> {
     if (this.nativeModule == null)
       throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded);
     const prefix = (role && this.prompts?.[role]) || '';
     const res = await this.nativeModule.generate(prefix + input);
     // res.dataPtr is already a Float32Array view over the owned native buffer
-    // (built at the JSI boundary), so use it directly — no extra copy.
+    // (built at the JSI boundary).
+    const vectors = res.dataPtr as Float32Array;
+    if (!this.multiVector) {
+      // Pooled models output [1, embeddingDim]; return that single row.
+      return vectors.subarray(0, res.embeddingDim);
+    }
     return {
-      vectors: res.dataPtr as Float32Array,
+      vectors,
       numTokens: res.numTokens,
       embeddingDim: res.embeddingDim,
       tokenIds: res.tokenIds,
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index 47e056794f..c013cb818b 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -17,17 +17,16 @@ export type TextEmbeddingsModelName =
   | 'lfm2-5-colbert-350m';
 
 /**
- * Raw text embedding output: a [numTokens, embeddingDim] fp32 matrix (row-
- * major) plus the input token ids. Single-vector (pooled) models give
- * numTokens === 1 — use `toVector` for that common case. Multi-vector (late-
- * interaction, e.g. ColBERT) models give the full per-token sequence; scoring
- * (e.g. MaxSim) is the consumer's concern.
+ * Per-token (multi-vector) embedding output for late-interaction models (e.g.
+ * ColBERT): a [numTokens, embeddingDim] fp32 matrix (row-major) plus the input
+ * token ids. Standard models return a single pooled `Float32Array` from
+ * `forward` instead; only `multiVector` models yield this.
  * @category Types
  */
 export interface EmbeddingResult {
   /** Flat [numTokens * embeddingDim] fp32 vectors (row-major). */
   vectors: Float32Array;
-  /** Number of token rows (1 for pooled models). */
+  /** Number of token rows. */
   numTokens: number;
   /** Per-token vector dimension. */
   embeddingDim: number;
@@ -54,44 +53,52 @@ export interface EmbeddingPrompts {
   document: string;
 }
 
-/** A standard (symmetric) embedding model — `forward(text)`, no role. */
+/**
+ * A text embeddings model config. Two optional flags drive `forward`:
+ * - `prompts` present  -> `forward` REQUIRES a `role` (auto-prepends the prompt)
+ * - `multiVector` true -> `forward` returns the per-token `EmbeddingResult`;
+ *                         otherwise it returns a single pooled `Float32Array`.
+ * @category Types
+ */
 export interface TextEmbeddingsModel {
   modelName: TextEmbeddingsModelName;
   modelSource: ResourceSource;
   tokenizerSource: ResourceSource;
-  prompts?: undefined;
+  prompts?: EmbeddingPrompts;
+  multiVector?: boolean;
 }
 
 /**
- * An asymmetric model with query/document prompts — `forward(text, role)` with
- * role REQUIRED.
+ * `forward`'s signature, computed from the model config:
+ * - return type: `EmbeddingResult` if `multiVector`, else `Float32Array`.
+ * - role arg: required if the model has `prompts`, else absent.
  */
-export interface PromptedTextEmbeddingsModel {
-  modelName: TextEmbeddingsModelName;
-  modelSource: ResourceSource;
-  tokenizerSource: ResourceSource;
-  prompts: EmbeddingPrompts;
-}
-
-export type AnyTextEmbeddingsModel =
-  | TextEmbeddingsModel
-  | PromptedTextEmbeddingsModel;
+export type ForwardReturn<M extends TextEmbeddingsModel> =
+  M extends { multiVector: true } ? EmbeddingResult : Float32Array;
 
 /**
- * `forward`'s signature, discriminated by the model: prompted models require a
- * `role` argument; standard models take none.
+ * `forward`'s signature, computed from the model config:
+ * - A model that DEFINITELY has prompts -> `role` is REQUIRED.
+ * - A model that definitely has NO prompts (`prompts?: undefined`) -> no role.
+ * - Otherwise (prompts optional / unknown, e.g. a heterogeneous model list) ->
+ *   `role` is OPTIONAL.
  */
-export type ForwardFn<M extends AnyTextEmbeddingsModel> =
-  M extends PromptedTextEmbeddingsModel
-    ? (input: string, role: EmbeddingRole) => Promise<EmbeddingResult>
-    : (input: string) => Promise<EmbeddingResult>;
+export type ForwardFn<M extends TextEmbeddingsModel> = M extends {
+  prompts: EmbeddingPrompts;
+}
+  ? (input: string, role: EmbeddingRole) => Promise<ForwardReturn<M>>
+  : undefined extends M['prompts']
+    ? M['prompts'] extends undefined
+      ? (input: string) => Promise<ForwardReturn<M>>
+      : (input: string, role?: EmbeddingRole) => Promise<ForwardReturn<M>>
+    : (input: string) => Promise<ForwardReturn<M>>;
 
 /**
  * Props for the useTextEmbeddings hook.
  * @category Types
  */
 export interface TextEmbeddingsProps<
-  M extends AnyTextEmbeddingsModel = AnyTextEmbeddingsModel,
+  M extends TextEmbeddingsModel = TextEmbeddingsModel,
 > {
   model: M;
   preventLoad?: boolean;
@@ -102,7 +109,7 @@ export interface TextEmbeddingsProps<
  * @category Types
  */
 export interface TextEmbeddingsType<
-  M extends AnyTextEmbeddingsModel = AnyTextEmbeddingsModel,
+  M extends TextEmbeddingsModel = TextEmbeddingsModel,
 > {
   error: null | RnExecutorchError;
   isReady: boolean;
@@ -110,10 +117,9 @@ export interface TextEmbeddingsType<
   downloadProgress: number;
 
   /**
-   * Embed text into a [numTokens, embeddingDim] result. Pooled models return
-   * numTokens === 1 (use `toVector`); multi-vector models return the full
-   * per-token sequence. Models with prompts require a `role`
-   * ('query' | 'document'); standard models take none.
+   * Embed text. Standard models return a single pooled `Float32Array`;
+   * `multiVector` models return the per-token `EmbeddingResult`. Models with
+   * `prompts` require a `role` ('query' | 'document').
    */
   forward: ForwardFn<M>;
 }

From e12fb039e08a018784d9341dc6d4ac37b88bdd3d Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 22 Jun 2026 15:07:12 +0200
Subject: [PATCH 04/14] refactor: move skiplist to model config, MaxSim scoring
 to app

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 apps/text-embeddings/app/colbert/index.tsx    | 15 +++----
 apps/text-embeddings/utils/math.ts            | 34 +++++++++++++++
 .../src/constants/modelRegistry.ts            | 10 +++++
 .../src/types/textEmbeddings.ts               |  7 ++++
 .../src/utils/textEmbeddings.ts               | 42 -------------------
 5 files changed, 56 insertions(+), 52 deletions(-)

diff --git a/apps/text-embeddings/app/colbert/index.tsx b/apps/text-embeddings/app/colbert/index.tsx
index d686168f43..5136aad9f1 100644
--- a/apps/text-embeddings/app/colbert/index.tsx
+++ b/apps/text-embeddings/app/colbert/index.tsx
@@ -15,23 +15,18 @@ import { useIsFocused } from 'expo-router';
 import {
   models,
   useTextEmbeddings,
-  maxSim,
   EmbeddingResult,
 } from 'react-native-executorch';
 import ColorPalette from '../../colors';
 import ErrorBanner from '../../components/ErrorBanner';
+import { maxSim } from '../../utils/math';
 
 const colbertModel = models.text_embedding.lfm2_5_colbert_350m();
 
-// The library auto-applies the model's [Q]/[D] prompts via forward(text, role).
-// Late-interaction MaxSim is a shipped util; the document skiplist (punctuation
-// token ids excluded from scoring) is the consumer's choice — these are the
-// LFM2.5-ColBERT skiplist ids.
-const SKIPLIST = [
-  510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524,
-  535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601,
-  602, 603,
-];
+// The library auto-applies the model's [Q]/[D] prompts via forward(text, role)
+// and ships the document skiplist on the model config; we just pass it to the
+// shipped MaxSim util.
+const SKIPLIST = colbertModel.skiplistIds ?? [];
 
 const CORPUS: string[] = [
   'The forecast says heavy showers this afternoon.',
diff --git a/apps/text-embeddings/utils/math.ts b/apps/text-embeddings/utils/math.ts
index 50c70d1f92..997d3f46fb 100644
--- a/apps/text-embeddings/utils/math.ts
+++ b/apps/text-embeddings/utils/math.ts
@@ -1,6 +1,7 @@
 import {
   RnExecutorchError,
   RnExecutorchErrorCode,
+  EmbeddingResult,
 } from 'react-native-executorch';
 
 export const dotProduct = (a: Float32Array, b: Float32Array) => {
@@ -17,3 +18,36 @@ export const dotProduct = (a: Float32Array, b: Float32Array) => {
   }
   return sum;
 };
+
+/**
+ * ColBERT late-interaction score between a query and a document encoding:
+ *   score = Σ_q max_d ( q · d )
+ * For each query token, the max dot over non-skiplist doc tokens, summed.
+ * Per-token vectors are L2-normalized by the graph, so dot == cosine. Scoring
+ * is the consumer's concern (the library just yields the per-token vectors),
+ * so this lives in the app alongside dotProduct.
+ */
+export const maxSim = (
+  query: EmbeddingResult,
+  doc: EmbeddingResult,
+  skiplistIds: number[] = []
+) => {
+  const dim = query.embeddingDim;
+  const skip = new Set(skiplistIds);
+  let score = 0;
+  for (let qi = 0; qi < query.numTokens; qi++) {
+    const qOff = qi * dim;
+    let best = -Infinity;
+    for (let di = 0; di < doc.numTokens; di++) {
+      if (skip.has(doc.tokenIds[di]!)) continue;
+      const dOff = di * dim;
+      let dot = 0;
+      for (let k = 0; k < dim; k++) {
+        dot += (query.vectors[qOff + k] ?? 0) * (doc.vectors[dOff + k] ?? 0);
+      }
+      if (dot > best) best = dot;
+    }
+    if (best !== -Infinity) score += best;
+  }
+  return score;
+};
diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts
index c2e3a2a21d..f57c178b5e 100644
--- a/packages/react-native-executorch/src/constants/modelRegistry.ts
+++ b/packages/react-native-executorch/src/constants/modelRegistry.ts
@@ -287,6 +287,14 @@ const LFM2_5_EMBEDDING_350M_VARIANTS = {
 // LFM2.5-ColBERT is a plain text-embedding model from the library's POV: it
 // returns per-token vectors. Late-interaction scoring (MaxSim / skiplist) is
 // the consumer's concern; the library only auto-applies the role prompts.
+// Document punctuation token ids excluded from MaxSim (ColBERT skiplist),
+// derived from the model's config_sentence_transformers.json skiplist_words.
+const LFM_COLBERT_SKIPLIST = [
+  510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524,
+  535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601,
+  602, 603,
+];
+
 const LFM2_5_COLBERT_350M_VARIANTS = {
   mlx: {
     base: {
@@ -295,6 +303,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = {
       tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
       prompts: LFM_COLBERT_PROMPTS,
       multiVector: true as const,
+      skiplistIds: LFM_COLBERT_SKIPLIST,
     },
   },
   xnnpack: {
@@ -304,6 +313,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = {
       tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
       prompts: LFM_COLBERT_PROMPTS,
       multiVector: true as const,
+      skiplistIds: LFM_COLBERT_SKIPLIST,
     },
   },
 };
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index c013cb818b..2f42d71e9d 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -66,6 +66,13 @@ export interface TextEmbeddingsModel {
   tokenizerSource: ResourceSource;
   prompts?: EmbeddingPrompts;
   multiVector?: boolean;
+  /**
+   * Document token ids to exclude from late-interaction scoring (e.g. ColBERT's
+   * punctuation skiplist). Derived from the model's training config, so it's
+   * shipped here rather than reconstructed by the consumer, who passes it to
+   * their own MaxSim scoring.
+   */
+  skiplistIds?: number[];
 }
 
 /**
diff --git a/packages/react-native-executorch/src/utils/textEmbeddings.ts b/packages/react-native-executorch/src/utils/textEmbeddings.ts
index da10d9aa08..e9be7cf774 100644
--- a/packages/react-native-executorch/src/utils/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/utils/textEmbeddings.ts
@@ -35,45 +35,3 @@ export function getTokenVectors(result: EmbeddingResult): Float32Array[] {
   }
   return rows;
 }
-
-/**
- * Late-interaction MaxSim score between a query and a document encoding:
- *
- *   score = Σ_q  max_d ( q · d )
- *
- * For each query token, takes the max dot product over all (non-skiplist)
- * document tokens, then sums across query tokens. Per-token vectors are
- * L2-normalized by the graph, so a dot product is a cosine.
- *
- * `skiplistIds` (e.g. punctuation token ids) are excluded from the document
- * side, matching ColBERT's document skiplist. Pass `[]` to score every token.
- *
- * @category Utils
- */
-export function maxSim(
-  query: EmbeddingResult,
-  doc: EmbeddingResult,
-  skiplistIds: number[] = []
-): number {
-  const dim = query.embeddingDim;
-  const q = query.vectors;
-  const d = doc.vectors;
-  const skip = new Set(skiplistIds);
-
-  let score = 0;
-  for (let qi = 0; qi < query.numTokens; qi++) {
-    const qOff = qi * dim;
-    let best = -Infinity;
-    for (let di = 0; di < doc.numTokens; di++) {
-      if (skip.has(doc.tokenIds[di]!)) continue;
-      const dOff = di * dim;
-      let dot = 0;
-      for (let k = 0; k < dim; k++) {
-        dot += (q[qOff + k] ?? 0) * (d[dOff + k] ?? 0);
-      }
-      if (dot > best) best = dot;
-    }
-    if (best !== -Infinity) score += best;
-  }
-  return score;
-}

From d551b5f7adc5ed27b9e4bb06489c54352c0cd11f Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 22 Jun 2026 16:33:01 +0200
Subject: [PATCH 05/14] refactor(example): merge ColBERT search into text
 embeddings screen

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 apps/text-embeddings/app/_layout.tsx          |   8 -
 apps/text-embeddings/app/colbert/index.tsx    | 284 ------------------
 .../app/text-embeddings/index.tsx             |  43 ++-
 3 files changed, 33 insertions(+), 302 deletions(-)
 delete mode 100644 apps/text-embeddings/app/colbert/index.tsx

diff --git a/apps/text-embeddings/app/_layout.tsx b/apps/text-embeddings/app/_layout.tsx
index 57acb26eb2..bb8e1deeb8 100644
--- a/apps/text-embeddings/app/_layout.tsx
+++ b/apps/text-embeddings/app/_layout.tsx
@@ -109,14 +109,6 @@ export default function _layout() {
             headerTitleStyle: { color: ColorPalette.primary },
           }}
         />
-        <Drawer.Screen
-          name="colbert/index"
-          options={{
-            drawerLabel: 'ColBERT search',
-            title: 'ColBERT search',
-            headerTitleStyle: { color: ColorPalette.primary },
-          }}
-        />
       </Drawer>
     </GeneratingContext>
   );
diff --git a/apps/text-embeddings/app/colbert/index.tsx b/apps/text-embeddings/app/colbert/index.tsx
deleted file mode 100644
index 5136aad9f1..0000000000
--- a/apps/text-embeddings/app/colbert/index.tsx
+++ /dev/null
@@ -1,284 +0,0 @@
-import { useEffect, useState } from 'react';
-import {
-  StyleSheet,
-  Text,
-  TextInput,
-  TouchableOpacity,
-  View,
-  SafeAreaView,
-  ScrollView,
-  KeyboardAvoidingView,
-  Platform,
-} from 'react-native';
-import { Ionicons } from '@expo/vector-icons';
-import { useIsFocused } from 'expo-router';
-import {
-  models,
-  useTextEmbeddings,
-  EmbeddingResult,
-} from 'react-native-executorch';
-import ColorPalette from '../../colors';
-import ErrorBanner from '../../components/ErrorBanner';
-import { maxSim } from '../../utils/math';
-
-const colbertModel = models.text_embedding.lfm2_5_colbert_350m();
-
-// The library auto-applies the model's [Q]/[D] prompts via forward(text, role)
-// and ships the document skiplist on the model config; we just pass it to the
-// shipped MaxSim util.
-const SKIPLIST = colbertModel.skiplistIds ?? [];
-
-const CORPUS: string[] = [
-  'The forecast says heavy showers this afternoon.',
-  "It's so sunny outside today!",
-  'The home team scored in the final minute to win the match.',
-  'Fans packed the stadium for the championship game.',
-  'Simmer the tomatoes with garlic before adding the pasta.',
-  'He whisked the eggs and folded in the melted chocolate.',
-  'The new phone has a faster chip and a brighter screen.',
-  'The flight to Tokyo was delayed by three hours.',
-  'We hiked along the coast and camped near the cliffs.',
-];
-
-const EXAMPLE_QUERIES: string[] = [
-  "What's the weather like?",
-  'Who won the match?',
-  'How do I cook dinner?',
-  'Tell me about the latest technology',
-];
-
-type Ranked = { sentence: string; score: number };
-
-export default function ColbertScreenWrapper() {
-  return useIsFocused() ? <ColbertScreen /> : null;
-}
-
-function ColbertScreen() {
-  const model = useTextEmbeddings({ model: colbertModel });
-  const [error, setError] = useState<string | null>(null);
-  const [query, setQuery] = useState('');
-  const [docEncs, setDocEncs] = useState<
-    { sentence: string; enc: EmbeddingResult }[]
-  >([]);
-  const [results, setResults] = useState<Ranked[]>([]);
-  const [indexing, setIndexing] = useState(false);
-  const [encodeTime, setEncodeTime] = useState<number | null>(null);
-
-  useEffect(
-    () => {
-      let cancelled = false;
-      const indexCorpus = async () => {
-        if (!model.isReady) return;
-        setIndexing(true);
-        setResults([]);
-        try {
-          const encs = [];
-          for (const sentence of CORPUS) {
-            const enc = await model.forward(sentence, 'document');
-            if (cancelled) return;
-            encs.push({ sentence, enc });
-          }
-          setDocEncs(encs);
-        } catch (e) {
-          setError(e instanceof Error ? e.message : String(e));
-        } finally {
-          if (!cancelled) setIndexing(false);
-        }
-      };
-      indexCorpus();
-      return () => {
-        cancelled = true;
-      };
-    },
-    // eslint-disable-next-line react-hooks/exhaustive-deps
-    [model.isReady]
-  );
-
-  const runSearch = async (queryText: string = query) => {
-    const q = queryText.trim();
-    if (!model.isReady || !q || docEncs.length === 0) return;
-    setQuery(queryText);
-    try {
-      const start = Date.now();
-      const qEnc = await model.forward(q, 'query');
-      setEncodeTime(Date.now() - start);
-      const ranked = docEncs
-        .map(({ sentence, enc }) => ({
-          sentence,
-          score: maxSim(qEnc, enc, SKIPLIST),
-        }))
-        .sort((a, b) => b.score - a.score);
-      setResults(ranked);
-    } catch (e) {
-      setError(e instanceof Error ? e.message : String(e));
-    }
-  };
-
-  const ready = model.isReady && !indexing && docEncs.length > 0;
-  const canSearch = ready && !!query.trim();
-
-  const statusText = model.error
-    ? `Error: ${model.error}`
-    : !model.isReady
-      ? `Loading model ${(model.downloadProgress * 100).toFixed(0)}%`
-      : indexing
-        ? 'Indexing corpus…'
-        : 'Ready';
-
-  return (
-    <SafeAreaView style={styles.container}>
-      <KeyboardAvoidingView
-        style={styles.flex}
-        behavior={Platform.OS === 'ios' ? 'padding' : undefined}
-      >
-        <ScrollView contentContainerStyle={styles.scroll}>
-          <Text style={styles.heading}>ColBERT Late-Interaction Search</Text>
-          <Text style={styles.status}>{statusText}</Text>
-          <ErrorBanner message={error} onDismiss={() => setError(null)} />
-
-          <View style={styles.card}>
-            <Text style={styles.sectionTitle}>
-              Search the corpus ({CORPUS.length} sentences)
-            </Text>
-            <Text style={styles.hint}>
-              Per-token vectors scored with MaxSim. Tap an example or type a
-              query.
-            </Text>
-            <View style={styles.chipRow}>
-              {EXAMPLE_QUERIES.map((q) => (
-                <TouchableOpacity
-                  key={q}
-                  style={[styles.chip, !ready && styles.chipDisabled]}
-                  disabled={!ready}
-                  onPress={() => runSearch(q)}
-                >
-                  <Text style={styles.chipText}>{q}</Text>
-                </TouchableOpacity>
-              ))}
-            </View>
-            <TextInput
-              placeholder="Type a search query..."
-              placeholderTextColor="#94A3B8"
-              style={styles.input}
-              value={query}
-              onChangeText={setQuery}
-              onSubmitEditing={() => runSearch()}
-              returnKeyType="search"
-            />
-            <TouchableOpacity
-              onPress={() => runSearch()}
-              style={[styles.button, !canSearch && styles.buttonDisabled]}
-              disabled={!canSearch}
-            >
-              <Ionicons
-                name="search"
-                size={16}
-                color={!canSearch ? 'gray' : 'white'}
-              />
-              <Text style={[styles.buttonText, !canSearch && styles.buttonTextDisabled]}>
-                {indexing ? 'Indexing…' : 'Search'}
-              </Text>
-            </TouchableOpacity>
-            {encodeTime !== null && (
-              <Text style={styles.stats}>Query encoded in {encodeTime} ms</Text>
-            )}
-          </View>
-
-          {results.length > 0 && (
-            <View style={styles.card}>
-              <Text style={styles.sectionTitle}>Results</Text>
-              {results.map((r, i) => (
-                <View key={i} style={styles.resultRow}>
-                  <View style={styles.resultHeader}>
-                    <Text style={styles.resultText}>{r.sentence}</Text>
-                    <Text style={styles.resultScore}>{r.score.toFixed(2)}</Text>
-                  </View>
-                  <View style={styles.barTrack}>
-                    <View
-                      style={[
-                        styles.barFill,
-                        {
-                          width: `${Math.round(
-                            (results[0].score > 0 ? r.score / results[0].score : 0) * 100
-                          )}%`,
-                        },
-                        i === 0 && styles.barFillTop,
-                      ]}
-                    />
-                  </View>
-                </View>
-              ))}
-            </View>
-          )}
-        </ScrollView>
-      </KeyboardAvoidingView>
-    </SafeAreaView>
-  );
-}
-
-const styles = StyleSheet.create({
-  container: { flex: 1, backgroundColor: '#F8FAFC' },
-  flex: { flex: 1 },
-  scroll: { padding: 20 },
-  heading: { fontSize: 22, fontWeight: '500', marginBottom: 8, color: '#0F172A' },
-  status: { fontSize: 14, color: '#64748B', marginBottom: 12 },
-  card: {
-    backgroundColor: '#fff',
-    padding: 16,
-    borderRadius: 16,
-    borderColor: '#E2E8F0',
-    borderWidth: 2,
-    marginBottom: 20,
-  },
-  sectionTitle: { fontSize: 16, fontWeight: '500', marginBottom: 8, color: '#1E293B' },
-  hint: { fontSize: 13, color: '#64748B', marginBottom: 12, lineHeight: 18 },
-  chipRow: { flexDirection: 'row', flexWrap: 'wrap', gap: 8, marginBottom: 12 },
-  chip: {
-    backgroundColor: '#EEF2FF',
-    borderColor: '#C7D2FE',
-    borderWidth: 1,
-    borderRadius: 16,
-    paddingHorizontal: 12,
-    paddingVertical: 6,
-  },
-  chipDisabled: { opacity: 0.4 },
-  chipText: { fontSize: 13, color: 'navy' },
-  input: {
-    backgroundColor: '#F1F5F9',
-    borderRadius: 10,
-    padding: 10,
-    marginBottom: 10,
-    fontSize: 16,
-    color: '#0F172A',
-    minHeight: 40,
-  },
-  button: {
-    backgroundColor: 'navy',
-    borderRadius: 10,
-    paddingVertical: 12,
-    flexDirection: 'row',
-    alignItems: 'center',
-    justifyContent: 'center',
-  },
-  buttonDisabled: { backgroundColor: '#f0f0f0' },
-  buttonText: { color: '#fff', fontWeight: '500', marginLeft: 6 },
-  buttonTextDisabled: { color: 'gray' },
-  stats: { fontSize: 13, color: '#64748B', marginTop: 8, textAlign: 'center' },
-  resultRow: { marginBottom: 14 },
-  resultHeader: {
-    flexDirection: 'row',
-    justifyContent: 'space-between',
-    marginBottom: 6,
-    gap: 8,
-  },
-  resultText: { flex: 1, fontSize: 14, color: '#334155' },
-  resultScore: {
-    fontSize: 14,
-    fontWeight: '600',
-    color: '#0F172A',
-    fontVariant: ['tabular-nums'],
-  },
-  barTrack: { height: 8, borderRadius: 4, backgroundColor: '#E2E8F0', overflow: 'hidden' },
-  barFill: { height: '100%', borderRadius: 4, backgroundColor: '#94A3B8' },
-  barFillTop: { backgroundColor: 'navy' },
-});
diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index 8cb6777843..c2e3d14e29 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -16,12 +16,15 @@ import {
   models,
   useTextEmbeddings,
   TextEmbeddingsProps,
+  EmbeddingResult,
 } from 'react-native-executorch';
 const textEmbedding = models.text_embedding;
 
-// Single-vector (pooled) models: forward() returns a Float32Array directly.
-// The multi-vector ColBERT model has its own screen.
+// forward() returns a Float32Array for pooled (single-vector) models and an
+// EmbeddingResult for multi-vector (late-interaction) models. We store the raw
+// return for the whole corpus and pick the scorer per model below.
 type TextEmbeddingModel = TextEmbeddingsProps['model'];
+type Encoding = Float32Array | EmbeddingResult;
 
 const MODELS: { label: string; value: TextEmbeddingModel }[] = [
   { label: 'MiniLM L6', value: textEmbedding.all_minilm_l6_v2() },
@@ -53,6 +56,10 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
     label: 'LFM2.5 Embedding MLX',
     value: textEmbedding.lfm2_5_embedding_350m({ backend: 'mlx' }),
   },
+  {
+    label: 'LFM2.5 ColBERT (late-interaction)',
+    value: textEmbedding.lfm2_5_colbert_350m(),
+  },
 ];
 
 // A multi-topic corpus so semantic ranking is visible: a weather query should
@@ -83,7 +90,7 @@ const EXAMPLE_QUERIES: string[] = [
   'Where did they travel?',
 ];
 import { useIsFocused } from 'expo-router';
-import { dotProduct } from '../../utils/math';
+import { dotProduct, maxSim } from '../../utils/math';
 import ErrorBanner from '../../components/ErrorBanner';
 
 export default function TextEmbeddingsScreenWrapper() {
@@ -101,9 +108,15 @@ function TextEmbeddingsScreen() {
   const model = useTextEmbeddings({ model: selectedModel });
   const [error, setError] = useState<string | null>(null);
 
+  // ColBERT-style models score per-token vectors with MaxSim and exclude
+  // punctuation tokens; pooled models score the single vector with a dot
+  // product. Both are driven off the selected model's config.
+  const isMultiVector = !!selectedModel.multiVector;
+  const skiplistIds = selectedModel.skiplistIds ?? [];
+
   const [query, setQuery] = useState('');
   const [corpusEmbeddings, setCorpusEmbeddings] = useState<
-    { sentence: string; embedding: Float32Array }[]
+    { sentence: string; embedding: Encoding }[]
   >([]);
   const [results, setResults] = useState<RankedResult[]>([]);
   const [embeddingTime, setEmbeddingTime] = useState<number | null>(null);
@@ -122,8 +135,8 @@ function TextEmbeddingsScreen() {
           const embedded = [];
           for (const sentence of CORPUS) {
             // forward(_, 'document') auto-applies the model's document prompt
-            // (a no-op for models without one). Single-vector models return
-            // a Float32Array directly.
+            // (a no-op for models without one). Pooled models return a
+            // Float32Array, multi-vector models an EmbeddingResult.
             const embedding = await model.forward(sentence, 'document');
             if (cancelled) return;
             embedded.push({ sentence, embedding });
@@ -155,12 +168,21 @@ function TextEmbeddingsScreen() {
     setQuery(queryText);
     try {
       const start = Date.now();
-      const queryEmbedding = await model.forward(q, 'query');
+      const queryEmbedding = (await model.forward(q, 'query')) as Encoding;
       setEmbeddingTime(Date.now() - start);
       const ranked = corpusEmbeddings
         .map(({ sentence, embedding }) => ({
           sentence,
-          similarity: dotProduct(queryEmbedding, embedding),
+          similarity: isMultiVector
+            ? maxSim(
+                queryEmbedding as EmbeddingResult,
+                embedding as EmbeddingResult,
+                skiplistIds
+              )
+            : dotProduct(
+                queryEmbedding as Float32Array,
+                embedding as Float32Array
+              ),
         }))
         .sort((a, b) => b.similarity - a.similarity);
       setResults(ranked);
@@ -210,8 +232,9 @@ function TextEmbeddingsScreen() {
               Search the corpus ({CORPUS.length} sentences)
             </Text>
             <Text style={styles.hint}>
-              Ranks every sentence by meaning. Ask a full question — tap an
-              example or type your own.
+              {isMultiVector
+                ? 'Ranks per-token vectors with MaxSim (late interaction). Ask a full question — tap an example or type your own.'
+                : 'Ranks every sentence by meaning. Ask a full question — tap an example or type your own.'}
             </Text>
             <View style={styles.chipRow}>
               {EXAMPLE_QUERIES.map((q) => (

From b91153064ca041e99295b9b0d6511979e8716ba3 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 22 Jun 2026 17:19:07 +0200
Subject: [PATCH 06/14] refactor: drop empty BaseEmbeddings layer, rename
 skipList, trim comments

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../app/text-embeddings/index.tsx             | 41 ++++---------------
 apps/text-embeddings/utils/math.ts            | 12 +-----
 .../common/rnexecutorch/TokenizerModule.h     |  6 ---
 .../host_objects/JsiConversions.h             | 13 ++----
 .../models/embeddings/BaseEmbeddings.cpp      |  9 ----
 .../models/embeddings/BaseEmbeddings.h        | 13 ------
 .../models/embeddings/text/TextEmbeddings.cpp |  2 +-
 .../models/embeddings/text/TextEmbeddings.h   |  4 +-
 .../models/text_to_image/Encoder.cpp          |  2 -
 .../common/rnexecutorch/tests/CMakeLists.txt  |  3 --
 .../src/constants/modelRegistry.ts            | 14 ++-----
 .../src/constants/modelUrls.ts                | 11 +----
 .../src/types/textEmbeddings.ts               | 11 +++--
 13 files changed, 29 insertions(+), 112 deletions(-)
 delete mode 100644 packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp
 delete mode 100644 packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h

diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index c2e3d14e29..2c62a22922 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -5,7 +5,6 @@ import {
   TextInput,
   TouchableOpacity,
   View,
-  SafeAreaView,
   ScrollView,
   KeyboardAvoidingView,
   Platform,
@@ -18,11 +17,13 @@ import {
   TextEmbeddingsProps,
   EmbeddingResult,
 } from 'react-native-executorch';
+import { useIsFocused } from 'expo-router';
+import { dotProduct, maxSim } from '../../utils/math';
+import ErrorBanner from '../../components/ErrorBanner';
+import { SafeAreaView } from 'react-native-safe-area-context';
+
 const textEmbedding = models.text_embedding;
 
-// forward() returns a Float32Array for pooled (single-vector) models and an
-// EmbeddingResult for multi-vector (late-interaction) models. We store the raw
-// return for the whole corpus and pick the scorer per model below.
 type TextEmbeddingModel = TextEmbeddingsProps['model'];
 type Encoding = Float32Array | EmbeddingResult;
 
@@ -62,9 +63,6 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
   },
 ];
 
-// A multi-topic corpus so semantic ranking is visible: a weather query should
-// float the weather lines to the top and push sports/cooking/tech down, even
-// with no shared keywords.
 const CORPUS: string[] = [
   'The forecast says heavy showers this afternoon.',
   "It's so sunny outside today!",
@@ -80,8 +78,6 @@ const CORPUS: string[] = [
   'We hiked along the coast and camped near the cliffs.',
 ];
 
-// Tap-to-run example queries. Natural-language questions — how these models
-// are trained to be queried — give the cleanest separation.
 const EXAMPLE_QUERIES: string[] = [
   "What's the weather like?",
   'Who won the match?',
@@ -89,9 +85,6 @@ const EXAMPLE_QUERIES: string[] = [
   'How do I cook dinner?',
   'Where did they travel?',
 ];
-import { useIsFocused } from 'expo-router';
-import { dotProduct, maxSim } from '../../utils/math';
-import ErrorBanner from '../../components/ErrorBanner';
 
 export default function TextEmbeddingsScreenWrapper() {
   const isFocused = useIsFocused();
@@ -108,11 +101,8 @@ function TextEmbeddingsScreen() {
   const model = useTextEmbeddings({ model: selectedModel });
   const [error, setError] = useState<string | null>(null);
 
-  // ColBERT-style models score per-token vectors with MaxSim and exclude
-  // punctuation tokens; pooled models score the single vector with a dot
-  // product. Both are driven off the selected model's config.
   const isMultiVector = !!selectedModel.multiVector;
-  const skiplistIds = selectedModel.skiplistIds ?? [];
+  const skipListIds = selectedModel.skipListIds ?? [];
 
   const [query, setQuery] = useState('');
   const [corpusEmbeddings, setCorpusEmbeddings] = useState<
@@ -122,8 +112,6 @@ function TextEmbeddingsScreen() {
   const [embeddingTime, setEmbeddingTime] = useState<number | null>(null);
   const [indexing, setIndexing] = useState(false);
 
-  // Embed the whole corpus once the model is ready (re-runs on model change so
-  // prefixes / weights match the active model).
   useEffect(
     () => {
       let cancelled = false;
@@ -134,17 +122,11 @@ function TextEmbeddingsScreen() {
         try {
           const embedded = [];
           for (const sentence of CORPUS) {
-            // forward(_, 'document') auto-applies the model's document prompt
-            // (a no-op for models without one). Pooled models return a
-            // Float32Array, multi-vector models an EmbeddingResult.
             const embedding = await model.forward(sentence, 'document');
             if (cancelled) return;
             embedded.push({ sentence, embedding });
           }
           setCorpusEmbeddings(embedded);
-        } catch {
-          // A transient "Model not loaded" can fire while the hook swaps
-          // models; the effect re-runs once the new model is ready.
         } finally {
           if (!cancelled) setIndexing(false);
         }
@@ -154,10 +136,7 @@ function TextEmbeddingsScreen() {
         cancelled = true;
       };
     },
-    // Re-index when the model becomes ready OR the selected model changes, so
-    // the corpus is embedded by the active model. The "Model not loaded" race
-    // is handled by the isReady gate plus clearing the corpus on switch;
-    // switching sets isReady false→true so the re-run sees the new model.
+
     // eslint-disable-next-line react-hooks/exhaustive-deps
     [model.isReady, selectedModel]
   );
@@ -177,7 +156,7 @@ function TextEmbeddingsScreen() {
             ? maxSim(
                 queryEmbedding as EmbeddingResult,
                 embedding as EmbeddingResult,
-                skiplistIds
+                skipListIds
               )
             : dotProduct(
                 queryEmbedding as Float32Array,
@@ -201,8 +180,6 @@ function TextEmbeddingsScreen() {
     return model.isGenerating ? 'Generating...' : 'Model is ready';
   };
 
-  // Chips/examples just need a ready, indexed model; the Search button also
-  // needs a non-empty typed query.
   const ready = model.isReady && !indexing && corpusEmbeddings.length > 0;
   const canSearch = ready && !!query.trim();
 
@@ -306,8 +283,6 @@ function TextEmbeddingsScreen() {
   );
 }
 
-// One ranked result with a similarity bar. The bar is scaled relative to the
-// top hit so the ranking is visually obvious; the raw cosine is shown too.
 function ResultRow({
   sentence,
   similarity,
diff --git a/apps/text-embeddings/utils/math.ts b/apps/text-embeddings/utils/math.ts
index 997d3f46fb..44248e1658 100644
--- a/apps/text-embeddings/utils/math.ts
+++ b/apps/text-embeddings/utils/math.ts
@@ -19,21 +19,13 @@ export const dotProduct = (a: Float32Array, b: Float32Array) => {
   return sum;
 };
 
-/**
- * ColBERT late-interaction score between a query and a document encoding:
- *   score = Σ_q max_d ( q · d )
- * For each query token, the max dot over non-skiplist doc tokens, summed.
- * Per-token vectors are L2-normalized by the graph, so dot == cosine. Scoring
- * is the consumer's concern (the library just yields the per-token vectors),
- * so this lives in the app alongside dotProduct.
- */
 export const maxSim = (
   query: EmbeddingResult,
   doc: EmbeddingResult,
-  skiplistIds: number[] = []
+  skipListIds: number[] = []
 ) => {
   const dim = query.embeddingDim;
-  const skip = new Set(skiplistIds);
+  const skip = new Set(skipListIds);
   let score = 0;
   for (let qi = 0; qi < query.numTokens; qi++) {
     const qOff = qi * dim;
diff --git a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
index 09877dfc65..0e1356f121 100644
--- a/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
+++ b/packages/react-native-executorch/common/rnexecutorch/TokenizerModule.h
@@ -13,10 +13,6 @@ class TokenizerModule {
                            std::shared_ptr<react::CallInvoker> callInvoker);
   [[nodiscard("Registered non-void function")]] std::vector<uint64_t>
   encode(std::string s) const;
-  // Like encode, but applies the tokenizer.json post_processor (e.g.
-  // TemplateProcessing that prepends BOS). Needed by models whose pooling
-  // depends on the BOS/CLS token (e.g. CLS-pooled text embeddings). Not JS-
-  // bound; encode() keeps its single-arg signature for the JS API.
   [[nodiscard("Registered non-void function")]] std::vector<uint64_t>
   encodeWithSpecialTokens(std::string s) const;
   [[nodiscard("Registered non-void function")]] std::string
@@ -30,8 +26,6 @@ class TokenizerModule {
   std::size_t getMemoryLowerBound() const noexcept;
 
 private:
-  // Shared encode implementation. bos/eos act as an add-special-tokens flag
-  // (not a literal count) when the tokenizer.json defines a post_processor.
   std::vector<uint64_t> encodeImpl(const std::string &s, int8_t bos,
                                    int8_t eos) const;
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
index 8e211f0028..fdc87cd9af 100644
--- a/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
+++ b/packages/react-native-executorch/common/rnexecutorch/host_objects/JsiConversions.h
@@ -708,20 +708,15 @@ getJsiValue(const models::style_transfer::PixelDataResult &result,
   return obj;
 }
 
-// Text embedding output: a [numTokens, embeddingDim] fp32 matrix + input token
-// ids. Pooled models give numTokens == 1; multi-vector give the full sequence.
-// The TS layer reduces to a single vector or keeps the matrix per model config.
-inline jsi::Value
-getJsiValue(const models::embeddings::EmbeddingResult &result,
-            jsi::Runtime &runtime) {
+inline jsi::Value getJsiValue(const models::embeddings::EmbeddingResult &result,
+                              jsi::Runtime &runtime) {
   jsi::Object obj(runtime);
 
   auto arrayBuffer = jsi::ArrayBuffer(runtime, result.dataPtr);
   auto float32ArrayCtor =
       runtime.global().getPropertyAsFunction(runtime, "Float32Array");
-  auto float32Array =
-      float32ArrayCtor.callAsConstructor(runtime, arrayBuffer)
-          .getObject(runtime);
+  auto float32Array = float32ArrayCtor.callAsConstructor(runtime, arrayBuffer)
+                          .getObject(runtime);
   obj.setProperty(runtime, "dataPtr", float32Array);
 
   obj.setProperty(runtime, "numTokens", jsi::Value(result.numTokens));
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp
deleted file mode 100644
index e777be6704..0000000000
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.cpp
+++ /dev/null
@@ -1,9 +0,0 @@
-#include "BaseEmbeddings.h"
-
-namespace rnexecutorch::models::embeddings {
-
-BaseEmbeddings::BaseEmbeddings(const std::string &modelSource,
-                               std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseModel(modelSource, callInvoker) {}
-
-} // namespace rnexecutorch::models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h
deleted file mode 100644
index 4b37a3fe93..0000000000
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/BaseEmbeddings.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#pragma once
-
-#include <rnexecutorch/models/BaseModel.h>
-
-namespace rnexecutorch::models::embeddings {
-
-class BaseEmbeddings : public BaseModel {
-public:
-  BaseEmbeddings(const std::string &modelSource,
-                 std::shared_ptr<react::CallInvoker> callInvoker);
-};
-
-}; // namespace rnexecutorch::models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
index 26f3157690..d80c4fb4fe 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
@@ -11,7 +11,7 @@ using namespace executorch::extension;
 TextEmbeddings::TextEmbeddings(const std::string &modelSource,
                                const std::string &tokenizerSource,
                                std::shared_ptr<react::CallInvoker> callInvoker)
-    : BaseEmbeddings(modelSource, callInvoker),
+    : BaseModel(modelSource, callInvoker),
       tokenizer(
           std::make_unique<TokenizerModule>(tokenizerSource, callInvoker)) {}
 
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
index cb6059b96e..da51e4d26e 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
@@ -3,7 +3,7 @@
 #include "rnexecutorch/metaprogramming/ConstructorHelpers.h"
 #include <mutex>
 #include <rnexecutorch/TokenizerModule.h>
-#include <rnexecutorch/models/embeddings/BaseEmbeddings.h>
+#include <rnexecutorch/models/BaseModel.h>
 #include <rnexecutorch/models/embeddings/Types.h>
 
 namespace rnexecutorch {
@@ -14,7 +14,7 @@ struct TokenIdsWithAttentionMask {
   std::vector<int64_t> attentionMask;
 };
 
-class TextEmbeddings final : public BaseEmbeddings {
+class TextEmbeddings final : public BaseModel {
 public:
   TextEmbeddings(const std::string &modelSource,
                  const std::string &tokenizerSource,
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp
index 6abbccb9c6..3bf5fa2206 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/text_to_image/Encoder.cpp
@@ -16,8 +16,6 @@ Encoder::Encoder(const std::string &tokenizerSource,
           encoderSource, tokenizerSource, callInvoker)) {}
 
 std::vector<float> Encoder::generate(std::string input) {
-  // TextEmbeddings returns the raw [numTokens, embeddingDim] matrix; this
-  // encoder pools/uses the flat fp32 buffer directly (dataPtr).
   std::shared_ptr<OwningArrayBuffer> embeddingsText =
       encoder->generate(input).dataPtr;
   std::shared_ptr<OwningArrayBuffer> embeddingsUncond =
diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
index 5f9d7287a5..a901cd56fc 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/CMakeLists.txt
@@ -218,7 +218,6 @@ add_rn_test(ObjectDetectionTests integration/ObjectDetectionTest.cpp
 add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp
     SOURCES
         ${RNEXECUTORCH_DIR}/models/embeddings/image/ImageEmbeddings.cpp
-        ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp
         ${RNEXECUTORCH_DIR}/models/VisionModel.cpp
         ${RNEXECUTORCH_DIR}/utils/FrameProcessor.cpp
         ${RNEXECUTORCH_DIR}/utils/FrameExtractor.cpp
@@ -230,7 +229,6 @@ add_rn_test(ImageEmbeddingsTests integration/ImageEmbeddingsTest.cpp
 add_rn_test(TextEmbeddingsTests integration/TextEmbeddingsTest.cpp
     SOURCES
         ${RNEXECUTORCH_DIR}/models/embeddings/text/TextEmbeddings.cpp
-        ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp
         ${TOKENIZER_SOURCES}
     LIBS tokenizers_deps
 )
@@ -306,7 +304,6 @@ add_rn_test(TextToImageTests integration/TextToImageTest.cpp
         ${RNEXECUTORCH_DIR}/models/text_to_image/Decoder.cpp
         ${RNEXECUTORCH_DIR}/models/text_to_image/Scheduler.cpp
         ${RNEXECUTORCH_DIR}/models/embeddings/text/TextEmbeddings.cpp
-        ${RNEXECUTORCH_DIR}/models/embeddings/BaseEmbeddings.cpp
         ${TOKENIZER_SOURCES}
     LIBS tokenizers_deps
 )
diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts
index f57c178b5e..4c36c6a1fa 100644
--- a/packages/react-native-executorch/src/constants/modelRegistry.ts
+++ b/packages/react-native-executorch/src/constants/modelRegistry.ts
@@ -284,12 +284,7 @@ const LFM2_5_EMBEDDING_350M_VARIANTS = {
   },
 };
 
-// LFM2.5-ColBERT is a plain text-embedding model from the library's POV: it
-// returns per-token vectors. Late-interaction scoring (MaxSim / skiplist) is
-// the consumer's concern; the library only auto-applies the role prompts.
-// Document punctuation token ids excluded from MaxSim (ColBERT skiplist),
-// derived from the model's config_sentence_transformers.json skiplist_words.
-const LFM_COLBERT_SKIPLIST = [
+const LFM_COLBERT_SKIP_LIST = [
   510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524,
   535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601,
   602, 603,
@@ -303,7 +298,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = {
       tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
       prompts: LFM_COLBERT_PROMPTS,
       multiVector: true as const,
-      skiplistIds: LFM_COLBERT_SKIPLIST,
+      skipListIds: LFM_COLBERT_SKIP_LIST,
     },
   },
   xnnpack: {
@@ -313,7 +308,7 @@ const LFM2_5_COLBERT_350M_VARIANTS = {
       tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
       prompts: LFM_COLBERT_PROMPTS,
       multiVector: true as const,
-      skiplistIds: LFM_COLBERT_SKIPLIST,
+      skipListIds: LFM_COLBERT_SKIP_LIST,
     },
   },
 };
@@ -804,9 +799,6 @@ export const models = {
       ios: 'mlx',
       android: 'xnnpack',
     }),
-    // ColBERT (late-interaction): forward() returns per-token vectors. Scoring
-    // (markers / MaxSim / skiplist) is the consumer's concern — see the
-    // colbert example screen for a reference implementation.
     lfm2_5_colbert_350m: variant(LFM2_5_COLBERT_350M_VARIANTS, {
       ios: 'mlx',
       android: 'xnnpack',
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index 8fdebb1a6d..bd6cddf4a3 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -1195,21 +1195,14 @@ export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_8DA4W_MODEL = `${URL_PREFIX}-d
 export const DISTILUSE_BASE_MULTILINGUAL_CASED_V2_TOKENIZER = `${URL_PREFIX}-distiluse-base-multilingual-cased-v2/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
 const PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_QUANTIZED_MODEL = `${URL_PREFIX}-paraphrase-multilingual-MiniLM-L12-v2/${PREVIOUS_VERSION_TAG}/xnnpack/paraphrase_multilingual_minilm_l12_v2_xnnpack_8da4w.pte`;
 const PARAPHRASE_MULTILINGUAL_MINILM_L12_V2_TOKENIZER = `${URL_PREFIX}-paraphrase-multilingual-MiniLM-L12-v2/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
-const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
-const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
-// LFM2.5-Embedding-350M: XNNPACK 8da4w (Android/CPU), MLX int4 bf16 (iOS GPU,
-// physical device only). The exported graph bakes in CLS pooling + L2 norm.
-// Requires the runner to add the BOS special token (CLS-pooled at index 0).
 export const LFM2_5_EMBEDDING_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/xnnpack/lfm_2_5_embedding_350m_xnnpack_8da4w.pte`;
 export const LFM2_5_EMBEDDING_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/mlx/lfm_2_5_embedding_350m_mlx_int4.pte`;
 export const LFM2_5_EMBEDDING_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-embedding-350m/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
-// LFM2.5-ColBERT-350M: late-interaction multi-vector retriever (per-token
-// [S,128]). Same bidirectional backbone as the embedding model + a Linear
-// 1024->128 head. forward() returns per-token vectors; late-interaction
-// scoring (MaxSim) is the consumer's concern (see the colbert example).
 export const LFM2_5_COLBERT_350M_XNNPACK_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/xnnpack/lfm_2_5_colbert_350m_xnnpack_8da4w.pte`;
 export const LFM2_5_COLBERT_350M_MLX_MODEL = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/mlx/lfm_2_5_colbert_350m_mlx_int4.pte`;
 export const LFM2_5_COLBERT_350M_TOKENIZER = `${URL_PREFIX}-lfm2.5-colbert-350m/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
+const CLIP_VIT_BASE_PATCH32_TEXT_MODEL = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/xnnpack/clip_vit_base_patch32_text_xnnpack_fp32.pte`;
+const CLIP_VIT_BASE_PATCH32_TEXT_TOKENIZER = `${URL_PREFIX}-clip-vit-base-patch32/${PREVIOUS_VERSION_TAG}/tokenizer.json`;
 
 /**
  * @category Models - Text Embeddings
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index 2f42d71e9d..1b056a1f7b 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -68,11 +68,11 @@ export interface TextEmbeddingsModel {
   multiVector?: boolean;
   /**
    * Document token ids to exclude from late-interaction scoring (e.g. ColBERT's
-   * punctuation skiplist). Derived from the model's training config, so it's
+   * punctuation skipList). Derived from the model's training config, so it's
    * shipped here rather than reconstructed by the consumer, who passes it to
    * their own MaxSim scoring.
    */
-  skiplistIds?: number[];
+  skipListIds?: number[];
 }
 
 /**
@@ -80,8 +80,11 @@ export interface TextEmbeddingsModel {
  * - return type: `EmbeddingResult` if `multiVector`, else `Float32Array`.
  * - role arg: required if the model has `prompts`, else absent.
  */
-export type ForwardReturn<M extends TextEmbeddingsModel> =
-  M extends { multiVector: true } ? EmbeddingResult : Float32Array;
+export type ForwardReturn<M extends TextEmbeddingsModel> = M extends {
+  multiVector: true;
+}
+  ? EmbeddingResult
+  : Float32Array;
 
 /**
  * `forward`'s signature, computed from the model config:

From 9691184b595c9fede86d8fdf472f8963b9e791d4 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Mon, 22 Jun 2026 17:42:29 +0200
Subject: [PATCH 07/14] refactor: extract TextEmbeddings::buildResult, validate
 output rank

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../models/embeddings/text/TextEmbeddings.cpp | 51 +++++++++++--------
 .../models/embeddings/text/TextEmbeddings.h   |  2 +
 2 files changed, 31 insertions(+), 22 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
index d80c4fb4fe..6e5982c2a5 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
@@ -60,34 +60,41 @@ EmbeddingResult TextEmbeddings::generate(const std::string input) {
   auto forwardResult = BaseModel::forward({tokenIds, attnMask});
   CHECK_OK_OR_THROW_FORWARD_ERROR(forwardResult);
 
-  // Output is [1, numTokens, embeddingDim] (numTokens == 1 for pooled models,
-  // == sequence length for multi-vector models). Return the raw matrix + the
-  // input ids; the TS layer reduces to a single vector or keeps the matrix.
-  auto out = forwardResult->at(0).toTensor();
-  auto sizes = out.sizes();
+  return buildResult(forwardResult->at(0).toTensor(),
+                     std::move(preprocessed.inputIds));
+}
 
-  EmbeddingResult result;
-  result.dataPtr = std::make_shared<OwningArrayBuffer>(out.const_data_ptr(),
-                                                       out.nbytes());
-  result.numTokens = static_cast<int32_t>(sizes[sizes.size() - 2]);
-  result.embeddingDim = static_cast<int32_t>(sizes[sizes.size() - 1]);
-  result.tokenIds = std::move(preprocessed.inputIds);
+// Output is [1, numTokens, embeddingDim] (numTokens == 1 for pooled models,
+// == sequence length for multi-vector models). Multi-vector consumers index
+// tokenIds[i] per output row (e.g. skiplist masking), so numTokens must match
+// the input token count or that alignment silently breaks.
+EmbeddingResult
+TextEmbeddings::buildResult(const executorch::aten::Tensor &output,
+                            std::vector<int64_t> tokenIds) {
+  auto sizes = output.sizes();
+  if (sizes.size() < 2) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidModelOutput,
+                            "Embedding output must be at least 2D, got rank " +
+                                std::to_string(sizes.size()));
+  }
 
-  // Invariant for multi-vector models: one output row per input token, so
-  // numTokens (from the output tensor) must equal tokenIds.size() (from the
-  // input). Consumers index tokenIds[i] per output row (e.g. skiplist masking),
-  // which silently breaks if the graph ever pads/truncates the sequence.
-  // (Pooled models legitimately collapse to numTokens == 1.)
-  if (result.numTokens != 1 &&
-      result.numTokens != static_cast<int32_t>(result.tokenIds.size())) {
+  const auto numTokens = static_cast<int32_t>(sizes[sizes.size() - 2]);
+  const auto inputTokens = static_cast<int32_t>(tokenIds.size());
+  if (numTokens != 1 && numTokens != inputTokens) {
     throw RnExecutorchError(
         RnExecutorchErrorCode::InvalidModelOutput,
-        "Embedding output rows (" + std::to_string(result.numTokens) +
-            ") != input tokens (" +
-            std::to_string(result.tokenIds.size()) +
+        "Embedding output rows (" + std::to_string(numTokens) +
+            ") != input tokens (" + std::to_string(inputTokens) +
             "); per-token tokenIds alignment is broken.");
   }
-  return result;
+
+  return EmbeddingResult{
+      .dataPtr = std::make_shared<OwningArrayBuffer>(output.const_data_ptr(),
+                                                     output.nbytes()),
+      .numTokens = numTokens,
+      .embeddingDim = static_cast<int32_t>(sizes[sizes.size() - 1]),
+      .tokenIds = std::move(tokenIds),
+  };
 }
 
 } // namespace rnexecutorch::models::embeddings
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
index da51e4d26e..02cfefde4d 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
@@ -31,6 +31,8 @@ class TextEmbeddings final : public BaseModel {
   mutable std::mutex inference_mutex_;
   std::vector<std::vector<int32_t>> inputShapes;
   TokenIdsWithAttentionMask preprocess(const std::string &input);
+  static EmbeddingResult buildResult(const executorch::aten::Tensor &output,
+                                     std::vector<int64_t> tokenIds);
   std::unique_ptr<TokenizerModule> tokenizer;
 };
 } // namespace models::embeddings

From 8e494c4128f2f5585ba0c4e6fce99f311ff23049 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 23 Jun 2026 09:54:04 +0200
Subject: [PATCH 08/14] refactor: dedup LFM model configs, drop deleted util
 export, trim comments

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../models/embeddings/text/TextEmbeddings.cpp |  7 ---
 .../models/embeddings/text/TextEmbeddings.h   |  4 --
 .../src/constants/modelRegistry.ts            | 40 +++++++-------
 .../useTextEmbeddings.ts                      | 11 ++--
 packages/react-native-executorch/src/index.ts |  1 -
 .../TextEmbeddingsModule.ts                   | 16 ++----
 .../src/types/textEmbeddings.ts               | 55 ++++++++++++-------
 .../src/utils/textEmbeddings.ts               | 37 -------------
 8 files changed, 67 insertions(+), 104 deletions(-)
 delete mode 100644 packages/react-native-executorch/src/utils/textEmbeddings.ts

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
index 6e5982c2a5..52a10b6e40 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.cpp
@@ -16,9 +16,6 @@ TextEmbeddings::TextEmbeddings(const std::string &modelSource,
           std::make_unique<TokenizerModule>(tokenizerSource, callInvoker)) {}
 
 TokenIdsWithAttentionMask TextEmbeddings::preprocess(const std::string &input) {
-  // Apply the tokenizer's post_processor so declared special tokens (e.g. a
-  // BOS prepended via TemplateProcessing) are added. CLS-pooled embedding
-  // models read position 0, so a missing BOS corrupts the pooled vector.
   auto inputIds = tokenizer->encodeWithSpecialTokens(input);
   // Tokenizers-cpp return tokens as int32, but text embedding models require
   // int64 as input
@@ -64,10 +61,6 @@ EmbeddingResult TextEmbeddings::generate(const std::string input) {
                      std::move(preprocessed.inputIds));
 }
 
-// Output is [1, numTokens, embeddingDim] (numTokens == 1 for pooled models,
-// == sequence length for multi-vector models). Multi-vector consumers index
-// tokenIds[i] per output row (e.g. skiplist masking), so numTokens must match
-// the input token count or that alignment silently breaks.
 EmbeddingResult
 TextEmbeddings::buildResult(const executorch::aten::Tensor &output,
                             std::vector<int64_t> tokenIds) {
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
index 02cfefde4d..587f697bd4 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/embeddings/text/TextEmbeddings.h
@@ -19,10 +19,6 @@ class TextEmbeddings final : public BaseModel {
   TextEmbeddings(const std::string &modelSource,
                  const std::string &tokenizerSource,
                  std::shared_ptr<react::CallInvoker> callInvoker);
-  // Returns the raw [numTokens, embeddingDim] output. Pooled models give
-  // numTokens == 1; multi-vector (late-interaction) models give the full
-  // sequence. The TS layer reduces to a single vector or keeps the matrix
-  // based on the model's config.
   [[nodiscard("Registered non-void function")]] EmbeddingResult
   generate(const std::string input);
   void unload() noexcept;
diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts
index 4c36c6a1fa..16ec6964e2 100644
--- a/packages/react-native-executorch/src/constants/modelRegistry.ts
+++ b/packages/react-native-executorch/src/constants/modelRegistry.ts
@@ -260,26 +260,26 @@ const GEMMA4_E2B_MM_VARIANTS = {
   },
 };
 
-// Asymmetric query/document prompts the LFM models are trained with.
-// forward(text, role) auto-prepends these.
 const LFM_EMBEDDING_PROMPTS = { query: 'query: ', document: 'document: ' };
-const LFM_COLBERT_PROMPTS = { query: '[Q] ', document: '[D] ' };
+
+const LFM2_5_EMBEDDING_350M_CONFIG = {
+  modelName: 'lfm2-5-embedding-350m' as const,
+  tokenizerSource: M.LFM2_5_EMBEDDING_350M_TOKENIZER,
+  prompts: LFM_EMBEDDING_PROMPTS,
+  multiVector: false as const,
+};
 
 const LFM2_5_EMBEDDING_350M_VARIANTS = {
   mlx: {
     base: {
-      modelName: 'lfm2-5-embedding-350m' as const,
+      ...LFM2_5_EMBEDDING_350M_CONFIG,
       modelSource: M.LFM2_5_EMBEDDING_350M_MLX_MODEL,
-      tokenizerSource: M.LFM2_5_EMBEDDING_350M_TOKENIZER,
-      prompts: LFM_EMBEDDING_PROMPTS,
     },
   },
   xnnpack: {
     base: {
-      modelName: 'lfm2-5-embedding-350m' as const,
+      ...LFM2_5_EMBEDDING_350M_CONFIG,
       modelSource: M.LFM2_5_EMBEDDING_350M_XNNPACK_MODEL,
-      tokenizerSource: M.LFM2_5_EMBEDDING_350M_TOKENIZER,
-      prompts: LFM_EMBEDDING_PROMPTS,
     },
   },
 };
@@ -290,25 +290,27 @@ const LFM_COLBERT_SKIP_LIST = [
   602, 603,
 ];
 
+const LFM_COLBERT_PROMPTS = { query: '[Q] ', document: '[D] ' };
+
+const LFM2_5_COLBERT_350M_CONFIG = {
+  modelName: 'lfm2-5-colbert-350m' as const,
+  tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
+  prompts: LFM_COLBERT_PROMPTS,
+  multiVector: true as const,
+  skipListIds: LFM_COLBERT_SKIP_LIST,
+};
+
 const LFM2_5_COLBERT_350M_VARIANTS = {
   mlx: {
     base: {
-      modelName: 'lfm2-5-colbert-350m' as const,
+      ...LFM2_5_COLBERT_350M_CONFIG,
       modelSource: M.LFM2_5_COLBERT_350M_MLX_MODEL,
-      tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
-      prompts: LFM_COLBERT_PROMPTS,
-      multiVector: true as const,
-      skipListIds: LFM_COLBERT_SKIP_LIST,
     },
   },
   xnnpack: {
     base: {
-      modelName: 'lfm2-5-colbert-350m' as const,
+      ...LFM2_5_COLBERT_350M_CONFIG,
       modelSource: M.LFM2_5_COLBERT_350M_XNNPACK_MODEL,
-      tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
-      prompts: LFM_COLBERT_PROMPTS,
-      multiVector: true as const,
-      skipListIds: LFM_COLBERT_SKIP_LIST,
     },
   },
 };
diff --git a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
index 2f100b8cbb..9e3fa7f0e4 100644
--- a/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
+++ b/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts
@@ -9,12 +9,13 @@ import {
 } from '../../types/textEmbeddings';
 
 /**
- * React hook for a Text Embeddings model.
+ * React hook for managing a Text Embeddings model instance.
  * @category Hooks
- * @param TextEmbeddingsProps - `model` source + optional `preventLoad`.
- * @returns Ready to use embeddings model. `forward` returns the raw
- *   [numTokens, embeddingDim] result; use `toVector` for a single vector.
- *   Models with prompts require a `role` ('query' | 'document') on `forward`.
+ * @param TextEmbeddingsProps - Configuration object containing `model` source and optional `preventLoad` flag.
+ * @returns Ready to use Text Embeddings model. `forward` returns a
+ *   `Float32Array` for pooled models and an `EmbeddingResult` (per-token
+ *   vectors) for multi-vector models. Models with prompts require a `role`
+ *   ('query' | 'document') on `forward`.
  */
 export const useTextEmbeddings = <M extends TextEmbeddingsModel>({
   model,
diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts
index 34cdf97d8d..1f190d41f5 100644
--- a/packages/react-native-executorch/src/index.ts
+++ b/packages/react-native-executorch/src/index.ts
@@ -212,7 +212,6 @@ export * from './utils/ResourceFetcher';
 export * from './utils/ResourceFetcherUtils';
 export * from './utils/BaseResourceFetcherClass';
 export * from './utils/llm';
-export * from './utils/textEmbeddings';
 export * from './common/Logger';
 export * from './utils/llms/context_strategy';
 export * from './utils/segmentAnythingPrompts';
diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
index abb620e981..e192de0664 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
@@ -13,11 +13,7 @@ import { parseUnknownError, RnExecutorchError } from '../../errors/errorUtils';
 import { Logger } from '../../common/Logger';
 
 /**
- * Module for text embeddings. `forward` returns a single pooled `Float32Array`
- * for standard models, or the per-token `EmbeddingResult` for `multiVector`
- * (late-interaction) models. The native runner always produces the raw
- * [numTokens, embeddingDim] matrix; the reduction to a single vector happens
- * here so the common single-vector API stays `Float32Array`.
+ * Module for managing a Text Embeddings model instance.
  * @category Typescript API
  */
 export class TextEmbeddingsModule extends BaseModule {
@@ -86,11 +82,12 @@ export class TextEmbeddingsModule extends BaseModule {
   }
 
   /**
-   * Embed text. Standard models return the single pooled `Float32Array`;
-   * `multiVector` models return the per-token `EmbeddingResult`.
+   * Embed text into a pooled `Float32Array`, or a per-token `EmbeddingResult`
+   * for `multiVector` models.
    * @param input - The text to embed.
    * @param role - 'query' | 'document'; prepends the model's prompt for that
-   *   role when configured (no-op otherwise).
+   *   role when configured.
+   * @returns A `Float32Array` for pooled models, an `EmbeddingResult` otherwise.
    */
   async forward(
     input: string,
@@ -100,11 +97,8 @@ export class TextEmbeddingsModule extends BaseModule {
       throw new RnExecutorchError(RnExecutorchErrorCode.ModuleNotLoaded);
     const prefix = (role && this.prompts?.[role]) || '';
     const res = await this.nativeModule.generate(prefix + input);
-    // res.dataPtr is already a Float32Array view over the owned native buffer
-    // (built at the JSI boundary).
     const vectors = res.dataPtr as Float32Array;
     if (!this.multiVector) {
-      // Pooled models output [1, embeddingDim]; return that single row.
       return vectors.subarray(0, res.embeddingDim);
     }
     return {
diff --git a/packages/react-native-executorch/src/types/textEmbeddings.ts b/packages/react-native-executorch/src/types/textEmbeddings.ts
index 1b056a1f7b..9f24cbcf1d 100644
--- a/packages/react-native-executorch/src/types/textEmbeddings.ts
+++ b/packages/react-native-executorch/src/types/textEmbeddings.ts
@@ -18,9 +18,8 @@ export type TextEmbeddingsModelName =
 
 /**
  * Per-token (multi-vector) embedding output for late-interaction models (e.g.
- * ColBERT): a [numTokens, embeddingDim] fp32 matrix (row-major) plus the input
- * token ids. Standard models return a single pooled `Float32Array` from
- * `forward` instead; only `multiVector` models yield this.
+ * ColBERT). Only `multiVector` models yield this; standard models return a
+ * pooled `Float32Array` from `forward` instead.
  * @category Types
  */
 export interface EmbeddingResult {
@@ -44,8 +43,7 @@ export type EmbeddingRole = 'query' | 'document';
 
 /**
  * Asymmetric prompts a model is trained with. When a model config carries
- * these, `forward` REQUIRES a `role` so the matching prompt is always applied
- * (forgetting it would silently embed raw text and wreck asymmetric retrieval).
+ * these, `forward` requires a `role` so the matching prompt is always applied.
  * @category Types
  */
 export interface EmbeddingPrompts {
@@ -55,9 +53,8 @@ export interface EmbeddingPrompts {
 
 /**
  * A text embeddings model config. Two optional flags drive `forward`:
- * - `prompts` present  -> `forward` REQUIRES a `role` (auto-prepends the prompt)
- * - `multiVector` true -> `forward` returns the per-token `EmbeddingResult`;
- *                         otherwise it returns a single pooled `Float32Array`.
+ * `prompts` makes a `role` argument required, and `multiVector` makes it return
+ * a per-token `EmbeddingResult` instead of a pooled `Float32Array`.
  * @category Types
  */
 export interface TextEmbeddingsModel {
@@ -76,9 +73,8 @@ export interface TextEmbeddingsModel {
 }
 
 /**
- * `forward`'s signature, computed from the model config:
- * - return type: `EmbeddingResult` if `multiVector`, else `Float32Array`.
- * - role arg: required if the model has `prompts`, else absent.
+ * `forward`'s return type: `EmbeddingResult` for `multiVector` models,
+ * `Float32Array` otherwise.
  */
 export type ForwardReturn<M extends TextEmbeddingsModel> = M extends {
   multiVector: true;
@@ -87,11 +83,9 @@ export type ForwardReturn<M extends TextEmbeddingsModel> = M extends {
   : Float32Array;
 
 /**
- * `forward`'s signature, computed from the model config:
- * - A model that DEFINITELY has prompts -> `role` is REQUIRED.
- * - A model that definitely has NO prompts (`prompts?: undefined`) -> no role.
- * - Otherwise (prompts optional / unknown, e.g. a heterogeneous model list) ->
- *   `role` is OPTIONAL.
+ * `forward`'s signature, computed from the model config: `role` is required
+ * when the model has `prompts`, omitted when it has none, and optional when
+ * unknown (e.g. a heterogeneous model list).
  */
 export type ForwardFn<M extends TextEmbeddingsModel> = M extends {
   prompts: EmbeddingPrompts;
@@ -106,6 +100,14 @@ export type ForwardFn<M extends TextEmbeddingsModel> = M extends {
 /**
  * Props for the useTextEmbeddings hook.
  * @category Types
+ * @property {object} model - An object containing the model configuration.
+ * @property {TextEmbeddingsModelName} model.modelName - Unique name identifying the model.
+ * @property {ResourceSource} model.modelSource - The source of the text embeddings model binary.
+ * @property {ResourceSource} model.tokenizerSource - The source of the tokenizer JSON file.
+ * @property {EmbeddingPrompts} [model.prompts] - Optional asymmetric prompts for query/document roles.
+ * @property {boolean} [model.multiVector] - Optional flag indicating if the model returns per-token embeddings.
+ * @property {number[]} [model.skipListIds] - Optional array of token IDs to skip during scoring.
+ * @property {boolean} [preventLoad] - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
  */
 export interface TextEmbeddingsProps<
   M extends TextEmbeddingsModel = TextEmbeddingsModel,
@@ -121,15 +123,28 @@ export interface TextEmbeddingsProps<
 export interface TextEmbeddingsType<
   M extends TextEmbeddingsModel = TextEmbeddingsModel,
 > {
+  /**
+   * Contains the error message if the model failed to load or during inference.
+   */
   error: null | RnExecutorchError;
+  /**
+   * Indicates whether the embeddings model has successfully loaded and is ready for inference.
+   */
   isReady: boolean;
+  /**
+   * Indicates whether the model is currently generating embeddings.
+   */
   isGenerating: boolean;
+  /**
+   * Tracks the progress of the model download process (value between 0 and 1).
+   */
   downloadProgress: number;
-
   /**
-   * Embed text. Standard models return a single pooled `Float32Array`;
-   * `multiVector` models return the per-token `EmbeddingResult`. Models with
-   * `prompts` require a `role` ('query' | 'document').
+   * Runs the text embeddings model on the provided input string.
+   * @param input - The text string to embed.
+   * @param role - Optional role for models with asymmetric prompts. Required if the model has `prompts`.
+   * @returns A promise resolving to a Float32Array or EmbeddingResult containing the vector embeddings.
+   * @throws {RnExecutorchError} If the model is not loaded or is currently processing another request.
    */
   forward: ForwardFn<M>;
 }
diff --git a/packages/react-native-executorch/src/utils/textEmbeddings.ts b/packages/react-native-executorch/src/utils/textEmbeddings.ts
deleted file mode 100644
index e9be7cf774..0000000000
--- a/packages/react-native-executorch/src/utils/textEmbeddings.ts
+++ /dev/null
@@ -1,37 +0,0 @@
-import { EmbeddingResult } from '../types/textEmbeddings';
-
-/**
- * Get the single pooled embedding vector from a result. Convenience for the
- * common single-vector case: the exported graph pools + L2-normalizes to a
- * [1, embeddingDim] output, so this returns row 0.
- *
- * For multi-vector (late-interaction) models, prefer the full per-token
- * vectors (`getTokenVectors`); row 0 alone is not a meaningful sentence
- * embedding there.
- *
- * @category Utils
- */
-export function toVector(result: EmbeddingResult): Float32Array {
-  return result.vectors.slice(0, result.embeddingDim);
-}
-
-/**
- * Split a result's flat `vectors` buffer into per-token rows
- * (`numTokens` arrays of length `embeddingDim`). Useful for inspecting or
- * storing individual token vectors (e.g. a multi-vector vector DB).
- *
- * The rows are zero-copy `subarray` VIEWS over `result.vectors` — valid only
- * while that buffer is alive and not mutated. Copy them (e.g. `new
- * Float32Array(row)`) before storing beyond the result's lifetime. (`toVector`
- * by contrast returns an independent copy.)
- *
- * @category Utils
- */
-export function getTokenVectors(result: EmbeddingResult): Float32Array[] {
-  const { vectors, numTokens, embeddingDim } = result;
-  const rows: Float32Array[] = [];
-  for (let i = 0; i < numTokens; i++) {
-    rows.push(vectors.subarray(i * embeddingDim, (i + 1) * embeddingDim));
-  }
-  return rows;
-}

From c8e7769b2d2e573a2efb50efd92eca9000b6ccd1 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 23 Jun 2026 10:31:13 +0200
Subject: [PATCH 09/14] docs: document text embeddings prompts, multi-vector &
 ColBERT MaxSim

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../useTextEmbeddings.md                      | 68 ++++++++++++++-
 .../useTextEmbeddings.md                      | 68 ++++++++++++++-
 .../classes/TextEmbeddingsModule.md           | 83 +++++++++----------
 .../functions/useTextEmbeddings.md            | 19 +++--
 .../interfaces/EmbeddingPrompts.md            | 22 +++++
 .../interfaces/EmbeddingResult.md             | 47 +++++++++++
 .../interfaces/TextEmbeddingsModel.md         | 60 ++++++++++++++
 .../interfaces/TextEmbeddingsProps.md         | 36 +++-----
 .../interfaces/TextEmbeddingsType.md          | 66 ++++++++-------
 .../type-aliases/EmbeddingRole.md             |  9 ++
 .../type-aliases/ForwardFn.md                 | 15 ++++
 .../type-aliases/ForwardReturn.md             | 14 ++++
 .../type-aliases/TextEmbeddingsModelName.md   |  2 +-
 13 files changed, 402 insertions(+), 107 deletions(-)
 create mode 100644 docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/EmbeddingPrompts.md
 create mode 100644 docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/EmbeddingResult.md
 create mode 100644 docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsModel.md
 create mode 100644 docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/EmbeddingRole.md
 create mode 100644 docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/ForwardFn.md
 create mode 100644 docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/ForwardReturn.md

diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index b9ba8c41b9..fc0faff7e0 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -60,7 +60,21 @@ You need more details? Check the following resources:
 
 ## Running the model
 
-To run the model, you can use the [`forward`](../../06-api-reference/interfaces/TextEmbeddingsType.md#forward) method. It accepts one argument, which is a string representing the text you want to embed. The function returns a promise, which can resolve either to an error or an array of numbers representing the embedding.
+To run the model, you can use the [`forward`](../../06-api-reference/interfaces/TextEmbeddingsType.md#forward) method. It accepts the text to embed and, for models trained with asymmetric prompts, an optional `role`. The return type depends on the model:
+
+- **Pooled models** (the default, e.g. MiniLM, MPNet, LFM2.5-Embedding) resolve to a single `Float32Array` — one normalized vector for the whole input.
+- **Multi-vector models** (`multiVector: true`, e.g. LFM2.5-ColBERT) resolve to an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) with the per-token vectors (`vectors`, `numTokens`, `embeddingDim`, `tokenIds`).
+
+### Asymmetric prompts (`role`)
+
+Some retrieval models are trained to embed queries and documents with different prefixes (e.g. LFM2.5 uses `query: `/`document: `, ColBERT uses `[Q] `/`[D] `). For these models the model config carries the prompts and `forward` requires a `role`:
+
+```typescript
+const queryEmbedding = await model.forward('What is the weather?', 'query');
+const docEmbedding = await model.forward('It is sunny today.', 'document');
+```
+
+The matching prompt is prepended automatically; for models without prompts the `role` argument is absent.
 
 ## Example
 
@@ -112,6 +126,8 @@ function App() {
 | [distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)   | 50+ languages |    126     |         512          | Multilingual DistilBERT with a 768→512 projection head. Recommended when broader language coverage matters more than the exact English quality of MiniLM/MPNet.                                                                                                                                                                                                                                                                  |
 | [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) | 50+ languages |    126     |         384          | Multilingual MiniLM-L12 distilled from paraphrase-multilingual-mpnet-base-v2. Compact (≈118 M params) sentence encoder for cross-lingual semantic similarity and retrieval across 50+ languages.                                                                                                                                                                                                                                 |
 | [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                                           |    English    |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
+| [LFM2.5-Embedding-350M](https://huggingface.co/LiquidAI/LFM2.5-Embedding-350M)                                              |  Multilingual |    512     |         1024         | Dense bi-encoder from Liquid AI with CLS pooling. Trained with asymmetric `query: `/`document: ` prompts, so `forward` requires a `role`. On iOS it runs on the GPU via the MLX backend (physical device only); Android uses XNNPACK.                                                                                                                                                                                            |
+| [LFM2.5-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2.5-ColBERT-350M)                                                  |  Multilingual |    512     |     128 (per token)  | Late-interaction (multi-vector) retriever from Liquid AI: a `Linear(1024→128)` head emits one normalized vector per token. `forward` returns an `EmbeddingResult`; score query/document pairs with MaxSim (see below). Uses `[Q] `/`[D] ` role prompts.                                                                                                                                                                          |
 
 **`Max Tokens`** - The maximum number of tokens that can be processed by the model. If the input text exceeds this limit, it will be truncated.
 
@@ -120,3 +136,53 @@ function App() {
 :::note
 For the supported models, the returned embedding vector is normalized, meaning that its length is equal to 1. This allows for easier comparison of vectors using cosine similarity, just calculate the dot product of two vectors to get the cosine similarity score.
 :::
+
+## Late interaction (multi-vector models)
+
+Multi-vector models such as LFM2.5-ColBERT do not pool the sequence into a single vector. Instead, `forward` returns an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) holding one normalized vector per token. You score a query against a document with **MaxSim**: for every query-token vector, take its highest dot product against the document-token vectors, then sum those maxima.
+
+The library is a pure embedder — it gives you the per-token vectors and the model's punctuation `skipListIds`, but scoring is your concern (so it can run wherever you store the vectors). A reference `maxSim` implementation:
+
+```typescript
+import { models, useTextEmbeddings, EmbeddingResult } from 'react-native-executorch';
+
+const colbert = models.text_embedding.lfm2_5_colbert_350m();
+const skipListIds = colbert.skipListIds ?? [];
+
+const maxSim = (
+  query: EmbeddingResult,
+  doc: EmbeddingResult,
+  skip: number[] = []
+) => {
+  const dim = query.embeddingDim;
+  const skipped = new Set(skip);
+  let score = 0;
+  for (let qi = 0; qi < query.numTokens; qi++) {
+    const qOff = qi * dim;
+    let best = -Infinity;
+    for (let di = 0; di < doc.numTokens; di++) {
+      if (skipped.has(doc.tokenIds[di])) continue;
+      const dOff = di * dim;
+      let dot = 0;
+      for (let k = 0; k < dim; k++) {
+        dot += query.vectors[qOff + k] * doc.vectors[dOff + k];
+      }
+      if (dot > best) best = dot;
+    }
+    if (best !== -Infinity) score += best;
+  }
+  return score;
+};
+
+function App() {
+  const model = useTextEmbeddings({ model: colbert });
+
+  // ...
+
+  const query = await model.forward('What is the weather?', 'query');
+  const doc = await model.forward('It is sunny today.', 'document');
+  const score = maxSim(query, doc, skipListIds);
+}
+```
+
+The `skipListIds` shipped on the model config are the punctuation token ids excluded from scoring (derived from the model's training config). Per-token vectors are L2-normalized by the graph, so the dot product equals cosine similarity.
diff --git a/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index 3e23a88630..67bc356e70 100644
--- a/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -60,7 +60,21 @@ You need more details? Check the following resources:
 
 ## Running the model
 
-To run the model, you can use the [`forward`](../../06-api-reference/interfaces/TextEmbeddingsType.md#forward) method. It accepts one argument, which is a string representing the text you want to embed. The function returns a promise, which can resolve either to an error or an array of numbers representing the embedding.
+To run the model, you can use the [`forward`](../../06-api-reference/interfaces/TextEmbeddingsType.md#forward) method. It accepts the text to embed and, for models trained with asymmetric prompts, an optional `role`. The return type depends on the model:
+
+- **Pooled models** (the default, e.g. MiniLM, MPNet, LFM2.5-Embedding) resolve to a single `Float32Array` — one normalized vector for the whole input.
+- **Multi-vector models** (`multiVector: true`, e.g. LFM2.5-ColBERT) resolve to an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) with the per-token vectors (`vectors`, `numTokens`, `embeddingDim`, `tokenIds`).
+
+### Asymmetric prompts (`role`)
+
+Some retrieval models are trained to embed queries and documents with different prefixes (e.g. LFM2.5 uses `query: `/`document: `, ColBERT uses `[Q] `/`[D] `). For these models the model config carries the prompts and `forward` requires a `role`:
+
+```typescript
+const queryEmbedding = await model.forward('What is the weather?', 'query');
+const docEmbedding = await model.forward('It is sunny today.', 'document');
+```
+
+The matching prompt is prepended automatically; for models without prompts the `role` argument is absent.
 
 ## Example
 
@@ -112,6 +126,8 @@ function App() {
 | [distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)   | 50+ languages |    126     |         512          | Multilingual DistilBERT with a 768→512 projection head. Recommended when broader language coverage matters more than the exact English quality of MiniLM/MPNet.                                                                                                                                                                                                                                                                  |
 | [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) | 50+ languages |    126     |         384          | Multilingual MiniLM-L12 distilled from paraphrase-multilingual-mpnet-base-v2. Compact (≈118 M params) sentence encoder for cross-lingual semantic similarity and retrieval across 50+ languages.                                                                                                                                                                                                                                 |
 | [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                                           |    English    |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
+| [LFM2.5-Embedding-350M](https://huggingface.co/LiquidAI/LFM2.5-Embedding-350M)                                              |  Multilingual |    512     |         1024         | Dense bi-encoder from Liquid AI with CLS pooling. Trained with asymmetric `query: `/`document: ` prompts, so `forward` requires a `role`. On iOS it runs on the GPU via the MLX backend (physical device only); Android uses XNNPACK.                                                                                                                                                                                            |
+| [LFM2.5-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2.5-ColBERT-350M)                                                  |  Multilingual |    512     |     128 (per token)  | Late-interaction (multi-vector) retriever from Liquid AI: a `Linear(1024→128)` head emits one normalized vector per token. `forward` returns an `EmbeddingResult`; score query/document pairs with MaxSim (see below). Uses `[Q] `/`[D] ` role prompts.                                                                                                                                                                          |
 
 **`Max Tokens`** - The maximum number of tokens that can be processed by the model. If the input text exceeds this limit, it will be truncated.
 
@@ -120,3 +136,53 @@ function App() {
 :::note
 For the supported models, the returned embedding vector is normalized, meaning that its length is equal to 1. This allows for easier comparison of vectors using cosine similarity, just calculate the dot product of two vectors to get the cosine similarity score.
 :::
+
+## Late interaction (multi-vector models)
+
+Multi-vector models such as LFM2.5-ColBERT do not pool the sequence into a single vector. Instead, `forward` returns an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) holding one normalized vector per token. You score a query against a document with **MaxSim**: for every query-token vector, take its highest dot product against the document-token vectors, then sum those maxima.
+
+The library is a pure embedder — it gives you the per-token vectors and the model's punctuation `skipListIds`, but scoring is your concern (so it can run wherever you store the vectors). A reference `maxSim` implementation:
+
+```typescript
+import { models, useTextEmbeddings, EmbeddingResult } from 'react-native-executorch';
+
+const colbert = models.text_embedding.lfm2_5_colbert_350m();
+const skipListIds = colbert.skipListIds ?? [];
+
+const maxSim = (
+  query: EmbeddingResult,
+  doc: EmbeddingResult,
+  skip: number[] = []
+) => {
+  const dim = query.embeddingDim;
+  const skipped = new Set(skip);
+  let score = 0;
+  for (let qi = 0; qi < query.numTokens; qi++) {
+    const qOff = qi * dim;
+    let best = -Infinity;
+    for (let di = 0; di < doc.numTokens; di++) {
+      if (skipped.has(doc.tokenIds[di])) continue;
+      const dOff = di * dim;
+      let dot = 0;
+      for (let k = 0; k < dim; k++) {
+        dot += query.vectors[qOff + k] * doc.vectors[dOff + k];
+      }
+      if (dot > best) best = dot;
+    }
+    if (best !== -Infinity) score += best;
+  }
+  return score;
+};
+
+function App() {
+  const model = useTextEmbeddings({ model: colbert });
+
+  // ...
+
+  const query = await model.forward('What is the weather?', 'query');
+  const doc = await model.forward('It is sunny today.', 'document');
+  const score = maxSim(query, doc, skipListIds);
+}
+```
+
+The `skipListIds` shipped on the model config are the punctuation token ids excluded from scoring (derived from the model's training config). Per-token vectors are L2-normalized by the graph, so the dot product equals cosine similarity.
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/classes/TextEmbeddingsModule.md b/docs/versioned_docs/version-0.9.x/06-api-reference/classes/TextEmbeddingsModule.md
index 2c6141349e..9bce0bfdff 100644
--- a/docs/versioned_docs/version-0.9.x/06-api-reference/classes/TextEmbeddingsModule.md
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/classes/TextEmbeddingsModule.md
@@ -1,8 +1,8 @@
 # Class: TextEmbeddingsModule
 
-Defined in: [modules/natural_language_processing/TextEmbeddingsModule.ts:13](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts#L13)
+Defined in: [modules/natural\_language\_processing/TextEmbeddingsModule.ts:19](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts#L19)
 
-Module for generating text embeddings from input text.
+Module for managing a Text Embeddings model instance.
 
 ## Extends
 
@@ -23,14 +23,12 @@ making it worklet-compatible and safe to call from VisionCamera's
 frame processor thread.
 
 **Performance characteristics:**
-
 - **Zero-copy path**: When using `frame.getNativeBuffer()` from VisionCamera v5,
   frame data is accessed directly without copying (fastest, recommended).
 - **Copy path**: When using `frame.toArrayBuffer()`, pixel data is copied
   from native to JS, then accessed from native code (slower, fallback).
 
 **Usage with VisionCamera:**
-
 ```typescript
 const frameOutput = useFrameOutput({
   pixelFormat: 'rgb',
@@ -39,16 +37,12 @@ const frameOutput = useFrameOutput({
     // Zero-copy approach (recommended)
     const nativeBuffer = frame.getNativeBuffer();
     const result = model.generateFromFrame(
-      {
-        nativeBuffer: nativeBuffer.pointer,
-        width: frame.width,
-        height: frame.height,
-      },
+      { nativeBuffer: nativeBuffer.pointer, width: frame.width, height: frame.height },
       ...args
     );
     nativeBuffer.release();
     frame.dispose();
-  },
+  }
 });
 ```
 
@@ -80,7 +74,7 @@ Model-specific output (e.g., detections, classifications, embeddings)
 
 `BaseModule.generateFromFrame`
 
----
+***
 
 ### nativeModule
 
@@ -116,15 +110,16 @@ Always call this method when you're done with a model to prevent memory leaks.
 
 `BaseModule.delete`
 
----
+***
 
 ### forward()
 
-> **forward**(`input`): `Promise`\<`Float32Array`\<`ArrayBufferLike`\>\>
+> **forward**(`input`, `role?`): `Promise`\<`Float32Array`\<`ArrayBufferLike`\> \| [`EmbeddingResult`](../interfaces/EmbeddingResult.md)\>
 
-Defined in: [modules/natural_language_processing/TextEmbeddingsModule.ts:82](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts#L82)
+Defined in: [modules/natural\_language\_processing/TextEmbeddingsModule.ts:101](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts#L101)
 
-Executes the model's forward pass to generate an embedding for the provided text.
+Embed text into a pooled `Float32Array`, or a per-token `EmbeddingResult`
+for `multiVector` models.
 
 #### Parameters
 
@@ -132,15 +127,26 @@ Executes the model's forward pass to generate an embedding for the provided text
 
 `string`
 
-The text string to embed.
+The text to embed.
+
+##### role?
+
+[`EmbeddingRole`](../type-aliases/EmbeddingRole.md)
+
+Optional role ('query' | 'document') for models with
+  asymmetric prompts; prepends the model's prompt for that role.
 
 #### Returns
 
-`Promise`\<`Float32Array`\<`ArrayBufferLike`\>\>
+`Promise`\<`Float32Array`\<`ArrayBufferLike`\> \| [`EmbeddingResult`](../interfaces/EmbeddingResult.md)\>
 
-A Promise resolving to a `Float32Array` containing the embedding vector.
+A `Float32Array` for pooled models, an `EmbeddingResult` otherwise.
 
----
+#### Throws
+
+If the model is not loaded.
+
+***
 
 ### forwardET()
 
@@ -171,7 +177,7 @@ Array of output tensors.
 
 `BaseModule.forwardET`
 
----
+***
 
 ### getInputShape()
 
@@ -205,16 +211,18 @@ The input shape as an array of numbers.
 
 `BaseModule.getInputShape`
 
----
+***
 
 ### fromCustomModel()
 
 > `static` **fromCustomModel**(`modelSource`, `tokenizerSource`, `onDownloadProgress?`): `Promise`\<`TextEmbeddingsModule`\>
 
-Defined in: [modules/natural_language_processing/TextEmbeddingsModule.ts:62](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts#L62)
+Defined in: [modules/natural\_language\_processing/TextEmbeddingsModule.ts:77](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts#L77)
 
-Creates a text embeddings instance with a user-provided model binary and tokenizer.
-Use this when working with a custom-exported model that is not one of the built-in presets.
+Creates a text embeddings instance with a user-provided model binary.
+Use this when working with a custom-exported embeddings model. Internally
+uses `'custom'` as the model name. Note that prompts, multi-vector output,
+and skipLists are model-config features and are not configured here.
 
 #### Parameters
 
@@ -242,18 +250,13 @@ Optional callback to monitor download progress, receiving a value between 0 and
 
 A Promise resolving to a `TextEmbeddingsModule` instance.
 
-#### Remarks
-
-The native model contract for this method is not formally defined and may change
-between releases. Refer to the native source code for the current expected tensor interface.
-
----
+***
 
 ### fromModelName()
 
 > `static` **fromModelName**(`namedSources`, `onDownloadProgress?`): `Promise`\<`TextEmbeddingsModule`\>
 
-Defined in: [modules/natural_language_processing/TextEmbeddingsModule.ts:25](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts#L25)
+Defined in: [modules/natural\_language\_processing/TextEmbeddingsModule.ts:42](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts#L42)
 
 Creates a text embeddings instance for a built-in model.
 
@@ -261,25 +264,17 @@ Creates a text embeddings instance for a built-in model.
 
 ##### namedSources
 
-An object specifying which built-in model to load and where to fetch it from.
+[`TextEmbeddingsModel`](../interfaces/TextEmbeddingsModel.md)
 
-###### modelName
-
-[`TextEmbeddingsModelName`](../type-aliases/TextEmbeddingsModelName.md)
-
-###### modelSource
-
-[`ResourceSource`](../type-aliases/ResourceSource.md)
-
-###### tokenizerSource
-
-[`ResourceSource`](../type-aliases/ResourceSource.md)
+An object specifying the model name, model source,
+  tokenizer source, and optional `prompts` / `multiVector` / `skipListIds`.
 
 ##### onDownloadProgress?
 
 (`progress`) => `void`
 
-Optional callback to monitor download progress, receiving a value between 0 and 1.
+Optional callback to monitor download progress,
+  receiving a value between 0 and 1.
 
 #### Returns
 
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/functions/useTextEmbeddings.md b/docs/versioned_docs/version-0.9.x/06-api-reference/functions/useTextEmbeddings.md
index 6bc23e5219..b5de9d57b1 100644
--- a/docs/versioned_docs/version-0.9.x/06-api-reference/functions/useTextEmbeddings.md
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/functions/useTextEmbeddings.md
@@ -1,21 +1,30 @@
 # Function: useTextEmbeddings()
 
-> **useTextEmbeddings**(`TextEmbeddingsProps`): [`TextEmbeddingsType`](../interfaces/TextEmbeddingsType.md)
+> **useTextEmbeddings**\<`M`\>(`TextEmbeddingsProps`): [`TextEmbeddingsType`](../interfaces/TextEmbeddingsType.md)\<`M`\>
 
-Defined in: [hooks/natural_language_processing/useTextEmbeddings.ts:14](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts#L14)
+Defined in: [hooks/natural\_language\_processing/useTextEmbeddings.ts:20](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/hooks/natural_language_processing/useTextEmbeddings.ts#L20)
 
 React hook for managing a Text Embeddings model instance.
 
+## Type Parameters
+
+### M
+
+`M` *extends* [`TextEmbeddingsModel`](../interfaces/TextEmbeddingsModel.md)
+
 ## Parameters
 
 ### TextEmbeddingsProps
 
-[`TextEmbeddingsProps`](../interfaces/TextEmbeddingsProps.md)
+[`TextEmbeddingsProps`](../interfaces/TextEmbeddingsProps.md)\<`M`\>
 
 Configuration object containing `model` source and optional `preventLoad` flag.
 
 ## Returns
 
-[`TextEmbeddingsType`](../interfaces/TextEmbeddingsType.md)
+[`TextEmbeddingsType`](../interfaces/TextEmbeddingsType.md)\<`M`\>
 
-Ready to use Text Embeddings model.
+Ready to use Text Embeddings model. `forward` returns a
+  `Float32Array` for pooled models and an `EmbeddingResult` (per-token
+  vectors) for multi-vector models. Models with prompts require a `role`
+  ('query' | 'document') on `forward`.
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/EmbeddingPrompts.md b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/EmbeddingPrompts.md
new file mode 100644
index 0000000000..0244afadd1
--- /dev/null
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/EmbeddingPrompts.md
@@ -0,0 +1,22 @@
+# Interface: EmbeddingPrompts
+
+Defined in: [types/textEmbeddings.ts:49](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L49)
+
+Asymmetric prompts a model is trained with. When a model config carries
+these, `forward` requires a `role` so the matching prompt is always applied.
+
+## Properties
+
+### document
+
+> **document**: `string`
+
+Defined in: [types/textEmbeddings.ts:51](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L51)
+
+***
+
+### query
+
+> **query**: `string`
+
+Defined in: [types/textEmbeddings.ts:50](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L50)
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/EmbeddingResult.md b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/EmbeddingResult.md
new file mode 100644
index 0000000000..e02bd77aa4
--- /dev/null
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/EmbeddingResult.md
@@ -0,0 +1,47 @@
+# Interface: EmbeddingResult
+
+Defined in: [types/textEmbeddings.ts:25](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L25)
+
+Per-token (multi-vector) embedding output for late-interaction models (e.g.
+ColBERT). Only `multiVector` models yield this; standard models return a
+pooled `Float32Array` from `forward` instead.
+
+## Properties
+
+### embeddingDim
+
+> **embeddingDim**: `number`
+
+Defined in: [types/textEmbeddings.ts:31](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L31)
+
+Per-token vector dimension.
+
+***
+
+### numTokens
+
+> **numTokens**: `number`
+
+Defined in: [types/textEmbeddings.ts:29](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L29)
+
+Number of token rows.
+
+***
+
+### tokenIds
+
+> **tokenIds**: `number`[]
+
+Defined in: [types/textEmbeddings.ts:33](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L33)
+
+Input token ids per row.
+
+***
+
+### vectors
+
+> **vectors**: `Float32Array`
+
+Defined in: [types/textEmbeddings.ts:27](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L27)
+
+Flat [numTokens * embeddingDim] fp32 vectors (row-major).
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsModel.md b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsModel.md
new file mode 100644
index 0000000000..6bd254a93a
--- /dev/null
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsModel.md
@@ -0,0 +1,60 @@
+# Interface: TextEmbeddingsModel
+
+Defined in: [types/textEmbeddings.ts:60](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L60)
+
+A text embeddings model config. Two optional flags drive `forward`:
+`prompts` makes a `role` argument required, and `multiVector` makes it return
+a per-token `EmbeddingResult` instead of a pooled `Float32Array`.
+
+## Properties
+
+### modelName
+
+> **modelName**: [`TextEmbeddingsModelName`](../type-aliases/TextEmbeddingsModelName.md)
+
+Defined in: [types/textEmbeddings.ts:61](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L61)
+
+***
+
+### modelSource
+
+> **modelSource**: [`ResourceSource`](../type-aliases/ResourceSource.md)
+
+Defined in: [types/textEmbeddings.ts:62](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L62)
+
+***
+
+### multiVector?
+
+> `optional` **multiVector**: `boolean`
+
+Defined in: [types/textEmbeddings.ts:65](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L65)
+
+***
+
+### prompts?
+
+> `optional` **prompts**: [`EmbeddingPrompts`](EmbeddingPrompts.md)
+
+Defined in: [types/textEmbeddings.ts:64](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L64)
+
+***
+
+### skipListIds?
+
+> `optional` **skipListIds**: `number`[]
+
+Defined in: [types/textEmbeddings.ts:72](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L72)
+
+Document token ids to exclude from late-interaction scoring (e.g. ColBERT's
+punctuation skipList). Derived from the model's training config, so it's
+shipped here rather than reconstructed by the consumer, who passes it to
+their own MaxSim scoring.
+
+***
+
+### tokenizerSource
+
+> **tokenizerSource**: [`ResourceSource`](../type-aliases/ResourceSource.md)
+
+Defined in: [types/textEmbeddings.ts:63](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L63)
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsProps.md b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsProps.md
index 1581b79edb..4556bd9dbd 100644
--- a/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsProps.md
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsProps.md
@@ -1,43 +1,31 @@
-# Interface: TextEmbeddingsProps
+# Interface: TextEmbeddingsProps\<M\>
 
-Defined in: [types/textEmbeddings.ts:26](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L26)
+Defined in: [types/textEmbeddings.ts:112](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L112)
 
 Props for the useTextEmbeddings hook.
 
-## Properties
-
-### model
-
-> **model**: `object`
-
-Defined in: [types/textEmbeddings.ts:27](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L27)
-
-An object containing the model configuration.
-
-#### modelName
-
-> **modelName**: [`TextEmbeddingsModelName`](../type-aliases/TextEmbeddingsModelName.md)
+## Type Parameters
 
-The unique name of the text embeddings model.
+### M
 
-#### modelSource
+`M` *extends* [`TextEmbeddingsModel`](TextEmbeddingsModel.md) = [`TextEmbeddingsModel`](TextEmbeddingsModel.md)
 
-> **modelSource**: [`ResourceSource`](../type-aliases/ResourceSource.md)
+## Properties
 
-The source of the text embeddings model binary.
+### model
 
-#### tokenizerSource
+> **model**: `M`
 
-> **tokenizerSource**: [`ResourceSource`](../type-aliases/ResourceSource.md)
+Defined in: [types/textEmbeddings.ts:115](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L115)
 
-The source of the tokenizer JSON file.
+An object containing the model configuration.
 
----
+***
 
 ### preventLoad?
 
 > `optional` **preventLoad**: `boolean`
 
-Defined in: [types/textEmbeddings.ts:41](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L41)
+Defined in: [types/textEmbeddings.ts:116](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L116)
 
 Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsType.md b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsType.md
index 78c267daf8..5f4b9c90dc 100644
--- a/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsType.md
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/interfaces/TextEmbeddingsType.md
@@ -1,8 +1,14 @@
-# Interface: TextEmbeddingsType
+# Interface: TextEmbeddingsType\<M\>
 
-Defined in: [types/textEmbeddings.ts:48](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L48)
+Defined in: [types/textEmbeddings.ts:123](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L123)
 
-React hook state and methods for managing a Text Embeddings model instance.
+React hook state and methods for a Text Embeddings model instance.
+
+## Type Parameters
+
+### M
+
+`M` *extends* [`TextEmbeddingsModel`](TextEmbeddingsModel.md) = [`TextEmbeddingsModel`](TextEmbeddingsModel.md)
 
 ## Properties
 
@@ -10,64 +16,62 @@ React hook state and methods for managing a Text Embeddings model instance.
 
 > **downloadProgress**: `number`
 
-Defined in: [types/textEmbeddings.ts:67](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L67)
+Defined in: [types/textEmbeddings.ts:141](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L141)
 
 Tracks the progress of the model download process (value between 0 and 1).
 
----
+***
 
 ### error
 
 > **error**: [`RnExecutorchError`](../classes/RnExecutorchError.md) \| `null`
 
-Defined in: [types/textEmbeddings.ts:52](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L52)
+Defined in: [types/textEmbeddings.ts:129](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L129)
 
 Contains the error message if the model failed to load or during inference.
 
----
+***
 
-### isGenerating
+### forward
 
-> **isGenerating**: `boolean`
-
-Defined in: [types/textEmbeddings.ts:62](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L62)
+> **forward**: [`ForwardFn`](../type-aliases/ForwardFn.md)\<`M`\>
 
-Indicates whether the model is currently generating embeddings.
+Defined in: [types/textEmbeddings.ts:149](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L149)
 
----
+Runs the text embeddings model on the provided input string.
 
-### isReady
+#### Param
 
-> **isReady**: `boolean`
+The text string to embed.
 
-Defined in: [types/textEmbeddings.ts:57](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L57)
+#### Param
 
-Indicates whether the embeddings model has successfully loaded and is ready for inference.
+Optional role for models with asymmetric prompts. Required if the model has `prompts`.
 
-## Methods
+#### Returns
 
-### forward()
+A promise resolving to a Float32Array or EmbeddingResult containing the vector embeddings.
 
-> **forward**(`input`): `Promise`\<`Float32Array`\<`ArrayBufferLike`\>\>
+#### Throws
 
-Defined in: [types/textEmbeddings.ts:75](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L75)
+If the model is not loaded or is currently processing another request.
 
-Runs the text embeddings model on the provided input string.
+***
 
-#### Parameters
+### isGenerating
 
-##### input
+> **isGenerating**: `boolean`
 
-`string`
+Defined in: [types/textEmbeddings.ts:137](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L137)
 
-The text string to embed.
+Indicates whether the model is currently generating embeddings.
 
-#### Returns
+***
 
-`Promise`\<`Float32Array`\<`ArrayBufferLike`\>\>
+### isReady
 
-A promise resolving to a Float32Array containing the vector embeddings.
+> **isReady**: `boolean`
 
-#### Throws
+Defined in: [types/textEmbeddings.ts:133](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L133)
 
-If the model is not loaded or is currently processing another request.
+Indicates whether the embeddings model has successfully loaded and is ready for inference.
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/EmbeddingRole.md b/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/EmbeddingRole.md
new file mode 100644
index 0000000000..16d869dd78
--- /dev/null
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/EmbeddingRole.md
@@ -0,0 +1,9 @@
+# Type Alias: EmbeddingRole
+
+> **EmbeddingRole** = `"query"` \| `"document"`
+
+Defined in: [types/textEmbeddings.ts:42](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L42)
+
+Role for `forward`. Some models are trained with asymmetric query/document
+prompts (e.g. LFM2.5 uses `query: `/`document: `, ColBERT uses `[Q] `/`[D] `).
+Passing a role auto-prepends the model's configured prompt for that role.
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/ForwardFn.md b/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/ForwardFn.md
new file mode 100644
index 0000000000..13e311b31e
--- /dev/null
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/ForwardFn.md
@@ -0,0 +1,15 @@
+# Type Alias: ForwardFn\<M\>
+
+> **ForwardFn**\<`M`\> = `M` *extends* `object` ? (`input`, `role`) => `Promise`\<[`ForwardReturn`](ForwardReturn.md)\<`M`\>\> : `undefined` *extends* `M`\[`"prompts"`\] ? `M`\[`"prompts"`\] *extends* `undefined` ? (`input`) => `Promise`\<[`ForwardReturn`](ForwardReturn.md)\<`M`\>\> : (`input`, `role?`) => `Promise`\<[`ForwardReturn`](ForwardReturn.md)\<`M`\>\> : (`input`) => `Promise`\<[`ForwardReturn`](ForwardReturn.md)\<`M`\>\>
+
+Defined in: [types/textEmbeddings.ts:90](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L90)
+
+`forward`'s signature, computed from the model config: `role` is required
+when the model has `prompts`, omitted when it has none, and optional when
+unknown (e.g. a heterogeneous model list).
+
+## Type Parameters
+
+### M
+
+`M` *extends* [`TextEmbeddingsModel`](../interfaces/TextEmbeddingsModel.md)
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/ForwardReturn.md b/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/ForwardReturn.md
new file mode 100644
index 0000000000..8ee72147b8
--- /dev/null
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/ForwardReturn.md
@@ -0,0 +1,14 @@
+# Type Alias: ForwardReturn\<M\>
+
+> **ForwardReturn**\<`M`\> = `M` *extends* `object` ? [`EmbeddingResult`](../interfaces/EmbeddingResult.md) : `Float32Array`
+
+Defined in: [types/textEmbeddings.ts:79](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L79)
+
+`forward`'s return type: `EmbeddingResult` for `multiVector` models,
+`Float32Array` otherwise.
+
+## Type Parameters
+
+### M
+
+`M` *extends* [`TextEmbeddingsModel`](../interfaces/TextEmbeddingsModel.md)
diff --git a/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/TextEmbeddingsModelName.md b/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/TextEmbeddingsModelName.md
index 4d419240ce..54abdf4901 100644
--- a/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/TextEmbeddingsModelName.md
+++ b/docs/versioned_docs/version-0.9.x/06-api-reference/type-aliases/TextEmbeddingsModelName.md
@@ -1,6 +1,6 @@
 # Type Alias: TextEmbeddingsModelName
 
-> **TextEmbeddingsModelName** = `"all-minilm-l6-v2"` \| `"all-mpnet-base-v2"` \| `"multi-qa-minilm-l6-cos-v1"` \| `"multi-qa-mpnet-base-dot-v1"` \| `"distiluse-base-multilingual-cased-v2-8da4w"` \| `"paraphrase-multilingual-minilm-l12-v2-quantized"` \| `"clip-vit-base-patch32-text"`
+> **TextEmbeddingsModelName** = `"all-minilm-l6-v2"` \| `"all-mpnet-base-v2"` \| `"multi-qa-minilm-l6-cos-v1"` \| `"multi-qa-mpnet-base-dot-v1"` \| `"distiluse-base-multilingual-cased-v2-8da4w"` \| `"paraphrase-multilingual-minilm-l12-v2-quantized"` \| `"clip-vit-base-patch32-text"` \| `"lfm2-5-embedding-350m"` \| `"lfm2-5-colbert-350m"`
 
 Defined in: [types/textEmbeddings.ts:8](https://github.com/software-mansion/react-native-executorch/blob/0e95b8934cc7318c1b30a8e534444a8b50d25230/packages/react-native-executorch/src/types/textEmbeddings.ts#L8)
 

From a593082b5f8999d35e08f4e83a21ebe3cd536fee Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 23 Jun 2026 10:36:00 +0200
Subject: [PATCH 10/14] docs: align TextEmbeddingsModule JSDoc with LLMModule
 convention

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../TextEmbeddingsModule.ts                   | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
index e192de0664..d3b8f890b9 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
@@ -33,8 +33,11 @@ export class TextEmbeddingsModule extends BaseModule {
 
   /**
    * Creates a text embeddings instance for a built-in model.
-   * @param namedSources - The model config (+ optional prompts / multiVector).
-   * @param onDownloadProgress - Optional download progress callback (0..1).
+   * @param namedSources - An object specifying the model name, model source,
+   *   tokenizer source, and optional `prompts` / `multiVector` / `skipListIds`.
+   * @param onDownloadProgress - Optional callback to monitor download progress,
+   *   receiving a value between 0 and 1.
+   * @returns A Promise resolving to a `TextEmbeddingsModule` instance.
    */
   static async fromModelName(
     namedSources: TextEmbeddingsModel,
@@ -62,9 +65,14 @@ export class TextEmbeddingsModule extends BaseModule {
   }
 
   /**
-   * Creates a text embeddings instance from a custom model binary + tokenizer.
-   * @remarks The native tensor contract is not formally guaranteed across
-   * releases.
+   * Creates a text embeddings instance with a user-provided model binary.
+   * Use this when working with a custom-exported embeddings model. Internally
+   * uses `'custom'` as the model name. Note that prompts, multi-vector output,
+   * and skipLists are model-config features and are not configured here.
+   * @param modelSource - A fetchable resource pointing to the model binary.
+   * @param tokenizerSource - A fetchable resource pointing to the tokenizer file.
+   * @param onDownloadProgress - Optional callback to monitor download progress, receiving a value between 0 and 1.
+   * @returns A Promise resolving to a `TextEmbeddingsModule` instance.
    */
   static fromCustomModel(
     modelSource: ResourceSource,
@@ -85,9 +93,10 @@ export class TextEmbeddingsModule extends BaseModule {
    * Embed text into a pooled `Float32Array`, or a per-token `EmbeddingResult`
    * for `multiVector` models.
    * @param input - The text to embed.
-   * @param role - 'query' | 'document'; prepends the model's prompt for that
-   *   role when configured.
+   * @param role - Optional role ('query' | 'document') for models with
+   *   asymmetric prompts; prepends the model's prompt for that role.
    * @returns A `Float32Array` for pooled models, an `EmbeddingResult` otherwise.
+   * @throws {RnExecutorchError} If the model is not loaded.
    */
   async forward(
     input: string,

From f470d20db9b55395a6368a0c2347aeed8632351b Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 23 Jun 2026 10:42:50 +0200
Subject: [PATCH 11/14] test: assert EmbeddingResult metadata + tokenIds;
 clarify role JSDoc

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../tests/integration/TextEmbeddingsTest.cpp  | 33 +++++++++++++++++++
 .../TextEmbeddingsModule.ts                   |  6 ++--
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp b/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp
index 0e0cc846b5..cf7d6c4804 100644
--- a/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/tests/integration/TextEmbeddingsTest.cpp
@@ -142,6 +142,39 @@ TEST(TextEmbeddingsGenerateTests, SimilarTextProducesSimilarEmbeddings) {
   EXPECT_GT(dotProduct, 0.5f);
 }
 
+TEST(TextEmbeddingsGenerateTests, PooledResultMetadataIsConsistent) {
+  TextEmbeddings model(kValidTextEmbeddingsModelPath,
+                       kValidTextEmbeddingsTokenizerPath, nullptr);
+  auto result = model.generate("A pooled embedding has a single row.");
+
+  EXPECT_EQ(result.numTokens, 1);
+  EXPECT_EQ(result.embeddingDim,
+            static_cast<int32_t>(kMiniLmEmbeddingDimensions));
+  EXPECT_EQ(result.dataPtr->size(),
+            static_cast<size_t>(result.numTokens) * result.embeddingDim *
+                sizeof(float));
+}
+
+TEST(TextEmbeddingsGenerateTests, TokenIdsIncludeSpecialTokens) {
+  TextEmbeddings model(kValidTextEmbeddingsModelPath,
+                       kValidTextEmbeddingsTokenizerPath, nullptr);
+  auto result = model.generate("Hello");
+
+  // The tokenizer post_processor wraps the input as [CLS] ... [SEP], so even a
+  // single word yields more than one token id.
+  EXPECT_GT(result.tokenIds.size(), 1u);
+}
+
+TEST(TextEmbeddingsGenerateTests, TokenIdsGrowWithInputLength) {
+  TextEmbeddings model(kValidTextEmbeddingsModelPath,
+                       kValidTextEmbeddingsTokenizerPath, nullptr);
+  auto shortResult = model.generate("Hi");
+  auto longResult =
+      model.generate("This sentence is considerably longer than the other.");
+
+  EXPECT_GT(longResult.tokenIds.size(), shortResult.tokenIds.size());
+}
+
 TEST(TextEmbeddingsInheritedTests, GetInputShapeWorks) {
   TextEmbeddings model(kValidTextEmbeddingsModelPath,
                        kValidTextEmbeddingsTokenizerPath, nullptr);
diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
index d3b8f890b9..b9e2e866d1 100644
--- a/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
+++ b/packages/react-native-executorch/src/modules/natural_language_processing/TextEmbeddingsModule.ts
@@ -93,8 +93,10 @@ export class TextEmbeddingsModule extends BaseModule {
    * Embed text into a pooled `Float32Array`, or a per-token `EmbeddingResult`
    * for `multiVector` models.
    * @param input - The text to embed.
-   * @param role - Optional role ('query' | 'document') for models with
-   *   asymmetric prompts; prepends the model's prompt for that role.
+   * @param role - Role ('query' | 'document') for models with asymmetric
+   *   prompts; the matching prompt is prepended. The `useTextEmbeddings` types
+   *   require it for prompted models and omit it for the rest; at the module
+   *   level it is optional and a no-op when the model has no prompts.
    * @returns A `Float32Array` for pooled models, an `EmbeddingResult` otherwise.
    * @throws {RnExecutorchError} If the model is not loaded.
    */

From 93ee6984ac438710430b43a229fd245fabe68231 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 23 Jun 2026 10:54:30 +0200
Subject: [PATCH 12/14] fix: remove ===

---
 .../react-native-executorch/src/constants/modelRegistry.ts     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts
index 472524877c..3e502ad9cf 100644
--- a/packages/react-native-executorch/src/constants/modelRegistry.ts
+++ b/packages/react-native-executorch/src/constants/modelRegistry.ts
@@ -313,7 +313,8 @@ const LFM2_5_COLBERT_350M_VARIANTS = {
       modelSource: M.LFM2_5_COLBERT_350M_XNNPACK_MODEL,
     },
   },
-=======
+};
+
 const LFM2_5_350M_VARIANTS = {
   mlx: { base: { ...M.LFM2_5_350M, modelSource: M.LFM2_5_350M_MLX_MODEL } },
   xnnpack: { base: M.LFM2_5_350M, quant: M.LFM2_5_350M_QUANTIZED },

From 34e4c09d17f9f9636a4c44d2b8ec8b48f16397e1 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Tue, 23 Jun 2026 11:09:24 +0200
Subject: [PATCH 13/14] docs: list new model-config fields + correct forward
 return type

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../useTextEmbeddings.md                          |  8 +++++++-
 .../TextEmbeddingsModule.md                       | 15 +++++++++++----
 .../useTextEmbeddings.md                          |  8 +++++++-
 .../TextEmbeddingsModule.md                       | 15 +++++++++++----
 4 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index fc0faff7e0..464e64620e 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -45,7 +45,13 @@ try {
 
 `useTextEmbeddings` takes [`TextEmbeddingsProps`](../../06-api-reference/interfaces/TextEmbeddingsProps.md) that consists of:
 
-- `model` of type `object` containing the [model source](../../06-api-reference/interfaces/TextEmbeddingsProps.md#modelsource) and [tokenizer source](../../06-api-reference/interfaces/TextEmbeddingsProps.md#tokenizersource).
+- `model` of type `object` ([`TextEmbeddingsModel`](../../06-api-reference/interfaces/TextEmbeddingsModel.md)) containing:
+  - `modelName` - Unique name identifying the model.
+  - `modelSource` - Location of the used model.
+  - `tokenizerSource` - Location of the used tokenizer.
+  - `prompts` _(optional)_ - Asymmetric `query`/`document` prompts the model is trained with. When present, `forward` requires a `role` and prepends the matching prompt.
+  - `multiVector` _(optional)_ - When `true`, `forward` returns the per-token [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) instead of a single pooled `Float32Array`.
+  - `skipListIds` _(optional)_ - Token ids to exclude from late-interaction (MaxSim) scoring.
 - An optional flag [`preventLoad`](../../06-api-reference/interfaces/TextEmbeddingsProps.md#preventload) which prevents auto-loading of the model.
 
 You need more details? Check the following resources:
diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md
index aa563c213d..b4cd478b0e 100644
--- a/docs/docs/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md
+++ b/docs/docs/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md
@@ -30,13 +30,20 @@ All methods of `TextEmbeddingsModule` are explained in details here: [`TextEmbed
 
 Use the static [`fromModelName`](../../06-api-reference/classes/TextEmbeddingsModule.md#frommodelname) factory method. It accepts a model config object (e.g. `ALL_MINILM_L6_V2`) containing:
 
-- [`modelSource`](../../06-api-reference/classes/TextEmbeddingsModule.md#modelsource) - Location of the used model.
-- [`tokenizerSource`](../../06-api-reference/classes/TextEmbeddingsModule.md#tokenizersource) - Location of the used tokenizer.
+- `modelName` - Unique name identifying the model.
+- `modelSource` - Location of the used model.
+- `tokenizerSource` - Location of the used tokenizer.
+- `prompts` _(optional)_ - Asymmetric `query`/`document` prompts the model is trained with. When present, `forward` requires a `role` and prepends the matching prompt.
+- `multiVector` _(optional)_ - When `true`, `forward` returns the per-token `EmbeddingResult` instead of a single pooled `Float32Array`.
+- `skipListIds` _(optional)_ - Token ids to exclude from late-interaction (MaxSim) scoring.
 
-And an optional `onDownloadProgress` callback. It returns a promise resolving to a `TextEmbeddingsModule` instance.
+And an optional `onDownloadProgress` callback (receiving a value between 0 and 1). It returns a promise resolving to a `TextEmbeddingsModule` instance.
 
 For more information on loading resources, take a look at [loading models](../../01-fundamentals/02-loading-models.md) page.
 
 ## Running the model
 
-To run the model, you can use the [`forward`](../../06-api-reference/classes/TextEmbeddingsModule.md#forward) method. It accepts one argument, which is the text you want to embed. The method returns a promise, which can resolve either to an error or an array of numbers representing the embedding.
+To run the model, use the [`forward`](../../06-api-reference/classes/TextEmbeddingsModule.md#forward) method. It accepts the text to embed and, for models with asymmetric prompts, an optional `role` (`'query' | 'document'`). The method returns a promise resolving to:
+
+- a `Float32Array` — a single pooled vector — for standard models, or
+- an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) with the per-token vectors for `multiVector` models.
diff --git a/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index 67bc356e70..c235a89ab4 100644
--- a/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -45,7 +45,13 @@ try {
 
 `useTextEmbeddings` takes [`TextEmbeddingsProps`](../../06-api-reference/interfaces/TextEmbeddingsProps.md) that consists of:
 
-- `model` of type `object` containing the [model source](../../06-api-reference/interfaces/TextEmbeddingsProps.md#modelsource) and [tokenizer source](../../06-api-reference/interfaces/TextEmbeddingsProps.md#tokenizersource).
+- `model` of type `object` ([`TextEmbeddingsModel`](../../06-api-reference/interfaces/TextEmbeddingsModel.md)) containing:
+  - `modelName` - Unique name identifying the model.
+  - `modelSource` - Location of the used model.
+  - `tokenizerSource` - Location of the used tokenizer.
+  - `prompts` _(optional)_ - Asymmetric `query`/`document` prompts the model is trained with. When present, `forward` requires a `role` and prepends the matching prompt.
+  - `multiVector` _(optional)_ - When `true`, `forward` returns the per-token [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) instead of a single pooled `Float32Array`.
+  - `skipListIds` _(optional)_ - Token ids to exclude from late-interaction (MaxSim) scoring.
 - An optional flag [`preventLoad`](../../06-api-reference/interfaces/TextEmbeddingsProps.md#preventload) which prevents auto-loading of the model.
 
 You need more details? Check the following resources:
diff --git a/docs/versioned_docs/version-0.9.x/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md b/docs/versioned_docs/version-0.9.x/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md
index aa563c213d..b4cd478b0e 100644
--- a/docs/versioned_docs/version-0.9.x/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md
+++ b/docs/versioned_docs/version-0.9.x/04-typescript-api/01-natural-language-processing/TextEmbeddingsModule.md
@@ -30,13 +30,20 @@ All methods of `TextEmbeddingsModule` are explained in details here: [`TextEmbed
 
 Use the static [`fromModelName`](../../06-api-reference/classes/TextEmbeddingsModule.md#frommodelname) factory method. It accepts a model config object (e.g. `ALL_MINILM_L6_V2`) containing:
 
-- [`modelSource`](../../06-api-reference/classes/TextEmbeddingsModule.md#modelsource) - Location of the used model.
-- [`tokenizerSource`](../../06-api-reference/classes/TextEmbeddingsModule.md#tokenizersource) - Location of the used tokenizer.
+- `modelName` - Unique name identifying the model.
+- `modelSource` - Location of the used model.
+- `tokenizerSource` - Location of the used tokenizer.
+- `prompts` _(optional)_ - Asymmetric `query`/`document` prompts the model is trained with. When present, `forward` requires a `role` and prepends the matching prompt.
+- `multiVector` _(optional)_ - When `true`, `forward` returns the per-token `EmbeddingResult` instead of a single pooled `Float32Array`.
+- `skipListIds` _(optional)_ - Token ids to exclude from late-interaction (MaxSim) scoring.
 
-And an optional `onDownloadProgress` callback. It returns a promise resolving to a `TextEmbeddingsModule` instance.
+And an optional `onDownloadProgress` callback (receiving a value between 0 and 1). It returns a promise resolving to a `TextEmbeddingsModule` instance.
 
 For more information on loading resources, take a look at [loading models](../../01-fundamentals/02-loading-models.md) page.
 
 ## Running the model
 
-To run the model, you can use the [`forward`](../../06-api-reference/classes/TextEmbeddingsModule.md#forward) method. It accepts one argument, which is the text you want to embed. The method returns a promise, which can resolve either to an error or an array of numbers representing the embedding.
+To run the model, use the [`forward`](../../06-api-reference/classes/TextEmbeddingsModule.md#forward) method. It accepts the text to embed and, for models with asymmetric prompts, an optional `role` (`'query' | 'document'`). The method returns a promise resolving to:
+
+- a `Float32Array` — a single pooled vector — for standard models, or
+- an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) with the per-token vectors for `multiVector` models.

From e3ab741c43bbf06d589d27ce0f97e7703a685f19 Mon Sep 17 00:00:00 2001
From: Norbert Klockiewicz <Nklockiewicz12@gmail.com>
Date: Wed, 24 Jun 2026 10:52:35 +0200
Subject: [PATCH 14/14] address review: expose maxSim/dotProduct helpers, move
 colbert constants

Promote maxSim and dotProduct from the demo app into the library as
exported text-embedding helpers; move LFM ColBERT prompts/skiplist into
constants/textEmbeddings/colbert.ts; collapse the demo's LFM2.5 entries
to platform-optimal accessors; doc examples now import the helpers; link
the Liquid retrievers blog and add nums to the cspell wordlist.

Pre-commit skipped: types hook fails on a pre-existing, unrelated error
in expo-resource-fetcher (missing RnExecutorchErrorCode, not in this diff).
---
 .cspell-wordlist.txt                          |  1 +
 .../app/clip-embeddings/index.tsx             |  2 +-
 .../app/text-embeddings/index.tsx             | 11 ++---
 .../useTextEmbeddings.md                      | 43 ++++---------------
 .../useTextEmbeddings.md                      | 43 ++++---------------
 .../src/constants/modelRegistry.ts            | 12 ++----
 .../src/constants/textEmbeddings/colbert.ts   |  7 +++
 packages/react-native-executorch/src/index.ts |  1 +
 .../src/utils/textEmbeddings.ts               | 10 ++---
 9 files changed, 40 insertions(+), 90 deletions(-)
 create mode 100644 packages/react-native-executorch/src/constants/textEmbeddings/colbert.ts
 rename apps/text-embeddings/utils/math.ts => packages/react-native-executorch/src/utils/textEmbeddings.ts (82%)

diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt
index 3837e96955..8d801d3850 100644
--- a/.cspell-wordlist.txt
+++ b/.cspell-wordlist.txt
@@ -215,3 +215,4 @@ MATEUSZ
 BLAZEFACE
 Blazeface
 blazeface
+nums
diff --git a/apps/text-embeddings/app/clip-embeddings/index.tsx b/apps/text-embeddings/app/clip-embeddings/index.tsx
index 02a8a9c656..affe3c2955 100644
--- a/apps/text-embeddings/app/clip-embeddings/index.tsx
+++ b/apps/text-embeddings/app/clip-embeddings/index.tsx
@@ -17,6 +17,7 @@ import {
   useTextEmbeddings,
   useImageEmbeddings,
   ImageEmbeddingsProps,
+  dotProduct,
 } from 'react-native-executorch';
 
 type ImageEmbeddingModel = ImageEmbeddingsProps['model'];
@@ -35,7 +36,6 @@ const IMAGE_MODELS: { label: string; value: ImageEmbeddingModel }[] = [
 ];
 import { launchImageLibrary } from 'react-native-image-picker';
 import { useIsFocused } from 'expo-router';
-import { dotProduct } from '../../utils/math';
 import { ModelPicker } from '../../components/ModelPicker';
 
 const DEFAULT_LABELS = [
diff --git a/apps/text-embeddings/app/text-embeddings/index.tsx b/apps/text-embeddings/app/text-embeddings/index.tsx
index 2c62a22922..fb4711e837 100644
--- a/apps/text-embeddings/app/text-embeddings/index.tsx
+++ b/apps/text-embeddings/app/text-embeddings/index.tsx
@@ -16,9 +16,10 @@ import {
   useTextEmbeddings,
   TextEmbeddingsProps,
   EmbeddingResult,
+  dotProduct,
+  maxSim,
 } from 'react-native-executorch';
 import { useIsFocused } from 'expo-router';
-import { dotProduct, maxSim } from '../../utils/math';
 import ErrorBanner from '../../components/ErrorBanner';
 import { SafeAreaView } from 'react-native-safe-area-context';
 
@@ -50,12 +51,8 @@ const MODELS: { label: string; value: TextEmbeddingModel }[] = [
     value: textEmbedding.paraphrase_multilingual_minilm_l12_v2(),
   },
   {
-    label: 'LFM2.5 Embedding XNNPACK',
-    value: textEmbedding.lfm2_5_embedding_350m({ backend: 'xnnpack' }),
-  },
-  {
-    label: 'LFM2.5 Embedding MLX',
-    value: textEmbedding.lfm2_5_embedding_350m({ backend: 'mlx' }),
+    label: 'LFM2.5 Embedding',
+    value: textEmbedding.lfm2_5_embedding_350m(),
   },
   {
     label: 'LFM2.5 ColBERT (late-interaction)',
diff --git a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index 464e64620e..439de17560 100644
--- a/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/docs/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -71,6 +71,8 @@ To run the model, you can use the [`forward`](../../06-api-reference/interfaces/
 - **Pooled models** (the default, e.g. MiniLM, MPNet, LFM2.5-Embedding) resolve to a single `Float32Array` — one normalized vector for the whole input.
 - **Multi-vector models** (`multiVector: true`, e.g. LFM2.5-ColBERT) resolve to an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) with the per-token vectors (`vectors`, `numTokens`, `embeddingDim`, `tokenIds`).
 
+For background on why a dense bi-encoder pools to one vector while a late-interaction model keeps per-token vectors, see Liquid AI's [LFM2.5 Retrievers blog post](https://www.liquid.ai/blog/lfm2-5-retrievers).
+
 ### Asymmetric prompts (`role`)
 
 Some retrieval models are trained to embed queries and documents with different prefixes (e.g. LFM2.5 uses `query: `/`document: `, ColBERT uses `[Q] `/`[D] `). For these models the model config carries the prompts and `forward` requires a `role`:
@@ -85,11 +87,9 @@ The matching prompt is prepended automatically; for models without prompts the `
 ## Example
 
 ```typescript
-import { models, useTextEmbeddings } from 'react-native-executorch';
-const dotProduct = (a: number[], b: number[]) =>
-  a.reduce((sum, val, i) => sum + val * b[i], 0);
+import { models, useTextEmbeddings, dotProduct } from 'react-native-executorch';
 
-const cosineSimilarity = (a: number[], b: number[]) => {
+const cosineSimilarity = (a: Float32Array, b: Float32Array) => {
   const dot = dotProduct(a, b);
   const normA = Math.sqrt(dotProduct(a, a));
   const normB = Math.sqrt(dotProduct(b, b));
@@ -132,8 +132,8 @@ function App() {
 | [distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)   | 50+ languages |    126     |         512          | Multilingual DistilBERT with a 768→512 projection head. Recommended when broader language coverage matters more than the exact English quality of MiniLM/MPNet.                                                                                                                                                                                                                                                                  |
 | [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) | 50+ languages |    126     |         384          | Multilingual MiniLM-L12 distilled from paraphrase-multilingual-mpnet-base-v2. Compact (≈118 M params) sentence encoder for cross-lingual semantic similarity and retrieval across 50+ languages.                                                                                                                                                                                                                                 |
 | [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                                           |    English    |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
-| [LFM2.5-Embedding-350M](https://huggingface.co/LiquidAI/LFM2.5-Embedding-350M)                                              |  Multilingual |    512     |         1024         | Dense bi-encoder from Liquid AI with CLS pooling. Trained with asymmetric `query: `/`document: ` prompts, so `forward` requires a `role`. On iOS it runs on the GPU via the MLX backend (physical device only); Android uses XNNPACK.                                                                                                                                                                                            |
-| [LFM2.5-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2.5-ColBERT-350M)                                                  |  Multilingual |    512     |     128 (per token)  | Late-interaction (multi-vector) retriever from Liquid AI: a `Linear(1024→128)` head emits one normalized vector per token. `forward` returns an `EmbeddingResult`; score query/document pairs with MaxSim (see below). Uses `[Q] `/`[D] ` role prompts.                                                                                                                                                                          |
+| [LFM2.5-Embedding-350M](https://huggingface.co/LiquidAI/LFM2.5-Embedding-350M)                                              | Multilingual  |    512     |         1024         | Dense bi-encoder from Liquid AI with CLS pooling. Trained with asymmetric `query: `/`document: ` prompts, so `forward` requires a `role`. On iOS it runs on the GPU via the MLX backend (physical device only); Android uses XNNPACK.                                                                                                                                                                                            |
+| [LFM2.5-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2.5-ColBERT-350M)                                                  | Multilingual  |    512     |   128 (per token)    | Late-interaction (multi-vector) retriever from Liquid AI: a `Linear(1024→128)` head emits one normalized vector per token. `forward` returns an `EmbeddingResult`; score query/document pairs with MaxSim (see below). Uses `[Q] `/`[D] ` role prompts.                                                                                                                                                                          |
 
 **`Max Tokens`** - The maximum number of tokens that can be processed by the model. If the input text exceeds this limit, it will be truncated.
 
@@ -145,41 +145,16 @@ For the supported models, the returned embedding vector is normalized, meaning t
 
 ## Late interaction (multi-vector models)
 
-Multi-vector models such as LFM2.5-ColBERT do not pool the sequence into a single vector. Instead, `forward` returns an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) holding one normalized vector per token. You score a query against a document with **MaxSim**: for every query-token vector, take its highest dot product against the document-token vectors, then sum those maxima.
+Multi-vector models such as LFM2.5-ColBERT do not pool the sequence into a single vector. Instead, `forward` returns an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) holding one normalized vector per token. You score a query against a document with **MaxSim**: for every query-token vector, take its highest dot product against the document-token vectors, then sum those maxima. The model also ships a `skipListIds` array — the punctuation token ids excluded from scoring.
 
-The library is a pure embedder — it gives you the per-token vectors and the model's punctuation `skipListIds`, but scoring is your concern (so it can run wherever you store the vectors). A reference `maxSim` implementation:
+The library ships a `maxSim` helper (and a `dotProduct` helper for pooled models), so you can score directly without reimplementing it:
 
 ```typescript
-import { models, useTextEmbeddings, EmbeddingResult } from 'react-native-executorch';
+import { models, useTextEmbeddings, maxSim } from 'react-native-executorch';
 
 const colbert = models.text_embedding.lfm2_5_colbert_350m();
 const skipListIds = colbert.skipListIds ?? [];
 
-const maxSim = (
-  query: EmbeddingResult,
-  doc: EmbeddingResult,
-  skip: number[] = []
-) => {
-  const dim = query.embeddingDim;
-  const skipped = new Set(skip);
-  let score = 0;
-  for (let qi = 0; qi < query.numTokens; qi++) {
-    const qOff = qi * dim;
-    let best = -Infinity;
-    for (let di = 0; di < doc.numTokens; di++) {
-      if (skipped.has(doc.tokenIds[di])) continue;
-      const dOff = di * dim;
-      let dot = 0;
-      for (let k = 0; k < dim; k++) {
-        dot += query.vectors[qOff + k] * doc.vectors[dOff + k];
-      }
-      if (dot > best) best = dot;
-    }
-    if (best !== -Infinity) score += best;
-  }
-  return score;
-};
-
 function App() {
   const model = useTextEmbeddings({ model: colbert });
 
diff --git a/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md b/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
index c235a89ab4..304969034b 100644
--- a/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
+++ b/docs/versioned_docs/version-0.9.x/03-hooks/01-natural-language-processing/useTextEmbeddings.md
@@ -71,6 +71,8 @@ To run the model, you can use the [`forward`](../../06-api-reference/interfaces/
 - **Pooled models** (the default, e.g. MiniLM, MPNet, LFM2.5-Embedding) resolve to a single `Float32Array` — one normalized vector for the whole input.
 - **Multi-vector models** (`multiVector: true`, e.g. LFM2.5-ColBERT) resolve to an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) with the per-token vectors (`vectors`, `numTokens`, `embeddingDim`, `tokenIds`).
 
+For background on why a dense bi-encoder pools to one vector while a late-interaction model keeps per-token vectors, see Liquid AI's [LFM2.5 Retrievers blog post](https://www.liquid.ai/blog/lfm2-5-retrievers).
+
 ### Asymmetric prompts (`role`)
 
 Some retrieval models are trained to embed queries and documents with different prefixes (e.g. LFM2.5 uses `query: `/`document: `, ColBERT uses `[Q] `/`[D] `). For these models the model config carries the prompts and `forward` requires a `role`:
@@ -85,11 +87,9 @@ The matching prompt is prepended automatically; for models without prompts the `
 ## Example
 
 ```typescript
-import { models, useTextEmbeddings } from 'react-native-executorch';
-const dotProduct = (a: number[], b: number[]) =>
-  a.reduce((sum, val, i) => sum + val * b[i], 0);
+import { models, useTextEmbeddings, dotProduct } from 'react-native-executorch';
 
-const cosineSimilarity = (a: number[], b: number[]) => {
+const cosineSimilarity = (a: Float32Array, b: Float32Array) => {
   const dot = dotProduct(a, b);
   const normA = Math.sqrt(dotProduct(a, a));
   const normB = Math.sqrt(dotProduct(b, b));
@@ -132,8 +132,8 @@ function App() {
 | [distiluse-base-multilingual-cased-v2](https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2)   | 50+ languages |    126     |         512          | Multilingual DistilBERT with a 768→512 projection head. Recommended when broader language coverage matters more than the exact English quality of MiniLM/MPNet.                                                                                                                                                                                                                                                                  |
 | [paraphrase-multilingual-MiniLM-L12-v2](https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2) | 50+ languages |    126     |         384          | Multilingual MiniLM-L12 distilled from paraphrase-multilingual-mpnet-base-v2. Compact (≈118 M params) sentence encoder for cross-lingual semantic similarity and retrieval across 50+ languages.                                                                                                                                                                                                                                 |
 | [clip-vit-base-patch32-text](https://huggingface.co/openai/clip-vit-base-patch32)                                           |    English    |     74     |         512          | CLIP (Contrastive Language-Image Pre-Training) is a neural network trained on a variety of (image, text) pairs. CLIP allows to embed images and text into the same vector space. This allows to find similar images as well as to implement image search. This is the text encoder part of the CLIP model. To embed images checkout [clip-vit-base-patch32-image](../02-computer-vision/useImageEmbeddings.md#supported-models). |
-| [LFM2.5-Embedding-350M](https://huggingface.co/LiquidAI/LFM2.5-Embedding-350M)                                              |  Multilingual |    512     |         1024         | Dense bi-encoder from Liquid AI with CLS pooling. Trained with asymmetric `query: `/`document: ` prompts, so `forward` requires a `role`. On iOS it runs on the GPU via the MLX backend (physical device only); Android uses XNNPACK.                                                                                                                                                                                            |
-| [LFM2.5-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2.5-ColBERT-350M)                                                  |  Multilingual |    512     |     128 (per token)  | Late-interaction (multi-vector) retriever from Liquid AI: a `Linear(1024→128)` head emits one normalized vector per token. `forward` returns an `EmbeddingResult`; score query/document pairs with MaxSim (see below). Uses `[Q] `/`[D] ` role prompts.                                                                                                                                                                          |
+| [LFM2.5-Embedding-350M](https://huggingface.co/LiquidAI/LFM2.5-Embedding-350M)                                              | Multilingual  |    512     |         1024         | Dense bi-encoder from Liquid AI with CLS pooling. Trained with asymmetric `query: `/`document: ` prompts, so `forward` requires a `role`. On iOS it runs on the GPU via the MLX backend (physical device only); Android uses XNNPACK.                                                                                                                                                                                            |
+| [LFM2.5-ColBERT-350M](https://huggingface.co/LiquidAI/LFM2.5-ColBERT-350M)                                                  | Multilingual  |    512     |   128 (per token)    | Late-interaction (multi-vector) retriever from Liquid AI: a `Linear(1024→128)` head emits one normalized vector per token. `forward` returns an `EmbeddingResult`; score query/document pairs with MaxSim (see below). Uses `[Q] `/`[D] ` role prompts.                                                                                                                                                                          |
 
 **`Max Tokens`** - The maximum number of tokens that can be processed by the model. If the input text exceeds this limit, it will be truncated.
 
@@ -145,41 +145,16 @@ For the supported models, the returned embedding vector is normalized, meaning t
 
 ## Late interaction (multi-vector models)
 
-Multi-vector models such as LFM2.5-ColBERT do not pool the sequence into a single vector. Instead, `forward` returns an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) holding one normalized vector per token. You score a query against a document with **MaxSim**: for every query-token vector, take its highest dot product against the document-token vectors, then sum those maxima.
+Multi-vector models such as LFM2.5-ColBERT do not pool the sequence into a single vector. Instead, `forward` returns an [`EmbeddingResult`](../../06-api-reference/interfaces/EmbeddingResult.md) holding one normalized vector per token. You score a query against a document with **MaxSim**: for every query-token vector, take its highest dot product against the document-token vectors, then sum those maxima. The model also ships a `skipListIds` array — the punctuation token ids excluded from scoring.
 
-The library is a pure embedder — it gives you the per-token vectors and the model's punctuation `skipListIds`, but scoring is your concern (so it can run wherever you store the vectors). A reference `maxSim` implementation:
+The library ships a `maxSim` helper (and a `dotProduct` helper for pooled models), so you can score directly without reimplementing it:
 
 ```typescript
-import { models, useTextEmbeddings, EmbeddingResult } from 'react-native-executorch';
+import { models, useTextEmbeddings, maxSim } from 'react-native-executorch';
 
 const colbert = models.text_embedding.lfm2_5_colbert_350m();
 const skipListIds = colbert.skipListIds ?? [];
 
-const maxSim = (
-  query: EmbeddingResult,
-  doc: EmbeddingResult,
-  skip: number[] = []
-) => {
-  const dim = query.embeddingDim;
-  const skipped = new Set(skip);
-  let score = 0;
-  for (let qi = 0; qi < query.numTokens; qi++) {
-    const qOff = qi * dim;
-    let best = -Infinity;
-    for (let di = 0; di < doc.numTokens; di++) {
-      if (skipped.has(doc.tokenIds[di])) continue;
-      const dOff = di * dim;
-      let dot = 0;
-      for (let k = 0; k < dim; k++) {
-        dot += query.vectors[qOff + k] * doc.vectors[dOff + k];
-      }
-      if (dot > best) best = dot;
-    }
-    if (best !== -Infinity) score += best;
-  }
-  return score;
-};
-
 function App() {
   const model = useTextEmbeddings({ model: colbert });
 
diff --git a/packages/react-native-executorch/src/constants/modelRegistry.ts b/packages/react-native-executorch/src/constants/modelRegistry.ts
index 3e502ad9cf..5cbc3d981b 100644
--- a/packages/react-native-executorch/src/constants/modelRegistry.ts
+++ b/packages/react-native-executorch/src/constants/modelRegistry.ts
@@ -3,6 +3,10 @@ import { isEmulatorSync } from 'react-native-device-info';
 import * as M from './modelUrls';
 import * as OCR from './ocr/models';
 import { symbols } from './ocr/symbols';
+import {
+  LFM_COLBERT_PROMPTS,
+  LFM_COLBERT_SKIP_LIST,
+} from './textEmbeddings/colbert';
 import {
   KOKORO_AMERICAN_ENGLISH_FEMALE_HEART,
   KOKORO_AMERICAN_ENGLISH_FEMALE_RIVER,
@@ -284,14 +288,6 @@ const LFM2_5_EMBEDDING_350M_VARIANTS = {
   },
 };
 
-const LFM_COLBERT_SKIP_LIST = [
-  510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524,
-  535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601,
-  602, 603,
-];
-
-const LFM_COLBERT_PROMPTS = { query: '[Q] ', document: '[D] ' };
-
 const LFM2_5_COLBERT_350M_CONFIG = {
   modelName: 'lfm2-5-colbert-350m' as const,
   tokenizerSource: M.LFM2_5_COLBERT_350M_TOKENIZER,
diff --git a/packages/react-native-executorch/src/constants/textEmbeddings/colbert.ts b/packages/react-native-executorch/src/constants/textEmbeddings/colbert.ts
new file mode 100644
index 0000000000..9f60f5d87b
--- /dev/null
+++ b/packages/react-native-executorch/src/constants/textEmbeddings/colbert.ts
@@ -0,0 +1,7 @@
+export const LFM_COLBERT_SKIP_LIST = [
+  510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524,
+  535, 536, 537, 538, 539, 540, 541, 568, 569, 570, 571, 572, 573, 600, 601,
+  602, 603,
+];
+
+export const LFM_COLBERT_PROMPTS = { query: '[Q] ', document: '[D] ' };
diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts
index 1f190d41f5..ba7ac384f9 100644
--- a/packages/react-native-executorch/src/index.ts
+++ b/packages/react-native-executorch/src/index.ts
@@ -215,6 +215,7 @@ export * from './utils/llm';
 export * from './common/Logger';
 export * from './utils/llms/context_strategy';
 export * from './utils/segmentAnythingPrompts';
+export * from './utils/textEmbeddings';
 
 // types
 export * from './types/objectDetection';
diff --git a/apps/text-embeddings/utils/math.ts b/packages/react-native-executorch/src/utils/textEmbeddings.ts
similarity index 82%
rename from apps/text-embeddings/utils/math.ts
rename to packages/react-native-executorch/src/utils/textEmbeddings.ts
index 44248e1658..1dd241661c 100644
--- a/apps/text-embeddings/utils/math.ts
+++ b/packages/react-native-executorch/src/utils/textEmbeddings.ts
@@ -1,8 +1,6 @@
-import {
-  RnExecutorchError,
-  RnExecutorchErrorCode,
-  EmbeddingResult,
-} from 'react-native-executorch';
+import { EmbeddingResult } from '../types/textEmbeddings';
+import { RnExecutorchError } from '../errors/errorUtils';
+import { RnExecutorchErrorCode } from '../errors/ErrorCodes';
 
 export const dotProduct = (a: Float32Array, b: Float32Array) => {
   if (a.length !== b.length) {
@@ -14,7 +12,7 @@ export const dotProduct = (a: Float32Array, b: Float32Array) => {
 
   let sum = 0;
   for (let i = 0; i < a.length; i++) {
-    sum += a[i] * b[i];
+    sum += (a[i] ?? 0) * (b[i] ?? 0);
   }
   return sum;
 };