bartolli · zm2231 · Mar 21, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 23, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -101,6 +101,7 @@ tree-sitter-lua = "0.5.0"
 tree-sitter-clojure-orchard = "0.2.5"
 glob = "0.3.3"
 async-trait = "0.1.89"
+reqwest = { version = "0.12", default-features = false, features = ["json", "rustls-tls"] }
 sysinfo = "0.38.4"
 indexmap = { version = "2.13.0", features = ["serde"] }
 

diff --git a/src/cli/commands/index_parallel.rs b/src/cli/commands/index_parallel.rs
@@ -7,9 +7,10 @@ use std::path::{Path, PathBuf};
 use std::sync::{Arc, Mutex};
 
 use crate::config::Settings;
+use crate::indexing::facade::{build_embedding_backend, resolve_remote_model_name};
 use crate::indexing::pipeline::{IncrementalStats, Phase2Stats, Pipeline, PipelineConfig};
 use crate::io::status_line::{ProgressBar, ProgressBarOptions, ProgressBarStyle};
-use crate::semantic::SimpleSemanticSearch;
+use crate::semantic::{EmbeddingBackend, SemanticSearchError, SimpleSemanticSearch};
 use crate::storage::DocumentIndex;
 
 /// Arguments for the index-parallel command.
@@ -71,8 +72,10 @@ pub fn run(args: IndexParallelArgs, settings: &Settings) {
         }
     };
 
-    // Create semantic search (for embeddings)
-    let semantic = create_semantic_search(settings, &semantic_path);
+    // Create semantic search (for storing/loading/searching embeddings)
+    // and a separate embedding backend for generating new embeddings.
+    let (semantic, embedding_backend) =
+        create_semantic_search(settings, &semantic_path);
 
     // Create pipeline
     let settings_arc = Arc::new(settings.clone());
@@ -102,7 +105,7 @@ pub fn run(args: IndexParallelArgs, settings: &Settings) {
 
         tracing::info!(target: "pipeline", "Indexing directory ({mode}): {}", path.display());
 
-        match pipeline.index_incremental(path, Arc::clone(&index), semantic.clone(), None, force) {
+        match pipeline.index_incremental(path, Arc::clone(&index), semantic.clone(), embedding_backend.clone(), force) {
             Ok(stats) => {
                 display_incremental_stats(&stats, progress);
             }
@@ -119,42 +122,103 @@ pub fn run(args: IndexParallelArgs, settings: &Settings) {
     }
 }
 
-/// Create semantic search instance if enabled in settings.
+/// Create semantic search instance and embedding backend if enabled in settings.
+///
+/// Returns `(semantic, backend)` where:
+/// - `semantic` stores/loads/searches the embedding vectors
+/// - `backend` generates new embeddings (local fastembed pool or remote HTTP)
 fn create_semantic_search(
     settings: &Settings,
     semantic_path: &Path,
-) -> Option<Arc<Mutex<SimpleSemanticSearch>>> {
+) -> (Option<Arc<Mutex<SimpleSemanticSearch>>>, Option<Arc<EmbeddingBackend>>) {
     if !settings.semantic_search.enabled {
         tracing::debug!(target: "pipeline", "Semantic search disabled");
-        return None;
+        return (None, None);
     }
 
+    let is_remote = std::env::var("CODANNA_EMBED_URL").is_ok()
+        || settings.semantic_search.remote_url.is_some();
+
+    // Build embedding backend (local pool or remote HTTP)
+    let backend = match build_embedding_backend(&settings.semantic_search) {
+        Ok(b) => Arc::new(b),
+        Err(e) => {
+            tracing::warn!(target: "pipeline", "Failed to initialize embedding backend: {e}");
+            return (None, None);
+        }
+    };
+
     let model = &settings.semantic_search.model;
 
-    // Try to load existing embeddings first
-    if semantic_path.exists() {
-        match SimpleSemanticSearch::load(semantic_path) {
-            Ok(semantic) => {
+    // Load existing embeddings or create fresh instance.
+    // After loading, verify dimensions match the backend so we don't silently
+    // drop all new embeddings during an incremental run after a backend switch.
+    let semantic = if semantic_path.exists() {
+        // In remote mode load without initialising a local fastembed model
+        let load_result = if is_remote {
+            SimpleSemanticSearch::load_remote(semantic_path)
+        } else {
+            SimpleSemanticSearch::load(semantic_path)
+        };
+        match load_result {
+            Ok(s) => {
+                let index_dim = s.dimensions();
+                let backend_dim = backend.dimensions();
+                if index_dim != backend_dim {
+                    tracing::error!(
+                        target: "pipeline",
+                        "Semantic index dimension mismatch: index has {index_dim}d but backend produces {backend_dim}d. \
+                         Re-index with: codanna index-parallel <path> --force"
+                    );
+                    std::process::exit(1);
+                }
+                let index_is_remote = s.is_remote_index();
+                if index_is_remote != is_remote {
+                    tracing::warn!(
+                        target: "pipeline",
+                        "Backend kind changed (index={}, current={}). \
+                         Embedding spaces may differ — similarity scores could be inaccurate. \
+                         Re-index with --force to fix.",
+                        if index_is_remote { "remote" } else { "local" },
+                        if is_remote { "remote" } else { "local" },
+                    );
+                }
                 tracing::debug!(target: "pipeline", "Loaded existing embeddings from {}", semantic_path.display());
-                return Some(Arc::new(Mutex::new(semantic)));
+                Some(Arc::new(Mutex::new(s)))
+            }
+            Err(SemanticSearchError::DimensionMismatch { suggestion, .. }) => {
+                // Incompatible existing index — cannot continue silently as stored
+                // vectors are structurally wrong for this backend.
+                tracing::error!(target: "pipeline", "Semantic index incompatible: {suggestion}");
+                std::process::exit(1);
             }
             Err(e) => {
-                tracing::warn!(target: "pipeline", "Failed to load embeddings: {e}");
+                tracing::warn!(target: "pipeline", "Failed to load embeddings, continuing without semantic search: {e}");
+                None
             }
         }
-    }
-
-    // Create new semantic search instance
-    match SimpleSemanticSearch::from_model_name(model) {
-        Ok(semantic) => {
-            tracing::debug!(target: "pipeline", "Created new semantic search with model: {model}");
-            Some(Arc::new(Mutex::new(semantic)))
-        }
-        Err(e) => {
-            tracing::warn!(target: "pipeline", "Failed to initialize semantic search: {e}");
-            None
+    } else {
+        let new_result = if is_remote {
+            Ok(SimpleSemanticSearch::new_empty(
+                backend.dimensions(),
+                &resolve_remote_model_name(&settings.semantic_search),
+            ))
+        } else {
+            SimpleSemanticSearch::from_model_name(model)
+        };
+        match new_result {
+            Ok(s) => {
+                tracing::debug!(target: "pipeline", "Created new semantic search with model: {model}");
+                Some(Arc::new(Mutex::new(s)))
+            }
+            Err(e) => {
+                tracing::warn!(target: "pipeline", "Failed to initialize semantic search: {e}");
+                None
+            }
         }
-    }
+    };
+
+    (semantic, Some(backend))
 }
 
 fn display_incremental_stats(stats: &IncrementalStats, with_progress: bool) {

diff --git a/src/config.rs b/src/config.rs
@@ -205,6 +205,22 @@ pub struct SemanticSearchConfig {
     /// Number of parallel embedding model instances
     #[serde(default = "default_embedding_threads")]
     pub embedding_threads: usize,
+
+    /// Remote embedding server URL (OpenAI-compatible, e.g. http://host:8100).
+    /// When set, local fastembed is bypassed and this endpoint is used instead.
+    /// Overrideable via CODANNA_EMBED_URL env var.
+    #[serde(default)]
+    pub remote_url: Option<String>,
+
+    /// Model name to send to the remote embedding server.
+    /// Overrideable via CODANNA_EMBED_MODEL env var.
+    #[serde(default)]
+    pub remote_model: Option<String>,
+
+    /// Output dimension of the remote embedding model.
+    /// Required when remote_url is set. Overrideable via CODANNA_EMBED_DIM env var.
+    #[serde(default)]
+    pub remote_dim: Option<usize>,
 }
 
 #[derive(Debug, Deserialize, Serialize, Clone)]
@@ -424,6 +440,9 @@ impl Default for SemanticSearchConfig {
             model: default_embedding_model(),
             threshold: default_similarity_threshold(),
             embedding_threads: default_embedding_threads(),
+            remote_url: None,
+            remote_model: None,
+            remote_dim: None,
         }
     }
 }

diff --git a/src/documents/store.rs b/src/documents/store.rs
@@ -226,11 +226,14 @@ fn highlight_keywords(text: &str, query: &str) -> String {
     let mut merged: Vec<(usize, usize)> = Vec::new();
     for (start, end) in matches {
         if let Some(last) = merged.last_mut() {
-            // Check if adjacent: only spaces/tabs between (no newlines)
-            let between = &text[last.1..start];
-            let is_adjacent = start <= last.1 || between.chars().all(|c| c == ' ' || c == '\t');
+            // Check overlap first — slice is only safe when start > last.1
+            let is_adjacent = if start <= last.1 {
+                true // overlapping, merge unconditionally
+            } else {
+                // Adjacent: only spaces/tabs between ranges (no newlines)
+                text[last.1..start].chars().all(|c| c == ' ' || c == '\t')
+            };
             if is_adjacent {
-                // Merge: extend the previous range
                 last.1 = last.1.max(end);
                 continue;
             }