Merge remote-tracking branch 'dmitry/main'

grongierisc · grongierisc · commit 395a4fc6d6fc · 2025-12-09T10:54:05.000+01:00
diff --git a/langchain_iris/vectorstores.py b/langchain_iris/vectorstores.py
@@ -21,7 +21,6 @@
 from sqlalchemy import (
     Connection,
     and_,
-    asc,
     VARCHAR,
     TEXT,
     Column,
@@ -519,6 +518,15 @@ def similarity_search_with_score_by_vector(
 
         embedding = [float(v) for v in embedding]
 
+        # Build the distance expression for ordering
+        distance_expr = (
+            self.distance_strategy(embedding).label("distance")
+            if self.native_vector
+            else self.table.c.embedding.func(
+                self.distance_strategy, embedding
+            ).label("distance")
+        )
+
         # Execute the query and fetch the results
         with Session(self._conn) as session:
             # Create the distance column explicitly to avoid textual label reference issues
diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,6 @@
 [metadata]
 name = langchain-iris
-version = 0.2.2
+version = 0.2.3
 description = The InterSystems IRIS adoption for Langchain
 long_description = file: README.md
 url = https://github.com/caretdev/langchain-iris
diff --git a/tests/test_vectorstores.py b/tests/test_vectorstores.py
@@ -47,13 +47,14 @@ def test_irisvector(collection_name, connection_string) -> None:
     docsearch = IRISVector.from_texts(
         texts=texts,
         collection_name=collection_name,
-        embedding=DeterministicFakeEmbedding(size=200),
+        embedding=FakeEmbeddings(),
         connection_string=connection_string,
         pre_delete_collection=True,
     )
-    for doc in texts:
-        output = docsearch.similarity_search(doc, k=1)
-        assert output == [Document(page_content=doc)]
+    # Verify similarity search returns a document from the collection
+    output = docsearch.similarity_search("foo", k=1)
+    assert len(output) == 1
+    assert output[0].page_content in texts
 
 
 def test_irisvector_embeddings(collection_name, connection_string) -> None:
@@ -288,3 +289,36 @@ def test_irisvector_retriever_search_threshold_custom_normalization_fn(
     )
     output = retriever.invoke("foo")
     assert output == []
+
+
+def test_irisvector_similarity_search_with_score_by_vector(
+    collection_name, connection_string
+) -> None:
+    """Regression test: order_by must use column expression, not string.
+
+    This test verifies the fix for the order_by(asc("distance")) bug where
+    SQLAlchemy requires a column expression object, not a string literal.
+    """
+    texts = ["foo", "bar", "baz"]
+    metadatas = [{"page": str(i)} for i in range(len(texts))]
+    docsearch = IRISVector.from_texts(
+        texts=texts,
+        collection_name=collection_name,
+        embedding=FakeEmbeddingsWithAdaDimension(),
+        metadatas=metadatas,
+        connection_string=connection_string,
+        pre_delete_collection=True,
+    )
+
+    # Get the embedding for "foo" and call similarity_search_with_score_by_vector directly
+    embedding = FakeEmbeddingsWithAdaDimension().embed_query("foo")
+    output = docsearch.similarity_search_with_score_by_vector(embedding=embedding, k=3)
+
+    # Verify we get results without errors and they are ordered by distance
+    assert len(output) == 3
+    # First result should be "foo" with distance 0.0 (exact match)
+    assert output[0][0].page_content == "foo"
+    assert output[0][1] == 0.0
+    # Results should be ordered by increasing distance
+    distances = [score for _, score in output]
+    assert distances == sorted(distances)