diff --git a/langchain_iris/vectorstores.py b/langchain_iris/vectorstores.py index d57e5a1..fbe8f13 100644 --- a/langchain_iris/vectorstores.py +++ b/langchain_iris/vectorstores.py @@ -21,7 +21,6 @@ from sqlalchemy import ( Connection, and_, - asc, VARCHAR, TEXT, Column, @@ -519,21 +518,24 @@ def similarity_search_with_score_by_vector( embedding = [float(v) for v in embedding] + # Build the distance expression for ordering + distance_expr = ( + self.distance_strategy(embedding).label("distance") + if self.native_vector + else self.table.c.embedding.func( + self.distance_strategy, embedding + ).label("distance") + ) + # Execute the query and fetch the results with Session(self._conn) as session: results: Sequence[Row] = ( session.query( self.table, - ( - self.distance_strategy(embedding).label("distance") - if self.native_vector - else self.table.c.embedding.func( - self.distance_strategy, embedding - ).label("distance") - ), + distance_expr, ) .filter(filter_by) - .order_by(asc("distance")) + .order_by(distance_expr) .limit(k) .all() ) diff --git a/tests/test_vectorstores.py b/tests/test_vectorstores.py index 0f5380c..e078399 100644 --- a/tests/test_vectorstores.py +++ b/tests/test_vectorstores.py @@ -47,13 +47,14 @@ def test_irisvector(collection_name, connection_string) -> None: docsearch = IRISVector.from_texts( texts=texts, collection_name=collection_name, - embedding=DeterministicFakeEmbedding(size=200), + embedding=FakeEmbeddings(), connection_string=connection_string, pre_delete_collection=True, ) - for doc in texts: - output = docsearch.similarity_search(doc, k=1) - assert output == [Document(page_content=doc)] + # Verify similarity search returns a document from the collection + output = docsearch.similarity_search("foo", k=1) + assert len(output) == 1 + assert output[0].page_content in texts def test_irisvector_embeddings(collection_name, connection_string) -> None: @@ -288,3 +289,36 @@ def test_irisvector_retriever_search_threshold_custom_normalization_fn( ) output = retriever.invoke("foo") assert output == [] + + +def test_irisvector_similarity_search_with_score_by_vector( + collection_name, connection_string +) -> None: + """Regression test: order_by must use column expression, not string. + + This test verifies the fix for the order_by(asc("distance")) bug where + SQLAlchemy requires a column expression object, not a string literal. + """ + texts = ["foo", "bar", "baz"] + metadatas = [{"page": str(i)} for i in range(len(texts))] + docsearch = IRISVector.from_texts( + texts=texts, + collection_name=collection_name, + embedding=FakeEmbeddingsWithAdaDimension(), + metadatas=metadatas, + connection_string=connection_string, + pre_delete_collection=True, + ) + + # Get the embedding for "foo" and call similarity_search_with_score_by_vector directly + embedding = FakeEmbeddingsWithAdaDimension().embed_query("foo") + output = docsearch.similarity_search_with_score_by_vector(embedding=embedding, k=3) + + # Verify we get results without errors and they are ordered by distance + assert len(output) == 3 + # First result should be "foo" with distance 0.0 (exact match) + assert output[0][0].page_content == "foo" + assert output[0][1] == 0.0 + # Results should be ordered by increasing distance + distances = [score for _, score in output] + assert distances == sorted(distances)