Skip to content

Commit 7fbffd9

Browse files
authored
Merge pull request #7 from isc-tdyar/fix/order-by-distance-column-expression
fix: use column expression in order_by instead of string literal
2 parents 4a7b002 + 9671762 commit 7fbffd9

File tree

2 files changed

+49
-13
lines changed

2 files changed

+49
-13
lines changed

langchain_iris/vectorstores.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
from sqlalchemy import (
2222
Connection,
2323
and_,
24-
asc,
2524
VARCHAR,
2625
TEXT,
2726
Column,
@@ -519,21 +518,24 @@ def similarity_search_with_score_by_vector(
519518

520519
embedding = [float(v) for v in embedding]
521520

521+
# Build the distance expression for ordering
522+
distance_expr = (
523+
self.distance_strategy(embedding).label("distance")
524+
if self.native_vector
525+
else self.table.c.embedding.func(
526+
self.distance_strategy, embedding
527+
).label("distance")
528+
)
529+
522530
# Execute the query and fetch the results
523531
with Session(self._conn) as session:
524532
results: Sequence[Row] = (
525533
session.query(
526534
self.table,
527-
(
528-
self.distance_strategy(embedding).label("distance")
529-
if self.native_vector
530-
else self.table.c.embedding.func(
531-
self.distance_strategy, embedding
532-
).label("distance")
533-
),
535+
distance_expr,
534536
)
535537
.filter(filter_by)
536-
.order_by(asc("distance"))
538+
.order_by(distance_expr)
537539
.limit(k)
538540
.all()
539541
)

tests/test_vectorstores.py

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,14 @@ def test_irisvector(collection_name, connection_string) -> None:
4747
docsearch = IRISVector.from_texts(
4848
texts=texts,
4949
collection_name=collection_name,
50-
embedding=DeterministicFakeEmbedding(size=200),
50+
embedding=FakeEmbeddings(),
5151
connection_string=connection_string,
5252
pre_delete_collection=True,
5353
)
54-
for doc in texts:
55-
output = docsearch.similarity_search(doc, k=1)
56-
assert output == [Document(page_content=doc)]
54+
# Verify similarity search returns a document from the collection
55+
output = docsearch.similarity_search("foo", k=1)
56+
assert len(output) == 1
57+
assert output[0].page_content in texts
5758

5859

5960
def test_irisvector_embeddings(collection_name, connection_string) -> None:
@@ -288,3 +289,36 @@ def test_irisvector_retriever_search_threshold_custom_normalization_fn(
288289
)
289290
output = retriever.invoke("foo")
290291
assert output == []
292+
293+
294+
def test_irisvector_similarity_search_with_score_by_vector(
295+
collection_name, connection_string
296+
) -> None:
297+
"""Regression test: order_by must use column expression, not string.
298+
299+
This test verifies the fix for the order_by(asc("distance")) bug where
300+
SQLAlchemy requires a column expression object, not a string literal.
301+
"""
302+
texts = ["foo", "bar", "baz"]
303+
metadatas = [{"page": str(i)} for i in range(len(texts))]
304+
docsearch = IRISVector.from_texts(
305+
texts=texts,
306+
collection_name=collection_name,
307+
embedding=FakeEmbeddingsWithAdaDimension(),
308+
metadatas=metadatas,
309+
connection_string=connection_string,
310+
pre_delete_collection=True,
311+
)
312+
313+
# Get the embedding for "foo" and call similarity_search_with_score_by_vector directly
314+
embedding = FakeEmbeddingsWithAdaDimension().embed_query("foo")
315+
output = docsearch.similarity_search_with_score_by_vector(embedding=embedding, k=3)
316+
317+
# Verify we get results without errors and they are ordered by distance
318+
assert len(output) == 3
319+
# First result should be "foo" with distance 0.0 (exact match)
320+
assert output[0][0].page_content == "foo"
321+
assert output[0][1] == 0.0
322+
# Results should be ordered by increasing distance
323+
distances = [score for _, score in output]
324+
assert distances == sorted(distances)

0 commit comments

Comments
 (0)