Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions langchain_iris/vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
from sqlalchemy import (
Connection,
and_,
asc,
VARCHAR,
TEXT,
Column,
Expand Down Expand Up @@ -519,21 +518,24 @@ def similarity_search_with_score_by_vector(

embedding = [float(v) for v in embedding]

# Build the distance expression for ordering
distance_expr = (
self.distance_strategy(embedding).label("distance")
if self.native_vector
else self.table.c.embedding.func(
self.distance_strategy, embedding
).label("distance")
)

# Execute the query and fetch the results
with Session(self._conn) as session:
results: Sequence[Row] = (
session.query(
self.table,
(
self.distance_strategy(embedding).label("distance")
if self.native_vector
else self.table.c.embedding.func(
self.distance_strategy, embedding
).label("distance")
),
distance_expr,
)
.filter(filter_by)
.order_by(asc("distance"))
.order_by(distance_expr)
.limit(k)
.all()
)
Expand Down
42 changes: 38 additions & 4 deletions tests/test_vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,14 @@ def test_irisvector(collection_name, connection_string) -> None:
docsearch = IRISVector.from_texts(
texts=texts,
collection_name=collection_name,
embedding=DeterministicFakeEmbedding(size=200),
embedding=FakeEmbeddings(),
connection_string=connection_string,
pre_delete_collection=True,
)
for doc in texts:
output = docsearch.similarity_search(doc, k=1)
assert output == [Document(page_content=doc)]
# Verify similarity search returns a document from the collection
output = docsearch.similarity_search("foo", k=1)
assert len(output) == 1
assert output[0].page_content in texts


def test_irisvector_embeddings(collection_name, connection_string) -> None:
Expand Down Expand Up @@ -288,3 +289,36 @@ def test_irisvector_retriever_search_threshold_custom_normalization_fn(
)
output = retriever.invoke("foo")
assert output == []


def test_irisvector_similarity_search_with_score_by_vector(
collection_name, connection_string
) -> None:
"""Regression test: order_by must use column expression, not string.

This test verifies the fix for the order_by(asc("distance")) bug where
SQLAlchemy requires a column expression object, not a string literal.
"""
texts = ["foo", "bar", "baz"]
metadatas = [{"page": str(i)} for i in range(len(texts))]
docsearch = IRISVector.from_texts(
texts=texts,
collection_name=collection_name,
embedding=FakeEmbeddingsWithAdaDimension(),
metadatas=metadatas,
connection_string=connection_string,
pre_delete_collection=True,
)

# Get the embedding for "foo" and call similarity_search_with_score_by_vector directly
embedding = FakeEmbeddingsWithAdaDimension().embed_query("foo")
output = docsearch.similarity_search_with_score_by_vector(embedding=embedding, k=3)

# Verify we get results without errors and they are ordered by distance
assert len(output) == 3
# First result should be "foo" with distance 0.0 (exact match)
assert output[0][0].page_content == "foo"
assert output[0][1] == 0.0
# Results should be ordered by increasing distance
distances = [score for _, score in output]
assert distances == sorted(distances)