Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci-jina.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ jobs:

strategy:
fail-fast: false
max-parallel: 1
matrix:
os: [ubuntu-latest, macos-latest]
python-version: ["3.10", "3.11", "3.12", "3.13"]
Expand Down
2 changes: 1 addition & 1 deletion packages/embcli-jina/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "embcli-jina"
version = "0.1.3-dev"
version = "0.1.4-dev"
description = "jina plugin for embcli"
readme = "README.md"
authors = [{ name = "Tomoko Uchida", email = "tomoko.uchida.1111@gmail.com" }]
Expand Down
3 changes: 2 additions & 1 deletion packages/embcli-jina/src/embcli_jina/jina.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import httpx
from embcli_core.models import EmbeddingModel, ModelOption, ModelOptionType

TIMEOUT_SEC = 3 # Default timeout for embedding requests
COLBERT_TIMEOUT_SEC = 5 # Timeout for ColBERT model requests


Expand Down Expand Up @@ -81,7 +82,7 @@ def _embed_one_batch(self, input: list[str], **kwargs) -> Iterator[list[float] |
if "embedding_type" in kwargs:
data["embedding_type"] = kwargs["embedding_type"]

timeout = COLBERT_TIMEOUT_SEC if self.model_id == "jina-colbert-v2" else None
timeout = COLBERT_TIMEOUT_SEC if self.model_id == "jina-colbert-v2" else TIMEOUT_SEC
response = httpx.post(self.endpoint, headers=headers, json=data, timeout=timeout)
response.raise_for_status()
for item in response.json().get("data", []):
Expand Down
4 changes: 3 additions & 1 deletion packages/embcli-jina/src/embcli_jina/jina_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import httpx
from embcli_core.models import Modality, ModelOption, ModelOptionType, MultimodalEmbeddingModel

TIMEOUT_SEC = 3 # Default timeout for embedding requests


def image_to_base64(image_path: str) -> str:
"""Encodes an image file to a base64 string."""
Expand Down Expand Up @@ -74,7 +76,7 @@ def _embed_one_batch_multimodal(
if "embedding_type" in kwargs:
data["embedding_type"] = kwargs["embedding_type"]

response = httpx.post(self.endpoint, headers=headers, json=data)
response = httpx.post(self.endpoint, headers=headers, json=data, timeout=TIMEOUT_SEC)
response.raise_for_status()
for item in response.json().get("data", []):
if "embedding" in item:
Expand Down
10 changes: 6 additions & 4 deletions packages/embcli-jina/tests/embcli_jina/test_cli_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,28 @@

@skip_if_no_api_key
def test_embed_command_text(plugin_manager, mocker):
mocker.patch("embcli_jina.jina.TIMEOUT_SEC", 30)
mocker.patch("embcli_core.cli._pm", plugin_manager)
runner = CliRunner()
result = runner.invoke(embed, ["--model", "jina-v4", "flying cat"])
result = runner.invoke(embed, ["--model", "jina-v3", "flying cat"])
assert result.exit_code == 0

embeddings = json.loads(result.output)
assert isinstance(embeddings, list)
assert len(embeddings) == 2048
assert len(embeddings) == 1024
assert all(isinstance(val, float) for val in embeddings)


@skip_if_no_api_key
def test_embed_command_image(plugin_manager, mocker):
mocker.patch("embcli_jina.jina_multimodal.TIMEOUT_SEC", 60)
mocker.patch("embcli_core.cli._pm", plugin_manager)
runner = CliRunner()
image_path = files("tests.embcli_jina").joinpath("flying_cat.jpeg")
result = runner.invoke(embed, ["--model", "jina-v4", "--image", str(image_path)])
result = runner.invoke(embed, ["--model", "jina-clip-v2", "--image", str(image_path)])
assert result.exit_code == 0

embeddings = json.loads(result.output)
assert isinstance(embeddings, list)
assert len(embeddings) == 2048
assert len(embeddings) == 1024
assert all(isinstance(val, float) for val in embeddings)
11 changes: 9 additions & 2 deletions packages/embcli-jina/tests/embcli_jina/test_jina.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def test_factory_create_invalid_model():


@skip_if_no_api_key
def test_embed_one_batch_yields_embeddings(jina_models):
def test_embed_one_batch_yields_embeddings(jina_models, mocker):
mocker.patch("embcli_jina.jina.TIMEOUT_SEC", 30)
mocker.patch("embcli_jina.jina.COLBERT_TIMEOUT_SEC", 30)
for model in jina_models:
print(f"Testing model: {model.model_id}")
input_data = ["hello", "world"]
Expand All @@ -43,6 +45,7 @@ def test_embed_one_batch_yields_embeddings(jina_models):

@skip_if_no_api_key
def test_embed_batch_with_options(jina_models, mocker):
mocker.patch("embcli_jina.jina.TIMEOUT_SEC", 30)
mocker.patch("embcli_jina.jina.COLBERT_TIMEOUT_SEC", 30)
input_data = ["hello", "world"]
for model in jina_models:
Expand All @@ -68,7 +71,9 @@ def test_embed_batch_with_options(jina_models, mocker):


@skip_if_no_api_key
def test_embed_batch_embedding_types(jina_models):
def test_embed_batch_embedding_types(jina_models, mocker):
mocker.patch("embcli_jina.jina.TIMEOUT_SEC", 30)
mocker.patch("embcli_jina.jina.COLBERT_TIMEOUT_SEC", 30)
input_data = ["hello", "world"]
for model in jina_models:
# Test binary embedding type
Expand All @@ -90,6 +95,7 @@ def test_embed_batch_embedding_types(jina_models):

@skip_if_no_api_key
def test_embed_batch_for_ingest(jina_models, mocker):
mocker.patch("embcli_jina.jina.TIMEOUT_SEC", 30)
mocker.patch("embcli_jina.jina.COLBERT_TIMEOUT_SEC", 30)
for model in jina_models:
input_data = ["hello", "world"]
Expand All @@ -106,6 +112,7 @@ def test_embed_batch_for_ingest(jina_models, mocker):

@skip_if_no_api_key
def test_embed_for_search(jina_models, mocker):
mocker.patch("embcli_jina.jina.TIMEOUT_SEC", 30)
mocker.patch("embcli_jina.jina.COLBERT_TIMEOUT_SEC", 30)
for model in jina_models:
input = "hello world"
Expand Down
12 changes: 8 additions & 4 deletions packages/embcli-jina/tests/embcli_jina/test_jina_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def test_factory_create_invalid_model():


@skip_if_no_api_key
def test_embed_one_batch_multimodal(jina_multimodal_models):
def test_embed_one_batch_multimodal(jina_multimodal_models, mocker):
mocker.patch("embcli_jina.jina_multimodal.TIMEOUT_SEC", 60)
for model in jina_multimodal_models:
print(f"Testing model: {model.model_id}")
input_data = ["hello", "world"]
Expand All @@ -42,7 +43,8 @@ def test_embed_one_batch_multimodal(jina_multimodal_models):


@skip_if_no_api_key
def test_embed_one_batch_multimodal_image(jina_multimodal_models):
def test_embed_one_batch_multimodal_image(jina_multimodal_models, mocker):
mocker.patch("embcli_jina.jina_multimodal.TIMEOUT_SEC", 60)
for model in jina_multimodal_models:
image_paths = [
files("tests.embcli_jina").joinpath("flying_cat.jpeg"),
Expand All @@ -61,7 +63,8 @@ def test_embed_one_batch_multimodal_image(jina_multimodal_models):


@skip_if_no_api_key
def test_embed_batch_with_options(jina_multimodal_models):
def test_embed_batch_with_options(jina_multimodal_models, mocker):
mocker.patch("embcli_jina.jina_multimodal.TIMEOUT_SEC", 30)
input_data = ["hello", "world"]
for model in jina_multimodal_models:
options = {"task": "retrieval.query", "dimensions": 512}
Expand All @@ -74,7 +77,8 @@ def test_embed_batch_with_options(jina_multimodal_models):


@skip_if_no_api_key
def test_embed_batch_embedding_types(jina_multimodal_models):
def test_embed_batch_embedding_types(jina_multimodal_models, mocker):
mocker.patch("embcli_jina.jina_multimodal.TIMEOUT_SEC", 30)
input_data = ["hello", "world"]
for model in jina_multimodal_models:
# Test binary embedding type
Expand Down
2 changes: 1 addition & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading