mocobeta · mocobeta · Aug 2, 2025 · Aug 2, 2025
diff --git a/.github/workflows/ci-jina.yml b/.github/workflows/ci-jina.yml
@@ -47,6 +47,6 @@ jobs:
       env:
         JINA_API_KEY: ${{ secrets.JINA_API_KEY }}
         RUN_JINA_TESTS: "1"
-        RUN_JINA_CLIP_TESTS: "1"
+        RUN_JINA_MULTIMODAL_TESTS: "1"
       run: |
         uv run --package embcli-jina pytest packages/embcli-jina/tests
diff --git a/packages/embcli-jina/README.md b/packages/embcli-jina/README.md
@@ -46,12 +46,15 @@ JinaEmbeddingModel
     * dimensions (int) - The number of dimensions the resulting output embeddings should have. Only supported in jina-embeddings-v3 and jina-colbert-v2.
     * input_type (str) - The type of input to the model. Supported types: 'query', 'document' Only supported in jina-corebert-v2.
     * embedding_type (str) - The type of embeddings to return. Options include 'float', 'binary', 'ubinary'. Default is 'float'.
-JinaClipModel
+JinaMultiModalModel
     Vendor: jina
     Models:
+    * jina-embeddings-v4 (aliases: jina-v4)
     * jina-clip-v2 (aliases: )
     Model Options:
-    * task (str) - Downstream task for which the embeddings are used. Supported tasks: 'retrieval.query', 'retrieval.passage'.
+    * task (str) - Downstream task for which the embeddings are used. Supported tasks: 'retrieval.query', 'retrieval.passage', 'text-matching', 'code.query', 'code.passage'.
+    * late_chunking (bool) - Whether if the late chunking is applied. Only supported in jina-embeddings-v4.
+    * truncate (bool) - When enabled, the model will automatically drop the tail that extends beyond the maximum context length allowed by the model instead of throwing an error. Only supported in jina-embeddings-v4.
     * dimensions (int) - The number of dimensions the resulting output embeddings should have.
     * embedding_type (str) - The type of embeddings to return. Options include 'float', 'binary', 'ubinary'. Default is 'float'.
 
@@ -64,9 +67,9 @@ emb embed -m jina-v3 "Embeddings are essential for semantic search and RAG apps.
 # get an embedding for an input text by jina-embeddings-v3 model model with embedding_type=binary.
 emb embed -m jina-v3 "Embeddings are essential for semantic search and RAG apps." -o embedding_type binary
 
-# get an embedding for an image input by jina-clip-v2 model.
+# get an embedding for an image input by jina-embeddings-v4 model.
 # assume you have an image file named `gingercat.jpg` in the current directory.
-emb embed -m jina-clip-v2 --image gingercat.jpeg
+emb embed -m jina-v4 --image gingercat.jpeg
 
 # calculate similarity score between two texts by jina-embeddings-v3 model model. the default metric is cosine similarity.
 emb simscore -m jina-v3 "The cat drifts toward sleep." "Sleep dances in the cat's eyes."

diff --git a/packages/embcli-jina/pyproject.toml b/packages/embcli-jina/pyproject.toml
@@ -34,4 +34,4 @@ build-backend = "hatchling.build"
 
 [project.entry-points."embcli"]
 jina = "embcli_jina.jina"
-jina-clip = "embcli_jina.jina_clip"
+jina-clip = "embcli_jina.jina_multimodal"
diff --git a/.../embcli-jina/src/embcli_jina/jina_clip.py → ...i-jina/src/embcli_jina/jina_multimodal.py b/.../embcli-jina/src/embcli_jina/jina_clip.py → ...i-jina/src/embcli_jina/jina_multimodal.py
@@ -14,15 +14,25 @@ def image_to_base64(image_path: str) -> str:
     return data.decode("utf-8")
 
 
-class JinaClipModel(MultimodalEmbeddingModel):
+class JinaMultiModalModel(MultimodalEmbeddingModel):
     vendor = "jina"
     default_batch_size = 100
-    model_aliases = [("jina-clip-v2", [])]
+    model_aliases = [("jina-embeddings-v4", ["jina-v4"]), ("jina-clip-v2", [])]
     valid_options = [
         ModelOption(
             "task",
             ModelOptionType.STR,
-            "Downstream task for which the embeddings are used. Supported tasks: 'retrieval.query', 'retrieval.passage'.",  # noqa: E501
+            "Downstream task for which the embeddings are used. Supported tasks: 'retrieval.query', 'retrieval.passage', 'text-matching', 'code.query', 'code.passage'.",  # noqa: E501
+        ),
+        ModelOption(
+            "late_chunking",
+            ModelOptionType.BOOL,
+            "Whether if the late chunking is applied. Only supported in jina-embeddings-v4.",
+        ),
+        ModelOption(
+            "truncate",
+            ModelOptionType.BOOL,
+            "When enabled, the model will automatically drop the tail that extends beyond the maximum context length allowed by the model instead of throwing an error. Only supported in jina-embeddings-v4.",  # noqa: E501
         ),
         ModelOption(
             "dimensions",
@@ -87,9 +97,9 @@ def embed_for_search(self, input, **kwargs):
 @embcli_core.hookimpl
 def embedding_model():
     def create(model_id: str, **kwargs):
-        model_ids = [alias[0] for alias in JinaClipModel.model_aliases]
+        model_ids = [alias[0] for alias in JinaMultiModalModel.model_aliases]
         if model_id not in model_ids:
             raise ValueError(f"Model ID {model_id} is not supported.")
-        return JinaClipModel(model_id, **kwargs)
+        return JinaMultiModalModel(model_id, **kwargs)
 
-    return JinaClipModel, create
+    return JinaMultiModalModel, create
diff --git a/packages/embcli-jina/tests/embcli_jina/conftest.py b/packages/embcli-jina/tests/embcli_jina/conftest.py
@@ -1,7 +1,7 @@
 import pytest
-from embcli_jina import jina, jina_clip
+from embcli_jina import jina, jina_multimodal
 from embcli_jina.jina import JinaEmbeddingModel
-from embcli_jina.jina_clip import JinaClipModel
+from embcli_jina.jina_multimodal import JinaMultiModalModel
 
 
 @pytest.fixture
@@ -11,9 +11,9 @@ def jina_models():
 
 
 @pytest.fixture
-def jina_clip_models():
-    model_ids = [alias[0] for alias in JinaClipModel.model_aliases]
-    return [JinaClipModel(model_id) for model_id in model_ids]
+def jina_multimodal_models():
+    model_ids = [alias[0] for alias in JinaMultiModalModel.model_aliases]
+    return [JinaMultiModalModel(model_id) for model_id in model_ids]
 
 
 @pytest.fixture
@@ -25,5 +25,5 @@ def plugin_manager():
     pm = pluggy.PluginManager("embcli")
     pm.add_hookspecs(hookspecs)
     pm.register(jina)
-    pm.register(jina_clip)
+    pm.register(jina_multimodal)
     return pm
diff --git a/packages/embcli-jina/tests/embcli_jina/test_cli_embed.py b/packages/embcli-jina/tests/embcli_jina/test_cli_embed.py
@@ -7,21 +7,21 @@
 from embcli_core.cli import embed
 
 skip_if_no_api_key = pytest.mark.skipif(
-    not os.environ.get("JINA_API_KEY") or not os.environ.get("RUN_JINA_CLIP_TESTS") == "1",
-    reason="JINA_API_KEY and RUN_JINA_CLIP_TESTS environment variables not set",
+    not os.environ.get("JINA_API_KEY") or not os.environ.get("RUN_JINA_MULTIMODAL_TESTS") == "1",
+    reason="JINA_API_KEY and RUN_JINA_MULTIMODAL_TESTS environment variables not set",
 )
 
 
 @skip_if_no_api_key
 def test_embed_command_text(plugin_manager, mocker):
     mocker.patch("embcli_core.cli._pm", plugin_manager)
     runner = CliRunner()
-    result = runner.invoke(embed, ["--model", "jina-clip-v2", "flying cat"])
+    result = runner.invoke(embed, ["--model", "jina-v4", "flying cat"])
     assert result.exit_code == 0
 
     embeddings = json.loads(result.output)
     assert isinstance(embeddings, list)
-    assert len(embeddings) == 1024
+    assert len(embeddings) == 2048
     assert all(isinstance(val, float) for val in embeddings)
 
 
@@ -30,10 +30,10 @@ def test_embed_command_image(plugin_manager, mocker):
     mocker.patch("embcli_core.cli._pm", plugin_manager)
     runner = CliRunner()
     image_path = files("tests.embcli_jina").joinpath("flying_cat.jpeg")
-    result = runner.invoke(embed, ["--model", "jina-clip-v2", "--image", str(image_path)])
+    result = runner.invoke(embed, ["--model", "jina-v4", "--image", str(image_path)])
     assert result.exit_code == 0
 
     embeddings = json.loads(result.output)
     assert isinstance(embeddings, list)
-    assert len(embeddings) == 1024
+    assert len(embeddings) == 2048
     assert all(isinstance(val, float) for val in embeddings)
diff --git a/...-jina/tests/embcli_jina/test_jina_clip.py → ...tests/embcli_jina/test_jina_multimodal.py b/...-jina/tests/embcli_jina/test_jina_clip.py → ...tests/embcli_jina/test_jina_multimodal.py
@@ -3,19 +3,19 @@
 
 import pytest
 from embcli_core.models import Modality
-from embcli_jina.jina_clip import JinaClipModel, embedding_model
+from embcli_jina.jina_multimodal import JinaMultiModalModel, embedding_model
 
 skip_if_no_api_key = pytest.mark.skipif(
-    not os.environ.get("JINA_API_KEY") or not os.environ.get("RUN_JINA_CLIP_TESTS") == "1",
-    reason="JINA_API_KEY and RUN_JINA_CLIP_TESTS environment variables not set",
+    not os.environ.get("JINA_API_KEY") or not os.environ.get("RUN_JINA_MULTIMODAL_TESTS") == "1",
+    reason="JINA_API_KEY and RUN_JINA_MULTIMODAL_TESTS environment variables not set",
 )
 
 
 @skip_if_no_api_key
 def test_factory_create_valid_model():
     _, create = embedding_model()
     model = create("jina-clip-v2")
-    assert isinstance(model, JinaClipModel)
+    assert isinstance(model, JinaMultiModalModel)
     assert model.model_id == "jina-clip-v2"
     assert model.endpoint == "https://api.jina.ai/v1/embeddings"
 
@@ -28,8 +28,8 @@ def test_factory_create_invalid_model():
 
 
 @skip_if_no_api_key
-def test_embed_one_batch_multimodal(jina_clip_models):
-    for model in jina_clip_models:
+def test_embed_one_batch_multimodal(jina_multimodal_models):
+    for model in jina_multimodal_models:
         print(f"Testing model: {model.model_id}")
         input_data = ["hello", "world"]
 
@@ -42,8 +42,8 @@ def test_embed_one_batch_multimodal(jina_clip_models):
 
 
 @skip_if_no_api_key
-def test_embed_one_batch_multimodal_image(jina_clip_models):
-    for model in jina_clip_models:
+def test_embed_one_batch_multimodal_image(jina_multimodal_models):
+    for model in jina_multimodal_models:
         image_paths = [
             files("tests.embcli_jina").joinpath("flying_cat.jpeg"),
             files("tests.embcli_jina").joinpath("sleepy_sheep.jpeg"),
@@ -54,13 +54,16 @@ def test_embed_one_batch_multimodal_image(jina_clip_models):
         for emb in embeddings:
             assert isinstance(emb, list)
             assert all(isinstance(x, float) for x in emb)
-            assert len(emb) == 1024
+            if model.model_id == "jina-clip-v2":
+                assert len(emb) == 1024
+            elif model.model_id == "jina-clip-v4":
+                assert len(emb) == 2048
 
 
 @skip_if_no_api_key
-def test_embed_batch_with_options(jina_clip_models):
+def test_embed_batch_with_options(jina_multimodal_models):
     input_data = ["hello", "world"]
-    for model in jina_clip_models:
+    for model in jina_multimodal_models:
         options = {"task": "retrieval.query", "dimensions": 512}
         embeddings = list(model.embed_batch(input_data, None, **options))
         assert len(embeddings) == len(input_data)
@@ -71,16 +74,15 @@ def test_embed_batch_with_options(jina_clip_models):
 
 
 @skip_if_no_api_key
-def test_embed_batch_embedding_types(jina_clip_models):
+def test_embed_batch_embedding_types(jina_multimodal_models):
     input_data = ["hello", "world"]
-    for model in jina_clip_models:
+    for model in jina_multimodal_models:
         # Test binary embedding type
         options = {"embedding_type": "binary"}
         embeddings = list(model.embed_batch(input_data, None, **options))
         assert len(embeddings) == len(input_data)
         for emb in embeddings:
             assert isinstance(emb, list)
-            assert all(isinstance(x, int) for x in emb)
             assert all(-128 <= x <= 127 for x in emb)
 
         # Test ubinary embedding type
@@ -89,5 +91,4 @@ def test_embed_batch_embedding_types(jina_clip_models):
         assert len(embeddings) == len(input_data)
         for emb in embeddings:
             assert isinstance(emb, list)
-            assert all(isinstance(x, int) for x in emb)
             assert all(0 <= x <= 255 for x in emb)