diff --git a/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py b/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py index 9741e054a..1aeb0ce54 100644 --- a/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py +++ b/python/flink_agents/integrations/chat_models/anthropic/tests/test_anthropic_chat_model.py @@ -30,6 +30,8 @@ AnthropicChatModelSetup, ) +pytestmark = pytest.mark.integration + test_model = os.environ.get("TEST_MODEL") api_key = os.environ.get("TEST_API_KEY") diff --git a/python/flink_agents/integrations/chat_models/azure/tests/test_azure_openai_chat_model.py b/python/flink_agents/integrations/chat_models/azure/tests/test_azure_openai_chat_model.py index ce69d42ec..983bbb47f 100644 --- a/python/flink_agents/integrations/chat_models/azure/tests/test_azure_openai_chat_model.py +++ b/python/flink_agents/integrations/chat_models/azure/tests/test_azure_openai_chat_model.py @@ -30,6 +30,8 @@ from flink_agents.plan.function import PythonFunction from flink_agents.plan.tools.function_tool import FunctionTool +pytestmark = pytest.mark.integration + test_deployment = os.environ.get("TEST_AZURE_DEPLOYMENT") api_key = os.environ.get("AZURE_OPENAI_API_KEY") azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") diff --git a/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py b/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py index 7ccb6c225..ff4d2bb39 100644 --- a/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py +++ b/python/flink_agents/integrations/chat_models/openai/tests/test_openai_chat_model.py @@ -31,6 +31,8 @@ from flink_agents.plan.function import PythonFunction from flink_agents.plan.tools.function_tool import FunctionTool +pytestmark = pytest.mark.integration + test_model = os.environ.get("TEST_MODEL") api_key = os.environ.get("TEST_API_KEY") api_base_url = os.environ.get("TEST_API_BASE_URL") diff --git a/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py b/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py index 6a2a47117..27f07f340 100644 --- a/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py +++ b/python/flink_agents/integrations/chat_models/tests/test_ollama_chat_model.py @@ -34,6 +34,8 @@ from flink_agents.plan.function import PythonFunction from flink_agents.plan.tools.function_tool import FunctionTool +pytestmark = pytest.mark.integration + test_model = os.environ.get("OLLAMA_CHAT_MODEL", "qwen3:1.7b") current_dir = Path(__file__).parent diff --git a/python/flink_agents/integrations/chat_models/tests/test_tongyi_chat_model.py b/python/flink_agents/integrations/chat_models/tests/test_tongyi_chat_model.py index c33a792ce..997422513 100644 --- a/python/flink_agents/integrations/chat_models/tests/test_tongyi_chat_model.py +++ b/python/flink_agents/integrations/chat_models/tests/test_tongyi_chat_model.py @@ -32,6 +32,8 @@ from flink_agents.plan.function import PythonFunction from flink_agents.plan.tools.function_tool import FunctionTool +pytestmark = pytest.mark.integration + test_model = os.environ.get("TONGYI_CHAT_MODEL", "qwen-plus") api_key_available = "DASHSCOPE_API_KEY" in os.environ diff --git a/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py b/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py index b770eea3f..dd71c14ca 100644 --- a/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py +++ b/python/flink_agents/integrations/embedding_models/local/tests/test_ollama_embedding_model.py @@ -31,6 +31,8 @@ OllamaEmbeddingModelSetup, ) +pytestmark = pytest.mark.integration + test_model = os.environ.get("OLLAMA_EMBEDDING_MODEL", "all-minilm:22m") current_dir = Path(__file__).parent diff --git a/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py b/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py index e76cc3faa..49907340f 100644 --- a/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py +++ b/python/flink_agents/integrations/embedding_models/tests/test_openai_embedding_model.py @@ -27,6 +27,8 @@ OpenAIEmbeddingModelSetup, ) +pytestmark = pytest.mark.integration + test_model = os.environ.get("TEST_EMBEDDING_MODEL", "text-embedding-3-small") api_key = os.environ.get("TEST_API_KEY") diff --git a/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py b/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py index 75e477067..b60c75596 100644 --- a/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py +++ b/python/flink_agents/integrations/embedding_models/tests/test_tongyi_embedding_model.py @@ -29,6 +29,8 @@ TongyiEmbeddingModelSetup, ) +pytestmark = pytest.mark.integration + test_model = os.environ.get("TONGYI_EMBEDDING_MODEL", "text-embedding-v4") api_key_available = "DASHSCOPE_API_KEY" in os.environ diff --git a/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py b/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py index 754798cc8..135ded8b5 100644 --- a/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py +++ b/python/flink_agents/integrations/vector_stores/chroma/tests/test_chroma_vector_store.py @@ -42,6 +42,8 @@ _translate_filters_to_chroma_where, ) +pytestmark = pytest.mark.integration + api_key = os.environ.get("TEST_API_KEY") tenant = os.environ.get("TEST_TENANT") database = os.environ.get("TEST_DATABASE") diff --git a/python/flink_agents/integrations/vector_stores/mem0/tests/test_mem0_vector_store.py b/python/flink_agents/integrations/vector_stores/mem0/tests/test_mem0_vector_store.py index 357d0352b..27253b964 100644 --- a/python/flink_agents/integrations/vector_stores/mem0/tests/test_mem0_vector_store.py +++ b/python/flink_agents/integrations/vector_stores/mem0/tests/test_mem0_vector_store.py @@ -62,9 +62,12 @@ def _doc( Mem0VectorStore, ) -pytestmark = pytest.mark.skipif( - not _backend_available, reason="mem0 / chromadb is not available" -) +pytestmark = [ + pytest.mark.skipif( + not _backend_available, reason="mem0 / chromadb is not available" + ), + pytest.mark.integration, +] # --------------------------------------------------------------------------- diff --git a/python/pyproject.toml b/python/pyproject.toml index cbd0d5b69..2d2d8d41c 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -202,3 +202,9 @@ strict = true [tool.ruff.format] docstring-code-format = true + +[tool.pytest.ini_options] +markers = [ + "integration: tests that require live external services (Ollama, DashScope, OpenAI, Azure, Anthropic, Chroma, mem0). Deselect with -m 'not integration'.", +] +strict_markers = true diff --git a/tools/ut.sh b/tools/ut.sh index 8b9de711a..a40303e4b 100755 --- a/tools/ut.sh +++ b/tools/ut.sh @@ -229,6 +229,7 @@ python_tests() { if $run_e2e; then # There will be an individual build step before run e2e test for including java dist uv pip install apache-flink~=${version}.0 + # Arm 1: existing e2e tests (directory-based selector). uv run --no-sync pytest flink_agents \ -s \ -k "e2e_tests_integration" \ @@ -236,15 +237,32 @@ python_tests() { --reruns-delay 5 \ -o log_cli=true \ -o log_cli_level=${LOG_LEVEL:-CRITICAL} + rc1=$? + # Arm 2: integration-marked tests (registered in pyproject.toml). + # Trap exit code 5 (no tests collected) as failure to defend + # against -m selector typos that --strict-markers does not catch. + uv run --no-sync pytest flink_agents \ + -s \ + -m "integration" \ + -o log_cli=true \ + -o log_cli_level=${LOG_LEVEL:-CRITICAL} + rc2=$? + if [ $rc2 -eq 5 ]; then rc2=1; fi + # Logical-OR aggregation: any nonzero exit on either arm yields testcode=1. + # Side effect: pytest exit 5 (no tests collected) becomes failure on BOTH + # arms, not just arm 2 — which is the correct semantics (zero collection + # on either arm indicates a selector regression). + testcode=$((rc1 || rc2)) else uv sync --extra test uv pip install apache-flink~=${version}.0 uv run --no-sync pytest flink_agents \ -k "not e2e_tests" \ + -m "not integration" \ -o log_cli=true \ - -o log_cli_level=${LOG_LEVEL:-CRITICAL} + -o log_cli_level=${LOG_LEVEL:-CRITICAL} + testcode=$? fi - testcode=$? else if $verbose; then echo "uv not found, falling back to pip" @@ -262,10 +280,20 @@ python_tests() { fi if $run_e2e; then pytest flink_agents -k "e2e_tests_integration" --reruns 2 --reruns-delay 5 -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF} + rc1=$? + # Arm 2: integration-marked tests; trap exit code 5 as failure. + pytest flink_agents -m "integration" -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF} + rc2=$? + if [ $rc2 -eq 5 ]; then rc2=1; fi + # Logical-OR aggregation: any nonzero exit on either arm yields testcode=1. + # Side effect: pytest exit 5 (no tests collected) becomes failure on BOTH + # arms, not just arm 2 — which is the correct semantics (zero collection + # on either arm indicates a selector regression). + testcode=$((rc1 || rc2)) else - pytest flink_agents -k "not e2e_tests" -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF} + pytest flink_agents -k "not e2e_tests" -m "not integration" -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF} + testcode=$? fi - testcode=$? fi # Handle pytest exit codes