Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
AnthropicChatModelSetup,
)

pytestmark = pytest.mark.integration

test_model = os.environ.get("TEST_MODEL")
api_key = os.environ.get("TEST_API_KEY")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
from flink_agents.plan.function import PythonFunction
from flink_agents.plan.tools.function_tool import FunctionTool

pytestmark = pytest.mark.integration

test_deployment = os.environ.get("TEST_AZURE_DEPLOYMENT")
api_key = os.environ.get("AZURE_OPENAI_API_KEY")
azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
from flink_agents.plan.function import PythonFunction
from flink_agents.plan.tools.function_tool import FunctionTool

pytestmark = pytest.mark.integration

test_model = os.environ.get("TEST_MODEL")
api_key = os.environ.get("TEST_API_KEY")
api_base_url = os.environ.get("TEST_API_BASE_URL")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
from flink_agents.plan.function import PythonFunction
from flink_agents.plan.tools.function_tool import FunctionTool

pytestmark = pytest.mark.integration

test_model = os.environ.get("OLLAMA_CHAT_MODEL", "qwen3:1.7b")
current_dir = Path(__file__).parent

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from flink_agents.plan.function import PythonFunction
from flink_agents.plan.tools.function_tool import FunctionTool

pytestmark = pytest.mark.integration

test_model = os.environ.get("TONGYI_CHAT_MODEL", "qwen-plus")
api_key_available = "DASHSCOPE_API_KEY" in os.environ

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
OllamaEmbeddingModelSetup,
)

pytestmark = pytest.mark.integration

test_model = os.environ.get("OLLAMA_EMBEDDING_MODEL", "all-minilm:22m")
current_dir = Path(__file__).parent

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
OpenAIEmbeddingModelSetup,
)

pytestmark = pytest.mark.integration

test_model = os.environ.get("TEST_EMBEDDING_MODEL", "text-embedding-3-small")
api_key = os.environ.get("TEST_API_KEY")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
TongyiEmbeddingModelSetup,
)

pytestmark = pytest.mark.integration

test_model = os.environ.get("TONGYI_EMBEDDING_MODEL", "text-embedding-v4")
api_key_available = "DASHSCOPE_API_KEY" in os.environ

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
_translate_filters_to_chroma_where,
)

pytestmark = pytest.mark.integration

api_key = os.environ.get("TEST_API_KEY")
tenant = os.environ.get("TEST_TENANT")
database = os.environ.get("TEST_DATABASE")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,12 @@ def _doc(
Mem0VectorStore,
)

pytestmark = pytest.mark.skipif(
not _backend_available, reason="mem0 / chromadb is not available"
)
pytestmark = [
pytest.mark.skipif(
not _backend_available, reason="mem0 / chromadb is not available"
),
pytest.mark.integration,
]


# ---------------------------------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,9 @@ strict = true

[tool.ruff.format]
docstring-code-format = true

[tool.pytest.ini_options]
markers = [
"integration: tests that require live external services (Ollama, DashScope, OpenAI, Azure, Anthropic, Chroma, mem0). Deselect with -m 'not integration'.",
]
strict_markers = true
36 changes: 32 additions & 4 deletions tools/ut.sh
Original file line number Diff line number Diff line change
Expand Up @@ -229,22 +229,40 @@ python_tests() {
if $run_e2e; then
# There will be an individual build step before run e2e test for including java dist
uv pip install apache-flink~=${version}.0
# Arm 1: existing e2e tests (directory-based selector).
uv run --no-sync pytest flink_agents \
-s \
-k "e2e_tests_integration" \
--reruns 2 \
--reruns-delay 5 \
-o log_cli=true \
-o log_cli_level=${LOG_LEVEL:-CRITICAL}
rc1=$?
# Arm 2: integration-marked tests (registered in pyproject.toml).
# Trap exit code 5 (no tests collected) as failure to defend
# against -m selector typos that --strict-markers does not catch.
uv run --no-sync pytest flink_agents \
-s \
-m "integration" \
-o log_cli=true \
-o log_cli_level=${LOG_LEVEL:-CRITICAL}
rc2=$?
if [ $rc2 -eq 5 ]; then rc2=1; fi
# Logical-OR aggregation: any nonzero exit on either arm yields testcode=1.
# Side effect: pytest exit 5 (no tests collected) becomes failure on BOTH
# arms, not just arm 2 — which is the correct semantics (zero collection
# on either arm indicates a selector regression).
testcode=$((rc1 || rc2))
else
uv sync --extra test
uv pip install apache-flink~=${version}.0
uv run --no-sync pytest flink_agents \
-k "not e2e_tests" \
-m "not integration" \
-o log_cli=true \
-o log_cli_level=${LOG_LEVEL:-CRITICAL}
-o log_cli_level=${LOG_LEVEL:-CRITICAL}
testcode=$?
fi
testcode=$?
else
if $verbose; then
echo "uv not found, falling back to pip"
Expand All @@ -262,10 +280,20 @@ python_tests() {
fi
if $run_e2e; then
pytest flink_agents -k "e2e_tests_integration" --reruns 2 --reruns-delay 5 -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF}
rc1=$?
# Arm 2: integration-marked tests; trap exit code 5 as failure.
pytest flink_agents -m "integration" -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF}
rc2=$?
if [ $rc2 -eq 5 ]; then rc2=1; fi
# Logical-OR aggregation: any nonzero exit on either arm yields testcode=1.
# Side effect: pytest exit 5 (no tests collected) becomes failure on BOTH
# arms, not just arm 2 — which is the correct semantics (zero collection
# on either arm indicates a selector regression).
testcode=$((rc1 || rc2))
else
pytest flink_agents -k "not e2e_tests" -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF}
pytest flink_agents -k "not e2e_tests" -m "not integration" -o log_cli=true -o log_cli_level=${LOG_LEVEL:-OFF}
testcode=$?
fi
testcode=$?
fi

# Handle pytest exit codes
Expand Down
Loading