Skip to content

Commit 633551c

Browse files
committed
add test stage
1 parent a920a85 commit 633551c

File tree

2 files changed

+48
-1
lines changed

2 files changed

+48
-1
lines changed

test/suites/E2E/test_evaluator.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import dataclasses
2+
3+
import pytest
4+
from common.capture_utils import export_vars
5+
from common.config_utils import config_utils as config_instance
6+
from common.uc_eval.task import DocQaEvalTask
7+
from common.uc_eval.utils.data_class import EvalConfig, ModelConfig
8+
9+
10+
@pytest.fixture(scope="session")
11+
def model_config() -> ModelConfig:
12+
cfg = config_instance.get_config("models") or {}
13+
field_name = [field.name for field in dataclasses.fields(ModelConfig)]
14+
kwargs = {k: v for k, v in cfg.items() if k in field_name and v is not None}
15+
return ModelConfig(**kwargs)
16+
17+
18+
doc_qa_eval_cases = [
19+
pytest.param(
20+
EvalConfig(
21+
data_type="doc_qa",
22+
dataset_file_path="common/uc_eval/datasets/doc_qa/demo.jsonl",
23+
enable_prefix_cache=False,
24+
parallel_num=1,
25+
benchmark_mode="evaluate",
26+
metrics=["accuracy", "bootstrap-accuracy", "f1-score"],
27+
eval_class="common.uc_eval.utils.metric:Includes",
28+
),
29+
id="doc-qa-complete-recalculate-evaluate",
30+
)
31+
]
32+
33+
34+
@pytest.mark.feature("eval_test")
35+
@pytest.mark.stage(2)
36+
@pytest.mark.parametrize("eval_config", doc_qa_eval_cases)
37+
@export_vars
38+
def test_doc_qa_perf(
39+
eval_config: EvalConfig, model_config: ModelConfig, request: pytest.FixtureRequest
40+
):
41+
file_save_path = config_instance.get_config("reports").get("base_dir")
42+
task = DocQaEvalTask(model_config, eval_config, file_save_path)
43+
result = task.run()
44+
return {"_name": request.node.callspec.id, "_data": result}

test/suites/E2E/test_uc_performance.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def model_config() -> ModelConfig:
203203

204204

205205
@pytest.mark.feature("perf_test")
206+
@pytest.mark.stage(2)
206207
@pytest.mark.parametrize("perf_config", sync_perf_cases)
207208
@export_vars
208209
def test_sync_perf(
@@ -211,7 +212,7 @@ def test_sync_perf(
211212
file_save_path = config_instance.get_config("reports").get("base_dir")
212213
task = SyntheticPerfTask(model_config, perf_config, file_save_path)
213214
result = task.run()
214-
return {"_name": request.node.callspec.id, "_data": result}
215+
return {"_name": request.node.callspec.id, "_proj": result}
215216

216217

217218
multiturn_dialogue_perf_cases = [
@@ -229,6 +230,7 @@ def test_sync_perf(
229230

230231

231232
@pytest.mark.feature("perf_test")
233+
@pytest.mark.stage(2)
232234
@pytest.mark.parametrize("perf_config", multiturn_dialogue_perf_cases)
233235
@export_vars
234236
def test_multiturn_dialogue_perf(
@@ -255,6 +257,7 @@ def test_multiturn_dialogue_perf(
255257

256258

257259
@pytest.mark.feature("perf_test")
260+
@pytest.mark.stage(2)
258261
@pytest.mark.parametrize("perf_config", doc_qa_perf_cases)
259262
@export_vars
260263
def test_doc_qa_perf(

0 commit comments

Comments
 (0)