Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,7 @@ def _build_internal_log_attributes(
# Create a copy of the base log attributes
internal_log_attributes: Dict[str, str] = log_attributes.copy()
# Add threshold if present
if event_data.get("threshold"):
if event_data.get("threshold") is not None:
internal_log_attributes["gen_ai.evaluation.threshold"] = str(event_data["threshold"])

# Add testing criteria details if present
Expand Down Expand Up @@ -2030,6 +2030,11 @@ def _extract_testing_criteria_metadata(
"metrics": metrics,
"is_inverse": is_inverse,
}
# Propagate pass_threshold from evaluator config so result events can include it
if evaluator_config and criteria_name in evaluator_config:
pass_threshold = evaluator_config[criteria_name].get("_pass_threshold")
if pass_threshold is not None:
testing_criteria_metadata[criteria_name]["pass_threshold"] = pass_threshold

return testing_criteria_metadata

Expand Down Expand Up @@ -2503,6 +2508,14 @@ def _process_criteria_metrics(
# Extract metric values
result_per_metric = _extract_metric_values(criteria_name, criteria_type, metrics, expected_metrics, logger)

# Inject threshold from evaluator config when not present in raw results
# (e.g., PythonGrader/code evaluators don't emit a threshold column)
config_threshold = testing_criteria_metadata.get(criteria_name, {}).get("pass_threshold")
if config_threshold is not None:
for metric_values in result_per_metric.values():
if _is_none_or_nan(metric_values.get("threshold")):
metric_values["threshold"] = config_threshold

# Convert to result objects
results = []
top_sample = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ def _get_graders_and_column_mappings(
def _build_schema_tree_from_paths(
paths: List[str],
force_leaf_type: str = "string",
leaf_type_map: Optional[Dict[str, str]] = None,
) -> Dict[str, Any]:
"""
Build a nested JSON schema (object) from a list of dot-delimited paths.
Expand Down Expand Up @@ -629,33 +630,40 @@ def _build_schema_tree_from_paths(
:param force_leaf_type: The JSON Schema ``type`` value to assign to every leaf node
produced from the supplied paths. Defaults to ``"string"``.
:type force_leaf_type: str
:param leaf_type_map: Optional mapping from leaf path to JSON Schema type. When
provided, overrides ``force_leaf_type`` for any path present in this map.
:type leaf_type_map: Optional[Dict[str, str]]
:return: A JSON Schema fragment describing the hierarchical structure implied by
the input paths. The returned schema root always has ``type: object`` with
recursively nested ``properties`` / ``required`` keys.
:rtype: Dict[str, Any]
"""
# Build tree where each node: {"__children__": { segment: node, ... }, "__leaf__": bool }
root: Dict[str, Any] = {"__children__": {}, "__leaf__": False}
# Build tree where each node: {"__children__": { segment: node, ... }, "__leaf__": bool, "__path__": str }
root: Dict[str, Any] = {"__children__": {}, "__leaf__": False, "__path__": ""}

def insert(path: str):
parts = [p for p in path.split(".") if p]
node = root
for i, part in enumerate(parts):
children = node["__children__"]
if part not in children:
children[part] = {"__children__": {}, "__leaf__": False}
children[part] = {"__children__": {}, "__leaf__": False, "__path__": ""}
node = children[part]
if i == len(parts) - 1:
node["__leaf__"] = True
node["__path__"] = path

for p in paths:
insert(p)

_leaf_types = leaf_type_map or {}

def to_schema(node: Dict[str, Any]) -> Dict[str, Any]:
children = node["__children__"]
if not children:
# Leaf node
return {"type": force_leaf_type}
# Leaf node — use per-leaf type if available, else force_leaf_type
leaf_type = _leaf_types.get(node["__path__"], force_leaf_type)
return {"type": leaf_type}
props = {}
required = []
for name, child in children.items():
Expand Down Expand Up @@ -715,8 +723,24 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
props = data_source_config["item_schema"]["properties"]
req = data_source_config["item_schema"]["required"]
for key in column_mapping.keys():
if key in input_data_df and len(input_data_df[key]) > 0 and isinstance(input_data_df[key].iloc[0], list):
sample = None
if key in input_data_df:
for candidate in input_data_df[key]:
# Skip null-like scalar values (None, NaN, pd.NA, NaT, etc.)
if isinstance(candidate, (list, dict)):
sample = candidate
break
try:
if candidate is not None and not pd.isna(candidate):
sample = candidate
break
except (TypeError, ValueError):
sample = candidate
break
if isinstance(sample, list):
props[key] = {"type": "array"}
elif isinstance(sample, dict):
props[key] = {"type": "object"}
else:
props[key] = {"type": "string"}
req.append(key)
Expand Down Expand Up @@ -754,7 +778,24 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
LOGGER.info(f"AOAI: Effective paths after stripping wrapper: {effective_paths}")

LOGGER.info(f"AOAI: Building nested schema from {len(effective_paths)} effective paths...")
nested_schema = _build_schema_tree_from_paths(effective_paths, force_leaf_type="string")

# Infer leaf types from the DataFrame so nested schemas also get array/object types
leaf_type_map: Dict[str, str] = {}
for ref_path, eff_path in zip(referenced_paths, effective_paths if strip_wrapper else referenced_paths):
if ref_path in input_data_df:
for candidate in input_data_df[ref_path]:
if isinstance(candidate, (list, dict)):
leaf_type_map[eff_path] = "array" if isinstance(candidate, list) else "object"
break
try:
if candidate is not None and not pd.isna(candidate):
break
except (TypeError, ValueError):
break

nested_schema = _build_schema_tree_from_paths(
effective_paths, force_leaf_type="string", leaf_type_map=leaf_type_map
)

LOGGER.info(f"AOAI: Nested schema generated successfully with type '{nested_schema.get('type')}'")
return {
Expand Down Expand Up @@ -816,9 +857,9 @@ def _convert_value(val: Any) -> Any:
if isinstance(val, bool):
return val
# Align numerics with legacy text-only JSONL payloads by turning them into strings.
if isinstance(val, (int, float, list)):
if isinstance(val, (int, float)):
return str(val)
if isinstance(val, (dict)):
if isinstance(val, (list, dict)):
return val
return str(val)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,26 @@ def test_mixed_depth_paths(self):
assert nested["type"] == "object"
assert "field" in nested["properties"]

def test_leaf_type_map_overrides_force_leaf_type(self):
"""Test that leaf_type_map overrides force_leaf_type for specific paths."""
paths = ["query", "tags", "metadata"]
leaf_type_map = {"tags": "array", "metadata": "object"}
schema = _build_schema_tree_from_paths(paths, force_leaf_type="string", leaf_type_map=leaf_type_map)

assert schema["properties"]["query"]["type"] == "string"
assert schema["properties"]["tags"]["type"] == "array"
assert schema["properties"]["metadata"]["type"] == "object"

def test_leaf_type_map_nested_paths(self):
"""Test leaf_type_map with nested paths."""
paths = ["context.tags", "context.query"]
leaf_type_map = {"context.tags": "array"}
schema = _build_schema_tree_from_paths(paths, force_leaf_type="string", leaf_type_map=leaf_type_map)

context = schema["properties"]["context"]
assert context["properties"]["tags"]["type"] == "array"
assert context["properties"]["query"]["type"] == "string"


@pytest.mark.unittest
class TestGenerateDataSourceConfig:
Expand Down Expand Up @@ -297,6 +317,102 @@ def test_single_nested_path(self, flat_test_data):
# After wrapper stripping, should see context
assert "context" in schema["properties"]

def test_flat_schema_infers_list_and_dict_types(self, flat_test_data):
"""Test that flat schema correctly infers array/object types from data."""
flat_test_data["tags"] = [["tag1", "tag2"], ["tag3"], []]
flat_test_data["metadata"] = [{"key": "val"}, {"key2": "val2"}, {}]
flat_test_data["score"] = [95, 87, 92]

column_mapping = {
"query": "${data.query}",
"tags": "${data.tags}",
"metadata": "${data.metadata}",
"score": "${data.score}",
}

config = _generate_data_source_config(flat_test_data, column_mapping)

properties = config["item_schema"]["properties"]
# Strings should be typed as string
assert properties["query"]["type"] == "string"
# Lists should be typed as array
assert properties["tags"]["type"] == "array"
# Dicts should be typed as object
assert properties["metadata"]["type"] == "object"
# Numerics (converted to str by _convert_value) should be typed as string
assert properties["score"]["type"] == "string"

def test_flat_schema_skips_none_nan_for_type_inference(self):
"""Test that schema inference skips None/NaN rows to find the real type."""
import numpy as np

df = pd.DataFrame(
{
"tags": [None, ["tag1", "tag2"], ["tag3"]],
"metadata": [np.nan, {"key": "val"}, {}],
"query": [None, None, "hello"],
}
)
column_mapping = {
"tags": "${data.tags}",
"metadata": "${data.metadata}",
"query": "${data.query}",
}

config = _generate_data_source_config(df, column_mapping)
properties = config["item_schema"]["properties"]

# Should look past None in row 0 and find list in row 1
assert properties["tags"]["type"] == "array"
# Should look past NaN in row 0 and find dict in row 1
assert properties["metadata"]["type"] == "object"
# All None → falls back to string
assert properties["query"]["type"] == "string"

def test_flat_schema_skips_pd_na_for_type_inference(self):
"""Test that schema inference skips pd.NA sentinel values."""
df = pd.DataFrame(
{
"tags": [pd.NA, ["tag1", "tag2"], ["tag3"]],
"query": ["hello", "world", "test"],
}
)
column_mapping = {
"tags": "${data.tags}",
"query": "${data.query}",
}

config = _generate_data_source_config(df, column_mapping)
properties = config["item_schema"]["properties"]

assert properties["tags"]["type"] == "array"
assert properties["query"]["type"] == "string"

def test_nested_schema_infers_list_and_dict_leaf_types(self):
"""Test that nested schema infers array/object types for leaf nodes."""
df = pd.DataFrame(
[
{
"item.query": "hello",
"item.tags": ["tag1", "tag2"],
"item.metadata": {"key": "val"},
}
]
)
column_mapping = {
"query": "${data.item.query}",
"tags": "${data.item.tags}",
"metadata": "${data.item.metadata}",
}

config = _generate_data_source_config(df, column_mapping)
schema = config["item_schema"]

# After wrapper stripping, leaves should have inferred types
assert schema["properties"]["query"]["type"] == "string"
assert schema["properties"]["tags"]["type"] == "array"
assert schema["properties"]["metadata"]["type"] == "object"


@pytest.mark.unittest
class TestGetDataSource:
Expand Down Expand Up @@ -437,7 +553,7 @@ def test_data_source_with_item_column_and_nested_values(self, nested_item_keywor
# Ensure we did not accidentally nest another 'item' key inside the wrapper
assert "item" not in item_payload
assert item_payload["sample"]["output_text"] == "someoutput"
assert item_payload["sample"]["output_items"] == "['item1', 'item2']"
assert item_payload["sample"]["output_items"] == ["item1", "item2"]

def test_data_source_with_item_sample_column_and_nested_values(self, nested_item_sample_keyword_data):
"""Ensure rows that already have an 'item' column keep nested dicts intact."""
Expand All @@ -464,7 +580,7 @@ def test_data_source_with_item_sample_column_and_nested_values(self, nested_item
# Ensure we did not accidentally nest another 'item' key inside the wrapper
assert "item" not in item_payload
assert item_payload["sample"]["output_text"] == "someoutput"
assert item_payload["sample"]["output_items"] == "['item1', 'item2']"
assert item_payload["sample"]["output_items"] == ["item1", "item2"]

def test_data_source_with_sample_output_metadata(self, flat_sample_output_data):
"""Ensure flat rows that include dotted sample metadata remain accessible."""
Expand All @@ -485,7 +601,7 @@ def test_data_source_with_sample_output_metadata(self, flat_sample_output_data):
assert row["test"]["test_string"] == "baking cakes is fun!"
# sample.output_text should follow the row through normalization without being stringified
assert row["sample.output_text"] == "someoutput"
assert row["sample.output_items"] == "['item1', 'item2']"
assert row["sample.output_items"] == ["item1", "item2"]

def test_data_source_with_numeric_values(self, flat_test_data):
"""Test data source generation converts numeric values to strings."""
Expand All @@ -504,6 +620,35 @@ def test_data_source_with_numeric_values(self, flat_test_data):
assert isinstance(content[0][WRAPPER_KEY]["score"], str)
assert isinstance(content[0][WRAPPER_KEY]["confidence"], str)

def test_data_source_with_list_and_dict_values(self, flat_test_data):
"""Test data source generation preserves list and dict values as-is."""
flat_test_data["tags"] = [["tag1", "tag2"], ["tag3"], []]
flat_test_data["metadata"] = [{"key": "val"}, {"key2": "val2"}, {}]

column_mapping = {
"query": "${data.query}",
"tags": "${data.tags}",
"metadata": "${data.metadata}",
}

data_source = _get_data_source(flat_test_data, column_mapping)

content = data_source["source"]["content"]

# Lists should be preserved as lists, not stringified
assert content[0][WRAPPER_KEY]["tags"] == ["tag1", "tag2"]
assert isinstance(content[0][WRAPPER_KEY]["tags"], list)
# Empty lists should also be preserved
assert content[2][WRAPPER_KEY]["tags"] == []
assert isinstance(content[2][WRAPPER_KEY]["tags"], list)

# Dicts should be preserved as dicts
assert content[0][WRAPPER_KEY]["metadata"] == {"key": "val"}
assert isinstance(content[0][WRAPPER_KEY]["metadata"], dict)
# Empty dicts should also be preserved
assert content[2][WRAPPER_KEY]["metadata"] == {}
assert isinstance(content[2][WRAPPER_KEY]["metadata"], dict)

def test_empty_dataframe(self):
"""Test data source generation with empty dataframe."""
empty_df = pd.DataFrame()
Expand Down Expand Up @@ -600,3 +745,33 @@ def test_nested_schema_and_data_alignment(self, nested_test_data):
assert "query" in item
assert "context" in item
assert "company" in item["context"]

def test_flat_schema_and_data_alignment_with_list_and_dict(self, flat_test_data):
"""Test that schema types and data values agree for list/dict columns."""
flat_test_data["tags"] = [["tag1", "tag2"], ["tag3"], []]
flat_test_data["metadata"] = [{"key": "val"}, {"key2": "val2"}, {}]

column_mapping = {
"query": "${data.query}",
"tags": "${data.tags}",
"metadata": "${data.metadata}",
}

config = _generate_data_source_config(flat_test_data, column_mapping)
data_source = _get_data_source(flat_test_data, column_mapping)

schema_props = config["item_schema"]["properties"]
data_item = data_source["source"]["content"][0][WRAPPER_KEY]

# Schema declares array → data contains a list
assert schema_props["tags"]["type"] == "array"
assert isinstance(data_item["tags"], list)

# Schema declares object → data contains a dict
assert schema_props["metadata"]["type"] == "object"
assert isinstance(data_item["metadata"], dict)

# Empty collections should also align
empty_item = data_source["source"]["content"][2][WRAPPER_KEY]
assert isinstance(empty_item["tags"], list)
assert isinstance(empty_item["metadata"], dict)
Loading