Azure · aprilk-ms · Mar 10, 2026 · Mar 8, 2026 · Mar 8, 2026 · Mar 8, 2026
@@ -1103,7 +1103,7 @@ def _build_internal_log_attributes(
     # Create a copy of the base log attributes
     internal_log_attributes: Dict[str, str] = log_attributes.copy()
     # Add threshold if present
-    if event_data.get("threshold"):
+    if event_data.get("threshold") is not None:
         internal_log_attributes["gen_ai.evaluation.threshold"] = str(event_data["threshold"])
 
     # Add testing criteria details if present
@@ -2030,6 +2030,11 @@ def _extract_testing_criteria_metadata(
             "metrics": metrics,
             "is_inverse": is_inverse,
         }
+        # Propagate pass_threshold from evaluator config so result events can include it
+        if evaluator_config and criteria_name in evaluator_config:
+            pass_threshold = evaluator_config[criteria_name].get("_pass_threshold")
+            if pass_threshold is not None:
+                testing_criteria_metadata[criteria_name]["pass_threshold"] = pass_threshold
 
     return testing_criteria_metadata
 
@@ -2503,6 +2508,14 @@ def _process_criteria_metrics(
     # Extract metric values
     result_per_metric = _extract_metric_values(criteria_name, criteria_type, metrics, expected_metrics, logger)
 
+    # Inject threshold from evaluator config when not present in raw results
+    # (e.g., PythonGrader/code evaluators don't emit a threshold column)
+    config_threshold = testing_criteria_metadata.get(criteria_name, {}).get("pass_threshold")
+    if config_threshold is not None:
+        for metric_values in result_per_metric.values():
+            if _is_none_or_nan(metric_values.get("threshold")):
+                metric_values["threshold"] = config_threshold
+
     # Convert to result objects
     results = []
     top_sample = {}

@@ -590,6 +590,7 @@ def _get_graders_and_column_mappings(
 def _build_schema_tree_from_paths(
     paths: List[str],
     force_leaf_type: str = "string",
+    leaf_type_map: Optional[Dict[str, str]] = None,
 ) -> Dict[str, Any]:
     """
     Build a nested JSON schema (object) from a list of dot-delimited paths.
@@ -629,33 +630,40 @@ def _build_schema_tree_from_paths(
     :param force_leaf_type: The JSON Schema ``type`` value to assign to every leaf node
         produced from the supplied paths. Defaults to ``"string"``.
     :type force_leaf_type: str
+    :param leaf_type_map: Optional mapping from leaf path to JSON Schema type. When
+        provided, overrides ``force_leaf_type`` for any path present in this map.
+    :type leaf_type_map: Optional[Dict[str, str]]
     :return: A JSON Schema fragment describing the hierarchical structure implied by
         the input paths. The returned schema root always has ``type: object`` with
         recursively nested ``properties`` / ``required`` keys.
     :rtype: Dict[str, Any]
     """
-    # Build tree where each node: {"__children__": { segment: node, ... }, "__leaf__": bool }
-    root: Dict[str, Any] = {"__children__": {}, "__leaf__": False}
+    # Build tree where each node: {"__children__": { segment: node, ... }, "__leaf__": bool, "__path__": str }
+    root: Dict[str, Any] = {"__children__": {}, "__leaf__": False, "__path__": ""}
 
     def insert(path: str):
         parts = [p for p in path.split(".") if p]
         node = root
         for i, part in enumerate(parts):
             children = node["__children__"]
             if part not in children:
-                children[part] = {"__children__": {}, "__leaf__": False}
+                children[part] = {"__children__": {}, "__leaf__": False, "__path__": ""}
             node = children[part]
             if i == len(parts) - 1:
                 node["__leaf__"] = True
+                node["__path__"] = path
 
     for p in paths:
         insert(p)
 
+    _leaf_types = leaf_type_map or {}
+
     def to_schema(node: Dict[str, Any]) -> Dict[str, Any]:
         children = node["__children__"]
         if not children:
-            # Leaf node
-            return {"type": force_leaf_type}
+            # Leaf node — use per-leaf type if available, else force_leaf_type
+            leaf_type = _leaf_types.get(node["__path__"], force_leaf_type)
+            return {"type": leaf_type}
         props = {}
         required = []
         for name, child in children.items():
@@ -715,8 +723,24 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
         props = data_source_config["item_schema"]["properties"]
         req = data_source_config["item_schema"]["required"]
         for key in column_mapping.keys():
-            if key in input_data_df and len(input_data_df[key]) > 0 and isinstance(input_data_df[key].iloc[0], list):
+            sample = None
+            if key in input_data_df:
+                for candidate in input_data_df[key]:
+                    # Skip null-like scalar values (None, NaN, pd.NA, NaT, etc.)
+                    if isinstance(candidate, (list, dict)):
+                        sample = candidate
+                        break
+                    try:
+                        if candidate is not None and not pd.isna(candidate):
+                            sample = candidate
+                            break
+                    except (TypeError, ValueError):
+                        sample = candidate
+                        break
+            if isinstance(sample, list):
                 props[key] = {"type": "array"}
+            elif isinstance(sample, dict):
+                props[key] = {"type": "object"}
             else:
                 props[key] = {"type": "string"}
             req.append(key)
@@ -754,7 +778,24 @@ def _generate_data_source_config(input_data_df: pd.DataFrame, column_mapping: Di
             LOGGER.info(f"AOAI: Effective paths after stripping wrapper: {effective_paths}")
 
     LOGGER.info(f"AOAI: Building nested schema from {len(effective_paths)} effective paths...")
-    nested_schema = _build_schema_tree_from_paths(effective_paths, force_leaf_type="string")
+
+    # Infer leaf types from the DataFrame so nested schemas also get array/object types
+    leaf_type_map: Dict[str, str] = {}
+    for ref_path, eff_path in zip(referenced_paths, effective_paths if strip_wrapper else referenced_paths):
+        if ref_path in input_data_df:
+            for candidate in input_data_df[ref_path]:
+                if isinstance(candidate, (list, dict)):
+                    leaf_type_map[eff_path] = "array" if isinstance(candidate, list) else "object"
+                    break
+                try:
+                    if candidate is not None and not pd.isna(candidate):
+                        break
+                except (TypeError, ValueError):
+                    break
+
+    nested_schema = _build_schema_tree_from_paths(
+        effective_paths, force_leaf_type="string", leaf_type_map=leaf_type_map
+    )
 
     LOGGER.info(f"AOAI: Nested schema generated successfully with type '{nested_schema.get('type')}'")
     return {
@@ -816,9 +857,9 @@ def _convert_value(val: Any) -> Any:
         if isinstance(val, bool):
             return val
         # Align numerics with legacy text-only JSONL payloads by turning them into strings.
-        if isinstance(val, (int, float, list)):
+        if isinstance(val, (int, float)):
             return str(val)
-        if isinstance(val, (dict)):
+        if isinstance(val, (list, dict)):
             return val
         return str(val)
 

@@ -171,6 +171,26 @@ def test_mixed_depth_paths(self):
         assert nested["type"] == "object"
         assert "field" in nested["properties"]
 
+    def test_leaf_type_map_overrides_force_leaf_type(self):
+        """Test that leaf_type_map overrides force_leaf_type for specific paths."""
+        paths = ["query", "tags", "metadata"]
+        leaf_type_map = {"tags": "array", "metadata": "object"}
+        schema = _build_schema_tree_from_paths(paths, force_leaf_type="string", leaf_type_map=leaf_type_map)
+
+        assert schema["properties"]["query"]["type"] == "string"
+        assert schema["properties"]["tags"]["type"] == "array"
+        assert schema["properties"]["metadata"]["type"] == "object"
+
+    def test_leaf_type_map_nested_paths(self):
+        """Test leaf_type_map with nested paths."""
+        paths = ["context.tags", "context.query"]
+        leaf_type_map = {"context.tags": "array"}
+        schema = _build_schema_tree_from_paths(paths, force_leaf_type="string", leaf_type_map=leaf_type_map)
+
+        context = schema["properties"]["context"]
+        assert context["properties"]["tags"]["type"] == "array"
+        assert context["properties"]["query"]["type"] == "string"
+
 
 @pytest.mark.unittest
 class TestGenerateDataSourceConfig:
@@ -297,6 +317,102 @@ def test_single_nested_path(self, flat_test_data):
         # After wrapper stripping, should see context
         assert "context" in schema["properties"]
 
+    def test_flat_schema_infers_list_and_dict_types(self, flat_test_data):
+        """Test that flat schema correctly infers array/object types from data."""
+        flat_test_data["tags"] = [["tag1", "tag2"], ["tag3"], []]
+        flat_test_data["metadata"] = [{"key": "val"}, {"key2": "val2"}, {}]
+        flat_test_data["score"] = [95, 87, 92]
+
+        column_mapping = {
+            "query": "${data.query}",
+            "tags": "${data.tags}",
+            "metadata": "${data.metadata}",
+            "score": "${data.score}",
+        }
+
+        config = _generate_data_source_config(flat_test_data, column_mapping)
+
+        properties = config["item_schema"]["properties"]
+        # Strings should be typed as string
+        assert properties["query"]["type"] == "string"
+        # Lists should be typed as array
+        assert properties["tags"]["type"] == "array"
+        # Dicts should be typed as object
+        assert properties["metadata"]["type"] == "object"
+        # Numerics (converted to str by _convert_value) should be typed as string
+        assert properties["score"]["type"] == "string"
+
+    def test_flat_schema_skips_none_nan_for_type_inference(self):
+        """Test that schema inference skips None/NaN rows to find the real type."""
+        import numpy as np
+
+        df = pd.DataFrame(
+            {
+                "tags": [None, ["tag1", "tag2"], ["tag3"]],
+                "metadata": [np.nan, {"key": "val"}, {}],
+                "query": [None, None, "hello"],
+            }
+        )
+        column_mapping = {
+            "tags": "${data.tags}",
+            "metadata": "${data.metadata}",
+            "query": "${data.query}",
+        }
+
+        config = _generate_data_source_config(df, column_mapping)
+        properties = config["item_schema"]["properties"]
+
+        # Should look past None in row 0 and find list in row 1
+        assert properties["tags"]["type"] == "array"
+        # Should look past NaN in row 0 and find dict in row 1
+        assert properties["metadata"]["type"] == "object"
+        # All None → falls back to string
+        assert properties["query"]["type"] == "string"
+
+    def test_flat_schema_skips_pd_na_for_type_inference(self):
+        """Test that schema inference skips pd.NA sentinel values."""
+        df = pd.DataFrame(
+            {
+                "tags": [pd.NA, ["tag1", "tag2"], ["tag3"]],
+                "query": ["hello", "world", "test"],
+            }
+        )
+        column_mapping = {
+            "tags": "${data.tags}",
+            "query": "${data.query}",
+        }
+
+        config = _generate_data_source_config(df, column_mapping)
+        properties = config["item_schema"]["properties"]
+
+        assert properties["tags"]["type"] == "array"
+        assert properties["query"]["type"] == "string"
+
+    def test_nested_schema_infers_list_and_dict_leaf_types(self):
+        """Test that nested schema infers array/object types for leaf nodes."""
+        df = pd.DataFrame(
+            [
+                {
+                    "item.query": "hello",
+                    "item.tags": ["tag1", "tag2"],
+                    "item.metadata": {"key": "val"},
+                }
+            ]
+        )
+        column_mapping = {
+            "query": "${data.item.query}",
+            "tags": "${data.item.tags}",
+            "metadata": "${data.item.metadata}",
+        }
+
+        config = _generate_data_source_config(df, column_mapping)
+        schema = config["item_schema"]
+
+        # After wrapper stripping, leaves should have inferred types
+        assert schema["properties"]["query"]["type"] == "string"
+        assert schema["properties"]["tags"]["type"] == "array"
+        assert schema["properties"]["metadata"]["type"] == "object"
+
 
 @pytest.mark.unittest
 class TestGetDataSource:
@@ -437,7 +553,7 @@ def test_data_source_with_item_column_and_nested_values(self, nested_item_keywor
         # Ensure we did not accidentally nest another 'item' key inside the wrapper
         assert "item" not in item_payload
         assert item_payload["sample"]["output_text"] == "someoutput"
-        assert item_payload["sample"]["output_items"] == "['item1', 'item2']"
+        assert item_payload["sample"]["output_items"] == ["item1", "item2"]
 
     def test_data_source_with_item_sample_column_and_nested_values(self, nested_item_sample_keyword_data):
         """Ensure rows that already have an 'item' column keep nested dicts intact."""
@@ -464,7 +580,7 @@ def test_data_source_with_item_sample_column_and_nested_values(self, nested_item
         # Ensure we did not accidentally nest another 'item' key inside the wrapper
         assert "item" not in item_payload
         assert item_payload["sample"]["output_text"] == "someoutput"
-        assert item_payload["sample"]["output_items"] == "['item1', 'item2']"
+        assert item_payload["sample"]["output_items"] == ["item1", "item2"]
 
     def test_data_source_with_sample_output_metadata(self, flat_sample_output_data):
         """Ensure flat rows that include dotted sample metadata remain accessible."""
@@ -485,7 +601,7 @@ def test_data_source_with_sample_output_metadata(self, flat_sample_output_data):
         assert row["test"]["test_string"] == "baking cakes is fun!"
         # sample.output_text should follow the row through normalization without being stringified
         assert row["sample.output_text"] == "someoutput"
-        assert row["sample.output_items"] == "['item1', 'item2']"
+        assert row["sample.output_items"] == ["item1", "item2"]
 
     def test_data_source_with_numeric_values(self, flat_test_data):
         """Test data source generation converts numeric values to strings."""
@@ -504,6 +620,35 @@ def test_data_source_with_numeric_values(self, flat_test_data):
         assert isinstance(content[0][WRAPPER_KEY]["score"], str)
         assert isinstance(content[0][WRAPPER_KEY]["confidence"], str)
 
+    def test_data_source_with_list_and_dict_values(self, flat_test_data):
+        """Test data source generation preserves list and dict values as-is."""
+        flat_test_data["tags"] = [["tag1", "tag2"], ["tag3"], []]
+        flat_test_data["metadata"] = [{"key": "val"}, {"key2": "val2"}, {}]
+
+        column_mapping = {
+            "query": "${data.query}",
+            "tags": "${data.tags}",
+            "metadata": "${data.metadata}",
+        }
+
+        data_source = _get_data_source(flat_test_data, column_mapping)
+
+        content = data_source["source"]["content"]
+
+        # Lists should be preserved as lists, not stringified
+        assert content[0][WRAPPER_KEY]["tags"] == ["tag1", "tag2"]
+        assert isinstance(content[0][WRAPPER_KEY]["tags"], list)
+        # Empty lists should also be preserved
+        assert content[2][WRAPPER_KEY]["tags"] == []
+        assert isinstance(content[2][WRAPPER_KEY]["tags"], list)
+
+        # Dicts should be preserved as dicts
+        assert content[0][WRAPPER_KEY]["metadata"] == {"key": "val"}
+        assert isinstance(content[0][WRAPPER_KEY]["metadata"], dict)
+        # Empty dicts should also be preserved
+        assert content[2][WRAPPER_KEY]["metadata"] == {}
+        assert isinstance(content[2][WRAPPER_KEY]["metadata"], dict)
+
     def test_empty_dataframe(self):
         """Test data source generation with empty dataframe."""
         empty_df = pd.DataFrame()
@@ -600,3 +745,33 @@ def test_nested_schema_and_data_alignment(self, nested_test_data):
         assert "query" in item
         assert "context" in item
         assert "company" in item["context"]
+
+    def test_flat_schema_and_data_alignment_with_list_and_dict(self, flat_test_data):
+        """Test that schema types and data values agree for list/dict columns."""
+        flat_test_data["tags"] = [["tag1", "tag2"], ["tag3"], []]
+        flat_test_data["metadata"] = [{"key": "val"}, {"key2": "val2"}, {}]
+
+        column_mapping = {
+            "query": "${data.query}",
+            "tags": "${data.tags}",
+            "metadata": "${data.metadata}",
+        }
+
+        config = _generate_data_source_config(flat_test_data, column_mapping)
+        data_source = _get_data_source(flat_test_data, column_mapping)
+
+        schema_props = config["item_schema"]["properties"]
+        data_item = data_source["source"]["content"][0][WRAPPER_KEY]
+
+        # Schema declares array → data contains a list
+        assert schema_props["tags"]["type"] == "array"
+        assert isinstance(data_item["tags"], list)
+
+        # Schema declares object → data contains a dict
+        assert schema_props["metadata"]["type"] == "object"
+        assert isinstance(data_item["metadata"], dict)
+
+        # Empty collections should also align
+        empty_item = data_source["source"]["content"][2][WRAPPER_KEY]
+        assert isinstance(empty_item["tags"], list)
+        assert isinstance(empty_item["metadata"], dict)