Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/workflows/code_examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ This pattern is also extremely useful for performing optimization over complex o
}
)

physical_properties_predictor = AutoMLModel(
physical_properties_predictor = AutoMLPredictor(
name = 'physical properties model',
inputs = [
wheat_flour_quantity,
Expand Down
8 changes: 4 additions & 4 deletions docs/source/workflows/predictors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -251,16 +251,16 @@ The following example demonstrates how to use a :class:`~citrine.informatics.pre
ml_predictor = AutoMLPredictor(
name='ML Model for Density',
description='Predict the density, given molecular features of the solvent',
inputs = features,
output = [output_desc]
inputs=features,
outputs=[output_desc]
)

# use a graph predictor to wrap together the featurizer and the machine learning model
graph_predictor = GraphPredictor(
name='Density from solvent molecular structure',
description='Predict the density from the solvent molecular structure using molecular structure features.',
predictors = [featurizer, ml_predictor],
training_data = [GemTableDataSource(table_id=training_data_table_uid, table_version=training_data_table_version)] # training data shared by all sub-predictors
predictors=[featurizer, ml_predictor],
training_data=[GemTableDataSource(table_id=training_data_table_uid, table_version=training_data_table_version)] # training data shared by all sub-predictors
)

# register or update predictor by name
Expand Down
85 changes: 48 additions & 37 deletions src/citrine/informatics/predictor_evaluator.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
from citrine._serialization import properties
from citrine._serialization.polymorphic_serializable import PolymorphicSerializable
from citrine._serialization.serializable import Serializable
from citrine.informatics.predictor_evaluation_metrics import PredictorEvaluationMetric
from citrine.informatics.data_sources import DataSource
from citrine.informatics.predictor_evaluation_metrics import PredictorEvaluationMetric

__all__ = ['PredictorEvaluator',
'CrossValidationEvaluator',
'HoldoutSetEvaluator'
]
__all__ = ["PredictorEvaluator", "CrossValidationEvaluator", "HoldoutSetEvaluator"]


class PredictorEvaluator(PolymorphicSerializable["PredictorEvaluator"]):
Expand All @@ -18,21 +15,23 @@ def get_type(cls, data) -> type[Serializable]:
"""Return the subtype."""
return {
"CrossValidationEvaluator": CrossValidationEvaluator,
"HoldoutSetEvaluator": HoldoutSetEvaluator
"HoldoutSetEvaluator": HoldoutSetEvaluator,
}[data["type"]]

def __eq__(self, other):
if isinstance(other, Serializable):
self_dict = self.dump()
other_dict = other.dump()

self_dict['responses'] = set(self_dict.get('responses', []))
self_dict['metrics'] = frozenset(
frozenset((k, v) for k, v in dct.items()) for dct in self_dict.get('metrics', [])
self_dict["responses"] = set(self_dict.get("responses", []))
self_dict["metrics"] = frozenset(
frozenset((k, v) for k, v in dct.items())
for dct in self_dict.get("metrics", [])
)
other_dict['responses'] = set(other_dict.get('responses', []))
other_dict['metrics'] = frozenset(
frozenset((k, v) for k, v in dct.items()) for dct in other_dict.get('metrics', [])
other_dict["responses"] = set(other_dict.get("responses", []))
other_dict["metrics"] = frozenset(
frozenset((k, v) for k, v in dct.items())
for dct in other_dict.get("metrics", [])
)

return self_dict == other_dict
Expand All @@ -55,13 +54,15 @@ def name(self) -> str:

A name is required by all evaluators because it is used as the top-level key
in the results returned by a
:class:`citrine.informatics.workflows.PredictorEvaluationWorkflow`.
:class:`citrine.informatics.executions.predictor_evaluation.PredictorEvaluation`.
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The actual issue in this file.

As such, the names of all evaluators within a single workflow must be unique.
"""
raise NotImplementedError # pragma: no cover


class CrossValidationEvaluator(Serializable["CrossValidationEvaluator"], PredictorEvaluator):
class CrossValidationEvaluator(
Serializable["CrossValidationEvaluator"], PredictorEvaluator
):
Comment on lines +63 to +65
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like Claude is breaking on 80 characters, not our setting of 99

"""Evaluate a predictor via cross validation.

Performs cross-validation on requested predictor responses and computes the requested metrics
Expand Down Expand Up @@ -103,21 +104,27 @@ class CrossValidationEvaluator(Serializable["CrossValidationEvaluator"], Predict
_responses = properties.Set(properties.String, "responses")
n_folds = properties.Integer("n_folds")
n_trials = properties.Integer("n_trials")
_metrics = properties.Optional(properties.Set(properties.Object(PredictorEvaluationMetric)),
"metrics")
ignore_when_grouping = properties.Optional(properties.Set(properties.String),
"ignore_when_grouping")
typ = properties.String("type", default="CrossValidationEvaluator", deserializable=False)

def __init__(self,
name: str,
*,
description: str = "",
responses: set[str],
n_folds: int = 5,
n_trials: int = 3,
metrics: set[PredictorEvaluationMetric] | None = None,
ignore_when_grouping: set[str] | None = None):
_metrics = properties.Optional(
properties.Set(properties.Object(PredictorEvaluationMetric)), "metrics"
)
ignore_when_grouping = properties.Optional(
properties.Set(properties.String), "ignore_when_grouping"
)
typ = properties.String(
"type", default="CrossValidationEvaluator", deserializable=False
)

def __init__(
self,
name: str,
*,
description: str = "",
responses: set[str],
n_folds: int = 5,
n_trials: int = 3,
metrics: set[PredictorEvaluationMetric] | None = None,
ignore_when_grouping: set[str] | None = None,
):
self.name: str = name
self.description: str = description
self._responses: set[str] = responses
Expand Down Expand Up @@ -161,16 +168,20 @@ class HoldoutSetEvaluator(Serializable["HoldoutSetEvaluator"], PredictorEvaluato
description = properties.String("description")
_responses = properties.Set(properties.String, "responses")
data_source = properties.Object(DataSource, "data_source")
_metrics = properties.Optional(properties.Set(properties.Object(PredictorEvaluationMetric)),
"metrics")
_metrics = properties.Optional(
properties.Set(properties.Object(PredictorEvaluationMetric)), "metrics"
)
typ = properties.String("type", default="HoldoutSetEvaluator", deserializable=False)

def __init__(self,
name: str, *,
description: str = "",
responses: set[str],
data_source: DataSource,
metrics: set[PredictorEvaluationMetric] | None = None):
def __init__(
self,
name: str,
*,
description: str = "",
responses: set[str],
data_source: DataSource,
metrics: set[PredictorEvaluationMetric] | None = None,
):
self.name: str = name
self.description: str = description
self._responses: set[str] = responses
Expand Down
Loading