Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
paths:
# Changes to source code
- hobj/**
- test/**.py
# Changes to workflows
- .github/workflows/ci.yml
# Changes to project/dependency metadata
Expand Down
690 changes: 690 additions & 0 deletions examples/documentation.ipynb

Large diffs are not rendered by default.

66 changes: 36 additions & 30 deletions examples/score_example_models.ipynb

Large diffs are not rendered by default.

42 changes: 24 additions & 18 deletions examples/view_experiment1_behavior.ipynb

Large diffs are not rendered by default.

29 changes: 16 additions & 13 deletions examples/view_experiment2_behavior.ipynb

Large diffs are not rendered by default.

30 changes: 28 additions & 2 deletions hobj/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,28 @@
import hobj.benchmarks as benchmarks
import hobj.learning_models as learning_models
from hobj.benchmarks.mut_oneshot_benchmark import MutatorOneshotBenchmark
from hobj.benchmarks.mut_highvar_benchmark import MutatorHighVarBenchmark

from hobj.data_loaders.behavior import load_highvar_behavior, load_oneshot_behavior

from hobj.data_loaders.images import (
load_image,
load_imageset_meta_highvar,
load_imageset_meta_oneshot,
load_imageset_meta_warmup,
load_imageset_meta_catch,
)

__all__ = [
# Raw behavior loaders
"load_highvar_behavior",
"load_oneshot_behavior",
# Image meta loaders:
"load_imageset_meta_highvar",
"load_imageset_meta_oneshot",
"load_imageset_meta_warmup",
"load_imageset_meta_catch",
# Image loader:
"load_image",
# Benchmarks:
"MutatorHighVarBenchmark",
"MutatorOneshotBenchmark",
]
2 changes: 0 additions & 2 deletions hobj/benchmarks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
from hobj.benchmarks.mut_highvar_benchmark import MutatorHighVarBenchmark
from hobj.benchmarks.mut_oneshot_benchmark import MutatorOneshotBenchmark
122 changes: 73 additions & 49 deletions hobj/benchmarks/binary_classification/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,71 +7,84 @@
from tqdm import tqdm

from hobj.benchmarks.binary_classification.estimator import LearningCurveStatistics
from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask, BinaryClassificationSubtaskResult
from hobj.benchmarks.binary_classification.simulation import (
BinaryClassificationSubtask,
BinaryClassificationSubtaskResult,
)
from hobj.learning_models import BinaryLearningModel
from hobj.stats.ci import estimate_basic_bootstrap_CI


# %% Models for configuring a LearningCurveBenchmark:
class TargetSubtaskData(pydantic.BaseModel):
subtask: BinaryClassificationSubtask # The subtask which generated the associated results
results: List[BinaryClassificationSubtaskResult] # [session, trial] boolean matrix of performance
results: List[
BinaryClassificationSubtaskResult
] # [session, trial] boolean matrix of performance

model_config = dict(
arbitrary_types_allowed=True
)
model_config = dict(arbitrary_types_allowed=True)

@pydantic.model_validator(mode='after')
def validate_results(self) -> 'TargetSubtaskData':
@pydantic.model_validator(mode="after")
def validate_results(self) -> "TargetSubtaskData":
# Check shape
for result in self.results:
if self.subtask.ntrials != len(result.perf_seq):
raise ValueError(f"Expected {self.subtask.ntrials} trials, but got {result.perf_seq} trials")
raise ValueError(
f"Expected {self.subtask.ntrials} trials, but got {result.perf_seq} trials"
)

return self


class LearningCurveBenchmarkConfig(pydantic.BaseModel):
subtask_name_to_data: Dict[str, 'TargetSubtaskData'] = pydantic.Field(default_factory=dict, description="A dictionary of subtask_name -> TargetSubtaskConfig")
subtask_name_to_data: Dict[str, "TargetSubtaskData"] = pydantic.Field(
default_factory=dict,
description="A dictionary of subtask_name -> TargetSubtaskConfig",
)
num_simulations_per_subtask: int = pydantic.Field(ge=2)
num_bootstrap_samples: int = pydantic.Field(ge=2)
bootstrap_by_worker: bool
ntrials: Optional[int] = pydantic.Field(default=None)

@pydantic.model_validator(mode='after')
def ensure_rectangular(self) -> 'LearningCurveBenchmarkConfig':

@pydantic.model_validator(mode="after")
def ensure_rectangular(self) -> "LearningCurveBenchmarkConfig":
ntrials_observed = set()
for name, data in self.subtask_name_to_data.items():
ntrials_observed.add(data.subtask.ntrials)

if not len(ntrials_observed) == 1:
raise ValueError(f"Expected all subtasks to have the same number of trials, but got {ntrials_observed}")
raise ValueError(
f"Expected all subtasks to have the same number of trials, but got {ntrials_observed}"
)

if self.ntrials is not None:
if self.ntrials != ntrials_observed.pop():
raise ValueError(f"Expected ntrials to be {ntrials_observed.pop()}, but got {self.ntrials}")
raise ValueError(
f"Expected ntrials to be {ntrials_observed.pop()}, but got {self.ntrials}"
)
else:
self.ntrials = ntrials_observed.pop()
return self


# %%
class LearningCurveBenchmark:

def __init__(
self,
config: LearningCurveBenchmarkConfig,
self,
config: LearningCurveBenchmarkConfig,
):
self.config = config

# Attach properties
self.subtask_names = sorted(config.subtask_name_to_data.keys())
self.subtask_name_to_subtask: Dict[str, BinaryClassificationSubtask] = {
name: config.subtask_name_to_data[name].subtask for name in self.subtask_names
name: config.subtask_name_to_data[name].subtask
for name in self.subtask_names
}

self.subtask_name_to_results: Dict[str, List[BinaryClassificationSubtaskResult]] = {}
self.subtask_name_to_results: Dict[
str, List[BinaryClassificationSubtaskResult]
] = {}
self._target_data = {}

for name in self.subtask_names:
Expand All @@ -82,8 +95,12 @@ def __init__(
for result in results:
worker_id = result.worker_id
if worker_id in self._target_data[name]:
raise ValueError(f"Worker {worker_id} has already been seen for subtask {name}")
self._target_data[name][result.worker_id] = list([bool(v) for v in result.perf_seq])
raise ValueError(
f"Worker {worker_id} has already been seen for subtask {name}"
)
self._target_data[name][result.worker_id] = list(
[bool(v) for v in result.perf_seq]
)

self._target_statistics = LearningCurveStatistics(
subtask_name_to_results=self.subtask_name_to_results,
Expand All @@ -95,7 +112,6 @@ def __init__(
def target_data(self) -> Dict[str, Dict[str, List[bool]]]:
return self._target_data


@property
def target_statistics(self) -> LearningCurveStatistics:
"""
Expand All @@ -115,18 +131,20 @@ class LearningCurveBenchmarkResult:
model_statistics: LearningCurveStatistics

def __call__(
self,
learner: BinaryLearningModel,
show_pbar: bool = False
self, learner: BinaryLearningModel, show_pbar: bool = False
) -> LearningCurveBenchmarkResult:
"""
:param learner: LearningModel
:return:
"""

# Get model learning curve statistics:
subtask_name_to_model_results: Dict[str, List[BinaryClassificationSubtaskResult]] = {}
for i_subtask, subtask_name in enumerate(tqdm(self.subtask_names, desc='Subtask simulations:', disable=not show_pbar)):
subtask_name_to_model_results: Dict[
str, List[BinaryClassificationSubtaskResult]
] = {}
for i_subtask, subtask_name in enumerate(
tqdm(self.subtask_names, desc="Subtask simulations:", disable=not show_pbar)
):
# Get [simulation, trial] boolean performance matrix for the model
subtask_results = self.simulate_model_behavior(
subtask=self.subtask_name_to_subtask[subtask_name],
Expand All @@ -149,7 +167,7 @@ def __call__(
model_varhat_phat=model_statistics.varhat_phat,
target_phat=self.target_statistics.phat,
target_varhat_phat=self.target_statistics.varhat_phat,
condition_dims=('subtask', 'trial'),
condition_dims=("subtask", "trial"),
fit_lapse_rate=True,
)

Expand All @@ -159,7 +177,7 @@ def __call__(
model_varhat_phat=model_statistics.boot_varhat_phat,
target_phat=self.target_statistics.boot_phat,
target_varhat_phat=self.target_statistics.boot_varhat_phat,
condition_dims=('subtask', 'trial'),
condition_dims=("subtask", "trial"),
fit_lapse_rate=True,
)

Expand All @@ -182,9 +200,9 @@ def __call__(

@staticmethod
def simulate_model_behavior(
subtask: BinaryClassificationSubtask,
learner: BinaryLearningModel,
nsimulations: int,
subtask: BinaryClassificationSubtask,
learner: BinaryLearningModel,
nsimulations: int,
) -> List[BinaryClassificationSubtaskResult]:
"""
Returns a [nsimulations, ntrials] matrix of model performance on the subtask.
Expand Down Expand Up @@ -213,34 +231,33 @@ def simulate_model_behavior(

@classmethod
def _compare_learning_curves(
cls,
model_phat: xr.DataArray,
model_varhat_phat: xr.DataArray,
target_phat: xr.DataArray,
target_varhat_phat: xr.DataArray,
condition_dims: Tuple[str, ...],
fit_lapse_rate: bool
cls,
model_phat: xr.DataArray,
model_varhat_phat: xr.DataArray,
target_phat: xr.DataArray,
target_varhat_phat: xr.DataArray,
condition_dims: Tuple[str, ...],
fit_lapse_rate: bool,
) -> Tuple[Union[np.ndarray, np.generic], Union[xr.DataArray, None]]:

if fit_lapse_rate:
lapse_rate = cls._fit_lapse_rate(
pmodel=model_phat,
ptarget=target_phat,
condition_dims=condition_dims
pmodel=model_phat, ptarget=target_phat, condition_dims=condition_dims
)
model_phat = model_phat * (1 - lapse_rate) + 0.5 * lapse_rate
model_varhat_phat = model_varhat_phat * (1 - lapse_rate) ** 2
else:
lapse_rate = None

msen = np.square(model_phat - target_phat).mean(condition_dims) - model_varhat_phat.mean(condition_dims) - target_varhat_phat.mean(condition_dims)
msen = (
np.square(model_phat - target_phat).mean(condition_dims)
- model_varhat_phat.mean(condition_dims)
- target_varhat_phat.mean(condition_dims)
)
return msen, lapse_rate

@staticmethod
def _fit_lapse_rate(
pmodel: xr.DataArray,
ptarget: xr.DataArray,
condition_dims: Tuple[str, ...]
pmodel: xr.DataArray, ptarget: xr.DataArray, condition_dims: Tuple[str, ...]
) -> Union[np.ndarray, np.generic]:
"""
Fits a "lapse rate" parameter (gamma), which takes on values between [0, 1]. It may be interpreted
Expand All @@ -257,8 +274,15 @@ def _fit_lapse_rate(
"""

nway = 2
numerator = -(2 * pmodel / nway - 2 * np.square(pmodel) + 2 * pmodel * ptarget - 2 * ptarget / nway).sum(dim=condition_dims)
denominator = (2 / (nway ** 2) - 4 * pmodel / nway + 2 * (pmodel ** 2)).sum(dim=condition_dims)
numerator = -(
2 * pmodel / nway
- 2 * np.square(pmodel)
+ 2 * pmodel * ptarget
- 2 * ptarget / nway
).sum(dim=condition_dims)
denominator = (2 / (nway**2) - 4 * pmodel / nway + 2 * (pmodel**2)).sum(
dim=condition_dims
)
gamma_star = numerator / denominator
gamma_star = np.clip(gamma_star, 0, 1)
return gamma_star
Loading
Loading