Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,27 @@ on:
- hobj/**
# Changes to workflows
- .github/workflows/ci.yml
# Changes to pyproject.toml
# Changes to project/dependency metadata
- 'pyproject.toml'
- 'uv.lock'
jobs:
unit_tests:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.12"]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
cache-dependency-path: setup.py # See https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md#caching-packages
- name: Install hobj
run: pip3 install -e .
python-version: ${{ matrix.python-version }}
- name: Set up uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
- name: Install dependencies
run: uv sync --locked --dev
- name: Run pytests
run: pytest -s
run: uv run pytest -s
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
__pycache__/
*.egg-info/
dist/
/data/
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,6 @@ check:
uv run ty check && \
uv run ruff check && \
uv run ruff format --check

test:
uv run pytest tests
1 change: 0 additions & 1 deletion examples/dev.py

This file was deleted.

5 changes: 3 additions & 2 deletions hobj/benchmarks/binary_classification/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Union

import numpy as np
import pydantic
import xarray as xr
from dataclasses import dataclass
from tqdm import tqdm
from typing import List, Dict, Union, Tuple, Optional

from hobj.benchmarks.binary_classification.estimator import LearningCurveStatistics
from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask, BinaryClassificationSubtaskResult
Expand Down
7 changes: 4 additions & 3 deletions hobj/benchmarks/binary_classification/estimator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from dataclasses import dataclass
from typing import Dict, List, Tuple

import numpy as np
import xarray as xr
from dataclasses import dataclass
from typing import List, Dict, Tuple

from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtaskResult
from hobj.stats import binomial as binomial_funcs
Expand Down Expand Up @@ -193,4 +194,4 @@ def _get_bootstrap_resamples_by_session(
return LearningCurveStatistics.BootstrapSamples(
boot_k=boot_k,
boot_n=boot_n,
)
)
12 changes: 6 additions & 6 deletions hobj/benchmarks/binary_classification/simulation.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from typing import List, Optional, Union

import numpy as np
import pydantic
from typing import List, Union, Optional

from hobj.types import ImageId
from hobj.learning_models import BinaryLearningModel
from mref import ImageRef


# %%
Expand All @@ -26,14 +26,14 @@ class BinaryClassificationSubtask(pydantic.BaseModel):
frozen=True
)

classA: List[ImageRef]
classB: List[ImageRef]
classA: List[ImageId]
classB: List[ImageId]
ntrials: int = pydantic.Field(description='The number of trials in the subtask.', gt=0)
replace: bool = pydantic.Field(description='Whether to show stimulus images with replacement or not.')

@pydantic.field_validator('classA', 'classB', mode='after')
@classmethod
def sort_image_refs(cls, value: List[ImageRef]) -> List[ImageRef]:
def sort_image_refs(cls, value: List[ImageId]) -> List[ImageId]:
return sorted(value)

@pydantic.model_validator(mode='after')
Expand Down
7 changes: 4 additions & 3 deletions hobj/benchmarks/generalization/benchmark.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from dataclasses import dataclass
from typing import List, Tuple

import numpy as np
import pydantic
import xarray as xr
from dataclasses import dataclass
from tqdm import tqdm
from typing import List, Tuple

from hobj.benchmarks.generalization.estimator import GeneralizationStatistics
from hobj.benchmarks.generalization.simulator import GeneralizationSubtask, GeneralizationSessionResult
from hobj.benchmarks.generalization.simulator import GeneralizationSessionResult, GeneralizationSubtask
from hobj.learning_models import BinaryLearningModel
from hobj.stats.ci import estimate_basic_bootstrap_CI

Expand Down
5 changes: 3 additions & 2 deletions hobj/benchmarks/generalization/estimator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import numpy as np
import warnings
import xarray as xr
from typing import List

import numpy as np
import xarray as xr

from hobj.benchmarks.generalization.simulator import GeneralizationSessionResult
from hobj.stats import binomial as binomial_funcs

Expand Down
19 changes: 9 additions & 10 deletions hobj/benchmarks/generalization/simulator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from typing import List, Union, Dict, Optional
import collections
from typing import Dict, List, Optional, Union

import numpy as np
import pydantic

from mref import ImageRef
from hobj.learning_models import BinaryLearningModel
import collections

from hobj.types import ImageId

# %%
class GeneralizationSessionResult(pydantic.BaseModel):
Expand Down Expand Up @@ -37,15 +36,15 @@ class GeneralizationSubtask(pydantic.BaseModel):
frozen=True
)

support_imageA: ImageRef
support_imageB: ImageRef
test_imagesA: List[ImageRef]
test_imagesB: List[ImageRef]
image_ref_to_transformation: Dict[ImageRef, str]
support_imageA: ImageId
support_imageB: ImageId
test_imagesA: List[ImageId]
test_imagesB: List[ImageId]
image_ref_to_transformation: Dict[ImageId, str]

@pydantic.field_validator('test_imagesA', 'test_imagesB', mode='after')
@classmethod
def sort_image_refs(cls, value: List[ImageRef]) -> List[ImageRef]:
def sort_image_refs(cls, value: List[ImageId]) -> List[ImageId]:
return sorted(value)

@pydantic.model_validator(mode='after')
Expand Down
32 changes: 22 additions & 10 deletions hobj/benchmarks/make_model.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,30 @@
"""
This module provides an alternative interface for instantiating a linear learning model.
"""
from hobj.learning_models.linear import LinearLearner, RepresentationalModel
import hobj.learning_models.linear.update_rules as update_rules
from typing import Literal, Dict
import mref
from functools import lru_cache
from typing import Literal

import numpy as np
from typing import List

import hobj.learning_models.update_rules as update_rules
from hobj.learning_models import LinearLearner, RepresentationalModel
from hobj.types import ImageId


# %%
@lru_cache(maxsize=1)
def _get_calibration_image_ids() -> list[ImageId]:
"""
Returns the ImageIds of the warmup images that are used for calibrating the features of the linear learner.
Caches the result to avoid redundant computation.
"""
raise NotImplementedError


# %%
def make_linear_learner_from_features(
ref_to_features: Dict[mref.ImageRef, np.ndarray],
calibration_images: List[mref.ImageRef],
features: np.ndarray,
image_ids: list[ImageId],
update_rule_name: Literal[
'Prototype',
'Square',
Expand All @@ -28,13 +40,13 @@ def make_linear_learner_from_features(
"""
Instantiates a linear learning model from precomputed features.
:param ref_to_features: Dict[mref.ImageRef, np.ndarray], the features to use.
:param calibration_images: List[mref.ImageRef], the images that will be used to calibrate the features (i.e. for mean centering and ensuring they fit within a unit ball).
:param update_rule_name: str, the name of the update rule to use.
:param alpha: float, the learning rate.
:return: LinearLearner
"""

f_calibration = np.array([ref_to_features[ref] for ref in calibration_images])
ref_to_features = {ref: features[i] for i, ref in enumerate(image_ids)}
f_calibration = np.array([ref_to_features[ref] for ref in _get_calibration_image_ids()])
mu_calibration = np.mean(f_calibration, axis=0)
norms_calibration = np.linalg.norm(f_calibration - mu_calibration, axis=1)
norm_cutoff = np.quantile(norms_calibration, 0.999) # Will clip the rest
Expand All @@ -55,4 +67,4 @@ def make_linear_learner_from_features(
image_ref_to_features=ref_to_calibrated_features
),
update_rule=update_rule_name(alpha=alpha)
)
)
15 changes: 8 additions & 7 deletions hobj/benchmarks/mut_highvar_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from typing import Dict, List

import numpy as np
from typing import List, Dict

from hobj.benchmarks.binary_classification.benchmark import LearningCurveBenchmark, LearningCurveBenchmarkConfig, TargetSubtaskData
from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask, BinaryClassificationSubtaskResult
from hobj.data.behavior import load_highvar_behavior
from hobj.data.images import MutatorHighVarImageset
from hobj.data_loaders.behavior import load_highvar_behavior
from hobj.data_loaders.images import MutatorHighVarImageset


# %%
Expand All @@ -21,7 +22,7 @@ def __init__(self):

# Normalize data for benchmark:
sha256_to_category = {
ref.sha256: imageset.get_annotation(image_ref=ref).category for ref in imageset.image_refs
ref.sha256: imageset.get_annotation(image_id=ref).category for ref in imageset.image_ids
}

subtask_name_to_results = {}
Expand All @@ -46,8 +47,8 @@ def __init__(self):
# Instantiate the subtask if it does not exist:
if subtask_name not in subtask_name_to_subtask:
subtask = BinaryClassificationSubtask(
classA=imageset.category_to_image_refs[cat0],
classB=imageset.category_to_image_refs[cat1],
classA=imageset.category_to_image_ids[cat0],
classB=imageset.category_to_image_ids[cat1],
ntrials=100,
replace=False,
)
Expand Down Expand Up @@ -87,4 +88,4 @@ def __init__(self):

if __name__ == '__main__':
experiment = MutatorHighVarBenchmark()
print(sorted(experiment.config.subtask_name_to_data.keys()))
print(sorted(experiment.config.subtask_name_to_data.keys()))
28 changes: 14 additions & 14 deletions hobj/benchmarks/mut_oneshot_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
# Coercing human data
from typing import Dict, List

from hobj.benchmarks.generalization.benchmark import GeneralizationBenchmarkConfig, GeneralizationBenchmark, GeneralizationSessionResult
from hobj.benchmarks.generalization.benchmark import GeneralizationBenchmark, GeneralizationBenchmarkConfig, GeneralizationSessionResult
from hobj.benchmarks.generalization.estimator import GeneralizationStatistics
from hobj.benchmarks.generalization.simulator import GeneralizationSubtask
from hobj.data.behavior import load_oneshot_behavior
from hobj.data.images import MutatorOneShotImageset
from mref import ImageRef
from hobj.data_loaders.behavior import load_oneshot_behavior
from hobj.data_loaders.images import MutatorOneShotImageset

from hobj.types import ImageId

# %%

Expand Down Expand Up @@ -96,24 +96,24 @@ def __init__(self):

# Map image refs to transformation ids
image_ref_to_transformation_id = {}
cat_to_support_image: Dict[str, ImageRef] = {}
cat_to_test_images: Dict[str, List[ImageRef]] = {}
cat_to_support_image: Dict[str, ImageId] = {}
cat_to_test_images: Dict[str, List[ImageId]] = {}

for ref in imageset.image_refs:
annotation = imageset.get_annotation(image_ref=ref)
for image_id in imageset.image_ids:
annotation = imageset.get_annotation(image_id=image_id)
transformation_id = f"{annotation.transformation} | {annotation.transformation_level}"
image_ref_to_transformation_id[ref] = transformation_id
image_ref_to_transformation_id[image_id] = transformation_id

if annotation.transformation == 'original':
if annotation.category not in cat_to_support_image:
cat_to_support_image[annotation.category] = ref
cat_to_support_image[annotation.category] = image_id
else:
raise ValueError(f"Multiple support images for category {annotation.category}")
else:
if annotation.category not in cat_to_test_images:
cat_to_test_images[annotation.category] = []

cat_to_test_images[annotation.category].append(ref)
cat_to_test_images[annotation.category].append(image_id)

# Assemble subtask simulators
subtasks = []
Expand Down Expand Up @@ -148,8 +148,8 @@ def __init__(self):
observed_categories = set()

for i_trial, sha in enumerate(session.stimulus_sha256_seq):
ref = ImageRef(sha256=sha)
annotation = imageset.get_annotation(image_ref=ref)
image_id = sha
annotation = imageset.get_annotation(image_id=image_id)

# Add stimulus category to observed categories
observed_categories.add(annotation.category)
Expand All @@ -166,7 +166,7 @@ def __init__(self):
ncatch += 1
else:
assert annotation.transformation != 'original'
transformation_id = image_ref_to_transformation_id[ref]
transformation_id = image_ref_to_transformation_id[image_id]

# Keep only benchmarked transformations
if transformation_id in self.transformation_ids:
Expand Down
4 changes: 0 additions & 4 deletions hobj/data/images/__init__.py

This file was deleted.

Empty file.
Loading
Loading