From 1f8f9c1c4725d983400c9c91318c9feb81981c3b Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 17:44:08 -0400 Subject: [PATCH 01/12] fix docstring --- hobj/stats/ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hobj/stats/ci.py b/hobj/stats/ci.py index 9a713d4..7ce343c 100644 --- a/hobj/stats/ci.py +++ b/hobj/stats/ci.py @@ -11,7 +11,7 @@ def estimate_basic_bootstrap_CI( Estimates the basic confidence interval for a given point estimate(s) using the bootstrap method. :param alpha: Sets the width of the confidence interval to be 1 - alpha. Must be in the range (0, 1). :param point_estimate: The point estimate(s) for which the confidence interval is to be estimated. - :param bootstrapped_point_estimate: Bootstrap resamples of the point estimate in question. + :param bootstrapped_point_estimates: Bootstrap resamples of the point estimate in question. :return: A tuple containing the lower and upper bounds of the confidence interval. """ From 45c89f14f9b943c4bc921d43f4afb978a9ceb150 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 17:48:04 -0400 Subject: [PATCH 02/12] test --- Makefile | 3 +++ hobj/benchmarks/mut_highvar_benchmark.py | 4 ++-- hobj/benchmarks/mut_oneshot_benchmark.py | 2 +- hobj/data/images/__init__.py | 4 ---- hobj/images/__init__.py | 4 ++++ hobj/{data => }/images/imagesets/__init__.py | 0 hobj/{data => }/images/imagesets/highvar.py | 2 +- hobj/{data => }/images/imagesets/oneshot.py | 4 ++-- hobj/{data => }/images/imagesets/probe.py | 2 +- hobj/{data => }/images/imagesets/warmup.py | 2 +- hobj/{data => }/images/template.py | 0 hobj/utils/hash.py | 2 +- site/readme_images/run_make_images.py | 2 +- {hobj/learning_models/tests => tests}/test_dummy_learner.py | 0 14 files changed, 17 insertions(+), 14 deletions(-) delete mode 100644 hobj/data/images/__init__.py create mode 100644 hobj/images/__init__.py rename hobj/{data => }/images/imagesets/__init__.py (100%) rename hobj/{data => }/images/imagesets/highvar.py (97%) rename hobj/{data => }/images/imagesets/oneshot.py (95%) rename hobj/{data => }/images/imagesets/probe.py (92%) rename hobj/{data => }/images/imagesets/warmup.py (93%) rename hobj/{data => }/images/template.py (100%) rename {hobj/learning_models/tests => tests}/test_dummy_learner.py (100%) diff --git a/Makefile b/Makefile index c596605..bf3fd22 100644 --- a/Makefile +++ b/Makefile @@ -6,3 +6,6 @@ check: uv run ty check && \ uv run ruff check && \ uv run ruff format --check + +test: + uv run pytest tests diff --git a/hobj/benchmarks/mut_highvar_benchmark.py b/hobj/benchmarks/mut_highvar_benchmark.py index 2d76f7e..92a544a 100644 --- a/hobj/benchmarks/mut_highvar_benchmark.py +++ b/hobj/benchmarks/mut_highvar_benchmark.py @@ -4,7 +4,7 @@ from hobj.benchmarks.binary_classification.benchmark import LearningCurveBenchmark, LearningCurveBenchmarkConfig, TargetSubtaskData from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask, BinaryClassificationSubtaskResult from hobj.data.behavior import load_highvar_behavior -from hobj.data.images import MutatorHighVarImageset +from hobj.images import MutatorHighVarImageset # %% @@ -87,4 +87,4 @@ def __init__(self): if __name__ == '__main__': experiment = MutatorHighVarBenchmark() - print(sorted(experiment.config.subtask_name_to_data.keys())) \ No newline at end of file + print(sorted(experiment.config.subtask_name_to_data.keys())) diff --git a/hobj/benchmarks/mut_oneshot_benchmark.py b/hobj/benchmarks/mut_oneshot_benchmark.py index 226b1de..a5ad68f 100644 --- a/hobj/benchmarks/mut_oneshot_benchmark.py +++ b/hobj/benchmarks/mut_oneshot_benchmark.py @@ -6,7 +6,7 @@ from hobj.benchmarks.generalization.estimator import GeneralizationStatistics from hobj.benchmarks.generalization.simulator import GeneralizationSubtask from hobj.data.behavior import load_oneshot_behavior -from hobj.data.images import MutatorOneShotImageset +from hobj.images import MutatorOneShotImageset from mref import ImageRef diff --git a/hobj/data/images/__init__.py b/hobj/data/images/__init__.py deleted file mode 100644 index 0fc51d1..0000000 --- a/hobj/data/images/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from hobj.data.images.imagesets.highvar import MutatorHighVarImageset -from hobj.data.images.imagesets.oneshot import MutatorOneShotImageset -from hobj.data.images.imagesets.warmup import MutatorWarmupImageset -from hobj.data.images.imagesets.probe import ProbeImageset \ No newline at end of file diff --git a/hobj/images/__init__.py b/hobj/images/__init__.py new file mode 100644 index 0000000..471101d --- /dev/null +++ b/hobj/images/__init__.py @@ -0,0 +1,4 @@ +from hobj.images.imagesets.highvar import MutatorHighVarImageset +from hobj.images.imagesets.oneshot import MutatorOneShotImageset +from hobj.images.imagesets.warmup import MutatorWarmupImageset +from hobj.images.imagesets.probe import ProbeImageset diff --git a/hobj/data/images/imagesets/__init__.py b/hobj/images/imagesets/__init__.py similarity index 100% rename from hobj/data/images/imagesets/__init__.py rename to hobj/images/imagesets/__init__.py diff --git a/hobj/data/images/imagesets/highvar.py b/hobj/images/imagesets/highvar.py similarity index 97% rename from hobj/data/images/imagesets/highvar.py rename to hobj/images/imagesets/highvar.py index f39778c..c87a4df 100644 --- a/hobj/data/images/imagesets/highvar.py +++ b/hobj/images/imagesets/highvar.py @@ -1,5 +1,5 @@ import mref.media_references -from hobj.data.images.template import Imageset +from hobj.images.template import Imageset import pydantic from typing import Dict, List diff --git a/hobj/data/images/imagesets/oneshot.py b/hobj/images/imagesets/oneshot.py similarity index 95% rename from hobj/data/images/imagesets/oneshot.py rename to hobj/images/imagesets/oneshot.py index 80d1646..e334b1d 100644 --- a/hobj/data/images/imagesets/oneshot.py +++ b/hobj/images/imagesets/oneshot.py @@ -3,7 +3,7 @@ import pydantic -from hobj.data.images.template import Imageset +from hobj.images.template import Imageset class MutatorOneShotAnnotation(pydantic.BaseModel): @@ -50,4 +50,4 @@ class MutatorOneShotImageset(Imageset[MutatorOneShotAnnotation]): if __name__ == '__main__': - imageset = MutatorOneShotImageset() \ No newline at end of file + imageset = MutatorOneShotImageset() diff --git a/hobj/data/images/imagesets/probe.py b/hobj/images/imagesets/probe.py similarity index 92% rename from hobj/data/images/imagesets/probe.py rename to hobj/images/imagesets/probe.py index d9a7a0f..594dc9f 100644 --- a/hobj/data/images/imagesets/probe.py +++ b/hobj/images/imagesets/probe.py @@ -1,5 +1,5 @@ -from hobj.data.images.template import Imageset +from hobj.images.template import Imageset import pydantic from typing import Literal diff --git a/hobj/data/images/imagesets/warmup.py b/hobj/images/imagesets/warmup.py similarity index 93% rename from hobj/data/images/imagesets/warmup.py rename to hobj/images/imagesets/warmup.py index 1a53dc8..73808bb 100644 --- a/hobj/data/images/imagesets/warmup.py +++ b/hobj/images/imagesets/warmup.py @@ -1,6 +1,6 @@ import pydantic -from hobj.data.images.template import Imageset +from hobj.images.template import Imageset diff --git a/hobj/data/images/template.py b/hobj/images/template.py similarity index 100% rename from hobj/data/images/template.py rename to hobj/images/template.py diff --git a/hobj/utils/hash.py b/hobj/utils/hash.py index 7431058..6c6b0fc 100644 --- a/hobj/utils/hash.py +++ b/hobj/utils/hash.py @@ -3,7 +3,7 @@ import PIL.Image -def hash_image(image: PIL.Image) -> str: +def hash_image(image: PIL.Image.Image) -> str: """ Hash an image based on its np.uint8 representation. :param image: diff --git a/site/readme_images/run_make_images.py b/site/readme_images/run_make_images.py index b55fb68..f084ed8 100644 --- a/site/readme_images/run_make_images.py +++ b/site/readme_images/run_make_images.py @@ -2,7 +2,7 @@ # %% import matplotlib.pyplot as plt - from hobj.data.images import MutatorHighVarImageset + from hobj.images import MutatorHighVarImageset from hobj.benchmarks import MutatorHighVarBenchmark import numpy as np diff --git a/hobj/learning_models/tests/test_dummy_learner.py b/tests/test_dummy_learner.py similarity index 100% rename from hobj/learning_models/tests/test_dummy_learner.py rename to tests/test_dummy_learner.py From 1dd8c0e059c264660e2d2ac5f6d7294d516a550d Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 17:50:43 -0400 Subject: [PATCH 03/12] move --- hobj/images/__init__.py | 8 ++++---- hobj/images/{imagesets => }/highvar.py | 0 hobj/images/imagesets/__init__.py | 0 hobj/images/{imagesets => }/oneshot.py | 0 hobj/images/{imagesets => }/probe.py | 0 hobj/images/{imagesets => }/warmup.py | 0 site/readme_images/run_make_images.py | 2 -- 7 files changed, 4 insertions(+), 6 deletions(-) rename hobj/images/{imagesets => }/highvar.py (100%) delete mode 100644 hobj/images/imagesets/__init__.py rename hobj/images/{imagesets => }/oneshot.py (100%) rename hobj/images/{imagesets => }/probe.py (100%) rename hobj/images/{imagesets => }/warmup.py (100%) diff --git a/hobj/images/__init__.py b/hobj/images/__init__.py index 471101d..bd9a855 100644 --- a/hobj/images/__init__.py +++ b/hobj/images/__init__.py @@ -1,4 +1,4 @@ -from hobj.images.imagesets.highvar import MutatorHighVarImageset -from hobj.images.imagesets.oneshot import MutatorOneShotImageset -from hobj.images.imagesets.warmup import MutatorWarmupImageset -from hobj.images.imagesets.probe import ProbeImageset +from hobj.images.highvar import MutatorHighVarImageset +from hobj.images.oneshot import MutatorOneShotImageset +from hobj.images.warmup import MutatorWarmupImageset +from hobj.images.probe import ProbeImageset diff --git a/hobj/images/imagesets/highvar.py b/hobj/images/highvar.py similarity index 100% rename from hobj/images/imagesets/highvar.py rename to hobj/images/highvar.py diff --git a/hobj/images/imagesets/__init__.py b/hobj/images/imagesets/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/hobj/images/imagesets/oneshot.py b/hobj/images/oneshot.py similarity index 100% rename from hobj/images/imagesets/oneshot.py rename to hobj/images/oneshot.py diff --git a/hobj/images/imagesets/probe.py b/hobj/images/probe.py similarity index 100% rename from hobj/images/imagesets/probe.py rename to hobj/images/probe.py diff --git a/hobj/images/imagesets/warmup.py b/hobj/images/warmup.py similarity index 100% rename from hobj/images/imagesets/warmup.py rename to hobj/images/warmup.py diff --git a/site/readme_images/run_make_images.py b/site/readme_images/run_make_images.py index f084ed8..c7ec90c 100644 --- a/site/readme_images/run_make_images.py +++ b/site/readme_images/run_make_images.py @@ -2,11 +2,9 @@ # %% import matplotlib.pyplot as plt - from hobj.images import MutatorHighVarImageset from hobj.benchmarks import MutatorHighVarBenchmark import numpy as np - imageset = MutatorHighVarImageset() benchmark = MutatorHighVarBenchmark() target_stats = benchmark.target_statistics From 893e7b614eed4daf40c2afc00604b86f61289058 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 17:59:55 -0400 Subject: [PATCH 04/12] rename to loaders --- hobj/benchmarks/mut_highvar_benchmark.py | 2 +- hobj/benchmarks/mut_oneshot_benchmark.py | 2 +- hobj/images/template.py | 2 +- hobj/{data => loaders}/__init__.py | 0 hobj/{data => loaders}/behavior.py | 2 +- hobj/{data => loaders}/store.py | 0 {hobj/data/tests => tests}/test_load_behavior.py | 2 +- 7 files changed, 5 insertions(+), 5 deletions(-) rename hobj/{data => loaders}/__init__.py (100%) rename hobj/{data => loaders}/behavior.py (98%) rename hobj/{data => loaders}/store.py (100%) rename {hobj/data/tests => tests}/test_load_behavior.py (71%) diff --git a/hobj/benchmarks/mut_highvar_benchmark.py b/hobj/benchmarks/mut_highvar_benchmark.py index 92a544a..385b5b4 100644 --- a/hobj/benchmarks/mut_highvar_benchmark.py +++ b/hobj/benchmarks/mut_highvar_benchmark.py @@ -3,7 +3,7 @@ from hobj.benchmarks.binary_classification.benchmark import LearningCurveBenchmark, LearningCurveBenchmarkConfig, TargetSubtaskData from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask, BinaryClassificationSubtaskResult -from hobj.data.behavior import load_highvar_behavior +from hobj.loaders.behavior import load_highvar_behavior from hobj.images import MutatorHighVarImageset diff --git a/hobj/benchmarks/mut_oneshot_benchmark.py b/hobj/benchmarks/mut_oneshot_benchmark.py index a5ad68f..211b609 100644 --- a/hobj/benchmarks/mut_oneshot_benchmark.py +++ b/hobj/benchmarks/mut_oneshot_benchmark.py @@ -5,7 +5,7 @@ from hobj.benchmarks.generalization.benchmark import GeneralizationBenchmarkConfig, GeneralizationBenchmark, GeneralizationSessionResult from hobj.benchmarks.generalization.estimator import GeneralizationStatistics from hobj.benchmarks.generalization.simulator import GeneralizationSubtask -from hobj.data.behavior import load_oneshot_behavior +from hobj.loaders.behavior import load_oneshot_behavior from hobj.images import MutatorOneShotImageset from mref import ImageRef diff --git a/hobj/images/template.py b/hobj/images/template.py index f8f93d7..4fd26e5 100644 --- a/hobj/images/template.py +++ b/hobj/images/template.py @@ -9,7 +9,7 @@ from hobj.utils.file_io import unzip_file from mref import ImageRef -from hobj.data.store import default_data_store +from hobj.loaders.store import default_data_store from mref import FileSystemStorage import warnings diff --git a/hobj/data/__init__.py b/hobj/loaders/__init__.py similarity index 100% rename from hobj/data/__init__.py rename to hobj/loaders/__init__.py diff --git a/hobj/data/behavior.py b/hobj/loaders/behavior.py similarity index 98% rename from hobj/data/behavior.py rename to hobj/loaders/behavior.py index d44ade8..615f642 100644 --- a/hobj/data/behavior.py +++ b/hobj/loaders/behavior.py @@ -3,7 +3,7 @@ import pydantic -from hobj.data.store import default_data_store +from hobj.loaders.store import default_data_store __all__ = ['load_highvar_behavior', 'load_oneshot_behavior'] diff --git a/hobj/data/store.py b/hobj/loaders/store.py similarity index 100% rename from hobj/data/store.py rename to hobj/loaders/store.py diff --git a/hobj/data/tests/test_load_behavior.py b/tests/test_load_behavior.py similarity index 71% rename from hobj/data/tests/test_load_behavior.py rename to tests/test_load_behavior.py index c76d948..e7a4741 100644 --- a/hobj/data/tests/test_load_behavior.py +++ b/tests/test_load_behavior.py @@ -1,4 +1,4 @@ -from hobj.data.behavior import load_highvar_behavior, load_oneshot_behavior +from hobj.loaders.behavior import load_highvar_behavior, load_oneshot_behavior def test_load_highvar(): From 65a8a5148b2ff07490953596a2421e4a6986e5fa Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 18:00:41 -0400 Subject: [PATCH 05/12] dir refactor --- hobj/benchmarks/mut_highvar_benchmark.py | 4 ++-- hobj/benchmarks/mut_oneshot_benchmark.py | 4 ++-- hobj/{loaders => data_loaders}/__init__.py | 0 hobj/{loaders => data_loaders}/behavior.py | 2 +- hobj/data_loaders/images/__init__.py | 4 ++++ hobj/{ => data_loaders}/images/highvar.py | 2 +- hobj/{ => data_loaders}/images/oneshot.py | 2 +- hobj/{ => data_loaders}/images/probe.py | 2 +- hobj/{ => data_loaders}/images/template.py | 2 +- hobj/{ => data_loaders}/images/warmup.py | 2 +- hobj/{loaders => data_loaders}/store.py | 0 hobj/images/__init__.py | 4 ---- tests/test_load_behavior.py | 2 +- 13 files changed, 15 insertions(+), 15 deletions(-) rename hobj/{loaders => data_loaders}/__init__.py (100%) rename hobj/{loaders => data_loaders}/behavior.py (98%) create mode 100644 hobj/data_loaders/images/__init__.py rename hobj/{ => data_loaders}/images/highvar.py (96%) rename hobj/{ => data_loaders}/images/oneshot.py (96%) rename hobj/{ => data_loaders}/images/probe.py (91%) rename hobj/{ => data_loaders}/images/template.py (98%) rename hobj/{ => data_loaders}/images/warmup.py (92%) rename hobj/{loaders => data_loaders}/store.py (100%) delete mode 100644 hobj/images/__init__.py diff --git a/hobj/benchmarks/mut_highvar_benchmark.py b/hobj/benchmarks/mut_highvar_benchmark.py index 385b5b4..91f5409 100644 --- a/hobj/benchmarks/mut_highvar_benchmark.py +++ b/hobj/benchmarks/mut_highvar_benchmark.py @@ -3,8 +3,8 @@ from hobj.benchmarks.binary_classification.benchmark import LearningCurveBenchmark, LearningCurveBenchmarkConfig, TargetSubtaskData from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask, BinaryClassificationSubtaskResult -from hobj.loaders.behavior import load_highvar_behavior -from hobj.images import MutatorHighVarImageset +from hobj.data_loaders.behavior import load_highvar_behavior +from hobj.data_loaders.images import MutatorHighVarImageset # %% diff --git a/hobj/benchmarks/mut_oneshot_benchmark.py b/hobj/benchmarks/mut_oneshot_benchmark.py index 211b609..3a0c0aa 100644 --- a/hobj/benchmarks/mut_oneshot_benchmark.py +++ b/hobj/benchmarks/mut_oneshot_benchmark.py @@ -5,8 +5,8 @@ from hobj.benchmarks.generalization.benchmark import GeneralizationBenchmarkConfig, GeneralizationBenchmark, GeneralizationSessionResult from hobj.benchmarks.generalization.estimator import GeneralizationStatistics from hobj.benchmarks.generalization.simulator import GeneralizationSubtask -from hobj.loaders.behavior import load_oneshot_behavior -from hobj.images import MutatorOneShotImageset +from hobj.data_loaders.behavior import load_oneshot_behavior +from hobj.data_loaders.images import MutatorOneShotImageset from mref import ImageRef diff --git a/hobj/loaders/__init__.py b/hobj/data_loaders/__init__.py similarity index 100% rename from hobj/loaders/__init__.py rename to hobj/data_loaders/__init__.py diff --git a/hobj/loaders/behavior.py b/hobj/data_loaders/behavior.py similarity index 98% rename from hobj/loaders/behavior.py rename to hobj/data_loaders/behavior.py index 615f642..fc36b81 100644 --- a/hobj/loaders/behavior.py +++ b/hobj/data_loaders/behavior.py @@ -3,7 +3,7 @@ import pydantic -from hobj.loaders.store import default_data_store +from hobj.data_loaders.store import default_data_store __all__ = ['load_highvar_behavior', 'load_oneshot_behavior'] diff --git a/hobj/data_loaders/images/__init__.py b/hobj/data_loaders/images/__init__.py new file mode 100644 index 0000000..3c15cf9 --- /dev/null +++ b/hobj/data_loaders/images/__init__.py @@ -0,0 +1,4 @@ +from hobj.data_loaders.images.highvar import MutatorHighVarImageset +from hobj.data_loaders.images.oneshot import MutatorOneShotImageset +from hobj.data_loaders.images.warmup import MutatorWarmupImageset +from hobj.data_loaders.images.probe import ProbeImageset diff --git a/hobj/images/highvar.py b/hobj/data_loaders/images/highvar.py similarity index 96% rename from hobj/images/highvar.py rename to hobj/data_loaders/images/highvar.py index c87a4df..42c5530 100644 --- a/hobj/images/highvar.py +++ b/hobj/data_loaders/images/highvar.py @@ -1,5 +1,5 @@ import mref.media_references -from hobj.images.template import Imageset +from hobj.data_loaders.images.template import Imageset import pydantic from typing import Dict, List diff --git a/hobj/images/oneshot.py b/hobj/data_loaders/images/oneshot.py similarity index 96% rename from hobj/images/oneshot.py rename to hobj/data_loaders/images/oneshot.py index e334b1d..ae253cf 100644 --- a/hobj/images/oneshot.py +++ b/hobj/data_loaders/images/oneshot.py @@ -3,7 +3,7 @@ import pydantic -from hobj.images.template import Imageset +from hobj.data_loaders.images.template import Imageset class MutatorOneShotAnnotation(pydantic.BaseModel): diff --git a/hobj/images/probe.py b/hobj/data_loaders/images/probe.py similarity index 91% rename from hobj/images/probe.py rename to hobj/data_loaders/images/probe.py index 594dc9f..7945099 100644 --- a/hobj/images/probe.py +++ b/hobj/data_loaders/images/probe.py @@ -1,5 +1,5 @@ -from hobj.images.template import Imageset +from hobj.data_loaders.images.template import Imageset import pydantic from typing import Literal diff --git a/hobj/images/template.py b/hobj/data_loaders/images/template.py similarity index 98% rename from hobj/images/template.py rename to hobj/data_loaders/images/template.py index 4fd26e5..91695d2 100644 --- a/hobj/images/template.py +++ b/hobj/data_loaders/images/template.py @@ -9,7 +9,7 @@ from hobj.utils.file_io import unzip_file from mref import ImageRef -from hobj.loaders.store import default_data_store +from hobj.data_loaders.store import default_data_store from mref import FileSystemStorage import warnings diff --git a/hobj/images/warmup.py b/hobj/data_loaders/images/warmup.py similarity index 92% rename from hobj/images/warmup.py rename to hobj/data_loaders/images/warmup.py index 73808bb..ec5a60c 100644 --- a/hobj/images/warmup.py +++ b/hobj/data_loaders/images/warmup.py @@ -1,6 +1,6 @@ import pydantic -from hobj.images.template import Imageset +from hobj.data_loaders.images.template import Imageset diff --git a/hobj/loaders/store.py b/hobj/data_loaders/store.py similarity index 100% rename from hobj/loaders/store.py rename to hobj/data_loaders/store.py diff --git a/hobj/images/__init__.py b/hobj/images/__init__.py deleted file mode 100644 index bd9a855..0000000 --- a/hobj/images/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from hobj.images.highvar import MutatorHighVarImageset -from hobj.images.oneshot import MutatorOneShotImageset -from hobj.images.warmup import MutatorWarmupImageset -from hobj.images.probe import ProbeImageset diff --git a/tests/test_load_behavior.py b/tests/test_load_behavior.py index e7a4741..83b6c8d 100644 --- a/tests/test_load_behavior.py +++ b/tests/test_load_behavior.py @@ -1,4 +1,4 @@ -from hobj.loaders.behavior import load_highvar_behavior, load_oneshot_behavior +from hobj.data_loaders.behavior import load_highvar_behavior, load_oneshot_behavior def test_load_highvar(): From bffd0d26a988bed3e6880f82b6b43cecbde87773 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 18:19:48 -0400 Subject: [PATCH 06/12] remove mref from learning models --- hobj/learning_models/__init__.py | 7 ++-- hobj/learning_models/linear/__init__.py | 5 +-- hobj/learning_models/linear/representation.py | 23 ++++++------- hobj/types.py | 1 + pyproject.toml | 1 + uv.lock | 33 +++++++++++++++++++ 6 files changed, 51 insertions(+), 19 deletions(-) create mode 100644 hobj/types.py diff --git a/hobj/learning_models/__init__.py b/hobj/learning_models/__init__.py index 97e597c..cc2a510 100644 --- a/hobj/learning_models/__init__.py +++ b/hobj/learning_models/__init__.py @@ -1,10 +1,9 @@ import typing from abc import ABC, abstractmethod -import PIL.Image import numpy as np -import mref.media_references +from hobj.types import ImageId # %% @@ -22,7 +21,7 @@ def reset_state(self, seed: typing.Union[int, None]) -> None: @abstractmethod def get_response( self, - image: typing.Union[mref.media_references.ImageRef, PIL.Image], + image: ImageId, ) -> typing.Literal[0, 1]: """ This function takes the current stimulus image (given either as a PIL.Image or a ImageRef) and returns one of two possible actions (parameterized by an integer). @@ -54,7 +53,7 @@ def reset_state(self, seed: typing.Union[int, None]) -> None: def get_response( self, - image: typing.Union[mref.media_references.ImageRef, PIL.Image], + image: ImageId, ) -> typing.Literal[0, 1]: action = self.random_generator.integers(2) action = int(action) diff --git a/hobj/learning_models/linear/__init__.py b/hobj/learning_models/linear/__init__.py index 77b934d..cc133b7 100644 --- a/hobj/learning_models/linear/__init__.py +++ b/hobj/learning_models/linear/__init__.py @@ -6,8 +6,9 @@ from hobj.learning_models import BinaryLearningModel from hobj.learning_models.linear.representation import RepresentationalModel from hobj.learning_models.linear.update_rules import UpdateRule +from hobj.types import ImageId - +# %% class LinearLearner(BinaryLearningModel): def __init__( self, @@ -47,7 +48,7 @@ def reset_state(self, seed: int) -> None: def get_response( self, - image: typing.Union[mref.media_references.ImageRef, PIL.Image] + image: ImageId, ) -> typing.Literal[0, 1]: f = self.representational_model.get_features(image=image) diff --git a/hobj/learning_models/linear/representation.py b/hobj/learning_models/linear/representation.py index 846870d..da28183 100644 --- a/hobj/learning_models/linear/representation.py +++ b/hobj/learning_models/linear/representation.py @@ -1,10 +1,11 @@ +from typing import Callable, Dict + import numpy as np -from typing import Union, Dict, Callable -import PIL.Image +from hobj.types import ImageId -from mref import ImageRef +# %% class RepresentationalModel: """ @@ -14,7 +15,7 @@ class RepresentationalModel: def __init__( self, d: int, - image_to_features_func: Callable[[Union[ImageRef, PIL.Image]], np.ndarray], + image_to_features_func: Callable[[ImageId], np.ndarray], ): if not isinstance(d, int): @@ -32,7 +33,7 @@ def d(self) -> int: def get_features( self, - image: Union[ImageRef, PIL.Image] + image: ImageId ) -> np.ndarray: """ Returns a feature vector for the image_url. @@ -52,7 +53,7 @@ def get_features( @classmethod def from_precomputed_features( cls, - image_ref_to_features: Dict[ImageRef, np.ndarray] + image_ref_to_features: Dict[ImageId, np.ndarray] ) -> 'RepresentationalModel': """ @@ -61,12 +62,8 @@ def from_precomputed_features( If get_features is called with an ImageRef (or PIL.Image with an ImageRef) not in image_ref_to_features, a KeyError will be raised. """ - def image_to_features_func(image: Union[ImageRef, PIL.Image]) -> np.ndarray: - if isinstance(image, PIL.Image.Image): - ref = ImageRef.from_image(image) - else: - ref = image - return image_ref_to_features[ref] + def image_to_features_func(image: ImageId) -> np.ndarray: + return image_ref_to_features[image] # Ensure all feature vectors are the same shape d = None @@ -83,4 +80,4 @@ def image_to_features_func(image: Union[ImageRef, PIL.Image]) -> np.ndarray: if not f.shape[0] == d: raise ValueError(f"Expected feature vector to be of shape ({d},), but got {f.shape}") - return cls(d=d, image_to_features_func=image_to_features_func) \ No newline at end of file + return cls(d=d, image_to_features_func=image_to_features_func) diff --git a/hobj/types.py b/hobj/types.py new file mode 100644 index 0000000..0f0952f --- /dev/null +++ b/hobj/types.py @@ -0,0 +1 @@ +type ImageId = str diff --git a/pyproject.toml b/pyproject.toml index 4104969..526a8b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "tqdm>=4.67", "pydantic>=2.10", "xarray>=2025.1", + "scipy>=1.17.1", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index f65af68..8d370d2 100644 --- a/uv.lock +++ b/uv.lock @@ -157,6 +157,7 @@ dependencies = [ { name = "pandas" }, { name = "pydantic" }, { name = "requests" }, + { name = "scipy" }, { name = "tqdm" }, { name = "xarray" }, ] @@ -173,6 +174,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.2" }, { name = "pydantic", specifier = ">=2.10" }, { name = "requests", specifier = ">=2.32" }, + { name = "scipy", specifier = ">=1.17.1" }, { name = "tqdm", specifier = ">=4.67" }, { name = "xarray", specifier = ">=2025.1" }, ] @@ -528,6 +530,37 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1e/db/4254e3eabe8020b458f1a747140d32277ec7a271daf1d235b70dc0b4e6e3/requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6", size = 64738, upload-time = "2025-08-18T20:46:00.542Z" }, ] +[[package]] +name = "scipy" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/75/b4ce781849931fef6fd529afa6b63711d5a733065722d0c3e2724af9e40a/scipy-1.17.1-cp311-cp311-macosx_10_14_x86_64.whl", hash = "sha256:1f95b894f13729334fb990162e911c9e5dc1ab390c58aa6cbecb389c5b5e28ec", size = 31613675, upload-time = "2026-02-23T00:16:00.13Z" }, + { url = "https://files.pythonhosted.org/packages/f7/58/bccc2861b305abdd1b8663d6130c0b3d7cc22e8d86663edbc8401bfd40d4/scipy-1.17.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:e18f12c6b0bc5a592ed23d3f7b891f68fd7f8241d69b7883769eb5d5dfb52696", size = 28162057, upload-time = "2026-02-23T00:16:09.456Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ee/18146b7757ed4976276b9c9819108adbc73c5aad636e5353e20746b73069/scipy-1.17.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a3472cfbca0a54177d0faa68f697d8ba4c80bbdc19908c3465556d9f7efce9ee", size = 20334032, upload-time = "2026-02-23T00:16:17.358Z" }, + { url = "https://files.pythonhosted.org/packages/ec/e6/cef1cf3557f0c54954198554a10016b6a03b2ec9e22a4e1df734936bd99c/scipy-1.17.1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:766e0dc5a616d026a3a1cffa379af959671729083882f50307e18175797b3dfd", size = 22709533, upload-time = "2026-02-23T00:16:25.791Z" }, + { url = "https://files.pythonhosted.org/packages/4d/60/8804678875fc59362b0fb759ab3ecce1f09c10a735680318ac30da8cd76b/scipy-1.17.1-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:744b2bf3640d907b79f3fd7874efe432d1cf171ee721243e350f55234b4cec4c", size = 33062057, upload-time = "2026-02-23T00:16:36.931Z" }, + { url = "https://files.pythonhosted.org/packages/09/7d/af933f0f6e0767995b4e2d705a0665e454d1c19402aa7e895de3951ebb04/scipy-1.17.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43af8d1f3bea642559019edfe64e9b11192a8978efbd1539d7bc2aaa23d92de4", size = 35349300, upload-time = "2026-02-23T00:16:49.108Z" }, + { url = "https://files.pythonhosted.org/packages/b4/3d/7ccbbdcbb54c8fdc20d3b6930137c782a163fa626f0aef920349873421ba/scipy-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd96a1898c0a47be4520327e01f874acfd61fb48a9420f8aa9f6483412ffa444", size = 35127333, upload-time = "2026-02-23T00:17:01.293Z" }, + { url = "https://files.pythonhosted.org/packages/e8/19/f926cb11c42b15ba08e3a71e376d816ac08614f769b4f47e06c3580c836a/scipy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4eb6c25dd62ee8d5edf68a8e1c171dd71c292fdae95d8aeb3dd7d7de4c364082", size = 37741314, upload-time = "2026-02-23T00:17:12.576Z" }, + { url = "https://files.pythonhosted.org/packages/95/da/0d1df507cf574b3f224ccc3d45244c9a1d732c81dcb26b1e8a766ae271a8/scipy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:d30e57c72013c2a4fe441c2fcb8e77b14e152ad48b5464858e07e2ad9fbfceff", size = 36607512, upload-time = "2026-02-23T00:17:23.424Z" }, + { url = "https://files.pythonhosted.org/packages/68/7f/bdd79ceaad24b671543ffe0ef61ed8e659440eb683b66f033454dcee90eb/scipy-1.17.1-cp311-cp311-win_arm64.whl", hash = "sha256:9ecb4efb1cd6e8c4afea0daa91a87fbddbce1b99d2895d151596716c0b2e859d", size = 24599248, upload-time = "2026-02-23T00:17:34.561Z" }, + { url = "https://files.pythonhosted.org/packages/35/48/b992b488d6f299dbe3f11a20b24d3dda3d46f1a635ede1c46b5b17a7b163/scipy-1.17.1-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:35c3a56d2ef83efc372eaec584314bd0ef2e2f0d2adb21c55e6ad5b344c0dcb8", size = 31610954, upload-time = "2026-02-23T00:17:49.855Z" }, + { url = "https://files.pythonhosted.org/packages/b2/02/cf107b01494c19dc100f1d0b7ac3cc08666e96ba2d64db7626066cee895e/scipy-1.17.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:fcb310ddb270a06114bb64bbe53c94926b943f5b7f0842194d585c65eb4edd76", size = 28172662, upload-time = "2026-02-23T00:18:01.64Z" }, + { url = "https://files.pythonhosted.org/packages/cf/a9/599c28631bad314d219cf9ffd40e985b24d603fc8a2f4ccc5ae8419a535b/scipy-1.17.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:cc90d2e9c7e5c7f1a482c9875007c095c3194b1cfedca3c2f3291cdc2bc7c086", size = 20344366, upload-time = "2026-02-23T00:18:12.015Z" }, + { url = "https://files.pythonhosted.org/packages/35/f5/906eda513271c8deb5af284e5ef0206d17a96239af79f9fa0aebfe0e36b4/scipy-1.17.1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:c80be5ede8f3f8eded4eff73cc99a25c388ce98e555b17d31da05287015ffa5b", size = 22704017, upload-time = "2026-02-23T00:18:21.502Z" }, + { url = "https://files.pythonhosted.org/packages/da/34/16f10e3042d2f1d6b66e0428308ab52224b6a23049cb2f5c1756f713815f/scipy-1.17.1-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e19ebea31758fac5893a2ac360fedd00116cbb7628e650842a6691ba7ca28a21", size = 32927842, upload-time = "2026-02-23T00:18:35.367Z" }, + { url = "https://files.pythonhosted.org/packages/01/8e/1e35281b8ab6d5d72ebe9911edcdffa3f36b04ed9d51dec6dd140396e220/scipy-1.17.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:02ae3b274fde71c5e92ac4d54bc06c42d80e399fec704383dcd99b301df37458", size = 35235890, upload-time = "2026-02-23T00:18:49.188Z" }, + { url = "https://files.pythonhosted.org/packages/c5/5c/9d7f4c88bea6e0d5a4f1bc0506a53a00e9fcb198de372bfe4d3652cef482/scipy-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8a604bae87c6195d8b1045eddece0514d041604b14f2727bbc2b3020172045eb", size = 35003557, upload-time = "2026-02-23T00:18:54.74Z" }, + { url = "https://files.pythonhosted.org/packages/65/94/7698add8f276dbab7a9de9fb6b0e02fc13ee61d51c7c3f85ac28b65e1239/scipy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f590cd684941912d10becc07325a3eeb77886fe981415660d9265c4c418d0bea", size = 37625856, upload-time = "2026-02-23T00:19:00.307Z" }, + { url = "https://files.pythonhosted.org/packages/a2/84/dc08d77fbf3d87d3ee27f6a0c6dcce1de5829a64f2eae85a0ecc1f0daa73/scipy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:41b71f4a3a4cab9d366cd9065b288efc4d4f3c0b37a91a8e0947fb5bd7f31d87", size = 36549682, upload-time = "2026-02-23T00:19:07.67Z" }, + { url = "https://files.pythonhosted.org/packages/bc/98/fe9ae9ffb3b54b62559f52dedaebe204b408db8109a8c66fdd04869e6424/scipy-1.17.1-cp312-cp312-win_arm64.whl", hash = "sha256:f4115102802df98b2b0db3cce5cb9b92572633a1197c77b7553e5203f284a5b3", size = 24547340, upload-time = "2026-02-23T00:19:12.024Z" }, +] + [[package]] name = "six" version = "1.17.0" From cee7311275c6c06d65b43699d34e4f2d51ab2ef7 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 18:20:30 -0400 Subject: [PATCH 07/12] optimize imports --- examples/dev.py | 1 - hobj/benchmarks/binary_classification/benchmark.py | 5 +++-- hobj/benchmarks/binary_classification/estimator.py | 7 ++++--- hobj/benchmarks/binary_classification/simulation.py | 5 +++-- .../tests/test_simulate_subtask.py | 10 ++++++---- hobj/benchmarks/generalization/benchmark.py | 7 ++++--- hobj/benchmarks/generalization/estimator.py | 5 +++-- hobj/benchmarks/generalization/simulator.py | 6 +++--- hobj/benchmarks/make_model.py | 11 ++++++----- hobj/benchmarks/mut_highvar_benchmark.py | 3 ++- hobj/benchmarks/mut_oneshot_benchmark.py | 5 +++-- hobj/data_loaders/behavior.py | 2 +- hobj/data_loaders/images/__init__.py | 2 +- hobj/data_loaders/images/highvar.py | 6 +++--- hobj/data_loaders/images/probe.py | 6 ++++-- hobj/data_loaders/images/template.py | 9 ++++----- hobj/data_loaders/images/warmup.py | 1 - hobj/data_loaders/store.py | 4 ++-- hobj/learning_models/linear/__init__.py | 6 +++--- hobj/learning_models/linear/update_rules.py | 5 +++-- hobj/stats/ci.py | 1 + hobj/utils/hash.py | 3 ++- 22 files changed, 61 insertions(+), 49 deletions(-) delete mode 100644 examples/dev.py diff --git a/examples/dev.py b/examples/dev.py deleted file mode 100644 index 29661e1..0000000 --- a/examples/dev.py +++ /dev/null @@ -1 +0,0 @@ -import hobj diff --git a/hobj/benchmarks/binary_classification/benchmark.py b/hobj/benchmarks/binary_classification/benchmark.py index 8f5b3f6..90820a6 100644 --- a/hobj/benchmarks/binary_classification/benchmark.py +++ b/hobj/benchmarks/binary_classification/benchmark.py @@ -1,9 +1,10 @@ +from dataclasses import dataclass +from typing import Dict, List, Optional, Tuple, Union + import numpy as np import pydantic import xarray as xr -from dataclasses import dataclass from tqdm import tqdm -from typing import List, Dict, Union, Tuple, Optional from hobj.benchmarks.binary_classification.estimator import LearningCurveStatistics from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask, BinaryClassificationSubtaskResult diff --git a/hobj/benchmarks/binary_classification/estimator.py b/hobj/benchmarks/binary_classification/estimator.py index 370f74c..0b80cb7 100644 --- a/hobj/benchmarks/binary_classification/estimator.py +++ b/hobj/benchmarks/binary_classification/estimator.py @@ -1,7 +1,8 @@ +from dataclasses import dataclass +from typing import Dict, List, Tuple + import numpy as np import xarray as xr -from dataclasses import dataclass -from typing import List, Dict, Tuple from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtaskResult from hobj.stats import binomial as binomial_funcs @@ -193,4 +194,4 @@ def _get_bootstrap_resamples_by_session( return LearningCurveStatistics.BootstrapSamples( boot_k=boot_k, boot_n=boot_n, - ) \ No newline at end of file + ) diff --git a/hobj/benchmarks/binary_classification/simulation.py b/hobj/benchmarks/binary_classification/simulation.py index 6e364f4..fe5833b 100644 --- a/hobj/benchmarks/binary_classification/simulation.py +++ b/hobj/benchmarks/binary_classification/simulation.py @@ -1,9 +1,10 @@ +from typing import List, Optional, Union + import numpy as np import pydantic -from typing import List, Union, Optional +from mref import ImageRef from hobj.learning_models import BinaryLearningModel -from mref import ImageRef # %% diff --git a/hobj/benchmarks/binary_classification/tests/test_simulate_subtask.py b/hobj/benchmarks/binary_classification/tests/test_simulate_subtask.py index 3503df8..a6c0415 100644 --- a/hobj/benchmarks/binary_classification/tests/test_simulate_subtask.py +++ b/hobj/benchmarks/binary_classification/tests/test_simulate_subtask.py @@ -1,9 +1,11 @@ -from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask -from hobj.learning_models import RandomGuesser -from mref import ImageRef from typing import List + import PIL.Image import numpy as np +from mref import ImageRef + +from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask +from hobj.learning_models import RandomGuesser def create_image_refs(nimages_per_class: int, seed: int) -> List[ImageRef]: @@ -38,4 +40,4 @@ def test_simulate_subtask(): assert len(result.perf_seq) == ntrials -# Todo: test deterministic \ No newline at end of file +# Todo: test deterministic diff --git a/hobj/benchmarks/generalization/benchmark.py b/hobj/benchmarks/generalization/benchmark.py index 3c9e856..69266ac 100644 --- a/hobj/benchmarks/generalization/benchmark.py +++ b/hobj/benchmarks/generalization/benchmark.py @@ -1,12 +1,13 @@ +from dataclasses import dataclass +from typing import List, Tuple + import numpy as np import pydantic import xarray as xr -from dataclasses import dataclass from tqdm import tqdm -from typing import List, Tuple from hobj.benchmarks.generalization.estimator import GeneralizationStatistics -from hobj.benchmarks.generalization.simulator import GeneralizationSubtask, GeneralizationSessionResult +from hobj.benchmarks.generalization.simulator import GeneralizationSessionResult, GeneralizationSubtask from hobj.learning_models import BinaryLearningModel from hobj.stats.ci import estimate_basic_bootstrap_CI diff --git a/hobj/benchmarks/generalization/estimator.py b/hobj/benchmarks/generalization/estimator.py index cc77697..6dbd337 100644 --- a/hobj/benchmarks/generalization/estimator.py +++ b/hobj/benchmarks/generalization/estimator.py @@ -1,8 +1,9 @@ -import numpy as np import warnings -import xarray as xr from typing import List +import numpy as np +import xarray as xr + from hobj.benchmarks.generalization.simulator import GeneralizationSessionResult from hobj.stats import binomial as binomial_funcs diff --git a/hobj/benchmarks/generalization/simulator.py b/hobj/benchmarks/generalization/simulator.py index dc690ad..9234410 100644 --- a/hobj/benchmarks/generalization/simulator.py +++ b/hobj/benchmarks/generalization/simulator.py @@ -1,11 +1,11 @@ -from typing import List, Union, Dict, Optional +import collections +from typing import Dict, List, Optional, Union import numpy as np import pydantic - from mref import ImageRef + from hobj.learning_models import BinaryLearningModel -import collections # %% diff --git a/hobj/benchmarks/make_model.py b/hobj/benchmarks/make_model.py index 2df5f58..4baa090 100644 --- a/hobj/benchmarks/make_model.py +++ b/hobj/benchmarks/make_model.py @@ -1,12 +1,13 @@ """ This module provides an alternative interface for instantiating a linear learning model. """ -from hobj.learning_models.linear import LinearLearner, RepresentationalModel -import hobj.learning_models.linear.update_rules as update_rules -from typing import Literal, Dict +from typing import Dict, List, Literal + import mref import numpy as np -from typing import List + +import hobj.learning_models.linear.update_rules as update_rules +from hobj.learning_models.linear import LinearLearner, RepresentationalModel # %% @@ -55,4 +56,4 @@ def make_linear_learner_from_features( image_ref_to_features=ref_to_calibrated_features ), update_rule=update_rule_name(alpha=alpha) - ) \ No newline at end of file + ) diff --git a/hobj/benchmarks/mut_highvar_benchmark.py b/hobj/benchmarks/mut_highvar_benchmark.py index 91f5409..a57ad5a 100644 --- a/hobj/benchmarks/mut_highvar_benchmark.py +++ b/hobj/benchmarks/mut_highvar_benchmark.py @@ -1,5 +1,6 @@ +from typing import Dict, List + import numpy as np -from typing import List, Dict from hobj.benchmarks.binary_classification.benchmark import LearningCurveBenchmark, LearningCurveBenchmarkConfig, TargetSubtaskData from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask, BinaryClassificationSubtaskResult diff --git a/hobj/benchmarks/mut_oneshot_benchmark.py b/hobj/benchmarks/mut_oneshot_benchmark.py index 3a0c0aa..0329c99 100644 --- a/hobj/benchmarks/mut_oneshot_benchmark.py +++ b/hobj/benchmarks/mut_oneshot_benchmark.py @@ -2,12 +2,13 @@ # Coercing human data from typing import Dict, List -from hobj.benchmarks.generalization.benchmark import GeneralizationBenchmarkConfig, GeneralizationBenchmark, GeneralizationSessionResult +from mref import ImageRef + +from hobj.benchmarks.generalization.benchmark import GeneralizationBenchmark, GeneralizationBenchmarkConfig, GeneralizationSessionResult from hobj.benchmarks.generalization.estimator import GeneralizationStatistics from hobj.benchmarks.generalization.simulator import GeneralizationSubtask from hobj.data_loaders.behavior import load_oneshot_behavior from hobj.data_loaders.images import MutatorOneShotImageset -from mref import ImageRef # %% diff --git a/hobj/data_loaders/behavior.py b/hobj/data_loaders/behavior.py index fc36b81..a26fbdb 100644 --- a/hobj/data_loaders/behavior.py +++ b/hobj/data_loaders/behavior.py @@ -1,5 +1,5 @@ -from typing import List, Literal import datetime +from typing import List, Literal import pydantic diff --git a/hobj/data_loaders/images/__init__.py b/hobj/data_loaders/images/__init__.py index 3c15cf9..1b8c648 100644 --- a/hobj/data_loaders/images/__init__.py +++ b/hobj/data_loaders/images/__init__.py @@ -1,4 +1,4 @@ from hobj.data_loaders.images.highvar import MutatorHighVarImageset from hobj.data_loaders.images.oneshot import MutatorOneShotImageset -from hobj.data_loaders.images.warmup import MutatorWarmupImageset from hobj.data_loaders.images.probe import ProbeImageset +from hobj.data_loaders.images.warmup import MutatorWarmupImageset diff --git a/hobj/data_loaders/images/highvar.py b/hobj/data_loaders/images/highvar.py index 42c5530..aa34156 100644 --- a/hobj/data_loaders/images/highvar.py +++ b/hobj/data_loaders/images/highvar.py @@ -1,9 +1,9 @@ +from typing import Dict, List + import mref.media_references -from hobj.data_loaders.images.template import Imageset import pydantic -from typing import Dict, List - +from hobj.data_loaders.images.template import Imageset class MutatorHighVarAnnotation(pydantic.BaseModel): diff --git a/hobj/data_loaders/images/probe.py b/hobj/data_loaders/images/probe.py index 7945099..473ce35 100644 --- a/hobj/data_loaders/images/probe.py +++ b/hobj/data_loaders/images/probe.py @@ -1,8 +1,10 @@ -from hobj.data_loaders.images.template import Imageset -import pydantic from typing import Literal +import pydantic + +from hobj.data_loaders.images.template import Imageset + class ProbeAnnotation(pydantic.BaseModel): color: Literal['blue', 'orange'] diff --git a/hobj/data_loaders/images/template.py b/hobj/data_loaders/images/template.py index 91695d2..150608a 100644 --- a/hobj/data_loaders/images/template.py +++ b/hobj/data_loaders/images/template.py @@ -1,17 +1,16 @@ import tempfile +import warnings from abc import ABC from pathlib import Path -from typing import Any, Dict, TypeVar, Generic, List +from typing import Any, Dict, Generic, List, TypeVar import PIL.Image import pydantic +from mref import FileSystemStorage, ImageRef from tqdm import tqdm -from hobj.utils.file_io import unzip_file -from mref import ImageRef from hobj.data_loaders.store import default_data_store -from mref import FileSystemStorage -import warnings +from hobj.utils.file_io import unzip_file class ImageManifestEntry(pydantic.BaseModel, ABC): diff --git a/hobj/data_loaders/images/warmup.py b/hobj/data_loaders/images/warmup.py index ec5a60c..3fdd9c9 100644 --- a/hobj/data_loaders/images/warmup.py +++ b/hobj/data_loaders/images/warmup.py @@ -3,7 +3,6 @@ from hobj.data_loaders.images.template import Imageset - class MutatorWarmupAnnotation(pydantic.BaseModel): category: str = pydantic.Field( examples=[ diff --git a/hobj/data_loaders/store.py b/hobj/data_loaders/store.py index c10786a..fcb2419 100644 --- a/hobj/data_loaders/store.py +++ b/hobj/data_loaders/store.py @@ -1,8 +1,8 @@ -import hobj.config +import mref +import hobj.config -import mref __all__ = [ 'default_data_store' ] diff --git a/hobj/learning_models/linear/__init__.py b/hobj/learning_models/linear/__init__.py index cc133b7..ac70b74 100644 --- a/hobj/learning_models/linear/__init__.py +++ b/hobj/learning_models/linear/__init__.py @@ -1,13 +1,13 @@ -import PIL.Image -import numpy as np import typing -import mref.media_references +import numpy as np + from hobj.learning_models import BinaryLearningModel from hobj.learning_models.linear.representation import RepresentationalModel from hobj.learning_models.linear.update_rules import UpdateRule from hobj.types import ImageId + # %% class LinearLearner(BinaryLearningModel): def __init__( diff --git a/hobj/learning_models/linear/update_rules.py b/hobj/learning_models/linear/update_rules.py index 6911f11..ea7b917 100644 --- a/hobj/learning_models/linear/update_rules.py +++ b/hobj/learning_models/linear/update_rules.py @@ -1,8 +1,9 @@ -import numpy as np -import scipy.special from abc import ABC, abstractmethod from typing import Tuple, Union +import numpy as np +import scipy.special + class UpdateRule(ABC): def __init__(self, alpha: float): diff --git a/hobj/stats/ci.py b/hobj/stats/ci.py index 7ce343c..13979a5 100644 --- a/hobj/stats/ci.py +++ b/hobj/stats/ci.py @@ -1,4 +1,5 @@ from typing import Tuple, Union + import numpy as np diff --git a/hobj/utils/hash.py b/hobj/utils/hash.py index 6c6b0fc..f321fbe 100644 --- a/hobj/utils/hash.py +++ b/hobj/utils/hash.py @@ -1,6 +1,7 @@ -import numpy as np import hashlib + import PIL.Image +import numpy as np def hash_image(image: PIL.Image.Image) -> str: From e1328243489c55e374cf5c0a89ddbc55d963e299 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 18:21:54 -0400 Subject: [PATCH 08/12] flatten --- hobj/benchmarks/make_model.py | 5 +- hobj/learning_models/__init__.py | 71 +++++++++++++++++ hobj/learning_models/linear/__init__.py | 76 ------------------- .../{linear => }/representation.py | 1 - .../{linear => }/update_rules.py | 0 5 files changed, 74 insertions(+), 79 deletions(-) delete mode 100644 hobj/learning_models/linear/__init__.py rename hobj/learning_models/{linear => }/representation.py (99%) rename hobj/learning_models/{linear => }/update_rules.py (100%) diff --git a/hobj/benchmarks/make_model.py b/hobj/benchmarks/make_model.py index 4baa090..3086be8 100644 --- a/hobj/benchmarks/make_model.py +++ b/hobj/benchmarks/make_model.py @@ -6,8 +6,9 @@ import mref import numpy as np -import hobj.learning_models.linear.update_rules as update_rules -from hobj.learning_models.linear import LinearLearner, RepresentationalModel +import hobj.learning_models.update_rules as update_rules +from hobj.learning_models.linear import RepresentationalModel +from hobj.learning_models import LinearLearner # %% diff --git a/hobj/learning_models/__init__.py b/hobj/learning_models/__init__.py index cc2a510..b409900 100644 --- a/hobj/learning_models/__init__.py +++ b/hobj/learning_models/__init__.py @@ -3,6 +3,9 @@ import numpy as np +from hobj.learning_models.representation import RepresentationalModel +from hobj.learning_models.update_rules import UpdateRule + from hobj.types import ImageId @@ -61,3 +64,71 @@ def get_response( def give_feedback(self, reward: float) -> None: return + + +# %% +class LinearLearner(BinaryLearningModel): + def __init__( + self, + representational_model: RepresentationalModel, + update_rule: UpdateRule, + ): + + self.representational_model = representational_model + self.update_rule = update_rule + + # State variables + self.w = None + self.b = None + self._f_last = None + self._logits_last = None + self._action_last = None + self._generator: np.random.Generator = np.random.default_rng() + + # Initialize state + self.reset_state(seed=0) + return + + def reset_state(self, seed: int) -> None: + """ + :param seed: + :return: + """ + self.update_rule.reset() + self.w = np.zeros((self.representational_model.d, 2)) + self.b = np.zeros((2,)) + self._f_last = None + self._logits_last = None + self._action_last = None + self._generator = np.random.default_rng(seed=seed) + + return + + def get_response( + self, + image: ImageId, + ) -> typing.Literal[0, 1]: + + f = self.representational_model.get_features(image=image) + logits = f @ self.w + self.b + action = self._random_tiebreaking_argmax(logits[0], logits[1]) + + # Update internal state with traces + self._f_last = f + self._logits_last = logits + self._action_last = action + return action + + def give_feedback(self, reward: float) -> None: + delta_w, delta_b = self.update_rule.get_update(x=self._f_last, w=self.w, b=self.b, logits=self._logits_last, action=self._action_last, reward=reward) # [action] + self.w += delta_w + self.b += delta_b + return + + def _random_tiebreaking_argmax(self, logit0, logit1) -> typing.Literal[0, 1]: + if logit0 > logit1: + return 0 + elif logit0 < logit1: + return 1 + else: + return 0 if self._generator.random() < 0.5 else 1 diff --git a/hobj/learning_models/linear/__init__.py b/hobj/learning_models/linear/__init__.py deleted file mode 100644 index ac70b74..0000000 --- a/hobj/learning_models/linear/__init__.py +++ /dev/null @@ -1,76 +0,0 @@ -import typing - -import numpy as np - -from hobj.learning_models import BinaryLearningModel -from hobj.learning_models.linear.representation import RepresentationalModel -from hobj.learning_models.linear.update_rules import UpdateRule -from hobj.types import ImageId - - -# %% -class LinearLearner(BinaryLearningModel): - def __init__( - self, - representational_model: RepresentationalModel, - update_rule: UpdateRule, - ): - - self.representational_model = representational_model - self.update_rule = update_rule - - # State variables - self.w = None - self.b = None - self._f_last = None - self._logits_last = None - self._action_last = None - self._generator: np.random.Generator = np.random.default_rng() - - # Initialize state - self.reset_state(seed=0) - return - - def reset_state(self, seed: int) -> None: - """ - :param seed: - :return: - """ - self.update_rule.reset() - self.w = np.zeros((self.representational_model.d, 2)) - self.b = np.zeros((2,)) - self._f_last = None - self._logits_last = None - self._action_last = None - self._generator = np.random.default_rng(seed=seed) - - return - - def get_response( - self, - image: ImageId, - ) -> typing.Literal[0, 1]: - - f = self.representational_model.get_features(image=image) - logits = f @ self.w + self.b - action = self._random_tiebreaking_argmax(logits[0], logits[1]) - - # Update internal state with traces - self._f_last = f - self._logits_last = logits - self._action_last = action - return action - - def give_feedback(self, reward: float) -> None: - delta_w, delta_b = self.update_rule.get_update(x=self._f_last, w=self.w, b=self.b, logits=self._logits_last, action=self._action_last, reward=reward) # [action] - self.w += delta_w - self.b += delta_b - return - - def _random_tiebreaking_argmax(self, logit0, logit1) -> typing.Literal[0, 1]: - if logit0 > logit1: - return 0 - elif logit0 < logit1: - return 1 - else: - return 0 if self._generator.random() < 0.5 else 1 diff --git a/hobj/learning_models/linear/representation.py b/hobj/learning_models/representation.py similarity index 99% rename from hobj/learning_models/linear/representation.py rename to hobj/learning_models/representation.py index da28183..bd3d1f4 100644 --- a/hobj/learning_models/linear/representation.py +++ b/hobj/learning_models/representation.py @@ -6,7 +6,6 @@ # %% - class RepresentationalModel: """ Class which maps images to feature vectors of shape (d,). diff --git a/hobj/learning_models/linear/update_rules.py b/hobj/learning_models/update_rules.py similarity index 100% rename from hobj/learning_models/linear/update_rules.py rename to hobj/learning_models/update_rules.py From 2699b531aecd8e3e9b3f51560986e659a5296db0 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 18:23:45 -0400 Subject: [PATCH 09/12] 3.12 only --- .github/workflows/ci.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b45a188..00db357 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,21 +7,27 @@ on: - hobj/** # Changes to workflows - .github/workflows/ci.yml - # Changes to pyproject.toml + # Changes to project/dependency metadata - 'pyproject.toml' + - 'uv.lock' jobs: unit_tests: runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.12"] steps: - name: Checkout code uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.12' - cache: 'pip' - cache-dependency-path: setup.py # See https://github.com/actions/setup-python/blob/main/docs/advanced-usage.md#caching-packages - - name: Install hobj - run: pip3 install -e . + python-version: ${{ matrix.python-version }} + - name: Set up uv + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + - name: Install dependencies + run: uv sync --locked --dev - name: Run pytests - run: pytest -s + run: uv run pytest -s From 2520cd510779a3b4b243cb546b2c1e447239a603 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 18:26:39 -0400 Subject: [PATCH 10/12] remove mref from tests --- tests/test_dummy_learner.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/test_dummy_learner.py b/tests/test_dummy_learner.py index 9c8656c..f164712 100644 --- a/tests/test_dummy_learner.py +++ b/tests/test_dummy_learner.py @@ -1,24 +1,19 @@ import pytest -import mref.media_references from hobj.learning_models import RandomGuesser @pytest.fixture def dummy_learner() -> RandomGuesser: return RandomGuesser(seed=0) -@pytest.fixture -def test_image() -> mref.media_references.ImageRef: - return mref.media_references.ImageRef(sha256='0' * 64) - -def test_dummy_learner_deterministic(dummy_learner, test_image): +def test_dummy_learner_deterministic(dummy_learner): ntests = 10 actions = [] expected_actions = [1, 1, 1, 0, 0, 0, 0, 0, 0, 1] for i in range(ntests): - a = dummy_learner.get_response(image=test_image) + a = dummy_learner.get_response(image='hi') actions.append(a) - assert actions == expected_actions \ No newline at end of file + assert actions == expected_actions From ee3dd34dfd3755baea88487595c054de56bd0b50 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 19:23:45 -0400 Subject: [PATCH 11/12] updated getters for images --- .gitignore | 1 + hobj/benchmarks/make_model.py | 26 ++-- hobj/benchmarks/mut_highvar_benchmark.py | 6 +- hobj/benchmarks/mut_oneshot_benchmark.py | 6 +- hobj/data_loaders/behavior.py | 3 - hobj/data_loaders/images/__init__.py | 1 + hobj/data_loaders/images/highvar.py | 31 ++--- hobj/data_loaders/images/oneshot.py | 5 +- hobj/data_loaders/images/template.py | 156 ++++++++++++----------- hobj/learning_models/__init__.py | 2 - hobj/learning_models/update_rules.py | 1 + 11 files changed, 132 insertions(+), 106 deletions(-) diff --git a/.gitignore b/.gitignore index f511459..ac631e5 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ __pycache__/ *.egg-info/ dist/ +/data/ diff --git a/hobj/benchmarks/make_model.py b/hobj/benchmarks/make_model.py index 3086be8..0593021 100644 --- a/hobj/benchmarks/make_model.py +++ b/hobj/benchmarks/make_model.py @@ -1,20 +1,30 @@ """ This module provides an alternative interface for instantiating a linear learning model. """ -from typing import Dict, List, Literal +from functools import lru_cache +from typing import Literal -import mref import numpy as np import hobj.learning_models.update_rules as update_rules -from hobj.learning_models.linear import RepresentationalModel -from hobj.learning_models import LinearLearner +from hobj.learning_models import LinearLearner, RepresentationalModel +from hobj.types import ImageId + + +# %% +@lru_cache(maxsize=1) +def _get_calibration_image_ids() -> list[ImageId]: + """ + Returns the ImageIds of the warmup images that are used for calibrating the features of the linear learner. + Caches the result to avoid redundant computation. + """ + raise NotImplementedError # %% def make_linear_learner_from_features( - ref_to_features: Dict[mref.ImageRef, np.ndarray], - calibration_images: List[mref.ImageRef], + features: np.ndarray, + image_ids: list[ImageId], update_rule_name: Literal[ 'Prototype', 'Square', @@ -30,13 +40,13 @@ def make_linear_learner_from_features( """ Instantiates a linear learning model from precomputed features. :param ref_to_features: Dict[mref.ImageRef, np.ndarray], the features to use. - :param calibration_images: List[mref.ImageRef], the images that will be used to calibrate the features (i.e. for mean centering and ensuring they fit within a unit ball). :param update_rule_name: str, the name of the update rule to use. :param alpha: float, the learning rate. :return: LinearLearner """ - f_calibration = np.array([ref_to_features[ref] for ref in calibration_images]) + ref_to_features = {ref: features[i] for i, ref in enumerate(image_ids)} + f_calibration = np.array([ref_to_features[ref] for ref in _get_calibration_image_ids()]) mu_calibration = np.mean(f_calibration, axis=0) norms_calibration = np.linalg.norm(f_calibration - mu_calibration, axis=1) norm_cutoff = np.quantile(norms_calibration, 0.999) # Will clip the rest diff --git a/hobj/benchmarks/mut_highvar_benchmark.py b/hobj/benchmarks/mut_highvar_benchmark.py index a57ad5a..5068538 100644 --- a/hobj/benchmarks/mut_highvar_benchmark.py +++ b/hobj/benchmarks/mut_highvar_benchmark.py @@ -22,7 +22,7 @@ def __init__(self): # Normalize data for benchmark: sha256_to_category = { - ref.sha256: imageset.get_annotation(image_ref=ref).category for ref in imageset.image_refs + ref.sha256: imageset.get_annotation(image_id=ref).category for ref in imageset.image_ids } subtask_name_to_results = {} @@ -47,8 +47,8 @@ def __init__(self): # Instantiate the subtask if it does not exist: if subtask_name not in subtask_name_to_subtask: subtask = BinaryClassificationSubtask( - classA=imageset.category_to_image_refs[cat0], - classB=imageset.category_to_image_refs[cat1], + classA=imageset.category_to_image_ids[cat0], + classB=imageset.category_to_image_ids[cat1], ntrials=100, replace=False, ) diff --git a/hobj/benchmarks/mut_oneshot_benchmark.py b/hobj/benchmarks/mut_oneshot_benchmark.py index 0329c99..9004a38 100644 --- a/hobj/benchmarks/mut_oneshot_benchmark.py +++ b/hobj/benchmarks/mut_oneshot_benchmark.py @@ -100,8 +100,8 @@ def __init__(self): cat_to_support_image: Dict[str, ImageRef] = {} cat_to_test_images: Dict[str, List[ImageRef]] = {} - for ref in imageset.image_refs: - annotation = imageset.get_annotation(image_ref=ref) + for ref in imageset.image_ids: + annotation = imageset.get_annotation(image_id=ref) transformation_id = f"{annotation.transformation} | {annotation.transformation_level}" image_ref_to_transformation_id[ref] = transformation_id @@ -150,7 +150,7 @@ def __init__(self): for i_trial, sha in enumerate(session.stimulus_sha256_seq): ref = ImageRef(sha256=sha) - annotation = imageset.get_annotation(image_ref=ref) + annotation = imageset.get_annotation(image_id=ref) # Add stimulus category to observed categories observed_categories.add(annotation.category) diff --git a/hobj/data_loaders/behavior.py b/hobj/data_loaders/behavior.py index a26fbdb..8eb9327 100644 --- a/hobj/data_loaders/behavior.py +++ b/hobj/data_loaders/behavior.py @@ -53,7 +53,6 @@ def validate_lengths(self) -> 'HumanLearningSession': # %% def _load_learning_sessions( dataset_url: str, - redownload: bool ) -> List[HumanLearningSession]: data_store = default_data_store # Download the data: @@ -72,7 +71,6 @@ class LearningSessionDataset(pydantic.BaseModel): # %% Data loaders def load_highvar_behavior( - redownload: bool = False, remove_probe_trials: bool = True ) -> List[HumanLearningSession]: """ @@ -82,7 +80,6 @@ def load_highvar_behavior( sessions = _load_learning_sessions( dataset_url='https://hlbdatasets.s3.us-east-1.amazonaws.com/behavior/mutator-highvar-human-learning-data.json', - redownload=redownload ) if not remove_probe_trials: diff --git a/hobj/data_loaders/images/__init__.py b/hobj/data_loaders/images/__init__.py index 1b8c648..1e11a53 100644 --- a/hobj/data_loaders/images/__init__.py +++ b/hobj/data_loaders/images/__init__.py @@ -2,3 +2,4 @@ from hobj.data_loaders.images.oneshot import MutatorOneShotImageset from hobj.data_loaders.images.probe import ProbeImageset from hobj.data_loaders.images.warmup import MutatorWarmupImageset + diff --git a/hobj/data_loaders/images/highvar.py b/hobj/data_loaders/images/highvar.py index aa34156..ea98517 100644 --- a/hobj/data_loaders/images/highvar.py +++ b/hobj/data_loaders/images/highvar.py @@ -1,10 +1,11 @@ -from typing import Dict, List - -import mref.media_references +from pathlib import Path import pydantic from hobj.data_loaders.images.template import Imageset +from hobj.types import ImageId + +# %% class MutatorHighVarAnnotation(pydantic.BaseModel): category: str = pydantic.Field( @@ -21,24 +22,24 @@ class MutatorHighVarImageset(Imageset[MutatorHighVarAnnotation]): zipped_images_url = 'https://hlbdatasets.s3.us-east-1.amazonaws.com/imagesets/mutator-highvar/MutatorB2000_Subset128_FullVar_Train.zip' annotation_schema = MutatorHighVarAnnotation - def __init__(self): - super().__init__() + def __init__(self, cachedir: Path | None = None, redownload: bool = False): + super().__init__(cachedir=cachedir, redownload=redownload) - self._category_to_image_refs: Dict[str, List[mref.media_references.ImageRef]] = {} + self._category_to_image_ids: dict[str, list[ImageId]] = {} - for ref in self.image_refs: - annotation = self.get_annotation(image_ref=ref) + for ref in self.image_ids: + annotation = self.get_annotation(image_id=ref) category = annotation.category - if category not in self._category_to_image_refs: - self._category_to_image_refs[category] = [] - self._category_to_image_refs[category].append(ref) + if category not in self._category_to_image_ids: + self._category_to_image_ids[category] = [] + self._category_to_image_ids[category].append(ref) - for category in self._category_to_image_refs: - self._category_to_image_refs[category] = sorted(self._category_to_image_refs[category]) + for category in self._category_to_image_ids: + self._category_to_image_ids[category] = sorted(self._category_to_image_ids[category]) @property - def category_to_image_refs(self) -> Dict[str, List[mref.media_references.ImageRef]]: - return self._category_to_image_refs + def category_to_image_ids(self) -> dict[str, list[ImageId]]: + return self._category_to_image_ids if __name__ == '__main__': diff --git a/hobj/data_loaders/images/oneshot.py b/hobj/data_loaders/images/oneshot.py index ae253cf..15c58fd 100644 --- a/hobj/data_loaders/images/oneshot.py +++ b/hobj/data_loaders/images/oneshot.py @@ -1,4 +1,4 @@ - +from pathlib import Path from typing import Literal import pydantic @@ -48,6 +48,9 @@ class MutatorOneShotImageset(Imageset[MutatorOneShotAnnotation]): zipped_images_url = 'https://hlbdatasets.s3.us-east-1.amazonaws.com/imagesets/mutator-oneshot/MutatorB2000_Oneshot64.zip' annotation_schema = MutatorOneShotAnnotation + def __init__(self, cachedir: Path | None = None, redownload: bool = False): + super().__init__(cachedir=cachedir, redownload=redownload) + if __name__ == '__main__': imageset = MutatorOneShotImageset() diff --git a/hobj/data_loaders/images/template.py b/hobj/data_loaders/images/template.py index 150608a..51e527e 100644 --- a/hobj/data_loaders/images/template.py +++ b/hobj/data_loaders/images/template.py @@ -1,18 +1,17 @@ -import tempfile -import warnings +import json from abc import ABC from pathlib import Path from typing import Any, Dict, Generic, List, TypeVar import PIL.Image import pydantic -from mref import FileSystemStorage, ImageRef -from tqdm import tqdm -from hobj.data_loaders.store import default_data_store -from hobj.utils.file_io import unzip_file +from hobj.types import ImageId +from hobj.utils.file_io import download_file, download_json, unzip_file +from hobj.utils.hash import hash_image +# %% class ImageManifestEntry(pydantic.BaseModel, ABC): sha256: str = pydantic.Field(pattern=r'^[a-f0-9]{64}$') relpath: Path = pydantic.Field( @@ -24,7 +23,7 @@ class ImageManifestEntry(pydantic.BaseModel, ABC): class ImageManifest(pydantic.BaseModel): - entries: Dict[str, ImageManifestEntry] = pydantic.Field( + entries: Dict[ImageId, ImageManifestEntry] = pydantic.Field( description='A mapping from a unique image ID to image manifest entries.' ) @@ -46,103 +45,118 @@ class Imageset(Generic[IA], ABC): def __init__( self, - data_store: FileSystemStorage = None, + cachedir: Path | None = None, redownload=False, ): """ - Unwrap the image manifest and save the images to the cache. + Download and materialize the imageset into a local cache directory. """ - if not data_store: - self.data_store: FileSystemStorage = default_data_store + repo_root = Path(__file__).resolve().parents[3] + self.cachedir = (cachedir if cachedir is not None else repo_root / 'data').resolve() + self.cachedir.mkdir(parents=True, exist_ok=True) - # Load the manifest if it is already cached - manifest_data = self.data_store.download_json_from_url(url=self.manifest_url, register=True) # Todo - image_manifest = ImageManifest(**manifest_data) + self._dataset_dir = self.cachedir / self.__class__.__name__ + self._dataset_dir.mkdir(parents=True, exist_ok=True) + self._images_dir = self._dataset_dir / 'images' - self._register_image_urls(manifest=image_manifest, redownload=redownload) + manifest_data = self._load_manifest_json(redownload=redownload) + image_manifest = ImageManifest(**manifest_data) self._manifest = image_manifest + self._ensure_images_present(manifest=image_manifest, redownload=redownload) - self._image_id_to_annotation: Dict[str, IA] = {} - self._image_id_to_sha256: Dict[str, str] = {} - self._sha256_to_image_ids: Dict[str, List[str]] = {} - self._image_refs: List[ImageRef] = [] + self._image_id_to_annotation: Dict[ImageId, IA] = {} + self._image_id_to_sha256: Dict[ImageId, str] = {} + self._image_id_to_relpath: Dict[ImageId, Path] = {} + self._image_ids: List[ImageId] = [] for image_id, entry in image_manifest.entries.items(): - image_ref = ImageRef(sha256=entry.sha256) - self._image_refs.append(image_ref) - self._image_id_to_sha256[image_id] = image_ref.sha256 + self._image_ids.append(image_id) + self._image_id_to_sha256[image_id] = entry.sha256 + self._image_id_to_relpath[image_id] = entry.relpath self._image_id_to_annotation[image_id] = self.annotation_schema(**entry.annotation) - if image_ref.sha256 not in self._sha256_to_image_ids: - self._sha256_to_image_ids[image_ref.sha256] = [] - self._sha256_to_image_ids[image_ref.sha256].append(image_id) - def _register_image_urls(self, manifest: ImageManifest, redownload: bool = False): + def _load_manifest_json(self, redownload: bool) -> dict[str, Any]: + if redownload or not self.manifest_path.exists(): + manifest_data = download_json(self.manifest_url) + self.manifest_path.write_text(json.dumps(manifest_data, indent=2)) + return json.loads(self.manifest_path.read_text()) + + def _ensure_images_present(self, manifest: ImageManifest, redownload: bool = False) -> None: """ - Ensures the entries of the manifest are registered in the data store. + Ensure the images for this imageset exist locally. """ + if redownload or not self._all_images_present(manifest): + self._download_and_extract_images(manifest) + + def _all_images_present(self, manifest: ImageManifest) -> bool: + for entry in manifest.entries.values(): + if not (self._images_dir / entry.relpath).exists(): + return False + return True + + def _download_and_extract_images(self, manifest: ImageManifest) -> None: + self._dataset_dir.mkdir(parents=True, exist_ok=True) + if self._images_dir.exists(): + for path in sorted(self._images_dir.rglob('*'), reverse=True): + if path.is_file(): + path.unlink() + elif path.is_dir(): + path.rmdir() + self._images_dir.rmdir() + + download_file(self.zipped_images_url, self.archive_path) + unzip_file(zip_path=self.archive_path, output_dir=self._images_dir) + self._verify_images(manifest) + + def _verify_images(self, manifest: ImageManifest) -> None: + for image_id, entry in manifest.entries.items(): + image_path = self._images_dir / entry.relpath + if not image_path.exists(): + raise FileNotFoundError(f"Missing image file for {image_id}: {image_path}") + + with PIL.Image.open(image_path) as image_data: + observed_sha256 = hash_image(image_data) + + if observed_sha256 != entry.sha256: + raise ValueError( + f"SHA256 mismatch for image {image_id}: {observed_sha256} != {entry.sha256}" + ) - num_undownloaded_images = 0 - for image_id, manifest_entry in manifest.entries.items(): - ref = ImageRef(sha256=manifest_entry.sha256) - if not self.data_store.check_data_exists(ref=ref): - num_undownloaded_images += 1 - - if num_undownloaded_images == 0: - return - - print(f'Missing {num_undownloaded_images}/{len(manifest.entries)} images for this imageset.') - - # Download the images - zipped_images_path = self.data_store.download_zip_path_from_url(url=self.zipped_images_url, register=True) # todo type correctly - - # Make a tempdir to unzip the images - with tempfile.TemporaryDirectory() as tempdir: - tempdir = Path(tempdir) - unzip_file(zip_path=zipped_images_path, output_dir=tempdir) - - # Register the images - pbar = tqdm(total=len(manifest.entries)) - for image_id, manifest_entry in manifest.entries.items(): - reported_sha256 = manifest_entry.sha256 - relpath = manifest_entry.relpath - image_path = tempdir / relpath - image_data = PIL.Image.open(image_path) - - image_ref = ImageRef.from_image(image=image_data) + @property + def manifest_path(self) -> Path: + return self._dataset_dir / 'manifest.json' - if not image_ref.sha256 == reported_sha256: - raise ValueError(f"SHA256 mismatch for image {manifest_entry}: {image_ref.sha256} != {reported_sha256}") + @property + def archive_path(self) -> Path: + archive_name = Path(self.zipped_images_url).name + return self._dataset_dir / archive_name - # Store image - self.data_store.register_image(image=image_data) - pbar.update(1) + @property + def images_dir(self) -> Path: + return self._images_dir @property - def image_refs(self) -> List[ImageRef]: + def image_ids(self) -> list[ImageId]: """ List of image refs in this imageset. :return: """ - return self._image_refs + return self._image_ids - def get_annotation(self, *, image_ref: ImageRef = None, sha256: str = None, ) -> IA: + def get_annotation(self, *, image_id: ImageId) -> IA: """ Get the annotation for a given image. If an image has multiple annotations, this will throw an error. """ - if sha256 is None: - sha256 = image_ref.sha256 - - image_ids = self._sha256_to_image_ids[sha256] - if len(image_ids) > 1: - warnings.warn(f"Image {sha256} has multiple annotations: {image_ids}. Returning the first one.") - image_id = image_ids[0] entry = self._image_id_to_annotation[image_id] return entry + def get_image_path(self, *, image_id: ImageId) -> Path: + return self.images_dir / self._image_id_to_relpath[image_id] + def __len__(self) -> int: - return len(self.image_refs) + return len(self.image_ids) def __repr__(self): return f"{self.__class__.__name__}({len(self)})" diff --git a/hobj/learning_models/__init__.py b/hobj/learning_models/__init__.py index b409900..ad3411b 100644 --- a/hobj/learning_models/__init__.py +++ b/hobj/learning_models/__init__.py @@ -5,13 +5,11 @@ from hobj.learning_models.representation import RepresentationalModel from hobj.learning_models.update_rules import UpdateRule - from hobj.types import ImageId # %% class BinaryLearningModel(ABC): - @abstractmethod def reset_state(self, seed: typing.Union[int, None]) -> None: """ diff --git a/hobj/learning_models/update_rules.py b/hobj/learning_models/update_rules.py index ea7b917..3297e26 100644 --- a/hobj/learning_models/update_rules.py +++ b/hobj/learning_models/update_rules.py @@ -53,6 +53,7 @@ def get_update( return delta_w, delta_b +# %% class Prototype(UpdateRule): """ Simulates the decision boundary implemented by a prototype learner. From a1e059baa510c574fbbbf68118bb5b091cff78c6 Mon Sep 17 00:00:00 2001 From: himjl Date: Tue, 24 Mar 2026 19:31:53 -0400 Subject: [PATCH 12/12] download behavior without mref --- .../binary_classification/simulation.py | 9 ++--- hobj/benchmarks/generalization/simulator.py | 15 ++++--- hobj/benchmarks/mut_oneshot_benchmark.py | 23 ++++++----- hobj/data_loaders/behavior.py | 39 ++++++++++++++----- hobj/data_loaders/store.py | 12 ------ hobj/utils/file_io.py | 9 ++--- .../tests => tests}/test_simulate_subtask.py | 13 ++----- 7 files changed, 58 insertions(+), 62 deletions(-) rename {hobj/benchmarks/binary_classification/tests => tests}/test_simulate_subtask.py (73%) diff --git a/hobj/benchmarks/binary_classification/simulation.py b/hobj/benchmarks/binary_classification/simulation.py index fe5833b..ad35f89 100644 --- a/hobj/benchmarks/binary_classification/simulation.py +++ b/hobj/benchmarks/binary_classification/simulation.py @@ -2,8 +2,7 @@ import numpy as np import pydantic -from mref import ImageRef - +from hobj.types import ImageId from hobj.learning_models import BinaryLearningModel @@ -27,14 +26,14 @@ class BinaryClassificationSubtask(pydantic.BaseModel): frozen=True ) - classA: List[ImageRef] - classB: List[ImageRef] + classA: List[ImageId] + classB: List[ImageId] ntrials: int = pydantic.Field(description='The number of trials in the subtask.', gt=0) replace: bool = pydantic.Field(description='Whether to show stimulus images with replacement or not.') @pydantic.field_validator('classA', 'classB', mode='after') @classmethod - def sort_image_refs(cls, value: List[ImageRef]) -> List[ImageRef]: + def sort_image_refs(cls, value: List[ImageId]) -> List[ImageId]: return sorted(value) @pydantic.model_validator(mode='after') diff --git a/hobj/benchmarks/generalization/simulator.py b/hobj/benchmarks/generalization/simulator.py index 9234410..9bddd95 100644 --- a/hobj/benchmarks/generalization/simulator.py +++ b/hobj/benchmarks/generalization/simulator.py @@ -3,10 +3,9 @@ import numpy as np import pydantic -from mref import ImageRef from hobj.learning_models import BinaryLearningModel - +from hobj.types import ImageId # %% class GeneralizationSessionResult(pydantic.BaseModel): @@ -37,15 +36,15 @@ class GeneralizationSubtask(pydantic.BaseModel): frozen=True ) - support_imageA: ImageRef - support_imageB: ImageRef - test_imagesA: List[ImageRef] - test_imagesB: List[ImageRef] - image_ref_to_transformation: Dict[ImageRef, str] + support_imageA: ImageId + support_imageB: ImageId + test_imagesA: List[ImageId] + test_imagesB: List[ImageId] + image_ref_to_transformation: Dict[ImageId, str] @pydantic.field_validator('test_imagesA', 'test_imagesB', mode='after') @classmethod - def sort_image_refs(cls, value: List[ImageRef]) -> List[ImageRef]: + def sort_image_refs(cls, value: List[ImageId]) -> List[ImageId]: return sorted(value) @pydantic.model_validator(mode='after') diff --git a/hobj/benchmarks/mut_oneshot_benchmark.py b/hobj/benchmarks/mut_oneshot_benchmark.py index 9004a38..71851d3 100644 --- a/hobj/benchmarks/mut_oneshot_benchmark.py +++ b/hobj/benchmarks/mut_oneshot_benchmark.py @@ -2,14 +2,13 @@ # Coercing human data from typing import Dict, List -from mref import ImageRef - from hobj.benchmarks.generalization.benchmark import GeneralizationBenchmark, GeneralizationBenchmarkConfig, GeneralizationSessionResult from hobj.benchmarks.generalization.estimator import GeneralizationStatistics from hobj.benchmarks.generalization.simulator import GeneralizationSubtask from hobj.data_loaders.behavior import load_oneshot_behavior from hobj.data_loaders.images import MutatorOneShotImageset +from hobj.types import ImageId # %% @@ -97,24 +96,24 @@ def __init__(self): # Map image refs to transformation ids image_ref_to_transformation_id = {} - cat_to_support_image: Dict[str, ImageRef] = {} - cat_to_test_images: Dict[str, List[ImageRef]] = {} + cat_to_support_image: Dict[str, ImageId] = {} + cat_to_test_images: Dict[str, List[ImageId]] = {} - for ref in imageset.image_ids: - annotation = imageset.get_annotation(image_id=ref) + for image_id in imageset.image_ids: + annotation = imageset.get_annotation(image_id=image_id) transformation_id = f"{annotation.transformation} | {annotation.transformation_level}" - image_ref_to_transformation_id[ref] = transformation_id + image_ref_to_transformation_id[image_id] = transformation_id if annotation.transformation == 'original': if annotation.category not in cat_to_support_image: - cat_to_support_image[annotation.category] = ref + cat_to_support_image[annotation.category] = image_id else: raise ValueError(f"Multiple support images for category {annotation.category}") else: if annotation.category not in cat_to_test_images: cat_to_test_images[annotation.category] = [] - cat_to_test_images[annotation.category].append(ref) + cat_to_test_images[annotation.category].append(image_id) # Assemble subtask simulators subtasks = [] @@ -149,8 +148,8 @@ def __init__(self): observed_categories = set() for i_trial, sha in enumerate(session.stimulus_sha256_seq): - ref = ImageRef(sha256=sha) - annotation = imageset.get_annotation(image_id=ref) + image_id = sha + annotation = imageset.get_annotation(image_id=image_id) # Add stimulus category to observed categories observed_categories.add(annotation.category) @@ -167,7 +166,7 @@ def __init__(self): ncatch += 1 else: assert annotation.transformation != 'original' - transformation_id = image_ref_to_transformation_id[ref] + transformation_id = image_ref_to_transformation_id[image_id] # Keep only benchmarked transformations if transformation_id in self.transformation_ids: diff --git a/hobj/data_loaders/behavior.py b/hobj/data_loaders/behavior.py index 8eb9327..3452ce7 100644 --- a/hobj/data_loaders/behavior.py +++ b/hobj/data_loaders/behavior.py @@ -1,9 +1,11 @@ import datetime +import json +from pathlib import Path from typing import List, Literal import pydantic -from hobj.data_loaders.store import default_data_store +from hobj.utils.file_io import download_json __all__ = ['load_highvar_behavior', 'load_oneshot_behavior'] @@ -53,15 +55,22 @@ def validate_lengths(self) -> 'HumanLearningSession': # %% def _load_learning_sessions( dataset_url: str, + cache_filename: str, + cachedir: Path | None = None, + redownload: bool = False, ) -> List[HumanLearningSession]: - data_store = default_data_store - # Download the data: - json_data = data_store.download_json_from_url( - url=dataset_url, - register=True, - ) + repo_root = Path(__file__).resolve().parents[2] + cache_root = (cachedir if cachedir is not None else repo_root / 'data').resolve() + behavior_dir = cache_root / 'behavior' + behavior_dir.mkdir(parents=True, exist_ok=True) + dataset_path = behavior_dir / cache_filename + + if redownload or not dataset_path.exists(): + json_data = download_json(dataset_url) + dataset_path.write_text(json.dumps(json_data, indent=2)) + else: + json_data = json.loads(dataset_path.read_text()) - # class LearningSessionDataset(pydantic.BaseModel): sessions: List[HumanLearningSession] @@ -71,7 +80,9 @@ class LearningSessionDataset(pydantic.BaseModel): # %% Data loaders def load_highvar_behavior( - remove_probe_trials: bool = True + remove_probe_trials: bool = True, + cachedir: Path | None = None, + redownload: bool = False, ) -> List[HumanLearningSession]: """ Load the "raw" human learning data from Experiment 1 of Lee and DiCarlo 2023. @@ -80,6 +91,9 @@ def load_highvar_behavior( sessions = _load_learning_sessions( dataset_url='https://hlbdatasets.s3.us-east-1.amazonaws.com/behavior/mutator-highvar-human-learning-data.json', + cache_filename='mutator-highvar-human-learning-data.json', + cachedir=cachedir, + redownload=redownload, ) if not remove_probe_trials: @@ -106,7 +120,10 @@ def filter(vals: list): return filtered_sessions -def load_oneshot_behavior(redownload: bool = False) -> List[HumanLearningSession]: +def load_oneshot_behavior( + cachedir: Path | None = None, + redownload: bool = False, +) -> List[HumanLearningSession]: """ Load the "raw" human learning data from Experiment 2 of Lee and DiCarlo 2023. :return: @@ -114,6 +131,8 @@ def load_oneshot_behavior(redownload: bool = False) -> List[HumanLearningSession sessions = _load_learning_sessions( dataset_url='https://hlbdatasets.s3.us-east-1.amazonaws.com/behavior/mutator-oneshot-human-learning-data.json', + cache_filename='mutator-oneshot-human-learning-data.json', + cachedir=cachedir, redownload=redownload ) diff --git a/hobj/data_loaders/store.py b/hobj/data_loaders/store.py index fcb2419..8b13789 100644 --- a/hobj/data_loaders/store.py +++ b/hobj/data_loaders/store.py @@ -1,13 +1 @@ -import mref - -import hobj.config - -__all__ = [ - 'default_data_store' -] - - -# %% -# Default data store -default_data_store = mref.FileSystemStorage(cachedir=hobj.config.cachedir) diff --git a/hobj/utils/file_io.py b/hobj/utils/file_io.py index 86be9cb..6778353 100644 --- a/hobj/utils/file_io.py +++ b/hobj/utils/file_io.py @@ -37,16 +37,13 @@ def download_file(url: str, output_path: Path) -> None: if not output_path.parent.exists(): output_path.parent.mkdir(parents=True) - size, unit = get_bytes_size(num_bytes=total_size_in_bytes) - - with tqdm(total=size, unit=unit, unit_scale=True, disable=False, desc='Download progress') as progress_bar: + with tqdm(total=total_size_in_bytes, unit='B', unit_scale=True, disable=False, desc='Download progress') as progress_bar: with open(output_path.as_posix(), 'wb') as file: # Iterate over the response data in chunks and write to file for chunk in response.iter_content(chunk_size=1024): if chunk: file.write(chunk) - chunk_size, _ = get_bytes_size(num_bytes=len(chunk), output_units=unit) - progress_bar.update(chunk_size) + progress_bar.update(len(chunk)) file.flush() @@ -93,4 +90,4 @@ def download_json(url: str) -> Any: data = response.read().decode('utf-8') json_data = json.loads(data) - return json_data \ No newline at end of file + return json_data diff --git a/hobj/benchmarks/binary_classification/tests/test_simulate_subtask.py b/tests/test_simulate_subtask.py similarity index 73% rename from hobj/benchmarks/binary_classification/tests/test_simulate_subtask.py rename to tests/test_simulate_subtask.py index a6c0415..3772a21 100644 --- a/hobj/benchmarks/binary_classification/tests/test_simulate_subtask.py +++ b/tests/test_simulate_subtask.py @@ -2,19 +2,17 @@ import PIL.Image import numpy as np -from mref import ImageRef from hobj.benchmarks.binary_classification.simulation import BinaryClassificationSubtask from hobj.learning_models import RandomGuesser +from hobj.types import ImageId -def create_image_refs(nimages_per_class: int, seed: int) -> List[ImageRef]: +def create_image_refs(nimages_per_class: int, seed: int) -> List[ImageId]: images = [] np.random.seed(seed) - for _ in range(nimages_per_class): - image = PIL.Image.fromarray(np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)) - image_ref = ImageRef.from_image(image=image) - images.append(image_ref) + for i in range(nimages_per_class): + images.append(f'seed{seed}_image{i}') return images @@ -38,6 +36,3 @@ def test_simulate_subtask(): ) assert len(result.perf_seq) == ntrials - - -# Todo: test deterministic