diff --git a/.github/workflows/master-pipeline.yml b/.github/workflows/master-pipeline.yml index a95b88f..2fdd6d3 100644 --- a/.github/workflows/master-pipeline.yml +++ b/.github/workflows/master-pipeline.yml @@ -13,7 +13,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 diff --git a/keras_batchflow/base/batch_shapers/batch_shaper.py b/keras_batchflow/base/batch_shapers/batch_shaper.py index 8630bf6..aed3316 100644 --- a/keras_batchflow/base/batch_shapers/batch_shaper.py +++ b/keras_batchflow/base/batch_shapers/batch_shaper.py @@ -71,9 +71,9 @@ def _create_shaper_func(self, data, leaf, **kwargs): def _shape_batch(self, data: pd.DataFrame, func, **kwargs): """ This method forms a batch. Depending on the functions provided it can return different data shaped the same way: - - batch-shaped data for keras and tensorflow fit/predict, e.g. ([ndarray, ndarray], ndarray) - - component shapes structure, e.g. ([(None, 2), (None, 5)], (None, 2)) for the same example - - number of classes for categorical inputs, e.g. ([10, 3], 2) + - batch-shaped data for keras and tensorflow fit/predict, e.g. ((ndarray, ndarray), ndarray) + - component shapes structure, e.g. (((None, 2), (None, 5)), (None, 2)) for the same example + - number of classes for categorical inputs, e.g. ((10, 3), 2) :param data: pandas dataframe with the data :param func: a function defining the output (self._shape_func, self._transform_func, self._n_classes_func) :param kwargs: @@ -98,10 +98,9 @@ def _walk_structure(self, data: pd.DataFrame, struc, func, **kwargs): return ret elif type(struc) in [list, tuple]: ret = [self._walk_structure(data, s, func, **kwargs) for s in struc] - if type(struc) is tuple: - return tuple(ret) - else: - return ret + # we always return lists as tuples as tensorflow wants x and y to be tuples in case of multiple + # components + return tuple(ret) else: raise ValueError('Error: structure definition in {} class only supports lists and tuples, but {}' 'was found'.format(type(self).__name__, type(struc))) diff --git a/tests/batch_generators/test_batch_generator.py b/tests/batch_generators/test_batch_generator.py index 73f294d..554a98e 100644 --- a/tests/batch_generators/test_batch_generator.py +++ b/tests/batch_generators/test_batch_generator.py @@ -173,14 +173,14 @@ def test_transform(self): batch = bg.transform(self.df) assert type(batch) == tuple assert len(batch) == 2 - assert type(batch[0]) == list + assert type(batch[0]) == tuple assert len(batch[0]) == 2 assert type(batch[1]) == np.ndarray assert batch[1].shape == (8, 1) batch = bg.transform(self.df, return_y=False) assert isinstance(batch, tuple) assert len(batch) == 1 - assert type(batch[0]) == list + assert type(batch[0]) == tuple assert len(batch[0]) == 2 def test_inverse_transform(self): @@ -222,7 +222,7 @@ def test_shapes(self): sh = bg.shapes assert type(sh) == tuple assert len(sh) == 2 - assert type(sh[0]) == list + assert type(sh[0]) == tuple assert len(sh[0]) == 2 assert sh[0][0] == (3,) assert sh[0][1] == (1,) diff --git a/tests/batch_shapers/test_batch_shaper.py b/tests/batch_shapers/test_batch_shaper.py index 822b677..6ca191b 100644 --- a/tests/batch_shapers/test_batch_shaper.py +++ b/tests/batch_shapers/test_batch_shaper.py @@ -75,7 +75,7 @@ def test_many_x(self, data, label_binarizer, label_encoder): batch = bs.transform(data) assert type(batch) == tuple assert len(batch) == 2 - assert type(batch[0]) == list + assert type(batch[0]) == tuple assert type(batch[1]) == np.ndarray assert len(batch[0]) == 2 assert type(batch[0][0]) == np.ndarray @@ -93,7 +93,7 @@ def test_many_y(self, data, label_binarizer, label_encoder): assert type(batch) == tuple assert len(batch) == 2 assert type(batch[0]) == np.ndarray - assert type(batch[1]) == list + assert type(batch[1]) == tuple assert len(batch[1]) == 2 assert type(batch[1][0]) == np.ndarray assert type(batch[1][1]) == np.ndarray @@ -115,7 +115,7 @@ def test_predict_batch(self, data, label_binarizer, label_encoder): batch = batch_shaper.transform(data) assert isinstance(batch, tuple) assert len(batch) == 1 - assert isinstance(batch[0], list) + assert isinstance(batch[0], tuple) assert len(batch[0]) == 2 @@ -152,7 +152,7 @@ def test_none_transformer(self, data, label_binarizer, label_encoder): batch = bs.transform(data) assert type(batch) == tuple assert len(batch) == 2 - assert type(batch[0]) == list + assert type(batch[0]) == tuple assert len(batch[0]) == 2 assert np.array_equal(batch[0][1], np.expand_dims(data['var2'].values, axis=-1)) @@ -163,7 +163,7 @@ def test_const_component_int(self, data, label_binarizer, label_encoder): batch = bs.transform(data) assert type(batch) == tuple assert len(batch) == 2 - assert type(batch[0]) == list + assert type(batch[0]) == tuple assert len(batch[0]) == 2 assert np.all(batch[0][1] == 0) assert batch[0][1].dtype == int @@ -175,7 +175,7 @@ def test_const_component_float(self, data, label_binarizer, label_encoder): batch = bs.transform(data) assert type(batch) == tuple assert len(batch) == 2 - assert type(batch[0]) == list + assert type(batch[0]) == tuple assert len(batch[0]) == 2 assert np.all(batch[0][1] == 0) assert batch[0][1].dtype == float @@ -187,7 +187,7 @@ def test_const_component_str(self, data, label_binarizer, label_encoder): batch = bs.transform(data) assert type(batch) == tuple assert len(batch) == 2 - assert type(batch[0]) == list + assert type(batch[0]) == tuple assert len(batch[0]) == 2 assert np.all(batch[0][1] == 'a') assert batch[0][1].dtype == ' 0.01 + assert binomtest(b, 1000, 0.6).pvalue > 0.01 def test_cols_dist(self): sample_size = 1000 @@ -53,15 +53,15 @@ def test_different_drop_values(self): fd = FeatureDropout([0., 1.], ['var1', 'var2', 'label'], drop_values=['v1', 'v2', 'v3']) batch = fd.transform(self.df.sample(1000, replace=True)) b = (batch == 'v1').sum(axis=0) - assert binom_test(b[0], 1000, 0.33) > 0.01 + assert binomtest(b[0], 1000, 0.33).pvalue > 0.01 assert b[1] == 0 assert b[2] == 0 b = (batch == 'v2').sum(axis=0) - assert binom_test(b[1], 1000, 0.33) > 0.001 + assert binomtest(b[1], 1000, 0.33).pvalue > 0.001 assert b[0] == 0 assert b[2] == 0 b = (batch == 'v3').sum(axis=0) - assert binom_test(b[2], 1000, 0.33) > 0.001 + assert binomtest(b[2], 1000, 0.33).pvalue > 0.001 assert b[0] == 0 assert b[1] == 0 diff --git a/tests/batch_transformers/test_shuffle_noise.py b/tests/batch_transformers/test_shuffle_noise.py index b19e4c9..5dbfa22 100644 --- a/tests/batch_transformers/test_shuffle_noise.py +++ b/tests/batch_transformers/test_shuffle_noise.py @@ -1,7 +1,7 @@ import pytest import pandas as pd import numpy as np -from scipy.stats import binom_test, chisquare +from scipy.stats import binomtest, chisquare from keras_batchflow.base.batch_transformers import ShuffleNoise