Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/master-pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v4
Expand Down
13 changes: 6 additions & 7 deletions keras_batchflow/base/batch_shapers/batch_shaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ def _create_shaper_func(self, data, leaf, **kwargs):
def _shape_batch(self, data: pd.DataFrame, func, **kwargs):
"""
This method forms a batch. Depending on the functions provided it can return different data shaped the same way:
- batch-shaped data for keras and tensorflow fit/predict, e.g. ([ndarray, ndarray], ndarray)
- component shapes structure, e.g. ([(None, 2), (None, 5)], (None, 2)) for the same example
- number of classes for categorical inputs, e.g. ([10, 3], 2)
- batch-shaped data for keras and tensorflow fit/predict, e.g. ((ndarray, ndarray), ndarray)
- component shapes structure, e.g. (((None, 2), (None, 5)), (None, 2)) for the same example
- number of classes for categorical inputs, e.g. ((10, 3), 2)
:param data: pandas dataframe with the data
:param func: a function defining the output (self._shape_func, self._transform_func, self._n_classes_func)
:param kwargs:
Expand All @@ -98,10 +98,9 @@ def _walk_structure(self, data: pd.DataFrame, struc, func, **kwargs):
return ret
elif type(struc) in [list, tuple]:
ret = [self._walk_structure(data, s, func, **kwargs) for s in struc]
if type(struc) is tuple:
return tuple(ret)
else:
return ret
# we always return lists as tuples as tensorflow wants x and y to be tuples in case of multiple
# components
return tuple(ret)
else:
raise ValueError('Error: structure definition in {} class only supports lists and tuples, but {}'
'was found'.format(type(self).__name__, type(struc)))
Expand Down
6 changes: 3 additions & 3 deletions tests/batch_generators/test_batch_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,14 @@ def test_transform(self):
batch = bg.transform(self.df)
assert type(batch) == tuple
assert len(batch) == 2
assert type(batch[0]) == list
assert type(batch[0]) == tuple
assert len(batch[0]) == 2
assert type(batch[1]) == np.ndarray
assert batch[1].shape == (8, 1)
batch = bg.transform(self.df, return_y=False)
assert isinstance(batch, tuple)
assert len(batch) == 1
assert type(batch[0]) == list
assert type(batch[0]) == tuple
assert len(batch[0]) == 2

def test_inverse_transform(self):
Expand Down Expand Up @@ -222,7 +222,7 @@ def test_shapes(self):
sh = bg.shapes
assert type(sh) == tuple
assert len(sh) == 2
assert type(sh[0]) == list
assert type(sh[0]) == tuple
assert len(sh[0]) == 2
assert sh[0][0] == (3,)
assert sh[0][1] == (1,)
Expand Down
20 changes: 10 additions & 10 deletions tests/batch_shapers/test_batch_shaper.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def test_many_x(self, data, label_binarizer, label_encoder):
batch = bs.transform(data)
assert type(batch) == tuple
assert len(batch) == 2
assert type(batch[0]) == list
assert type(batch[0]) == tuple
assert type(batch[1]) == np.ndarray
assert len(batch[0]) == 2
assert type(batch[0][0]) == np.ndarray
Expand All @@ -93,7 +93,7 @@ def test_many_y(self, data, label_binarizer, label_encoder):
assert type(batch) == tuple
assert len(batch) == 2
assert type(batch[0]) == np.ndarray
assert type(batch[1]) == list
assert type(batch[1]) == tuple
assert len(batch[1]) == 2
assert type(batch[1][0]) == np.ndarray
assert type(batch[1][1]) == np.ndarray
Expand All @@ -115,7 +115,7 @@ def test_predict_batch(self, data, label_binarizer, label_encoder):
batch = batch_shaper.transform(data)
assert isinstance(batch, tuple)
assert len(batch) == 1
assert isinstance(batch[0], list)
assert isinstance(batch[0], tuple)
assert len(batch[0]) == 2


Expand Down Expand Up @@ -152,7 +152,7 @@ def test_none_transformer(self, data, label_binarizer, label_encoder):
batch = bs.transform(data)
assert type(batch) == tuple
assert len(batch) == 2
assert type(batch[0]) == list
assert type(batch[0]) == tuple
assert len(batch[0]) == 2
assert np.array_equal(batch[0][1], np.expand_dims(data['var2'].values, axis=-1))

Expand All @@ -163,7 +163,7 @@ def test_const_component_int(self, data, label_binarizer, label_encoder):
batch = bs.transform(data)
assert type(batch) == tuple
assert len(batch) == 2
assert type(batch[0]) == list
assert type(batch[0]) == tuple
assert len(batch[0]) == 2
assert np.all(batch[0][1] == 0)
assert batch[0][1].dtype == int
Expand All @@ -175,7 +175,7 @@ def test_const_component_float(self, data, label_binarizer, label_encoder):
batch = bs.transform(data)
assert type(batch) == tuple
assert len(batch) == 2
assert type(batch[0]) == list
assert type(batch[0]) == tuple
assert len(batch[0]) == 2
assert np.all(batch[0][1] == 0)
assert batch[0][1].dtype == float
Expand All @@ -187,7 +187,7 @@ def test_const_component_str(self, data, label_binarizer, label_encoder):
batch = bs.transform(data)
assert type(batch) == tuple
assert len(batch) == 2
assert type(batch[0]) == list
assert type(batch[0]) == tuple
assert len(batch[0]) == 2
assert np.all(batch[0][1] == 'a')
assert batch[0][1].dtype == '<U1' # single unicode character
Expand All @@ -201,7 +201,7 @@ def test_metadata(self, data, label_binarizer, label_encoder):
batch = bs.transform(data)
assert type(md) is tuple
assert len(md) == 2
assert type(md[0]) is list
assert type(md[0]) is tuple
assert len(md[0]) == 2
assert type(md[0][0]) == dict
assert type(md[0][1]) == dict
Expand Down Expand Up @@ -234,7 +234,7 @@ def test_dummy_var_naming(self, data, label_binarizer, label_encoder):
md = bs.metadata
assert type(md) is tuple
assert len(md) == 2
assert type(md[0]) is list
assert type(md[0]) is tuple
assert len(md[0]) == 3
assert all([type(m) == dict for m in md[0]])
assert md[0][1]['name'] == 'dummy_constant_0'
Expand Down Expand Up @@ -263,7 +263,7 @@ def inverse_transform(self, data):
data_sample=data)
shapes = bs.shape
assert type(shapes) == tuple
assert type(shapes[0]) == list
assert type(shapes[0]) == tuple
assert len(shapes[0]) == 2
assert shapes[0][0] == (3,) # measured
assert shapes[0][1] == (11,) # direct from encoders's shape property
Expand Down
3 changes: 2 additions & 1 deletion tests/batch_shapers/test_numpy_encoder_adaptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ def test_transform_integer_array(self):
nea = NumpyEncoderAdaptor()
tr = nea.transform(data)
assert isinstance(tr, np.ndarray)
assert np.issubdtype(tr.dtype, object)
# this is for compatibility. Older versions of pandas 2.0.* for python 3.8 return Int64 as object dtype
assert np.issubdtype(tr.dtype, object) or np.issubdtype(tr.dtype, int)

def test_transform_datetime(self):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
import pandas as pd
import numpy as np
from scipy.stats import binom_test, chisquare
from scipy.stats import chisquare
from keras_batchflow.base.batch_transformers import BaseRandomCellTransform, BatchFork


Expand Down
10 changes: 5 additions & 5 deletions tests/batch_transformers/test_feature_dropout.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest
import pandas as pd
from scipy.stats import binom_test, chisquare
from scipy.stats import binomtest, chisquare
from keras_batchflow.base.batch_transformers import FeatureDropout


Expand Down Expand Up @@ -31,7 +31,7 @@ def test_row_dist(self):
fd = FeatureDropout([0.4, .6], 'var1', drop_values='')
batch = fd.transform(self.df.sample(1000, replace=True))
b = (batch['var1'] == '').sum()
assert binom_test(b, 1000, 0.6) > 0.01
assert binomtest(b, 1000, 0.6).pvalue > 0.01

def test_cols_dist(self):
sample_size = 1000
Expand All @@ -53,15 +53,15 @@ def test_different_drop_values(self):
fd = FeatureDropout([0., 1.], ['var1', 'var2', 'label'], drop_values=['v1', 'v2', 'v3'])
batch = fd.transform(self.df.sample(1000, replace=True))
b = (batch == 'v1').sum(axis=0)
assert binom_test(b[0], 1000, 0.33) > 0.01
assert binomtest(b[0], 1000, 0.33).pvalue > 0.01
assert b[1] == 0
assert b[2] == 0
b = (batch == 'v2').sum(axis=0)
assert binom_test(b[1], 1000, 0.33) > 0.001
assert binomtest(b[1], 1000, 0.33).pvalue > 0.001
assert b[0] == 0
assert b[2] == 0
b = (batch == 'v3').sum(axis=0)
assert binom_test(b[2], 1000, 0.33) > 0.001
assert binomtest(b[2], 1000, 0.33).pvalue > 0.001
assert b[0] == 0
assert b[1] == 0

Expand Down
2 changes: 1 addition & 1 deletion tests/batch_transformers/test_shuffle_noise.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pytest
import pandas as pd
import numpy as np
from scipy.stats import binom_test, chisquare
from scipy.stats import binomtest, chisquare
from keras_batchflow.base.batch_transformers import ShuffleNoise


Expand Down