From 9429aaae429e35407aafdb1d7c350fa73fc52cae Mon Sep 17 00:00:00 2001 From: Jeremy Khawaja Date: Mon, 12 Oct 2020 08:46:26 -0500 Subject: [PATCH] copy paginated code into branch --- paginated/README.md | 20 + paginated/array.go | 1554 ++++++++++++++++++++++++++++++++++++ paginated/data.go | 177 ++++ paginated/engine.go | 59 ++ paginated/err.go | 18 + paginated/format.go | 49 ++ paginated/index.go | 151 ++++ paginated/info.go | 53 ++ paginated/iterator.go | 110 +++ paginated/memory.go | 41 + paginated/operations.go | 218 +++++ paginated/options.go | 222 ++++++ paginated/overloading.go | 12 + paginated/pagination.go | 238 ++++++ paginated/serialization.go | 219 +++++ paginated/slice.go | 39 + paginated/stream.go | 115 +++ paginated/tensor.go | 12 +- paginated/traversal.go | 19 + paginated/view.go | 250 ++++++ 20 files changed, 3573 insertions(+), 3 deletions(-) create mode 100644 paginated/README.md create mode 100644 paginated/array.go create mode 100644 paginated/data.go create mode 100644 paginated/engine.go create mode 100644 paginated/err.go create mode 100644 paginated/format.go create mode 100644 paginated/index.go create mode 100644 paginated/info.go create mode 100644 paginated/iterator.go create mode 100644 paginated/memory.go create mode 100644 paginated/operations.go create mode 100644 paginated/options.go create mode 100644 paginated/overloading.go create mode 100644 paginated/pagination.go create mode 100644 paginated/serialization.go create mode 100644 paginated/slice.go create mode 100644 paginated/stream.go create mode 100644 paginated/traversal.go create mode 100644 paginated/view.go diff --git a/paginated/README.md b/paginated/README.md new file mode 100644 index 0000000..6a324ab --- /dev/null +++ b/paginated/README.md @@ -0,0 +1,20 @@ +# `paginated` tensors + +This package is not ready for production usage or release. Use at your own risk. The API will try to be maintained, but the only API guarantees at this time are the Gorgonia [`tensor.Tensor`](https://github.com/gorgonia/tensor/blob/master/tensor.go#L27) API. + +No behavior is currently guaranteed until a `1.0.0` architecture has been decided on, tested, benchmarked, profiled, and released. + +## Overview + +Allows for arbitrarily large tensors that live permanently on disk but are buffered in memory when used. The size of the in-memory buffer can be specified by a page size (in values) and the number of pages allowed in memory. Three default buffer size options are available: small (1/200 of system memory), medium (1/20 of system memory), and large (1/2 of system memory). But any buffer size less than system memory size can be specified. + +The tensors are also easily shareable. Composed of only two files: a `main.json` file which specifies the tensors metadata, and an `index.gob.gz` file which specifies an index from which the tensor can be rebuilt. + +## Contribute + +If you would like to contribute then please contribute: tests, benchmarks, documentation, and concurrency-safe code suggestions. + +## TODO + +- tests, benchmarks, profiling +- concurrency-safe code diff --git a/paginated/array.go b/paginated/array.go new file mode 100644 index 0000000..94af59b --- /dev/null +++ b/paginated/array.go @@ -0,0 +1,1554 @@ +package paginated + +import ( + "unsafe" + + "gorgonia.org/tensor" +) + +/* + This implementation choice may be too slow ... + See: https://github.com/gorgonia/tensor/blob/master/ALTERNATIVEDESIGNS.md#one-struct-multiple-backing-interfaces +*/ + +// Array interface can be used to satisfy an object +// which stores the data for a page in-memory. +type Array interface { + // Apply will apply the function to all values in the array + Apply(fn interface{}) error + // At will retrieve that value found at the provided index + At(int) (interface{}, error) + // Copy will copy the data from the src array + Copy(src Array) error + // Len will return the length of the array + Len() int + // SetAt will set the provided value at the specified index + SetAt(interface{}, int) error + // SetAll will set all values in the array to the provided value + SetAll(interface{}) error + // Slice will return a slice of the array + Slice(start, end int) (Array, error) + // Zero will set all values in the array to the zero (default) value of their type + Zero() +} + +// GenArray will generate a new array based on the paginated +// tensor's data type and the page size. +func (p *Tensor) GenArray() Array { + switch p.Dtype() { + case tensor.Bool: + return make(BoolArray, p.pageSize, p.pageSize) + case tensor.Int: + return make(IntArray, p.pageSize, p.pageSize) + case tensor.Int8: + return make(Int8Array, p.pageSize, p.pageSize) + case tensor.Int16: + return make(Int16Array, p.pageSize, p.pageSize) + case tensor.Int32: + return make(Int32Array, p.pageSize, p.pageSize) + case tensor.Int64: + return make(Int64Array, p.pageSize, p.pageSize) + case tensor.Uint: + return make(UIntArray, p.pageSize, p.pageSize) + case tensor.Uint8, tensor.Byte: + return make(UInt8Array, p.pageSize, p.pageSize) + case tensor.Uint16: + return make(UInt16Array, p.pageSize, p.pageSize) + case tensor.Uint32: + return make(UInt32Array, p.pageSize, p.pageSize) + case tensor.Uint64: + return make(UInt64Array, p.pageSize, p.pageSize) + case tensor.Float32: + return make(Float32Array, p.pageSize, p.pageSize) + case tensor.Float64: + return make(Float64Array, p.pageSize, p.pageSize) + case tensor.Complex64: + return make(Complex64Array, p.pageSize, p.pageSize) + case tensor.Complex128: + return make(Complex128Array, p.pageSize, p.pageSize) + case tensor.String: + return make(StringArray, p.pageSize, p.pageSize) + case tensor.Uintptr: + return make(UintptrArray, p.pageSize, p.pageSize) + case tensor.UnsafePointer: + return make(UnsafeArray, p.pageSize, p.pageSize) + } + + return nil +} + +type BoolArray []bool + +func (a BoolArray) Apply(fn interface{}) error { + f, ok := fn.(func(bool) bool) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a BoolArray) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a BoolArray) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(BoolArray) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a BoolArray) Len() int { + return len(a) +} + +func (a BoolArray) SetAll(v interface{}) error { + val, ok := v.(bool) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a BoolArray) SetAt(v interface{}, i int) error { + value, ok := v.(bool) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a BoolArray) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return BoolArray(a[start:end]), nil +} + +func (a BoolArray) Zero() { + for i := range a { + a[i] = false + } +} + +type IntArray []int + +func (a IntArray) Apply(fn interface{}) error { + f, ok := fn.(func(int) int) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a IntArray) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a IntArray) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(IntArray) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a IntArray) Len() int { + return len(a) +} + +func (a IntArray) SetAll(v interface{}) error { + val, ok := v.(int) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a IntArray) SetAt(v interface{}, i int) error { + value, ok := v.(int) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a IntArray) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return IntArray(a[start:end]), nil +} + +func (a IntArray) Zero() { + for i := range a { + a[i] = 0 + } +} + +type Int8Array []int8 + +func (a Int8Array) Apply(fn interface{}) error { + f, ok := fn.(func(int8) int8) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a Int8Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a Int8Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(Int8Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a Int8Array) Len() int { + return len(a) +} + +func (a Int8Array) SetAll(v interface{}) error { + val, ok := v.(int8) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a Int8Array) SetAt(v interface{}, i int) error { + value, ok := v.(int8) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a Int8Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return Int8Array(a[start:end]), nil +} + +func (a Int8Array) Zero() { + for i := range a { + a[i] = int8(0) + } +} + +type Int16Array []int16 + +func (a Int16Array) Apply(fn interface{}) error { + f, ok := fn.(func(int16) int16) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a Int16Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a Int16Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(Int16Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a Int16Array) Len() int { + return len(a) +} + +func (a Int16Array) SetAll(v interface{}) error { + val, ok := v.(int16) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a Int16Array) SetAt(v interface{}, i int) error { + value, ok := v.(int16) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a Int16Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return Int16Array(a[start:end]), nil +} + +func (a Int16Array) Zero() { + for i := range a { + a[i] = int16(0) + } +} + +type Int32Array []int32 + +func (a Int32Array) Apply(fn interface{}) error { + f, ok := fn.(func(int32) int32) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a Int32Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a Int32Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(Int32Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a Int32Array) Len() int { + return len(a) +} + +func (a Int32Array) SetAll(v interface{}) error { + val, ok := v.(int32) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a Int32Array) SetAt(v interface{}, i int) error { + value, ok := v.(int32) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a Int32Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return Int32Array(a[start:end]), nil +} + +func (a Int32Array) Zero() { + for i := range a { + a[i] = int32(0) + } +} + +type Int64Array []int64 + +func (a Int64Array) Apply(fn interface{}) error { + f, ok := fn.(func(int64) int64) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a Int64Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a Int64Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(Int64Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a Int64Array) Len() int { + return len(a) +} + +func (a Int64Array) SetAll(v interface{}) error { + val, ok := v.(int64) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a Int64Array) SetAt(v interface{}, i int) error { + value, ok := v.(int64) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a Int64Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return Int64Array(a[start:end]), nil +} + +func (a Int64Array) Zero() { + for i := range a { + a[i] = int64(0) + } +} + +type UIntArray []uint + +func (a UIntArray) Apply(fn interface{}) error { + f, ok := fn.(func(uint) uint) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a UIntArray) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a UIntArray) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(UIntArray) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a UIntArray) Len() int { + return len(a) +} + +func (a UIntArray) SetAll(v interface{}) error { + val, ok := v.(uint) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a UIntArray) SetAt(v interface{}, i int) error { + value, ok := v.(uint) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a UIntArray) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return UIntArray(a[start:end]), nil +} + +func (a UIntArray) Zero() { + for i := range a { + a[i] = uint(0) + } +} + +type UInt8Array []uint8 + +func (a UInt8Array) Apply(fn interface{}) error { + f, ok := fn.(func(uint8) uint8) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a UInt8Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a UInt8Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(UInt8Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a UInt8Array) Len() int { + return len(a) +} + +func (a UInt8Array) SetAll(v interface{}) error { + val, ok := v.(uint8) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a UInt8Array) SetAt(v interface{}, i int) error { + value, ok := v.(uint8) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a UInt8Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return UInt8Array(a[start:end]), nil +} + +func (a UInt8Array) Zero() { + for i := range a { + a[i] = uint8(0) + } +} + +type UInt16Array []uint16 + +func (a UInt16Array) Apply(fn interface{}) error { + f, ok := fn.(func(uint16) uint16) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a UInt16Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a UInt16Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(UInt16Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a UInt16Array) Len() int { + return len(a) +} + +func (a UInt16Array) SetAll(v interface{}) error { + val, ok := v.(uint16) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a UInt16Array) SetAt(v interface{}, i int) error { + value, ok := v.(uint16) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a UInt16Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return UInt16Array(a[start:end]), nil +} + +func (a UInt16Array) Zero() { + for i := range a { + a[i] = uint16(0) + } +} + +type UInt32Array []uint32 + +func (a UInt32Array) Apply(fn interface{}) error { + f, ok := fn.(func(uint32) uint32) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a UInt32Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a UInt32Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(UInt32Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a UInt32Array) Len() int { + return len(a) +} + +func (a UInt32Array) SetAll(v interface{}) error { + val, ok := v.(uint32) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a UInt32Array) SetAt(v interface{}, i int) error { + value, ok := v.(uint32) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a UInt32Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return UInt32Array(a[start:end]), nil +} + +func (a UInt32Array) Zero() { + for i := range a { + a[i] = uint32(0) + } +} + +type UInt64Array []uint64 + +func (a UInt64Array) Apply(fn interface{}) error { + f, ok := fn.(func(uint64) uint64) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a UInt64Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a UInt64Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(UInt64Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a UInt64Array) Len() int { + return len(a) +} + +func (a UInt64Array) SetAll(v interface{}) error { + val, ok := v.(uint64) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a UInt64Array) SetAt(v interface{}, i int) error { + value, ok := v.(uint64) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a UInt64Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return UInt64Array(a[start:end]), nil +} + +func (a UInt64Array) Zero() { + for i := range a { + a[i] = uint64(0) + } +} + +type Float32Array []float32 + +func (a Float32Array) Apply(fn interface{}) error { + f, ok := fn.(func(float32) float32) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a Float32Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a Float32Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(Float32Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a Float32Array) Len() int { + return len(a) +} + +func (a Float32Array) SetAll(v interface{}) error { + val, ok := v.(float32) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a Float32Array) SetAt(v interface{}, i int) error { + value, ok := v.(float32) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a Float32Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return Float32Array(a[start:end]), nil +} + +func (a Float32Array) Zero() { + for i := range a { + a[i] = float32(0) + } +} + +type Float64Array []float64 + +func (a Float64Array) Apply(fn interface{}) error { + f, ok := fn.(func(float64) float64) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a Float64Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a Float64Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(Float64Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a Float64Array) Len() int { + return len(a) +} + +func (a Float64Array) SetAll(v interface{}) error { + val, ok := v.(float64) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a Float64Array) SetAt(v interface{}, i int) error { + value, ok := v.(float64) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a Float64Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return Float64Array(a[start:end]), nil +} + +func (a Float64Array) Zero() { + for i := range a { + a[i] = float64(0) + } +} + +type Complex64Array []complex64 + +func (a Complex64Array) Apply(fn interface{}) error { + f, ok := fn.(func(complex64) complex64) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a Complex64Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a Complex64Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(Complex64Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a Complex64Array) Len() int { + return len(a) +} + +func (a Complex64Array) SetAll(v interface{}) error { + val, ok := v.(complex64) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a Complex64Array) SetAt(v interface{}, i int) error { + value, ok := v.(complex64) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a Complex64Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return Complex64Array(a[start:end]), nil +} + +func (a Complex64Array) Zero() { + for i := range a { + a[i] = complex64(0) + } +} + +type Complex128Array []complex128 + +func (a Complex128Array) Apply(fn interface{}) error { + f, ok := fn.(func(complex128) complex128) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a Complex128Array) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a Complex128Array) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(Complex128Array) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a Complex128Array) Len() int { + return len(a) +} + +func (a Complex128Array) SetAll(v interface{}) error { + val, ok := v.(complex128) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a Complex128Array) SetAt(v interface{}, i int) error { + value, ok := v.(complex128) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a Complex128Array) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return Complex128Array(a[start:end]), nil +} + +func (a Complex128Array) Zero() { + for i := range a { + a[i] = complex128(0) + } +} + +type StringArray []string + +func (a StringArray) Apply(fn interface{}) error { + f, ok := fn.(func(string) string) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a StringArray) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a StringArray) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(StringArray) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a StringArray) Len() int { + return len(a) +} + +func (a StringArray) SetAll(v interface{}) error { + val, ok := v.(string) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a StringArray) SetAt(v interface{}, i int) error { + value, ok := v.(string) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a StringArray) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return StringArray(a[start:end]), nil +} + +func (a StringArray) Zero() { + for i := range a { + a[i] = "" + } +} + +type UintptrArray []uintptr + +func (a UintptrArray) Apply(fn interface{}) error { + f, ok := fn.(func(uintptr) uintptr) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a UintptrArray) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a UintptrArray) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(UintptrArray) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a UintptrArray) Len() int { + return len(a) +} + +func (a UintptrArray) SetAll(v interface{}) error { + val, ok := v.(uintptr) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a UintptrArray) SetAt(v interface{}, i int) error { + value, ok := v.(uintptr) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a UintptrArray) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return UintptrArray(a[start:end]), nil +} + +func (a UintptrArray) Zero() { + for i := range a { + a[i] = uintptr(0) + } +} + +type UnsafeArray []unsafe.Pointer + +func (a UnsafeArray) Apply(fn interface{}) error { + f, ok := fn.(func(unsafe.Pointer) unsafe.Pointer) + if !ok { + return ErrFuncType + } + + for i, v := range a { + a[i] = f(v) + } + + return nil +} + +func (a UnsafeArray) At(i int) (interface{}, error) { + if i > 0 && i < len(a) { + return a[i], nil + } + return nil, ErrBound +} + +func (a UnsafeArray) Copy(src Array) error { + if len(a) < src.Len() { + return ErrLength + } + + array, ok := src.(UnsafeArray) + if !ok { + return ErrType + } + + for i, v := range array { + a[i] = v + } + + return nil +} + +func (a UnsafeArray) Len() int { + return len(a) +} + +func (a UnsafeArray) SetAll(v interface{}) error { + val, ok := v.(unsafe.Pointer) + if !ok { + return ErrType + } + + for i := range a { + a[i] = val + } + + return nil +} + +func (a UnsafeArray) SetAt(v interface{}, i int) error { + value, ok := v.(unsafe.Pointer) + if ok { + if i > 0 && i < len(a) { + a[i] = value + return nil + } + } + + return ErrType +} + +func (a UnsafeArray) Slice(start, end int) (Array, error) { + if start < 0 || end > len(a) || start > len(a) || end < start { + return nil, ErrBound + } + + return UnsafeArray(a[start:end]), nil +} + +func (a UnsafeArray) Zero() { + for i := range a { + a[i] = nil + } +} diff --git a/paginated/data.go b/paginated/data.go new file mode 100644 index 0000000..13b3606 --- /dev/null +++ b/paginated/data.go @@ -0,0 +1,177 @@ +package paginated + +import ( + "fmt" + "os" + "strconv" + "time" +) + +// Zero will set all values in the tensor to their zero value. +// This should be used very cautiously with paginated tensors +// as all persisted data will be zeroed as well. +// This function will panic if any error occurs. +func (p *Tensor) Zero() { + for i := 0; i < len(p.pages); i++ { + page, err := p.pages.get(i) + if err != nil { + panic(err) + } + + if p.cache.Contains(page.Id) { + d, _ := p.cache.Get(page.Id) + d.(Array).Zero() + } else { + err := p.Swap(page.Id) + if err != nil { + panic(err) + } + + d, _ := p.cache.Get(page.Id) + d.(Array).Zero() + } + + err = p.flush(page) + if err != nil { + panic(err) + } + } +} + +// Memset will set all values in the tensor to the provided +// value. +// This should be used very cautiously with paginated tensors +// as all persisted data will be set to provided value as well. +func (p *Tensor) Memset(input interface{}) error { + for i := 0; i < len(p.pages); i++ { + page, err := p.pages.get(i) + if err != nil { + return err + } + + if p.cache.Contains(page.Id) { + d, ok := p.cache.Get(page.Id) + if ok { + d.(Array).SetAll(input) + } + } else { + err := p.Swap(page.Id) + if err != nil { + return err + } + + d, ok := p.cache.Get(page.Id) + if ok { + d.(Array).SetAll(input) + } + } + + err = p.flush(page) + if err != nil { + return err + } + } + + return nil +} + +// Data will return nil for a paginated tensor +func (p *Tensor) Data() interface{} { + return nil +} + +// Eq will return if the paginated tensor is equal to +// or not equal to another object. +func (p *Tensor) Eq(i interface{}) bool { + v, ok := i.(*Tensor) + if !ok { + return false + } + if v.basePath != p.basePath { + return false + } + + if v.dataOrder != p.dataOrder { + return false + } + + if v.dtype != p.dtype { + return false + } + + return v.dims.Eq(p.dims) +} + +// Clone will create a copy of the paginated tensor +// in a subdirectory of the basePath of the original +// paginated tensor. +// It will panic if it encounters an error. +func (p *Tensor) Clone() interface{} { + name := fmt.Sprintf("%s-%s", "clone", time.Now().Format(stdTimeFmt)) + dir := p.clonePath + name + err := os.MkdirAll(dir, os.ModePerm) + if err != nil { + panic(err) + } + + clone := &Tensor{ + pages: make(pages, len(p.pages), len(p.pages)), + pageSize: p.pageSize, + dataOrder: p.dataOrder, + dims: p.dims, + dtype: p.dtype, + transposed: p.transposed, + engine: p.engine, + basePath: dir + "/", + clonePath: dir + "/", + applyPath: dir + "/", + viewPath: dir + "/", + fileFormat: p.fileFormat, + } + + for i := 0; i < len(p.pages); i++ { + oldPage := p.pages[i] + newPage := &page{ + Id: oldPage.Id, + File: clone.basePath + strconv.Itoa(oldPage.Id) + "." + p.fileExtension(), + Bounds: oldPage.Bounds, + } + + if p.cache.Contains(oldPage.Id) { + // in-memory + err := p.flush(oldPage) + if err != nil { + panic(err) + } + + err = p.flush(newPage) + if err != nil { + panic(err) + } + + err = p.load(oldPage) + if err != nil { + panic(err) + } + + } else { + // not in-memory + err := p.Swap(oldPage.Id) + if err != nil { + panic(err) + } + + err = p.flush(newPage) + if err != nil { + panic(err) + } + + err = p.load(oldPage) + if err != nil { + panic(err) + } + } + } + + return clone +} diff --git a/paginated/engine.go b/paginated/engine.go new file mode 100644 index 0000000..8bd0758 --- /dev/null +++ b/paginated/engine.go @@ -0,0 +1,59 @@ +package paginated + +import "gorgonia.org/tensor" + +/* + This code is currently only used to satisfy an interface. + It could possibly be extended with some cool memory management stuff. +*/ + +// Engine is the default engine for a paginated tensor. +// It is currently only implemented to satisfy the `tensor.Engine` interface. +type Engine struct { + *Tensor +} + +// NewEngine will return a new paginated engine +func NewEngine(p *Tensor) *Engine { + return &Engine{p} +} + +// AllocAccessible returns true if the engine return Go-accessible memory pointers? +func (p *Engine) AllocAccessible() bool { + return false +} + +// Alloc allocates memory +func (p *Engine) Alloc(size int64) (tensor.Memory, error) { + return p.Tensor, nil +} + +// Free frees memory +func (p *Engine) Free(mem tensor.Memory, size int64) error { + return nil +} + +// Memset - duh +func (p *Engine) Memset(mem tensor.Memory, val interface{}) error { + return nil +} + +// Memclr - duh +func (p *Engine) Memclr(mem tensor.Memory) { + return +} + +// Memcpy - duh +func (p *Engine) Memcpy(dst, src tensor.Memory) error { + return nil +} + +// Accessible returns Go-accesible memory pointers, or errors, if it cannot be done +func (p *Engine) Accessible(mem tensor.Memory) (tensor.Memory, error) { + return p.Tensor, nil +} + +// WorksWith returns true if the data order can be directly worked with +func (p *Engine) WorksWith(order tensor.DataOrder) bool { + return false +} diff --git a/paginated/err.go b/paginated/err.go new file mode 100644 index 0000000..a2836bd --- /dev/null +++ b/paginated/err.go @@ -0,0 +1,18 @@ +package paginated + +import "errors" + +var ( + ErrCache = errors.New("cache error") + ErrFilled = errors.New("tensor capacity is filled") + ErrLength = errors.New("incorrect length") + ErrType = errors.New("invalid type assertion") + ErrFuncType = errors.New("invalid function type assertion") + ErrDNE = errors.New("does not exist") + ErrBound = errors.New("exceeds bound") + ErrDims = errors.New("incorrect dimensions") + ErrSize = errors.New("incorrect size") + ErrIndex = errors.New("index out of bounds") + ErrFormat = errors.New("unsupported format") + ErrNotImplemented = errors.New("not implemented") +) diff --git a/paginated/format.go b/paginated/format.go new file mode 100644 index 0000000..4a05bd3 --- /dev/null +++ b/paginated/format.go @@ -0,0 +1,49 @@ +package paginated + +import "fmt" + +var ( + FormatCSV = "csv" + FormatJSON = "json" + FormatGob = "gob" + FormatProto = "protbuf" + FormatFlat = "flatbuf" + FormatMsgPck = "msgpack" + FormatParquet = "parquet" + FormatNumpy = "numpy" +) + +// Format does not actually do anything for now. +func (p *Tensor) Format(f fmt.State, c rune) { + return +} + +// String returns an empty string. +func (p *Tensor) String() string { + return "" +} + +func (p *Tensor) fileExtension() string { + // supported + switch p.fileFormat { + case FormatCSV: + return "csv" + case FormatJSON: + return "json" + case FormatGob: + return "gob" + case FormatProto: + return "proto" + case FormatFlat: + return "flat" + case FormatMsgPck: + return "msgpack" + case FormatParquet: + return "parquet" + case FormatNumpy: + return ".npy" + } + + // unsupported + return "" +} diff --git a/paginated/index.go b/paginated/index.go new file mode 100644 index 0000000..c7c6462 --- /dev/null +++ b/paginated/index.go @@ -0,0 +1,151 @@ +package paginated + +import ( + "bytes" + "compress/gzip" + "encoding/gob" + "os" + "sort" +) + +type Index map[interface{}][]int + +func (i Index) Save(path string) error { + var gobBuff, compBuff bytes.Buffer + zw := gzip.NewWriter(&compBuff) + + filename := path + "index.gob.gz" + f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + return err + } + + encoder := gob.NewEncoder(&gobBuff) + err = encoder.Encode(i) + if err != nil { + return err + } + + _, err = zw.Write(gobBuff.Bytes()) + if err != nil { + return err + } + + _, err = f.Write(compBuff.Bytes()) + + return err +} + +func (p *Tensor) loadIndex(index Index) error { + for _, page := range p.pages { + d, ok := p.cache.Get(page.Id) + if !ok { + err := p.Swap(page.Id) + if err != nil { + return err + } + + d, ok = p.cache.Get(page.Id) + } + pageData := d.(Array) + + var existing []int + for value, indices := range index { + // potential overlap + if indices[len(indices)-1] >= page.Bounds.S && indices[0] <= page.Bounds.E { + for _, idx := range indices { + // within page + if idx >= page.Bounds.S && idx <= page.Bounds.E { + err := pageData.SetAt(value, idx) + if err != nil { + return err + } + existing = append(existing, idx) + } + } + } + } + sort.Ints(existing) + + // interpolate the values not-indexed + var lastValue interface{} + for i := page.Bounds.S; i <= page.Bounds.E; i++ { + var exists bool + for j, e := range existing { + if e > i { + break + } + + if e == i { + v, err := pageData.At(i - page.Bounds.S) + if err != nil { + return err + } + lastValue = v + + exists = true + existing = append(existing[:j], existing[j+1:]...) + break + } + } + + if !exists { + err := pageData.SetAt(lastValue, i-page.Bounds.S) + if err != nil { + return err + } + } + } + + err := p.flush(page) + if err != nil { + return err + } + } + + return nil +} + +func (p *Tensor) Index() (Index, error) { + index := make(Index) + + for _, page := range p.pages { + d, ok := p.cache.Get(page.Id) + if !ok { + err := p.Swap(page.Id) + if err != nil { + return nil, err + } + + d, ok = p.cache.Get(page.Id) + } + pageData := d.(Array) + + var lastValue interface{} + for i := 0; i < pageData.Len(); i++ { + v, err := pageData.At(i) + if err != nil { + return nil, err + } + + if v == lastValue { + continue + } + + indices, ok := index[v] + if !ok { + index[v] = []int{i} + } else { + indices = append(indices, i) + } + + lastValue = v + } + } + + for _, indices := range index { + sort.Ints(indices) + } + + return index, nil +} diff --git a/paginated/info.go b/paginated/info.go new file mode 100644 index 0000000..219a3b0 --- /dev/null +++ b/paginated/info.go @@ -0,0 +1,53 @@ +package paginated + +import "gorgonia.org/tensor" + +// Shape will return the dimensions of the tensor. +func (p *Tensor) Shape() tensor.Shape { + return p.dims +} + +// Strides will return the strides of the paginated tensor +func (p *Tensor) Strides() []int { + strides := make([]int, p.Dims(), p.Dims()) + + for i := 0; i < p.Dims(); i++ { + switch i { + case 0: + strides[i] = p.rowSize() + case 1: + strides[i] = p.columnSize() + default: + strides[i] = p.dimensionSize(i) + } + } + + return strides +} + +// Dtype will return the data type of the tensor. +func (p *Tensor) Dtype() tensor.Dtype { + return p.dtype +} + +// Dims will return the Shape of the tensor. +func (p *Tensor) Dims() int { + return len(p.dims) +} + +// Size will return the number of values +// in the tensor. +func (p *Tensor) Size() int { + return p.dims.TotalSize() +} + +// DataSize is the same as the Size method. +func (p *Tensor) DataSize() int { + var size int + + for _, page := range p.pages { + size += page.Datasize + } + + return size +} diff --git a/paginated/iterator.go b/paginated/iterator.go new file mode 100644 index 0000000..7927d97 --- /dev/null +++ b/paginated/iterator.go @@ -0,0 +1,110 @@ +package paginated + +import "gorgonia.org/tensor" + +// Iterator is an iterator that +// can be used to iterate over a paginated +// tensor. +type Iterator struct { + *Tensor + coord []int + reverse bool +} + +// NewIterator will return an iterator for the provided +// paginated tensor. +func NewIterator(p *Tensor) tensor.Iterator { + return &Iterator{ + Tensor: p, + coord: make([]int, p.Dims(), p.Dims()), + } +} + +// Start returns the first index +func (p *Iterator) Start() (int, error) { + var start int + if p.reverse { + start = p.Tensor.globalIndex(p.largestCoord()...) + } + return start, nil +} + +// Next returns the next index. Next is defined as the next value in the coordinates +// For example: let x be a (5,5) matrix that is row-major. Current index is for the coordinate (3,3). +// Next() returns the index of (3,4). +// +// If there is no underlying data store for (3,4) - say for example, the matrix is a sparse matrix, it return an error. +// If however, there is an underlying data store for (3,4), but it's not valid (for example, masked tensors), it will not return an error. +// +// Second example: let x be a (5,5) matrix that is col-major. Current index is for coordinate (3,3). +// Next() returns the index of (4,3). +func (p *Iterator) Next() (int, error) { + if !p.reverse { + p.coord = p.Tensor.nextCoord(p.coord) + } else { + p.coord = p.Tensor.previousCoord(p.coord) + } + + return p.Tensor.globalIndex(p.coord...), nil +} + +// NextValidity is like Next, but returns the validity of the value at the index as well. +// This will always return `true` and the same values as `iterator.Next()` because a paginated +// tensor is treated as a Gorgonia `*Dense` tensor. +func (p *Iterator) NextValidity() (int, bool, error) { + next, _ := p.Next() + return next, true, nil +} + +// NextValid returns the next valid index, as well as a skip count. +// This function is the same as `NextValidity()` for iterators over +// paginated tensors. +func (p *Iterator) NextValid() (int, int, error) { + next, _ := p.Next() + return next, 0, nil +} + +// NextInvalid always return an `ErrNotImplemented`. +func (p *Iterator) NextInvalid() (int, int, error) { + return 0, 0, ErrNotImplemented +} + +// Reset resets the iterator +func (p *Iterator) Reset() { + if p.reverse { + p.coord = p.largestCoord() + return + } + + p.coord = make([]int, p.Dims(), p.Dims()) +} + +// SetReverse tells the iterator to iterate in reverse +func (p *Iterator) SetReverse() { + p.reverse = true +} + +// SetForward tells the iterator to iterate forwards +func (p *Iterator) SetForward() { + p.reverse = false +} + +// Coord returns the coordinates of the current value +func (p *Iterator) Coord() []int { + return p.coord +} + +// Done returns true when the iterator is done iterating. +func (p *Iterator) Done() bool { + for i, dim := range p.coord { + if dim < (p.Tensor.Shape()[i] - 1) { + return false + } + } + return true +} + +// Shape returns the shape of the multidimensional tensor it's iterating on. +func (p *Iterator) Shape() tensor.Shape { + return p.dims +} diff --git a/paginated/memory.go b/paginated/memory.go new file mode 100644 index 0000000..d3c94ca --- /dev/null +++ b/paginated/memory.go @@ -0,0 +1,41 @@ +package paginated + +import ( + "unsafe" + + "gorgonia.org/tensor" +) + +// Engine ... +func (p *Tensor) Engine() tensor.Engine { + return p.engine +} + +// MemSize returns the current size of the in-memory buffer. +func (p *Tensor) MemSize() uintptr { + return uintptr(p.cache.Len() * p.pageSize) +} + +// Pointer returns a pointer to the first value +// in the in-memory buffer. +func (p *Tensor) Pointer() unsafe.Pointer { + return nil +} + +// IsNativelyAccessible returns `false` for paginated +// tensors. +func (p *Tensor) IsNativelyAccessible() bool { + return false +} + +// IsManuallyManaged returns false for paginated +// tensors. +func (p *Tensor) IsManuallyManaged() bool { + return false +} + +// Uintptr is the same as `Pointer()` but returns +// a `uintptr` rather than an `unsafe.Pointer` +func (p *Tensor) Uintptr() uintptr { + return uintptr(0) +} diff --git a/paginated/operations.go b/paginated/operations.go new file mode 100644 index 0000000..47edcfa --- /dev/null +++ b/paginated/operations.go @@ -0,0 +1,218 @@ +package paginated + +import ( + "fmt" + "os" + "reflect" + "runtime" + "strconv" + "time" + + "gorgonia.org/tensor" +) + +// Slice will return a `*View` of the tensor. +func (p *Tensor) Slice(s ...tensor.Slice) (tensor.View, error) { + if len(s) != p.Dims() { + return &View{}, ErrDims + } + + return NewView(p, s...), nil +} + +// At will retrieve and return the value in the tensor at the given +// coordinates. Note: coordinates indexing starts with 0. +func (p *Tensor) At(coord ...int) (interface{}, error) { + page, err := p.page(coord...) + if err != nil { + return nil, err + } + + if p.cache.Contains(page.Id) { + d, ok := p.cache.Get(page.Id) + if ok { + return d.(Array).At(p.pageIndex(coord...)) + } + } + + err = p.Swap(page.Id) + if err != nil { + return nil, err + } + d, _ := p.cache.Get(page.Id) + + return d.(Array).At(p.pageIndex(coord...)) +} + +// SetAt will set the provided value at the provided coordinates +func (p *Tensor) SetAt(v interface{}, coord ...int) error { + page, err := p.page(coord...) + if err != nil { + return err + } + + if p.cache.Contains(page.Id) { + d, ok := p.cache.Get(page.Id) + if ok { + return d.(Array).SetAt(v, p.pageIndex(coord...)) + } + } + + err = p.Swap(page.Id) + if err != nil { + return err + } + d, _ := p.cache.Get(page.Id) + + return d.(Array).SetAt(v, p.pageIndex(coord...)) +} + +// Reshape will be called when adding data +func (p *Tensor) Reshape(input ...int) error { + newShape := tensor.Shape(input) + if newShape.TotalSize() != p.Size() { + return ErrSize + } + + p.dims = newShape + + return nil +} + +// T will transpose the paginated tensor. +// The axes arguments supplied should specify the new order +// of the dimensions. For example, (1,0,2) will flip the first +// and second dimensions positions. +// If the axes arguments do not equal the number of dimensions in the Tensor +// then an `ErrDims` value will be returned. +// If any of the axes arguments are out of the bounds [0:Dims()-1], then +// an `ErrIndex` value will be returned. +func (p *Tensor) T(axes ...int) error { + if len(axes) != p.Dims() { + return ErrDims + } + + newDims := make(tensor.Shape, p.Dims(), p.Dims()) + for i, v := range axes { + if v >= p.Dims() { + return ErrIndex + } + + newDims[v] = p.dims[i] + } + + p.old = p.dims + p.transposed = true + p.dims = newDims + + return nil +} + +// UT ... +func (p *Tensor) UT() { + if !p.transposed { + return + } + + p.dims = p.old + p.old = tensor.Shape{} + p.transposed = false +} + +// Transpose is not implemented. Please use the `T()` (tranpose) +// and `UT()` (un-transpose) methods instead. +// This will only return an `ErrNotImplemented` value. +func (p *Tensor) Transpose() error { + return ErrNotImplemented +} + +// Apply will create a new paginated tensor stored in a subdirectory at the `applyPath` +// It will be the result of applying the provided function on the values of the +// Tensor. +func (p *Tensor) Apply(fn interface{}, opts ...tensor.FuncOpt) (tensor.Tensor, error) { + fncName := runtime.FuncForPC(reflect.ValueOf(fn).Pointer()).Name() + name := fmt.Sprintf("%s-%s", fncName, time.Now().Format(stdTimeFmt)) + dir := p.applyPath + name + err := os.MkdirAll(dir, os.ModePerm) + if err != nil { + return nil, err + } + + applied := &Tensor{ + pages: make(pages, len(p.pages), len(p.pages)), + pageSize: p.pageSize, + dataOrder: p.dataOrder, + dims: p.dims, + dtype: p.dtype, + transposed: p.transposed, + engine: p.engine, + basePath: dir + "/", + fileFormat: p.fileFormat, + } + err = applied.init() + if err != nil { + return applied, err + } + + for i := 0; i < len(p.pages); i++ { + // pages + oldPage := p.pages[i] + newPage := &page{ + Id: oldPage.Id, + File: applied.basePath + strconv.Itoa(oldPage.Id) + "." + p.fileExtension(), + Bounds: oldPage.Bounds, + } + applied.pages = append(applied.pages, newPage) + + // apply and copy + if p.cache.Contains(oldPage.Id) { + // in-memory + err := p.flush(oldPage) + if err != nil { + return applied, err + } + + d, _ := p.cache.Get(oldPage.Id) + err = d.(Array).Apply(fn) + if err != nil { + return applied, err + } + + err = p.flush(newPage) + if err != nil { + return applied, err + } + + err = p.load(oldPage) + if err != nil { + return applied, err + } + } else { + // not in-memory + err := p.Swap(oldPage.Id) + if err != nil { + return applied, err + } + + d, _ := p.cache.Get(oldPage) + err = d.(Array).Apply(fn) + if err != nil { + return applied, err + } + + err = p.flush(newPage) + if err != nil { + return applied, err + } + + err = p.load(oldPage) + if err != nil { + return applied, err + } + } + + applied.pages = append(applied.pages, newPage) + } + + return applied, nil +} diff --git a/paginated/options.go b/paginated/options.go new file mode 100644 index 0000000..5db19de --- /dev/null +++ b/paginated/options.go @@ -0,0 +1,222 @@ +package paginated + +import ( + "bytes" + "compress/gzip" + "encoding/gob" + "encoding/json" + "io" + "io/ioutil" + "os" + "strings" + + "gorgonia.org/tensor" +) + +// Load should be the only option used on the tensor if it is called. +// Otherwise, it will overwrite other selected options OR be overwritten +// by other options. In which case, the metadata on the tensor may not actually describe +// the data of the tensor. +func Load(path string) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p, ok := t.(*Tensor) + if ok { + mainFile := path + "main.json" + data, err := ioutil.ReadFile(mainFile) + if err != nil { + panic(err) + } + + err = json.Unmarshal(data, p) + if err != nil { + panic(err) + } + + p.init() + + indexFile := path + "index.gob.gz" + _, err = os.Stat(indexFile) + if err == nil { + index := make(Index) + f, err := os.Open(indexFile) + if err != nil { + panic(err) + } + + // decompress + var buf bytes.Buffer + zr, err := gzip.NewReader(f) + if err != nil { + panic(err) + } + if _, err := io.Copy(&buf, zr); err != nil { + panic(err) + } + + // umarshal + decoder := gob.NewDecoder(&buf) + err = decoder.Decode(&index) + if err != nil { + panic(err) + } + + err = p.loadIndex(index) + if err != nil { + panic(err) + } + } + } + } + + return f +} + +// WithEngine is an option that can be used to set the engine +// of the tensor to something other than the default `PaginatedEngine`. +func WithEngine(engine tensor.Engine) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p, ok := t.(*Tensor) + if ok { + p.engine = engine + } + } + return f +} + +// WithName is an option that can be used to set the name +// of the tensor to something other than the default date and time string. +func WithName(name string) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p, ok := t.(*Tensor) + if ok { + p.name = name + } + } + return f +} + +// DataOrder is an option that can be used to set the data order +// of the tensor to something other than the default RowMajor. +func DataOrder(order tensor.DataOrder) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p, ok := t.(*Tensor) + if ok { + p.dataOrder = order + } + } + return f +} + +// MaxPageSize specifies the maximum size of a page (in bytes) +// and uses that to estimate the maximum number of values per page. +func MaxPageSize(size int) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p, ok := t.(*Tensor) + if ok { + p.pageSize = size / int(p.dtype.Size()) + } + } + return f +} + +// MaxPageValues specifies the maximum number of values that +// should be in a page. +func MaxPageValues(count int) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p, ok := t.(*Tensor) + if ok { + p.pageSize = count + } + } + return f +} + +// PageShape is an alternative to specifying the page size +// in values. It simply chunks the tensor using the given shape. +func PageShape(s tensor.Shape) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p, ok := t.(*Tensor) + if ok { + p.pageSize = s.TotalSize() / int(p.dtype.Size()) + } + } + return f +} + +// WithBasePath is a paginated tensor option that +// will set the base path for files for the paginated tensor +func WithBasePath(path string) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p := t.(*Tensor) + if !strings.HasSuffix(path, "/") { + path += "/" + } + p.basePath = path + } + return f +} + +// WithClonePath will set the base path for where cloned +// tensors should be stored as sub-directories of. +// This value will default as the base path of the paginated tensor. +func WithClonePath(path string) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p := t.(*Tensor) + p.clonePath = path + } + return f +} + +// WithApplyPath will set the base path for where `Apply()` result +// tensors should be stored as sub-directories of. +// This value will default as the base path of the paginated tensor. +func WithApplyPath(path string) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p := t.(*Tensor) + p.applyPath = path + } + return f +} + +// WithViewPath will set the base path for where a `view.Materialize()` resulting +// tensor should be stored as a sub-directory of. +// This value will default as the base path of the paginated tensor. +func WithViewPath(path string) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p := t.(*Tensor) + p.viewPath = path + } + return f +} + +// WithFileFormat is a PaginatedOption that +// will set the file format for the paginated tensor +func WithFileFormat(format string) tensor.ConsOpt { + f := func(t tensor.Tensor) { + p := t.(*Tensor) + switch format { + case FormatCSV: + p.fileFormat = FormatCSV + case FormatJSON: + p.fileFormat = FormatJSON + case FormatProto: + p.fileFormat = FormatProto + case FormatFlat: + p.fileFormat = FormatFlat + case FormatMsgPck: + p.fileFormat = FormatMsgPck + case FormatParquet: + p.fileFormat = FormatParquet + case FormatNumpy: + p.fileFormat = FormatNumpy + default: + p.fileFormat = FormatGob + } + } + return f +} + +// TODO: WithBackingFile() option: +// -- serialized gorgonia tensor file +// -- serialized gonum matrice file +// -- serialized arrow tensor file diff --git a/paginated/overloading.go b/paginated/overloading.go new file mode 100644 index 0000000..b6efb53 --- /dev/null +++ b/paginated/overloading.go @@ -0,0 +1,12 @@ +package paginated + +// IsScalar will always return false for a +// paginated tensor. +func (p *Tensor) IsScalar() bool { + return false +} + +// ScalarValue does not return anything for a paginated tensor. +func (p *Tensor) ScalarValue() interface{} { + return nil +} diff --git a/paginated/pagination.go b/paginated/pagination.go new file mode 100644 index 0000000..af50e3a --- /dev/null +++ b/paginated/pagination.go @@ -0,0 +1,238 @@ +package paginated + +import ( + "encoding/csv" + "encoding/gob" + "encoding/json" + "io/ioutil" + "os" + + "gorgonia.org/tensor" +) + +type page struct { + Id int + File string + Datasize int // number of values written to page; (not total page capacity) + Bounds Slice +} + +type pages []*page + +func (p pages) get(id int) (*page, error) { + for _, page := range p { + if page.Id == id { + return page, nil + } + } + + return &page{}, ErrDNE +} + +// Swap will load the desired page's data into +// the position of the least-used page in memory. +func (p *Tensor) Swap(id int) error { + page, err := p.pages.get(id) + if err != nil { + return err + } + + // if buffer can grow: load page; no swap + if p.cache.Len() < p.memPageCount { + p.cache.Add(id, p.GenArray()) + return p.load(page) + } + + // else: swap with oldest page + k, v, _ := p.cache.GetOldest() + oldPage, err := p.pages.get(k.(int)) + if err != nil { + return err + } + + err = p.flush(oldPage) + if err != nil { + return err + } + + v.(Array).Zero() + p.cache.Add(id, v) + + return p.load(page) +} + +func (p *Tensor) intersectingPages(slices ...tensor.Slice) pages { + var pages pages + + minCoord := make([]int, p.Dims(), p.Dims()) + maxCoord := make([]int, p.Dims(), p.Dims()) + for i, s := range slices { + minCoord[i] = s.Start() + maxCoord[i] = s.End() + } + + b := Slice{ + S: p.globalIndex(minCoord...), + E: p.globalIndex(maxCoord...), + } + + for _, page := range p.pages { + if page.Bounds.intersects(b) { + pages = append(pages, page) + } + } + + return pages +} + +func (p *Tensor) flush(page *page) error { + switch p.fileFormat { + case FormatCSV: + f, err := os.OpenFile(page.File, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + return err + } + + w := csv.NewWriter(f) + d, ok := p.cache.Get(page.Id) + if ok { + strings, err := toStrings(p.Dtype(), d.(Array)) + if err != nil { + return err + } + + err = w.Write(strings) + if err != nil { + return err + } + } + + return f.Close() + case FormatJSON: + d, ok := p.cache.Get(page.Id) + if !ok { + return ErrDNE + } + + data, err := json.Marshal(d.(Array)) + if err != nil { + return err + } + + return ioutil.WriteFile(page.File, data, 0644) + case FormatGob: + f, err := os.OpenFile(page.File, os.O_WRONLY|os.O_CREATE, 0666) + if err != nil { + return err + } + + encoder := gob.NewEncoder(f) + d, ok := p.cache.Get(page.Id) + if ok { + err = encoder.Encode(d.(Array)) + if err != nil { + return err + } + } + + return f.Close() + case FormatProto: + // TODO: implement + case FormatFlat: + // TODO: implement + case FormatMsgPck: + // TODO: implement + case FormatParquet: + // TODO: implement + case FormatNumpy: + // TODO: implement + default: + return ErrFormat + } + + return nil +} + +func (p *Tensor) load(page *page) error { + switch p.fileFormat { + case FormatCSV: + f, err := os.Open(page.File) + if err != nil { + return err + } + r := csv.NewReader(f) + + // always one line + data, err := r.Read() + if err != nil { + return err + } + + d, ok := p.cache.Get(page.Id) + if !ok { + return ErrDNE + } + a := d.(Array) + for i := 0; i < len(data); i++ { + err := a.SetAt(parseString(p.Dtype(), data[i]), i) + if err != nil { + return err + } + } + case FormatJSON: + data, err := ioutil.ReadFile(page.File) + if err != nil { + return err + } + + d, ok := p.cache.Get(page.Id) + if !ok { + return ErrDNE + } + + return json.Unmarshal(data, d) + case FormatGob: + f, err := os.Open(page.File) + if err != nil { + return err + } + + decoder := gob.NewDecoder(f) + + d, ok := p.cache.Get(page.Id) + if !ok { + return ErrDNE + } + + err = decoder.Decode(d) + if err != nil { + return err + } + + return f.Close() + case FormatProto: + // TODO: implement + case FormatFlat: + // TODO: implement + case FormatMsgPck: + // TODO: implement + case FormatParquet: + // TODO: implement + case FormatNumpy: + // TODO: implement + default: + return ErrFormat + } + + return nil +} + +func (p *Tensor) firstUnfilledPage() (*page, bool) { + for _, page := range p.pages { + if page.Datasize < p.pageSize { + return page, true + } + } + + return nil, false +} diff --git a/paginated/serialization.go b/paginated/serialization.go new file mode 100644 index 0000000..df3867b --- /dev/null +++ b/paginated/serialization.go @@ -0,0 +1,219 @@ +package paginated + +import ( + "encoding/json" + "fmt" + "io" + "io/ioutil" + "math/bits" + "strconv" + + "gorgonia.org/tensor" +) + +// Save will save the tensor metadata in a main.json file in the +// basepath (defaults to pwd) of the tensor. +func (p *Tensor) Save() error { + filename := p.basePath + "main.json" + + data, err := p.MarshalJSON() + if err != nil { + return err + } + + return ioutil.WriteFile(filename, data, 0666) +} + +// Flush will write all the data in-memory +// to disk. +func (p *Tensor) Flush() error { + for _, page := range p.pages { + if p.cache.Contains(page.Id) { + err := p.flush(page) + if err != nil { + return err + } + } + } + + return nil +} + +// WriteNpy is not implemented. +// It will return ErrNotImplemented. +func (p *Tensor) WriteNpy(w io.Writer) error { + return ErrNotImplemented +} + +// ReadNpy will load the data from a numpy object +// into the tensor. +func (p *Tensor) ReadNpy(r io.Reader) error { + return nil +} + +// GobDecode is not implemented +// It will return ErrNotImplemented +func (p *Tensor) GobDecode(data []byte) error { + return ErrNotImplemented +} + +// GobEncode is not implemented +// It will return ErrNotImplemented +func (p *Tensor) GobEncode() ([]byte, error) { + return []byte{}, ErrNotImplemented +} + +func (p *Tensor) MarshalJSON() ([]byte, error) { + return json.Marshal(struct { + Name string + FileFormat string + BasePath string + ClonePath string + ApplyPath string + ViewPath string + DataOrder tensor.DataOrder + Dims tensor.Shape + DType tensor.Dtype + Engine tensor.Engine + Transposed bool + Old tensor.Shape + MemPageCount int + Pages pages + PageSize int + }{ + Name: p.name, + FileFormat: p.fileFormat, + BasePath: p.basePath, + ClonePath: p.clonePath, + ApplyPath: p.applyPath, + ViewPath: p.viewPath, + DataOrder: p.dataOrder, + Dims: p.dims, + DType: p.dtype, + Engine: p.engine, + Transposed: p.transposed, + Old: p.old, + MemPageCount: p.memPageCount, + Pages: p.pages, + PageSize: p.pageSize, + }) +} + +func (p *Tensor) UnmarshalJSON(data []byte) error { + type tnsr struct { + Name string + FileFormat string + BasePath string + ClonePath string + ApplyPath string + ViewPath string + DataOrder tensor.DataOrder + Dims tensor.Shape + DType tensor.Dtype + Engine tensor.Engine + Transposed bool + Old tensor.Shape + MemPageCount int + Pages pages + PageSize int + } + + var t tnsr + if err := json.Unmarshal(data, &t); err != nil { + return err + } + + p.name = t.Name + p.fileFormat = t.FileFormat + p.basePath = t.BasePath + p.clonePath = t.ClonePath + p.applyPath = t.ApplyPath + p.viewPath = t.ViewPath + p.dataOrder = t.DataOrder + p.dims = t.Dims + p.dtype = t.DType + p.engine = t.Engine + p.transposed = t.Transposed + p.old = t.Old + p.memPageCount = t.MemPageCount + p.pages = t.Pages + p.pageSize = t.PageSize + + return nil +} + +// toStrings will convert an underlying data slice to a slice of strings +// primarily used to write a page's data to a CSV file +func toStrings(dtype tensor.Dtype, a Array) ([]string, error) { + length := a.Len() + strings := make([]string, length, length) + for i := 0; i < length; i++ { + v, err := a.At(i) + if err != nil { + return strings, err + } + strings[i] = fmt.Sprintf("%v", v) + } + + return strings, nil +} + +func parseString(dtype tensor.Dtype, s string) interface{} { + switch dtype { + case tensor.Bool: + b, _ := strconv.ParseBool(s) + return b + case tensor.Int: + var i int64 + if is64Bit { + i, _ = strconv.ParseInt(s, 10, 64) + } else { + i, _ = strconv.ParseInt(s, 10, 32) + } + + return int(i) + case tensor.Int8: + i, _ := strconv.ParseInt(s, 10, 8) + return i + case tensor.Int16: + i, _ := strconv.ParseInt(s, 10, 16) + return i + case tensor.Int32: + i, _ := strconv.ParseInt(s, 10, 32) + return i + case tensor.Int64: + i, _ := strconv.ParseInt(s, 10, 64) + return i + case tensor.Uint: + u, _ := strconv.ParseUint(s, 10, bits.UintSize) + return u + case tensor.Uint8, tensor.Byte: + u, _ := strconv.ParseUint(s, 10, 8) + return u + case tensor.Uint16: + u, _ := strconv.ParseUint(s, 10, 16) + return u + case tensor.Uint32: + u, _ := strconv.ParseUint(s, 10, 32) + return u + case tensor.Uint64: + u, _ := strconv.ParseUint(s, 10, 64) + return u + case tensor.Float32: + f, _ := strconv.ParseFloat(s, 32) + return f + case tensor.Float64: + f, _ := strconv.ParseFloat(s, 64) + return f + case tensor.Complex64: + c, _ := strconv.ParseComplex(s, 64) + return c + case tensor.Complex128: + c, _ := strconv.ParseComplex(s, 128) + return c + case tensor.String: + return s + } + + return nil +} diff --git a/paginated/slice.go b/paginated/slice.go new file mode 100644 index 0000000..10cacd8 --- /dev/null +++ b/paginated/slice.go @@ -0,0 +1,39 @@ +package paginated + +import "gorgonia.org/tensor" + +// Slice is used to represent a slice of a paginated +// tensor. +// It represents a starting and ending coordinate along a single dimension of the tensor. +type Slice struct{ S, E int } + +// Start is the starting index of a `Slice` +func (i Slice) Start() int { return i.S } + +// End is the last index of a `Slice` +func (i Slice) End() int { return i.E } + +// Step is set to 1 for a `Slice` +func (i Slice) Step() int { return 1 } + +func (i Slice) IsSingleValue() bool { + return i.S == i.E +} + +func (i Slice) intersects(i2 tensor.Slice) bool { + // i is all larger than i2 + if i.Start() > i2.End() { + return false + } + + // i2 is all larger than i + if i2.Start() > i.End() { + return false + } + + return true +} + +func (i Slice) contains(point int) bool { + return point >= i.S && point <= i.E +} diff --git a/paginated/stream.go b/paginated/stream.go new file mode 100644 index 0000000..521fd9d --- /dev/null +++ b/paginated/stream.go @@ -0,0 +1,115 @@ +package paginated + +// Stream is an interface for streaming data +// to a paginated tensor. +type Stream struct { + *Tensor + data chan Array + close chan bool +} + +// Stream can be used to more easily write a large amount of data +// to an initialized paginated tensor. +func (p *Tensor) Stream(mutable []bool) (*Stream, error) { + if len(mutable) != p.Dims() { + return &Stream{}, ErrSize + } + + dataChan := make(chan Array, 1) + closeChan := make(chan bool, 1) + + go func(p *Tensor) { + var b bool + for { + select { + case data := <-dataChan: + p.addData(data) + case <-closeChan: + close(dataChan) + close(closeChan) + b = true + } + + if b { + break + } + } + }(p) + + return &Stream{ + Tensor: p, + data: dataChan, + close: closeChan, + }, nil +} + +// Send will block until the Stream is ready +// to accept and write new data to the paginated tensor. +func (s *Stream) Send(data Array) { + s.data <- data +} + +// Close will close all channels, thus killing the Stream +func (s *Stream) Close() { + s.close <- true +} + +func (p *Tensor) addData(data Array) error { + page, ok := p.firstUnfilledPage() + if !ok { + return ErrFilled + } + + var pageData Array + d, ok := p.cache.Get(page.Id) + if !ok { + err := p.Swap(page.Id) + if err != nil { + return err + } + + d, ok = p.cache.Get(page.Id) + if !ok { + return ErrCache + } + } + pageData = d.(Array) + + // remaining page-capacity + if page.Datasize > 0 { + var err error + pageData, err = pageData.Slice(page.Datasize-1, pageData.Len()-1) + if err != nil { + return err + } + } + + // slice of new data + capacity := pageData.Len() + if data.Len() < capacity { + capacity = data.Len() + } + slice, err := data.Slice(0, capacity) + if err != nil { + return err + } + + // copy data + err = pageData.Copy(slice) + if err != nil { + return err + } + page.Datasize += slice.Len() + + // recurse: for all data in supplied array + if slice.Len() < data.Len() { + remaining, err := data.Slice(slice.Len(), data.Len()-1) + if err != nil { + return err + } + + return p.addData(remaining) + } + + return nil +} diff --git a/paginated/tensor.go b/paginated/tensor.go index 5632ee2..05c6d76 100644 --- a/paginated/tensor.go +++ b/paginated/tensor.go @@ -1,4 +1,3 @@ -// paclage paginated provides paginated tensors package paginated import ( @@ -7,9 +6,16 @@ import ( "strconv" "time" + lru "github.com/hashicorp/golang-lru/simplelru" "gorgonia.org/tensor" ) +const ( + stdTimeFmt = "02-Jan-2006 15-04" + // https://stackoverflow.com/a/60319709 + is64Bit = uint64(^uintptr(0)) == ^uint64(0) +) + // Tensor specifies a paginated Gorgonia dense tensor type Tensor struct { tensor.Tensor @@ -39,9 +45,9 @@ type Tensor struct { pageSize int } -// New will create a new paginated tensor. +// NewTensor will created a new paginated Gorgonia `*Dense` tensor. // It will use a `DefaultSmallTensor()` if no options are specified. -func New(dtype tensor.Dtype, opts ...tensor.ConsOpt) (*Tensor, error) { +func NewTensor(dtype tensor.Dtype, opts ...tensor.ConsOpt) (*Tensor, error) { /* tensor */ p, err := DefaultSmallTensor(dtype) if err != nil { diff --git a/paginated/traversal.go b/paginated/traversal.go new file mode 100644 index 0000000..2ec5628 --- /dev/null +++ b/paginated/traversal.go @@ -0,0 +1,19 @@ +package paginated + +import "gorgonia.org/tensor" + +// RequiresIterator returns false for a paginated tensor +func (p *Tensor) RequiresIterator() bool { + return false +} + +// Iterator will return a `*Iterator` object +// that can be used to iterate over the tensor. +func (p *Tensor) Iterator() tensor.Iterator { + return NewIterator(p) +} + +// DataOrder returns the ordering of the data of the tensor. +func (p *Tensor) DataOrder() tensor.DataOrder { + return p.dataOrder +} diff --git a/paginated/view.go b/paginated/view.go new file mode 100644 index 0000000..dafcc73 --- /dev/null +++ b/paginated/view.go @@ -0,0 +1,250 @@ +package paginated + +import ( + "fmt" + "os" + "time" + + "gorgonia.org/tensor" +) + +// View specifies a view of a +// paginated tensor. It can be materialized +// into a new paginated tensor if desired. +type View struct { + *Tensor + name string + slices []tensor.Slice +} + +// NewView will create and return a new View object. +func NewView(p *Tensor, slices ...tensor.Slice) *View { + return &View{ + Tensor: p, + name: "view", + slices: slices, + } +} + +// SetName will set the name of the view that will be used +func (p *View) SetName(name string) { + p.name = name +} + +// IsView will specify if object is only a view +// of another tensor or if it is it's own tensor. +func (p *View) IsView() bool { + return true +} + +// IsMaterializable specifies if the view can be converted +// into it's own tensor. This always returns true for a `*View` +func (p *View) IsMaterializable() bool { + return true +} + +// Materialize will convert the `*View` into a new `*Tensor`. +func (p *View) Materialize() tensor.Tensor { + intersectingPages := p.intersectingPages(p.slices...) + + b := p.sliceCoords(p.slices...) + dataSize := p.boundsData(b) + numOfPages := dataSize / p.pageSize + if dataSize%p.pageSize > 0 { + numOfPages++ + } + globalStart := p.pageIndex(b.min...) + globalEnd := p.pageIndex(b.max...) + + // view + name := fmt.Sprintf("%s-%s", p.name, time.Now().Format(stdTimeFmt)) + dir := p.viewPath + name + err := os.MkdirAll(dir, os.ModePerm) + if err != nil { + panic(err) + } + + dims := make(tensor.Shape, len(p.slices), len(p.slices)) + for i := 0; i < len(dims); i++ { + if i < len(dims)-1 { + dims[i] = p.dims[i] + continue + } + + dims[i] = (b.max[i] - b.min[i]) + 1 + } + + view := &Tensor{ + pages: make(pages, numOfPages, numOfPages), + pageSize: p.pageSize, + memPageCount: numOfPages, + dataOrder: p.dataOrder, + dims: dims, + dtype: p.dtype, + transposed: false, + engine: p.engine, + basePath: dir + "/", + fileFormat: p.fileFormat, + } + + err = view.init() + if err != nil { + panic(err) + } + + // variables updated every loop + var ( + to = 0 + from = 0 + + fromProcessed = 0 + toFilled = 0 + + remainingData = dataSize + ) + + // copy loop + for { + toPage := view.pages[to] + toCapacity := view.pageSize - toPage.Datasize + toCapacity -= toFilled + + fromPage := intersectingPages[from] + fromStart := p.pageSize * fromPage.Id + fromEnd := fromStart + p.pageSize + if fromStart < globalStart { + fromStart = globalStart + } + if fromEnd > globalEnd { + fromEnd = globalEnd + } + fromStart += fromProcessed + fromRemaining := fromEnd - fromStart + + // to-data + td, ok := view.cache.Get(toPage.Id) + if !ok { + err := view.Swap(toPage.Id) + if err != nil { + panic(err) + } + + td, ok = view.cache.Get(toPage.Id) + if !ok { + panic(ErrCache) + } + } + toData := td.(Array) + + // from-data + fd, ok := p.cache.Get(fromPage.Id) + if !ok { + err := p.Swap(fromPage.Id) + if err != nil { + panic(err) + } + + fd, ok = p.cache.Get(fromPage.Id) + if !ok { + panic(ErrCache) + } + } + fromData := fd.(Array) + + // slice + capacity := min(toCapacity, fromRemaining) + toSlice, err := toData.Slice(toPage.Datasize, capacity) + if err != nil { + panic(err) + } + fromSlice, err := fromData.Slice(fromStart, capacity) + if err != nil { + panic(err) + } + + // copy + err = toSlice.Copy(fromSlice) + if err != nil { + panic(err) + } + toPage.Datasize += capacity + + // update + if toCapacity < fromRemaining { + err := view.flush(toPage) + if err != nil { + panic(err) + } + + fromProcessed += capacity + + toFilled = 0 + to++ + + remainingData -= capacity + } else if fromRemaining < toCapacity { + fromProcessed = 0 + from++ + + toFilled += capacity + + remainingData -= capacity + } else { + err := view.flush(toPage) + if err != nil { + panic(err) + } + + fromProcessed = 0 + from++ + + toFilled = 0 + to++ + + remainingData -= capacity + } + + // exit + if from == len(intersectingPages) && remainingData <= 0 { + break + } else if from == len(intersectingPages) && remainingData >= 0 { + panic("something went wrong materializing the view") + } + } + + return view +} + +func min(arg1, arg2 int) int { + if arg1 < arg2 { + return arg1 + } + + return arg2 +} + +type bounds struct { + min, max []int +} + +// slicecoords can be used on a set of slices that is assumed to be +// a slice along each dimension of the tensor defining a "chunk" of the tensor. +func (p *Tensor) sliceCoords(slices ...tensor.Slice) bounds { + minCoord := make([]int, p.Dims(), p.Dims()) + maxCoord := make([]int, p.Dims(), p.Dims()) + for i, s := range slices { + minCoord[i] = s.Start() + maxCoord[i] = s.End() + } + + return bounds{ + min: minCoord, + max: maxCoord, + } +} + +func (p *Tensor) boundsData(b bounds) int { + start := p.globalIndex(b.min...) + end := p.globalIndex(b.max...) + return end - start +}