From 9e986ce29ca59e8a19449a5f91410a240e530f0a Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Tue, 25 Nov 2025 19:07:14 +0100 Subject: [PATCH 01/10] First Idea --- .../dataframe/impl/aggregation/aggregators/Aggregators.kt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index 9648fed3ad..b380bccc74 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -124,12 +124,20 @@ public object Aggregators { // T: Comparable -> T? // T : Comparable? -> T? + + // idea: if the following function get the ValueColumnImpl. I know if there is any stored value + // -> I return it. + // else I do the procedure max.invoke... and then cache the value inside public fun ?> max(skipNaN: Boolean): Aggregator = max.invoke(skipNaN).cast2() public val max: AggregatorOptionSwitch1, Comparable?> by withOneOption { skipNaN: Boolean -> twoStepSelectingForAny, Comparable?>( getReturnType = maxTypeConversion, + // idea, need to change the following line + // if(thereIsCachedValueInsideValueColumnImpl) that's what I want + // else ... + // maxOrNull is called on a sequence... -> previous idea can't be applied this way LOOK UP stepOneSelector = { type -> maxOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMax(type, skipNaN) }, ) From 276c9be7258a725b8a6205a6db6807dd8f99555a Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Fri, 28 Nov 2025 15:06:00 +0100 Subject: [PATCH 02/10] still working on solution --- .../impl/aggregation/aggregators/Aggregators.kt | 11 +++-------- .../kotlinx/dataframe/impl/columns/ValueColumnImpl.kt | 11 ++++++++++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index b380bccc74..87eeee0f59 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -124,20 +124,15 @@ public object Aggregators { // T: Comparable -> T? // T : Comparable? -> T? - - // idea: if the following function get the ValueColumnImpl. I know if there is any stored value - // -> I return it. - // else I do the procedure max.invoke... and then cache the value inside public fun ?> max(skipNaN: Boolean): Aggregator = max.invoke(skipNaN).cast2() + // following val is an aggregator-builder, invoke creates the aggregator. + // aggregator is something that gets values and makes a computation public val max: AggregatorOptionSwitch1, Comparable?> by withOneOption { skipNaN: Boolean -> + // the following fun returns an AggregatorProvider twoStepSelectingForAny, Comparable?>( getReturnType = maxTypeConversion, - // idea, need to change the following line - // if(thereIsCachedValueInsideValueColumnImpl) that's what I want - // else ... - // maxOrNull is called on a sequence... -> previous idea can't be applied this way LOOK UP stepOneSelector = { type -> maxOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMax(type, skipNaN) }, ) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index f758360d1f..ee26f1651f 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -8,6 +8,12 @@ import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import kotlin.reflect.KType import kotlin.reflect.full.withNullability +public class CachedStatisticWrapped(public var cachedStatistic: T?) + +internal interface ValueColumnInternal : ValueColumn { + val max: CachedStatisticWrapped +} + internal open class ValueColumnImpl( values: List, name: String, @@ -15,7 +21,8 @@ internal open class ValueColumnImpl( val defaultValue: T? = null, distinct: Lazy>? = null, ) : DataColumnImpl(values, name, type, distinct), - ValueColumn { + ValueColumn, + ValueColumnInternal { override fun distinct() = ValueColumnImpl(toSet().toList(), name, type, defaultValue, distinct) @@ -48,6 +55,8 @@ internal open class ValueColumnImpl( override fun defaultValue() = defaultValue override fun forceResolve() = ResolvingValueColumn(this) + + override val max = CachedStatisticWrapped(null) } internal class ResolvingValueColumn(override val source: ValueColumn) : From bacc395177905e2179a3175893c73be986328f86 Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Tue, 9 Dec 2025 15:32:57 +0100 Subject: [PATCH 03/10] work in progress --- .../AggregatorAggregationHandler.kt | 25 ++++++++++++++++++- .../dataframe/impl/columns/ValueColumnImpl.kt | 6 ++--- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index 7b1b0357eb..763d889d7a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -2,7 +2,9 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.asSequence +import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregationHandlers.SelectingAggregationHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl import kotlin.reflect.KType /** @@ -23,7 +25,28 @@ public interface AggregatorAggregationHandler * When the exact [valueType] is unknown, use [calculateValueType] or [aggregateCalculatingValueType]. */ public fun aggregateSequence(values: Sequence, valueType: ValueType): Return - +// +// public fun aggregateSingleColumn(column: DataColumn): Return { +// // It is possible to exploit cached statistic which is proper of ValueColumnImpl +// if (column is ValueColumnImpl) { +// if (column.max.wasComputed) { +// return column.max.cachedStatistic as Return +// } else { +// val max = aggregateSequence( +// values = column.asSequence(), +// valueType = column.type().toValueType(), +// ) +// column.max.cachedStatistic = max +// column.max.wasComputed = true +// aggregateSingleColumn(column) +// } +// } +// // Otherwise +// return aggregateSequence( +// values = column.asSequence(), +// valueType = column.type().toValueType(), +// ) +// } /** * Aggregates the data in the given column and computes a single resulting value. * Calls [aggregateSequence]. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index ee26f1651f..c72f915534 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -8,10 +8,10 @@ import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import kotlin.reflect.KType import kotlin.reflect.full.withNullability -public class CachedStatisticWrapped(public var cachedStatistic: T?) +public class CachedStatisticWrapped(public var cachedStatistic: Any?, public var wasComputed: Boolean = false) internal interface ValueColumnInternal : ValueColumn { - val max: CachedStatisticWrapped + val max: CachedStatisticWrapped } internal open class ValueColumnImpl( @@ -56,7 +56,7 @@ internal open class ValueColumnImpl( override fun forceResolve() = ResolvingValueColumn(this) - override val max = CachedStatisticWrapped(null) + override val max = CachedStatisticWrapped(null) } internal class ResolvingValueColumn(override val source: ValueColumn) : From 4d5d7149abc2bcd5b18e4aaa8da52c85c978bbd2 Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Wed, 10 Dec 2025 15:18:19 +0100 Subject: [PATCH 04/10] need to test --- .../jetbrains/kotlinx/dataframe/api/max.kt | 7 ++- .../AggregatorAggregationHandler.kt | 44 +++++++++---------- .../dataframe/impl/columns/ValueColumnImpl.kt | 6 +-- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt index d1ff0cd1e2..00fd70d62c 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt @@ -19,6 +19,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateByOrNull import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfRow +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl import org.jetbrains.kotlinx.dataframe.impl.columns.toComparableColumns import org.jetbrains.kotlinx.dataframe.impl.suggestIfNull import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API @@ -33,7 +34,11 @@ public fun > DataColumn.max(skipNaN: Boolean = skipNaNDefa maxOrNull(skipNaN).suggestIfNull("max") public fun > DataColumn.maxOrNull(skipNaN: Boolean = skipNaNDefault): T? = - Aggregators.max(skipNaN).aggregateSingleColumn(this) + if(this is ValueColumnImpl<*>) { + Aggregators.max(skipNaN).aggregateSingleColumn(this, this.max) + } else { + Aggregators.max(skipNaN).aggregateSingleColumn(this) + } public inline fun ?> DataColumn.maxBy( skipNaN: Boolean = skipNaNDefault, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index 763d889d7a..20ec32526b 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -2,9 +2,8 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.asSequence -import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregationHandlers.SelectingAggregationHandler -import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.WrappedStatistic import kotlin.reflect.KType /** @@ -25,28 +24,7 @@ public interface AggregatorAggregationHandler * When the exact [valueType] is unknown, use [calculateValueType] or [aggregateCalculatingValueType]. */ public fun aggregateSequence(values: Sequence, valueType: ValueType): Return -// -// public fun aggregateSingleColumn(column: DataColumn): Return { -// // It is possible to exploit cached statistic which is proper of ValueColumnImpl -// if (column is ValueColumnImpl) { -// if (column.max.wasComputed) { -// return column.max.cachedStatistic as Return -// } else { -// val max = aggregateSequence( -// values = column.asSequence(), -// valueType = column.type().toValueType(), -// ) -// column.max.cachedStatistic = max -// column.max.wasComputed = true -// aggregateSingleColumn(column) -// } -// } -// // Otherwise -// return aggregateSequence( -// values = column.asSequence(), -// valueType = column.type().toValueType(), -// ) -// } + /** * Aggregates the data in the given column and computes a single resulting value. * Calls [aggregateSequence]. @@ -57,6 +35,24 @@ public interface AggregatorAggregationHandler valueType = column.type().toValueType(), ) + /** + * optimized override for [aggregateSingleColumn], + * preferred when column's dynamic type is ValueColumnImpl + */ + public fun aggregateSingleColumn(column: DataColumn, wrappedStatistic: WrappedStatistic): Return { + // It is possible to exploit cached statistic which is proper of ValueColumnImpl + if (wrappedStatistic.wasComputed) { + return wrappedStatistic.cachedStatistic as Return + } + val statistic = aggregateSequence( + values = column.asSequence(), + valueType = column.type().toValueType(), + ) + wrappedStatistic.cachedStatistic = statistic + wrappedStatistic.wasComputed = true + return aggregateSingleColumn(column, wrappedStatistic) + } + /** * Function that can give the return type of [aggregateSequence] as [KType], given the type of the input. * This allows aggregators to avoid runtime type calculations. diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index c72f915534..22c9db6ae2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -8,10 +8,10 @@ import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import kotlin.reflect.KType import kotlin.reflect.full.withNullability -public class CachedStatisticWrapped(public var cachedStatistic: Any?, public var wasComputed: Boolean = false) +public class WrappedStatistic(public var cachedStatistic: Any?, public var wasComputed: Boolean = false) internal interface ValueColumnInternal : ValueColumn { - val max: CachedStatisticWrapped + val max: WrappedStatistic } internal open class ValueColumnImpl( @@ -56,7 +56,7 @@ internal open class ValueColumnImpl( override fun forceResolve() = ResolvingValueColumn(this) - override val max = CachedStatisticWrapped(null) + override val max = WrappedStatistic(null) } internal class ResolvingValueColumn(override val source: ValueColumn) : From bedea0e0f90d972f7c39806a9baa4c0ce6038df1 Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Thu, 11 Dec 2025 13:18:57 +0100 Subject: [PATCH 05/10] one red test --- .../kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt | 4 ++-- .../aggregators/AggregatorAggregationHandler.kt | 10 +++++++--- .../impl/aggregation/aggregators/Aggregators.kt | 3 --- .../kotlinx/dataframe/impl/columns/ValueColumnImpl.kt | 3 +++ .../org/jetbrains/kotlinx/dataframe/statistics/max.kt | 1 + 5 files changed, 13 insertions(+), 8 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt index 00fd70d62c..f833ef5fcf 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt @@ -19,7 +19,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateByOrNull import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfRow -import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnImpl +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnInternal import org.jetbrains.kotlinx.dataframe.impl.columns.toComparableColumns import org.jetbrains.kotlinx.dataframe.impl.suggestIfNull import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API @@ -34,7 +34,7 @@ public fun > DataColumn.max(skipNaN: Boolean = skipNaNDefa maxOrNull(skipNaN).suggestIfNull("max") public fun > DataColumn.maxOrNull(skipNaN: Boolean = skipNaNDefault): T? = - if(this is ValueColumnImpl<*>) { + if(this is ValueColumnInternal<*>) { Aggregators.max(skipNaN).aggregateSingleColumn(this, this.max) } else { Aggregators.max(skipNaN).aggregateSingleColumn(this) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index 20ec32526b..ba1efd8106 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -29,19 +29,22 @@ public interface AggregatorAggregationHandler * Aggregates the data in the given column and computes a single resulting value. * Calls [aggregateSequence]. */ - public fun aggregateSingleColumn(column: DataColumn): Return = - aggregateSequence( + public fun aggregateSingleColumn(column: DataColumn): Return { + println("NOT ValueColumnImpl") + return aggregateSequence( values = column.asSequence(), valueType = column.type().toValueType(), ) + } /** * optimized override for [aggregateSingleColumn], - * preferred when column's dynamic type is ValueColumnImpl + * preferred when column's runtime type is ValueColumnImpl */ public fun aggregateSingleColumn(column: DataColumn, wrappedStatistic: WrappedStatistic): Return { // It is possible to exploit cached statistic which is proper of ValueColumnImpl if (wrappedStatistic.wasComputed) { + println("ValueColumnImpl, stat was computed") return wrappedStatistic.cachedStatistic as Return } val statistic = aggregateSequence( @@ -50,6 +53,7 @@ public interface AggregatorAggregationHandler ) wrappedStatistic.cachedStatistic = statistic wrappedStatistic.wasComputed = true + println("ValueColumnImpl, stat was not computed") return aggregateSingleColumn(column, wrappedStatistic) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index 87eeee0f59..9648fed3ad 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -126,11 +126,8 @@ public object Aggregators { // T : Comparable? -> T? public fun ?> max(skipNaN: Boolean): Aggregator = max.invoke(skipNaN).cast2() - // following val is an aggregator-builder, invoke creates the aggregator. - // aggregator is something that gets values and makes a computation public val max: AggregatorOptionSwitch1, Comparable?> by withOneOption { skipNaN: Boolean -> - // the following fun returns an AggregatorProvider twoStepSelectingForAny, Comparable?>( getReturnType = maxTypeConversion, stepOneSelector = { type -> maxOrNull(type, skipNaN) }, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index 22c9db6ae2..999902323a 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -61,6 +61,7 @@ internal open class ValueColumnImpl( internal class ResolvingValueColumn(override val source: ValueColumn) : ValueColumn by source, + ValueColumnInternal, ForceResolvedColumn { override fun resolve(context: ColumnResolutionContext) = super.resolve(context) @@ -79,4 +80,6 @@ internal class ResolvingValueColumn(override val source: ValueColumn) : override fun equals(other: Any?) = source.checkEquals(other) override fun hashCode(): Int = source.hashCode() + + override val max = WrappedStatistic(null) } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt index bdb532329c..c3761e9da4 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt @@ -26,6 +26,7 @@ class MaxTests { @Test fun `max with regular values`() { val col = columnOf(5, 2, 8, 1, 9) + println(col::class.simpleName) col.max() shouldBe 9 } From c0adc08dd67b1cc83a233c888543888c9370a432 Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Thu, 11 Dec 2025 16:24:59 +0100 Subject: [PATCH 06/10] need to clean --- .../jetbrains/kotlinx/dataframe/api/max.kt | 4 +- .../AggregatorAggregationHandler.kt | 49 +++++++++++++------ .../dataframe/impl/columns/ValueColumnImpl.kt | 11 +++-- .../kotlinx/dataframe/statistics/max.kt | 1 + git | 0 5 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 git diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt index f833ef5fcf..1e87845d30 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt @@ -34,8 +34,8 @@ public fun > DataColumn.max(skipNaN: Boolean = skipNaNDefa maxOrNull(skipNaN).suggestIfNull("max") public fun > DataColumn.maxOrNull(skipNaN: Boolean = skipNaNDefault): T? = - if(this is ValueColumnInternal<*>) { - Aggregators.max(skipNaN).aggregateSingleColumn(this, this.max) + if (this is ValueColumnInternal<*>) { + Aggregators.max(skipNaN).aggregateSingleColumn(this, this.max, skipNaN) } else { Aggregators.max(skipNaN).aggregateSingleColumn(this) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index ba1efd8106..37f870fc79 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -38,23 +38,42 @@ public interface AggregatorAggregationHandler } /** - * optimized override for [aggregateSingleColumn], - * preferred when column's runtime type is ValueColumnImpl + * optimized override of [aggregateSingleColumn], + * preferred when column's runtime type is ValueColumnInternal so that + * it is possible to exploit cached statistics which are proper of ValueColumnInternal */ - public fun aggregateSingleColumn(column: DataColumn, wrappedStatistic: WrappedStatistic): Return { - // It is possible to exploit cached statistic which is proper of ValueColumnImpl - if (wrappedStatistic.wasComputed) { - println("ValueColumnImpl, stat was computed") - return wrappedStatistic.cachedStatistic as Return + public fun aggregateSingleColumn( + column: DataColumn, + wrappedStatistic: WrappedStatistic, + skipNaN: Boolean, + ): Return { + when { + skipNaN && wrappedStatistic.wasComputedSkippingNaN -> { + println("valuecol, NOT COMPUTED") + return wrappedStatistic.statisticComputedSkippingNaN as Return + } + + (!skipNaN) && wrappedStatistic.wasComputedNotSkippingNaN -> { + println("valuecol, NOT COMPUTED") + return wrappedStatistic.statisticComputedNotSkippingNaN as Return + } + + else -> { + val statistic = aggregateSequence( + values = column.asSequence(), + valueType = column.type().toValueType(), + ) + if (skipNaN) { + wrappedStatistic.wasComputedSkippingNaN = true + wrappedStatistic.statisticComputedSkippingNaN = statistic + } else { + wrappedStatistic.wasComputedNotSkippingNaN = true + wrappedStatistic.statisticComputedNotSkippingNaN = statistic + } + println("valuecol, COMPUTED") + return aggregateSingleColumn(column, wrappedStatistic, skipNaN) + } } - val statistic = aggregateSequence( - values = column.asSequence(), - valueType = column.type().toValueType(), - ) - wrappedStatistic.cachedStatistic = statistic - wrappedStatistic.wasComputed = true - println("ValueColumnImpl, stat was not computed") - return aggregateSingleColumn(column, wrappedStatistic) } /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index 999902323a..c9f7880fb2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -8,7 +8,12 @@ import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import kotlin.reflect.KType import kotlin.reflect.full.withNullability -public class WrappedStatistic(public var cachedStatistic: Any?, public var wasComputed: Boolean = false) +public class WrappedStatistic( + public var wasComputedSkippingNaN: Boolean = false, + public var wasComputedNotSkippingNaN: Boolean = false, + public var statisticComputedSkippingNaN: Any? = null, + public var statisticComputedNotSkippingNaN: Any? = null, +) internal interface ValueColumnInternal : ValueColumn { val max: WrappedStatistic @@ -56,7 +61,7 @@ internal open class ValueColumnImpl( override fun forceResolve() = ResolvingValueColumn(this) - override val max = WrappedStatistic(null) + override val max = WrappedStatistic() } internal class ResolvingValueColumn(override val source: ValueColumn) : @@ -81,5 +86,5 @@ internal class ResolvingValueColumn(override val source: ValueColumn) : override fun hashCode(): Int = source.hashCode() - override val max = WrappedStatistic(null) + override val max = WrappedStatistic() } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt index c3761e9da4..bd70f55b57 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt @@ -28,6 +28,7 @@ class MaxTests { val col = columnOf(5, 2, 8, 1, 9) println(col::class.simpleName) col.max() shouldBe 9 + col.max() shouldBe 9 } @Test diff --git a/git b/git new file mode 100644 index 0000000000..e69de29bb2 From 38b26c3e23bf03599c7121d12d522052c2a82781 Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Thu, 11 Dec 2025 19:25:39 +0100 Subject: [PATCH 07/10] cleaning --- .../aggregators/AggregatorAggregationHandler.kt | 9 ++------- .../org/jetbrains/kotlinx/dataframe/statistics/max.kt | 2 -- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index 37f870fc79..43797443dd 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -29,13 +29,11 @@ public interface AggregatorAggregationHandler * Aggregates the data in the given column and computes a single resulting value. * Calls [aggregateSequence]. */ - public fun aggregateSingleColumn(column: DataColumn): Return { - println("NOT ValueColumnImpl") - return aggregateSequence( + public fun aggregateSingleColumn(column: DataColumn): Return = + aggregateSequence( values = column.asSequence(), valueType = column.type().toValueType(), ) - } /** * optimized override of [aggregateSingleColumn], @@ -49,12 +47,10 @@ public interface AggregatorAggregationHandler ): Return { when { skipNaN && wrappedStatistic.wasComputedSkippingNaN -> { - println("valuecol, NOT COMPUTED") return wrappedStatistic.statisticComputedSkippingNaN as Return } (!skipNaN) && wrappedStatistic.wasComputedNotSkippingNaN -> { - println("valuecol, NOT COMPUTED") return wrappedStatistic.statisticComputedNotSkippingNaN as Return } @@ -70,7 +66,6 @@ public interface AggregatorAggregationHandler wrappedStatistic.wasComputedNotSkippingNaN = true wrappedStatistic.statisticComputedNotSkippingNaN = statistic } - println("valuecol, COMPUTED") return aggregateSingleColumn(column, wrappedStatistic, skipNaN) } } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt index bd70f55b57..bdb532329c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt @@ -26,8 +26,6 @@ class MaxTests { @Test fun `max with regular values`() { val col = columnOf(5, 2, 8, 1, 9) - println(col::class.simpleName) - col.max() shouldBe 9 col.max() shouldBe 9 } From 1c0fd665f535b6c887512a822c93f7c4925d4d1f Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Thu, 18 Dec 2025 09:50:43 +0100 Subject: [PATCH 08/10] need to test for max --- .../jetbrains/kotlinx/dataframe/api/max.kt | 7 +- .../aggregation/aggregators/Aggregator.kt | 20 ++++++ .../AggregatorAggregationHandler.kt | 68 ++++++++----------- .../aggregation/aggregators/Aggregators.kt | 9 ++- .../dataframe/impl/columns/ValueColumnImpl.kt | 44 +++++++++--- .../kotlinx/dataframe/statistics/max.kt | 1 + 6 files changed, 95 insertions(+), 54 deletions(-) diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt index 1e87845d30..2527c352a5 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt @@ -34,11 +34,8 @@ public fun > DataColumn.max(skipNaN: Boolean = skipNaNDefa maxOrNull(skipNaN).suggestIfNull("max") public fun > DataColumn.maxOrNull(skipNaN: Boolean = skipNaNDefault): T? = - if (this is ValueColumnInternal<*>) { - Aggregators.max(skipNaN).aggregateSingleColumn(this, this.max, skipNaN) - } else { - Aggregators.max(skipNaN).aggregateSingleColumn(this) - } + Aggregators.max(skipNaN).aggregateSingleColumn(this) + public inline fun ?> DataColumn.maxBy( skipNaN: Boolean = skipNaNDefault, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt index a7cb2a8b14..40ab3a5888 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.FlatteningMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.NoMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.TwoStepMultipleColumnsHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -42,6 +43,7 @@ public class Aggregator( public val inputHandler: AggregatorInputHandler, public val multipleColumnsHandler: AggregatorMultipleColumnsHandler, public val name: String, + public val statisticsParameters: Map, ) : AggregatorInputHandler by inputHandler, AggregatorMultipleColumnsHandler by multipleColumnsHandler, AggregatorAggregationHandler by aggregationHandler { @@ -75,6 +77,7 @@ public class Aggregator( aggregationHandler: AggregatorAggregationHandler, inputHandler: AggregatorInputHandler, multipleColumnsHandler: AggregatorMultipleColumnsHandler, + statisticsParameters: Map, ): AggregatorProvider = AggregatorProvider { name -> Aggregator( @@ -82,6 +85,23 @@ public class Aggregator( inputHandler = inputHandler, multipleColumnsHandler = multipleColumnsHandler, name = name, + statisticsParameters = statisticsParameters, + ) + } + + // fictitious, I want the program to compile + internal operator fun invoke( + aggregationHandler: AggregatorAggregationHandler, + inputHandler: AggregatorInputHandler, + multipleColumnsHandler: AggregatorMultipleColumnsHandler, + ): AggregatorProvider = + AggregatorProvider { name -> + Aggregator( + aggregationHandler = aggregationHandler, + inputHandler = inputHandler, + multipleColumnsHandler = multipleColumnsHandler, + name = name, + emptyMap(), ) } } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index 43797443dd..da574b3a46 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -3,7 +3,9 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.asSequence import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregationHandlers.SelectingAggregationHandler -import org.jetbrains.kotlinx.dataframe.impl.columns.WrappedStatistic +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue +import org.jetbrains.kotlinx.dataframe.impl.columns.StatisticResult +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnInternal import kotlin.reflect.KType /** @@ -27,48 +29,36 @@ public interface AggregatorAggregationHandler /** * Aggregates the data in the given column and computes a single resulting value. - * Calls [aggregateSequence]. + * Calls [aggregateSequence]. It tries to exploit a cache for statistics which is proper of + * [ValueColumnInternal] */ - public fun aggregateSingleColumn(column: DataColumn): Return = - aggregateSequence( - values = column.asSequence(), - valueType = column.type().toValueType(), - ) - - /** - * optimized override of [aggregateSingleColumn], - * preferred when column's runtime type is ValueColumnInternal so that - * it is possible to exploit cached statistics which are proper of ValueColumnInternal - */ - public fun aggregateSingleColumn( - column: DataColumn, - wrappedStatistic: WrappedStatistic, - skipNaN: Boolean, - ): Return { - when { - skipNaN && wrappedStatistic.wasComputedSkippingNaN -> { - return wrappedStatistic.statisticComputedSkippingNaN as Return + public fun aggregateSingleColumn(column: DataColumn): Return { + if (column is ValueColumnInternal<*>) { + println("ValueColumnInternal") + // cache check, cache is dynamically created + val aggregator = this.aggregator ?: throw IllegalStateException("Aggregator is required") + val desiredStatisticNotConsideringParameters = column.statistics.getOrPut(aggregator.name) { + mutableMapOf, StatisticResult>() } - - (!skipNaN) && wrappedStatistic.wasComputedNotSkippingNaN -> { - return wrappedStatistic.statisticComputedNotSkippingNaN as Return - } - - else -> { - val statistic = aggregateSequence( - values = column.asSequence(), - valueType = column.type().toValueType(), - ) - if (skipNaN) { - wrappedStatistic.wasComputedSkippingNaN = true - wrappedStatistic.statisticComputedSkippingNaN = statistic - } else { - wrappedStatistic.wasComputedNotSkippingNaN = true - wrappedStatistic.statisticComputedNotSkippingNaN = statistic - } - return aggregateSingleColumn(column, wrappedStatistic, skipNaN) + // can't compare maps whose Values are Any? -> ParameterValue instead + val desiredStatistic = desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] + // if desiredStatistic is null, statistic was never calculated + if (desiredStatistic != null) { + println("cache hit") + return desiredStatistic.value as Return } + println("cache miss") + val statistic = aggregateSequence( + values = column.asSequence(), + valueType = column.type().toValueType(), + ) + desiredStatisticNotConsideringParameters.put(aggregator.statisticsParameters, StatisticResult(statistic)) + return aggregateSingleColumn(column) } + return aggregateSequence( + values = column.asSequence(), + valueType = column.type().toValueType(), + ) } /** diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index 9648fed3ad..d3faa07f24 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandlers.NumberInputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.FlatteningMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.TwoStepMultipleColumnsHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import org.jetbrains.kotlinx.dataframe.math.indexOfMax import org.jetbrains.kotlinx.dataframe.math.indexOfMedian import org.jetbrains.kotlinx.dataframe.math.indexOfMin @@ -35,10 +36,12 @@ public object Aggregators { getReturnType: CalculateReturnType, indexOfResult: IndexOfResult, stepOneSelector: Selector, + statisticsParameters: Map, ) = Aggregator( aggregationHandler = SelectingAggregationHandler(stepOneSelector, indexOfResult, getReturnType), inputHandler = AnyInputHandler(), multipleColumnsHandler = TwoStepMultipleColumnsHandler(), + statisticsParameters = statisticsParameters, ) private fun flattenHybridForAny( @@ -117,8 +120,9 @@ public object Aggregators { by withOneOption { skipNaN: Boolean -> twoStepSelectingForAny, Comparable?>( getReturnType = minTypeConversion, - stepOneSelector = { type -> minOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMin(type, skipNaN) }, + stepOneSelector = { type -> minOrNull(type, skipNaN) }, + statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), ) } @@ -128,10 +132,13 @@ public object Aggregators { public val max: AggregatorOptionSwitch1, Comparable?> by withOneOption { skipNaN: Boolean -> + // the following function is 'getAggregator' of AggregatorOptionSwitch + // this is the fun that works with the parameter! twoStepSelectingForAny, Comparable?>( getReturnType = maxTypeConversion, stepOneSelector = { type -> maxOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMax(type, skipNaN) }, + statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), ) } diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index c9f7880fb2..27d1c2add9 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -8,15 +8,41 @@ import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import kotlin.reflect.KType import kotlin.reflect.full.withNullability -public class WrappedStatistic( - public var wasComputedSkippingNaN: Boolean = false, - public var wasComputedNotSkippingNaN: Boolean = false, - public var statisticComputedSkippingNaN: Any? = null, - public var statisticComputedNotSkippingNaN: Any? = null, -) +@JvmInline +public value class StatisticResult(public val value: Any?) + +public class ParameterValue(public val parameter: Any?) { + + override fun equals(other: Any?): Boolean { + if (parameter is Boolean && other is Boolean) { + return this.parameter == other + } + if (parameter is Int && other is Int) { + return this.parameter == other + } + if (parameter is Double && other is Double) { + return this.parameter == other + } + return super.equals(other) + } + + override fun hashCode(): Int { + if (parameter is Boolean) { + return this.parameter.hashCode() + } + if (parameter is Int) { + return this.parameter.hashCode() + } + if (parameter is Double) { + return this.parameter.hashCode() + } + return super.hashCode() + } +} internal interface ValueColumnInternal : ValueColumn { - val max: WrappedStatistic + // val statistics: MutableMap, WrappedStatistic>> + val statistics: MutableMap, StatisticResult>> } internal open class ValueColumnImpl( @@ -61,7 +87,7 @@ internal open class ValueColumnImpl( override fun forceResolve() = ResolvingValueColumn(this) - override val max = WrappedStatistic() + override val statistics = mutableMapOf, StatisticResult>>() } internal class ResolvingValueColumn(override val source: ValueColumn) : @@ -86,5 +112,5 @@ internal class ResolvingValueColumn(override val source: ValueColumn) : override fun hashCode(): Int = source.hashCode() - override val max = WrappedStatistic() + override val statistics = mutableMapOf, StatisticResult>>() } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt index bdb532329c..c6383d3a5c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt @@ -27,6 +27,7 @@ class MaxTests { fun `max with regular values`() { val col = columnOf(5, 2, 8, 1, 9) col.max() shouldBe 9 + col.max() shouldBe 9 } @Test From 39228893dad8f1c7ab20369ab1b1f869073764ce Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Thu, 18 Dec 2025 13:31:29 +0100 Subject: [PATCH 09/10] it works --- .../aggregation/aggregators/Aggregator.kt | 20 +++++++++ .../AggregatorAggregationHandler.kt | 33 ++++++++++++-- .../aggregation/aggregators/Aggregators.kt | 9 +++- .../dataframe/impl/columns/ValueColumnImpl.kt | 45 ++++++++++++++++++- .../kotlinx/dataframe/statistics/max.kt | 1 + .../jetbrains/kotlinx/dataframe/api/max.kt | 2 - .../aggregation/aggregators/Aggregator.kt | 1 - .../AggregatorAggregationHandler.kt | 5 +-- .../aggregation/aggregators/Aggregators.kt | 2 - .../dataframe/impl/columns/ValueColumnImpl.kt | 25 ++++------- .../kotlinx/dataframe/statistics/max.kt | 1 - .../org/jetbrains/kotlinx/dataframe/io/csv.kt | 4 ++ .../org/jetbrains/kotlinx/dataframe/io/tsv.kt | 4 ++ 13 files changed, 120 insertions(+), 32 deletions(-) diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt index 87efe88790..ef41bc0c75 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.FlatteningMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.NoMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.TwoStepMultipleColumnsHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import kotlin.reflect.KType import kotlin.reflect.full.withNullability @@ -51,6 +52,7 @@ public class Aggregator( public val inputHandler: AggregatorInputHandler, public val multipleColumnsHandler: AggregatorMultipleColumnsHandler, public val name: String, + public val statisticsParameters: Map, ) : AggregatorInputHandler by inputHandler, AggregatorMultipleColumnsHandler by multipleColumnsHandler, AggregatorAggregationHandler by aggregationHandler { @@ -84,6 +86,7 @@ public class Aggregator( aggregationHandler: AggregatorAggregationHandler, inputHandler: AggregatorInputHandler, multipleColumnsHandler: AggregatorMultipleColumnsHandler, + statisticsParameters: Map, ): AggregatorProvider = AggregatorProvider { name -> Aggregator( @@ -91,6 +94,23 @@ public class Aggregator( inputHandler = inputHandler, multipleColumnsHandler = multipleColumnsHandler, name = name, + statisticsParameters = statisticsParameters, + ) + } + + // fictitious, I want the program to compile + internal operator fun invoke( + aggregationHandler: AggregatorAggregationHandler, + inputHandler: AggregatorInputHandler, + multipleColumnsHandler: AggregatorMultipleColumnsHandler, + ): AggregatorProvider = + AggregatorProvider { name -> + Aggregator( + aggregationHandler = aggregationHandler, + inputHandler = inputHandler, + multipleColumnsHandler = multipleColumnsHandler, + name = name, + emptyMap(), ) } } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index 7b1b0357eb..da574b3a46 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -3,6 +3,9 @@ package org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators import org.jetbrains.kotlinx.dataframe.DataColumn import org.jetbrains.kotlinx.dataframe.api.asSequence import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.aggregationHandlers.SelectingAggregationHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue +import org.jetbrains.kotlinx.dataframe.impl.columns.StatisticResult +import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnInternal import kotlin.reflect.KType /** @@ -26,13 +29,37 @@ public interface AggregatorAggregationHandler /** * Aggregates the data in the given column and computes a single resulting value. - * Calls [aggregateSequence]. + * Calls [aggregateSequence]. It tries to exploit a cache for statistics which is proper of + * [ValueColumnInternal] */ - public fun aggregateSingleColumn(column: DataColumn): Return = - aggregateSequence( + public fun aggregateSingleColumn(column: DataColumn): Return { + if (column is ValueColumnInternal<*>) { + println("ValueColumnInternal") + // cache check, cache is dynamically created + val aggregator = this.aggregator ?: throw IllegalStateException("Aggregator is required") + val desiredStatisticNotConsideringParameters = column.statistics.getOrPut(aggregator.name) { + mutableMapOf, StatisticResult>() + } + // can't compare maps whose Values are Any? -> ParameterValue instead + val desiredStatistic = desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] + // if desiredStatistic is null, statistic was never calculated + if (desiredStatistic != null) { + println("cache hit") + return desiredStatistic.value as Return + } + println("cache miss") + val statistic = aggregateSequence( + values = column.asSequence(), + valueType = column.type().toValueType(), + ) + desiredStatisticNotConsideringParameters.put(aggregator.statisticsParameters, StatisticResult(statistic)) + return aggregateSingleColumn(column) + } + return aggregateSequence( values = column.asSequence(), valueType = column.type().toValueType(), ) + } /** * Function that can give the return type of [aggregateSequence] as [KType], given the type of the input. diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index 6ab45bbe73..faceba349a 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.inputHandlers.NumberInputHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.FlatteningMultipleColumnsHandler import org.jetbrains.kotlinx.dataframe.impl.aggregation.aggregators.multipleColumnsHandlers.TwoStepMultipleColumnsHandler +import org.jetbrains.kotlinx.dataframe.impl.columns.ParameterValue import org.jetbrains.kotlinx.dataframe.math.indexOfMax import org.jetbrains.kotlinx.dataframe.math.indexOfMedian import org.jetbrains.kotlinx.dataframe.math.indexOfMin @@ -35,10 +36,12 @@ public object Aggregators { getReturnType: CalculateReturnType, indexOfResult: IndexOfResult, stepOneSelector: Selector, + statisticsParameters: Map, ) = Aggregator( aggregationHandler = SelectingAggregationHandler(stepOneSelector, indexOfResult, getReturnType), inputHandler = AnyInputHandler(), multipleColumnsHandler = TwoStepMultipleColumnsHandler(), + statisticsParameters = statisticsParameters, ) private fun flattenHybridForAny( @@ -123,8 +126,9 @@ public object Aggregators { by withOneOption { skipNaN: Boolean -> twoStepSelectingForAny, Comparable?>( getReturnType = minTypeConversion, - stepOneSelector = { type -> minOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMin(type, skipNaN) }, + stepOneSelector = { type -> minOrNull(type, skipNaN) }, + statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), ) } @@ -134,10 +138,13 @@ public object Aggregators { public val max: AggregatorOptionSwitch1, Comparable?> by withOneOption { skipNaN: Boolean -> + // the following function is 'getAggregator' of AggregatorOptionSwitch + // this is the fun that works with the parameter! twoStepSelectingForAny, Comparable?>( getReturnType = maxTypeConversion, stepOneSelector = { type -> maxOrNull(type, skipNaN) }, indexOfResult = { type -> indexOfMax(type, skipNaN) }, + statisticsParameters = mapOf(Pair("skipNaN", ParameterValue(skipNaN))), ) } diff --git a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index f758360d1f..27d1c2add9 100644 --- a/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -8,6 +8,43 @@ import org.jetbrains.kotlinx.dataframe.columns.ValueColumn import kotlin.reflect.KType import kotlin.reflect.full.withNullability +@JvmInline +public value class StatisticResult(public val value: Any?) + +public class ParameterValue(public val parameter: Any?) { + + override fun equals(other: Any?): Boolean { + if (parameter is Boolean && other is Boolean) { + return this.parameter == other + } + if (parameter is Int && other is Int) { + return this.parameter == other + } + if (parameter is Double && other is Double) { + return this.parameter == other + } + return super.equals(other) + } + + override fun hashCode(): Int { + if (parameter is Boolean) { + return this.parameter.hashCode() + } + if (parameter is Int) { + return this.parameter.hashCode() + } + if (parameter is Double) { + return this.parameter.hashCode() + } + return super.hashCode() + } +} + +internal interface ValueColumnInternal : ValueColumn { + // val statistics: MutableMap, WrappedStatistic>> + val statistics: MutableMap, StatisticResult>> +} + internal open class ValueColumnImpl( values: List, name: String, @@ -15,7 +52,8 @@ internal open class ValueColumnImpl( val defaultValue: T? = null, distinct: Lazy>? = null, ) : DataColumnImpl(values, name, type, distinct), - ValueColumn { + ValueColumn, + ValueColumnInternal { override fun distinct() = ValueColumnImpl(toSet().toList(), name, type, defaultValue, distinct) @@ -48,10 +86,13 @@ internal open class ValueColumnImpl( override fun defaultValue() = defaultValue override fun forceResolve() = ResolvingValueColumn(this) + + override val statistics = mutableMapOf, StatisticResult>>() } internal class ResolvingValueColumn(override val source: ValueColumn) : ValueColumn by source, + ValueColumnInternal, ForceResolvedColumn { override fun resolve(context: ColumnResolutionContext) = super.resolve(context) @@ -70,4 +111,6 @@ internal class ResolvingValueColumn(override val source: ValueColumn) : override fun equals(other: Any?) = source.checkEquals(other) override fun hashCode(): Int = source.hashCode() + + override val statistics = mutableMapOf, StatisticResult>>() } diff --git a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt index bdb532329c..c6383d3a5c 100644 --- a/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt +++ b/core/generated-sources/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt @@ -27,6 +27,7 @@ class MaxTests { fun `max with regular values`() { val col = columnOf(5, 2, 8, 1, 9) col.max() shouldBe 9 + col.max() shouldBe 9 } @Test diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt index 2527c352a5..d1ff0cd1e2 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/api/max.kt @@ -19,7 +19,6 @@ import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateByOrNull import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateFor import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOf import org.jetbrains.kotlinx.dataframe.impl.aggregation.modes.aggregateOfRow -import org.jetbrains.kotlinx.dataframe.impl.columns.ValueColumnInternal import org.jetbrains.kotlinx.dataframe.impl.columns.toComparableColumns import org.jetbrains.kotlinx.dataframe.impl.suggestIfNull import org.jetbrains.kotlinx.dataframe.util.DEPRECATED_ACCESS_API @@ -36,7 +35,6 @@ public fun > DataColumn.max(skipNaN: Boolean = skipNaNDefa public fun > DataColumn.maxOrNull(skipNaN: Boolean = skipNaNDefault): T? = Aggregators.max(skipNaN).aggregateSingleColumn(this) - public inline fun ?> DataColumn.maxBy( skipNaN: Boolean = skipNaNDefault, crossinline selector: (T) -> R, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt index 40ab3a5888..a8cf1922ef 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregator.kt @@ -89,7 +89,6 @@ public class Aggregator( ) } - // fictitious, I want the program to compile internal operator fun invoke( aggregationHandler: AggregatorAggregationHandler, inputHandler: AggregatorInputHandler, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt index da574b3a46..a7f65152c3 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/AggregatorAggregationHandler.kt @@ -34,7 +34,6 @@ public interface AggregatorAggregationHandler */ public fun aggregateSingleColumn(column: DataColumn): Return { if (column is ValueColumnInternal<*>) { - println("ValueColumnInternal") // cache check, cache is dynamically created val aggregator = this.aggregator ?: throw IllegalStateException("Aggregator is required") val desiredStatisticNotConsideringParameters = column.statistics.getOrPut(aggregator.name) { @@ -44,15 +43,13 @@ public interface AggregatorAggregationHandler val desiredStatistic = desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] // if desiredStatistic is null, statistic was never calculated if (desiredStatistic != null) { - println("cache hit") return desiredStatistic.value as Return } - println("cache miss") val statistic = aggregateSequence( values = column.asSequence(), valueType = column.type().toValueType(), ) - desiredStatisticNotConsideringParameters.put(aggregator.statisticsParameters, StatisticResult(statistic)) + desiredStatisticNotConsideringParameters[aggregator.statisticsParameters] = StatisticResult(statistic) return aggregateSingleColumn(column) } return aggregateSequence( diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt index d3faa07f24..25660dd3d1 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/aggregation/aggregators/Aggregators.kt @@ -132,8 +132,6 @@ public object Aggregators { public val max: AggregatorOptionSwitch1, Comparable?> by withOneOption { skipNaN: Boolean -> - // the following function is 'getAggregator' of AggregatorOptionSwitch - // this is the fun that works with the parameter! twoStepSelectingForAny, Comparable?>( getReturnType = maxTypeConversion, stepOneSelector = { type -> maxOrNull(type, skipNaN) }, diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index 27d1c2add9..5a1c096a97 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -9,31 +9,23 @@ import kotlin.reflect.KType import kotlin.reflect.full.withNullability @JvmInline -public value class StatisticResult(public val value: Any?) +internal value class StatisticResult(val value: Any?) public class ParameterValue(public val parameter: Any?) { override fun equals(other: Any?): Boolean { - if (parameter is Boolean && other is Boolean) { - return this.parameter == other - } - if (parameter is Int && other is Int) { - return this.parameter == other - } - if (parameter is Double && other is Double) { - return this.parameter == other + val otherAsParameterValue = other as ParameterValue? + val that = otherAsParameterValue?.parameter + if (parameter is Boolean && that is Boolean) { + println("my_equals") + return this.parameter == that } + println("default_equals") return super.equals(other) } override fun hashCode(): Int { - if (parameter is Boolean) { - return this.parameter.hashCode() - } - if (parameter is Int) { - return this.parameter.hashCode() - } - if (parameter is Double) { + if (parameter is Boolean?) { return this.parameter.hashCode() } return super.hashCode() @@ -41,7 +33,6 @@ public class ParameterValue(public val parameter: Any?) { } internal interface ValueColumnInternal : ValueColumn { - // val statistics: MutableMap, WrappedStatistic>> val statistics: MutableMap, StatisticResult>> } diff --git a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt index c6383d3a5c..bdb532329c 100644 --- a/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt +++ b/core/src/test/kotlin/org/jetbrains/kotlinx/dataframe/statistics/max.kt @@ -27,7 +27,6 @@ class MaxTests { fun `max with regular values`() { val col = columnOf(5, 2, 8, 1, 9) col.max() shouldBe 9 - col.max() shouldBe 9 } @Test diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt index 3b4d408145..2a6444488e 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/csv.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams import java.io.File import java.io.InputStream +import java.nio.file.Path import kotlin.reflect.typeOf public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITER) : SupportedDataFrameFormat { @@ -17,6 +18,9 @@ public class CsvDeephaven(private val delimiter: Char = DelimParams.CSV_DELIMITE override fun readDataFrame(file: File, header: List): DataFrame<*> = DataFrame.readCsv(file = file, header = header, delimiter = delimiter) + override fun readDataFrame(path: Path, header: List): DataFrame<*> = + DataFrame.readCsv(path = path, header = header, delimiter = delimiter) + override fun acceptsExtension(ext: String): Boolean = ext == "csv" override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough diff --git a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt index 9bc0cacd90..ecb123b93c 100644 --- a/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt +++ b/dataframe-csv/generated-sources/src/main/kotlin/org/jetbrains/kotlinx/dataframe/io/tsv.kt @@ -8,6 +8,7 @@ import org.jetbrains.kotlinx.dataframe.codeGen.DefaultReadDfMethod import org.jetbrains.kotlinx.dataframe.documentationCsv.DelimParams import java.io.File import java.io.InputStream +import java.nio.file.Path import kotlin.reflect.typeOf public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITER) : SupportedDataFrameFormat { @@ -17,6 +18,9 @@ public class TsvDeephaven(private val delimiter: Char = DelimParams.TSV_DELIMITE override fun readDataFrame(file: File, header: List): DataFrame<*> = DataFrame.readTsv(file = file, header = header, delimiter = delimiter) + override fun readDataFrame(path: Path, header: List): DataFrame<*> = + DataFrame.readTsv(path = path, header = header, delimiter = delimiter) + override fun acceptsExtension(ext: String): Boolean = ext == "tsv" override fun acceptsSample(sample: SupportedFormatSample): Boolean = true // Extension is enough From 0f93be9532f339d901b43026d532a4232a29d4af Mon Sep 17 00:00:00 2001 From: Carlo Maria Proietti Date: Thu, 18 Dec 2025 13:48:42 +0100 Subject: [PATCH 10/10] remove useless file --- .../jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt | 2 -- git | 0 2 files changed, 2 deletions(-) delete mode 100644 git diff --git a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt index 5a1c096a97..071801a5d8 100644 --- a/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt +++ b/core/src/main/kotlin/org/jetbrains/kotlinx/dataframe/impl/columns/ValueColumnImpl.kt @@ -17,10 +17,8 @@ public class ParameterValue(public val parameter: Any?) { val otherAsParameterValue = other as ParameterValue? val that = otherAsParameterValue?.parameter if (parameter is Boolean && that is Boolean) { - println("my_equals") return this.parameter == that } - println("default_equals") return super.equals(other) } diff --git a/git b/git deleted file mode 100644 index e69de29bb2..0000000000