deephaven · stanbrub · Mar 13, 2026 · Mar 13, 2026 · Mar 19, 2026 · Mar 20, 2026
diff --git a/.github/resources/adhoc-benchmark-docker-compose.yml b/.github/resources/adhoc-benchmark-docker-compose.yml
@@ -1,13 +1,15 @@
 services:
   deephaven:
-    image: ${DOCKER_IMG}
+    image: ghcr.io/stanbrub/server:jvm25
     ports:
       - "${DEEPHAVEN_PORT:-10000}:10000"
     volumes:
       - ./data:/data
       - ./minio:/minio
     environment:
+      - "JAVA_OPTS=-XX:+UseG1GC"
       - "START_OPTS=-DAuthHandlers=io.deephaven.auth.AnonymousAuthenticationHandler ${CONFIG_OPTS}"
+      - "DEEPHAVEN_HOST_OS_DIR=${ENV_DEEPHAVEN_HOST_OS_DIR}"
 
   redpanda:
     command:

diff --git a/.github/resources/adhoc-scale-benchmark.properties b/.github/resources/adhoc-scale-benchmark.properties
@@ -15,7 +15,7 @@ schema.registry.addr=redpanda:8081
 kafka.consumer.addr=redpanda:29092
 
 # Default timeout to complete processes (Executing queries, generating records)
-default.completion.timeout=10 minutes
+default.completion.timeout=20 minutes
 
 # Default data distribution for column data (random, ascending, descending, runlength)
 default.data.distribution=${baseDistrib}

diff --git a/.github/scripts/manage-deephaven-remote.sh b/.github/scripts/manage-deephaven-remote.sh
@@ -35,6 +35,7 @@ if [[ ${CONFIG_OPTS} == "<default>" ]]; then
   CONFIG_OPTS="-Xmx24g"
 fi
 echo "CONFIG_OPTS=${CONFIG_OPTS}" > .env
+echo "ENV_DEEPHAVEN_HOST_OS_DIR=${DEEPHAVEN_DIR}" >> .env
 
 IS_BRANCH="false"
 if [[ ${DOCKER_IMG} == *"@sha"*":"* ]]; then

diff --git a/.github/scripts/run-benchmarks-remote.sh b/.github/scripts/run-benchmarks-remote.sh
@@ -43,7 +43,7 @@ title "-- Running Benchmarks --"
 set +f
 cd ${RUN_DIR}
 cat ${RUN_TYPE}-scale-benchmark.properties | sed 's|${baseRowCount}|'"${ROW_COUNT}|g" | sed 's|${baseDistrib}|'"${DISTRIB}|g" | sed 's|${userHome}|'"${HOME}|g" > scale-benchmark.properties
-JAVA_OPTS=$(echo -Dbenchmark.profile=scale-benchmark.properties -jar deephaven-benchmark-*-standalone.jar -cp standard-tests.jar)
+JAVA_OPTS=$(echo -Xmx4g -Dbenchmark.profile=scale-benchmark.properties -jar deephaven-benchmark-*-standalone.jar -cp standard-tests.jar)
 set -f
 
 if [ "${TAG_NAME}" = "Any" ]; then

diff --git a/.github/scripts/setup-test-server-remote.sh b/.github/scripts/setup-test-server-remote.sh
@@ -135,7 +135,8 @@ sudo docker system prune --volumes --force
 sudo rm -rf ${DEEPHAVEN_DIR}
 
 title "-- Staging Docker Resources --"
-mkdir -p ${DEEPHAVEN_DIR}
+mkdir -p ${DEEPHAVEN_DIR}/data
+chmod 777 ${DEEPHAVEN_DIR}/data
 cd ${DEEPHAVEN_DIR}
 cp ${GIT_DIR}/benchmark/.github/resources/${RUN_TYPE}-benchmark-docker-compose.yml docker-compose.yml
 

diff --git a/pom.xml b/pom.xml
@@ -178,7 +178,7 @@
 							<file>${project.basedir}/eclipse-java-google-style.xml</file>
 						</eclipse>
 						<licenseHeader>
-							<content>/* Copyright (c) 2022-$YEAR Deephaven Data Labs and Patent Pending */</content>
+							<content>/* Copyright (c) $YEAR Deephaven Data Labs and Patent Pending */</content>
 						</licenseHeader>
 					</java>
 				</configuration>
@@ -271,6 +271,11 @@
 			<artifactId>kafka-protobuf-serializer</artifactId>
 			<version>8.1.1</version>
 		</dependency>
+        <dependency>
+            <groupId>blue.strategic.parquet</groupId>
+            <artifactId>parquet-floor</artifactId>
+            <version>1.64</version>
+        </dependency>
 		<dependency>
 			<groupId>io.deephaven</groupId>
 			<artifactId>deephaven-java-client-barrage-dagger</artifactId>

diff --git a/src/it/java/io/deephaven/benchmark/tests/compare/CompareTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/compare/CompareTestRunner.java
@@ -28,6 +28,9 @@
  * practical purposes, though it is not ideal.
  */
 public class CompareTestRunner {
+    static {
+        System.setProperty("root.test.package", "io.deephaven.benchmark.tests");
+    }
     final Object testInst;
     final Set<String> requiredPackages = new LinkedHashSet<>();
     final Map<String, String> downloadFiles = new LinkedHashMap<>();

diff --git a/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java b/src/it/java/io/deephaven/benchmark/tests/standard/StandardTestRunner.java
@@ -1,4 +1,4 @@
-/* Copyright (c) 2022-2024 Deephaven Data Labs and Patent Pending */
+/* Copyright (c) 2022-2026 Deephaven Data Labs and Patent Pending */
 package io.deephaven.benchmark.tests.standard;
 
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -10,6 +10,7 @@
 import io.deephaven.benchmark.controller.Controller;
 import io.deephaven.benchmark.controller.DeephavenDockerController;
 import io.deephaven.benchmark.metric.Metrics;
+import io.deephaven.benchmark.util.Log;
 import io.deephaven.benchmark.util.Timer;
 
 /**
@@ -21,17 +22,25 @@
  * conventions are followed (ex. main file is "source")
  */
 final public class StandardTestRunner {
+    static {
+        System.setProperty("root.test.package", "io.deephaven.benchmark.tests");
+    }
     final Object testInst;
     final List<String> supportTables = new ArrayList<>();
     final List<String> setupQueries = new ArrayList<>();
     final List<String> preOpQueries = new ArrayList<>();
+    final List<String> teardownQueries = new ArrayList<>();
     final Set<String> requiredServices = new TreeSet<>(List.of("deephaven"));
     private String mainTable = "source";
     private Bench api;
     private Controller controller;
+    private int rowCountFactor = 1;
     private int staticFactor = 1;
     private int incFactor = 1;
-    private int rowCountFactor = 1;
+    private boolean useCachedSource = true;
+    private boolean useLocalParquet = false;
+    private float incCycleFactor = 1.0f;
+    private long incReleaseRowCount = 1000000;
 
     public StandardTestRunner(Object testInst) {
         this.testInst = testInst;
@@ -96,6 +105,25 @@ public void setServices(String... services) {
         requiredServices.addAll(Arrays.asList(services));
     }
 
+    /**
+     * Set if the generated tables are loaded into memory before running the test queries.
+     * 
+     * @return true if in memory source, otherwise false
+     */
+    public void useCachedSource(boolean useMemorySource) {
+        this.useCachedSource = useMemorySource;
+    }
+
+    /**
+     * Set if the generated tables are created through Deephaven (i.e. real client-server) or through the local file
+     * system (i.e. a local copy). The default of "false" is preferred.
+     * 
+     * @param useLocalParquet false to generate tables through Deephaven, otherwise false
+     */
+    public void useLocalParquet(boolean useLocalParquet) {
+        this.useLocalParquet = useLocalParquet;
+    }
+
     /**
      * Add a query to be run directly after the main table is loaded. It is not measured. This query can transform the
      * main table or supporting table, set up aggregations or updateby operations, etc.
@@ -117,6 +145,16 @@ public void addPreOpQuery(String query) {
         preOpQueries.add(query);
     }
 
+    /**
+     * Add a query to be run after everything else is done. This is useful for teardown of any resources after the test
+     * is run like logging, temporary files, perf table retrieval, etc.
+     * 
+     * @param query the query to run after the measured operation
+     */
+    public void addTeardownQuery(String query) {
+        teardownQueries.add(query);
+    }
+
     /**
      * The {@code scale.row.count} property supplies a default for the number of rows generated for benchmark tests.
      * Given that some operations use less memory than others, scaling up the generated rows per operation is more
@@ -143,6 +181,18 @@ public void setScaleFactors(int staticFactor, int incFactor) {
         this.incFactor = incFactor;
     }
 
+    /**
+     * Set the incremental release filter to use for the incremental test. By default, this is an auto-tuning release
+     * filter with a target cycle factor of 1.0f.
+     * 
+     * @param cycleFactor if isAutoTune is true, the target cycle factor, otherwise ignored
+     * @param releaseRowsCount if isAutoTune is true, ignored, otherwise the number of rows to release per cycle
+     */
+    public void setIncReleaseFilter(float cycleFactor, long releaseRowCount) {
+        this.incCycleFactor = cycleFactor;
+        this.incReleaseRowCount = releaseRowCount;
+    }
+
     /**
      * Run a single operation test through the Bench API with no upper bound expected on the resulting row count
      * 
@@ -193,40 +243,42 @@ public void test(String name, long maxExpectedRowCount, String operation, String
         }
     }
 
-    long getWarmupRowCount() {
-        return (long) (api.propertyAsIntegral("warmup.row.count", "0") * rowCountFactor);
+    public long getGeneratedRowCount() {
+        return (long) (api.propertyAsIntegral("scale.row.count", "100000") * rowCountFactor);
     }
 
-    long getGeneratedRowCount() {
-        return (long) (api.propertyAsIntegral("scale.row.count", "100000") * rowCountFactor);
+    long getWarmupRowCount() {
+        return (long) (api.propertyAsIntegral("warmup.row.count", "0") * rowCountFactor);
     }
 
     long getMaxExpectedRowCount(long expectedRowCount, long scaleFactor) {
         return (expectedRowCount < 1) ? Long.MAX_VALUE : expectedRowCount;
     }
 
     String getReadOperation(int scaleFactor, long rowCount, String... loadColumns) {
-        var headRows = (rowCount >= getGeneratedRowCount())?"":".head(${rows})";
+        var headRows = (rowCount >= getGeneratedRowCount()) ? "" : ".head(${rows})";
+        var selectStr = useCachedSource ? "select" : "view";
         if (scaleFactor > 1 && mainTable.equals("timed") && Arrays.asList(loadColumns).contains("timestamp")) {
             var read = """
             merge([
-                read('/data/timed.parquet').view(formulas=[${loadColumns}])${headRows}
+                bench_api_read('/data/timed.parquet').view(formulas=[${loadColumns}])${headRows}
             ] * ${scaleFactor}).update_view([
                 'timestamp=timestamp.plusMillis((long)(ii / ${rows}) * ${rows})'
-            ]).select()
+            ]).${selectStr}()
             """;
-            read = read.replace("${headRows}",headRows);
+            read = read.replace("${headRows}", headRows).replace("${selectStr}", selectStr);
             return read.replace("${scaleFactor}", "" + scaleFactor).replace("${rows}", "" + rowCount);
         }
 
-        var read = "read('/data/${mainTable}.parquet')${headRows}.select(formulas=[${loadColumns}])";
+        var read = "bench_api_read('/data/${mainTable}.parquet')${headRows}.${selectStr}(formulas=[${loadColumns}])";
         read = (loadColumns.length == 0) ? ("empty_table(${rows})") : read;
 
         if (scaleFactor > 1) {
             read = "merge([${readTable}] * ${scaleFactor})".replace("${readTable}", read);
             read = read.replace("${scaleFactor}", "" + scaleFactor);
         }
-        return read.replace("${headRows}",headRows).replace("${rows}", "" + rowCount);
+        read = read.replace("${headRows}", headRows).replace("${rows}", "" + rowCount);
+        return read.replace("${selectStr}", selectStr);
     }
 
     String getStaticQuery(String name, String operation, long rowCount, String... loadColumns) {
@@ -241,9 +293,11 @@ String getStaticQuery(String name, String operation, long rowCount, String... lo
         bench_api_metrics_start()
         print('${logOperationBegin}')
 
+        begin_clock = time.time_ns()
         begin_time = time.perf_counter_ns()
         result = ${operation}
         end_time = time.perf_counter_ns()
+        end_clock = time.time_ns()
 
         print('${logOperationEnd}')
         bench_api_metrics_end()
@@ -253,7 +307,10 @@ String getStaticQuery(String name, String operation, long rowCount, String... lo
             double_col("elapsed_nanos", [end_time - begin_time]),
             long_col("processed_row_count", [loaded_tbl_size]),
             long_col("result_row_count", [result.size]),
+            long_col("begin_clock_nanos", [begin_clock]),
+            long_col("end_clock_nanos", [end_clock]),
         ])
+        ${teardownQueries}
         """;
         var read = getReadOperation(staticFactor, rowCount, loadColumns);
         return populateQuery(name, staticQuery, operation, read, loadColumns);
@@ -268,16 +325,21 @@ String getIncQuery(String name, String operation, long rowCount, String... loadC
         loaded_tbl_size = ${mainTable}.size
         ${setupQueries}
 
-        autotune = jpy.get_type('io.deephaven.engine.table.impl.select.AutoTuningIncrementalReleaseFilter')
-        source_filter = autotune(0, 1000000, 1.0, True)
+        isat = System.getProperty('train.autotune', 'true').lower() == 'true' or ${incReleaseRowCount} <= 0
+        filter_name = 'AutoTuningIncrementalReleaseFilter' if isat else 'IncrementalReleaseFilter'
+        autotune = jpy.get_type(f'io.deephaven.engine.table.impl.select.{filter_name}')
+        print("******* ISAT:",isat, "FILTER:", filter_name)
+
+        source_filter = autotune(0,1000000,${incCycleFactor},True) if isat else autotune(0,${incReleaseRowCount})
         ${mainTable} = ${mainTable}.where(source_filter)
         if right: 
-            right_filter = autotune(0, 1010000, 1.0, True)
+            right_filter = autotune(0,1010000,${incCycleFactor},True) if isat else autotune(0,${incReleaseRowCount})
             right = right.where(right_filter)
 
         ${preOpQueries}
         bench_api_metrics_start()
         print('${logOperationBegin}')
+        begin_clock = time.time_ns()
         begin_time = time.perf_counter_ns()
         result = ${operation}
 
@@ -291,30 +353,39 @@ String getIncQuery(String name, String operation, long rowCount, String... loadC
         source_filter.waitForCompletion()
 
         end_time = time.perf_counter_ns()
+        end_clock = time.time_ns()
         print('${logOperationEnd}')
         bench_api_metrics_end()
         standard_metrics = bench_api_metrics_collect()
 
         stats = new_table([
             double_col("elapsed_nanos", [end_time - begin_time]),
             long_col("processed_row_count", [loaded_tbl_size]),
-            long_col("result_row_count", [result.size])
+            long_col("result_row_count", [result.size]),
+            long_col("begin_clock_nanos", [begin_clock]),
+            long_col("end_clock_nanos", [end_clock]),
         ])
+        ${teardownQueries}
+        print("STANDARD EVENTS: ", f'start_ns > {begin_clock}L', f'start_ns < {end_clock}L')
+        standard_events = standard_events.where([f'start_ns > {begin_clock}L', f'start_ns < {end_clock}L'])
         """;
         var read = getReadOperation(incFactor, rowCount, loadColumns);
         return populateQuery(name, incQuery, operation, read, loadColumns);
     }
 
     String populateQuery(String name, String query, String operation, String read, String... loadColumns) {
         query = query.replace("${readTable}", read);
-        query = query.replace("${mainTable}", mainTable);
         query = query.replace("${loadSupportTables}", loadSupportTables());
         query = query.replace("${loadColumns}", listStr(loadColumns));
         query = query.replace("${setupQueries}", String.join("\n", setupQueries));
         query = query.replace("${preOpQueries}", String.join("\n", preOpQueries));
         query = query.replace("${operation}", operation);
+        query = query.replace("${teardownQueries}", String.join("\n", teardownQueries));
         query = query.replace("${logOperationBegin}", getLogSnippet("Begin", name));
         query = query.replace("${logOperationEnd}", getLogSnippet("End", name));
+        query = query.replace("${incCycleFactor}", "" + incCycleFactor);
+        query = query.replace("${incReleaseRowCount}", "" + incReleaseRowCount);
+        query = query.replace("${mainTable}", mainTable);
         return query;
     }
 
@@ -326,6 +397,7 @@ Result runTest(String name, String warmupQuery, String mainQuery) {
         stopUnusedServices(requiredServices);
 
         try {
+            Log.info("Running Test: %s", name);
             if (getWarmupRowCount() > 0)
                 api.query(warmupQuery).execute();
             var result = new AtomicReference<Result>();
@@ -342,6 +414,8 @@ Result runTest(String name, String warmupQuery, String mainQuery) {
                 metrics.set("inc.factor", incFactor);
                 metrics.set("row.factor", rowCountFactor);
                 api.metrics().add(metrics);
+            }).fetchAfter("standard_events", table -> {
+                api.events().add(table);
             }).execute();
             api.result().test("deephaven-engine", result.get().elapsedTime(), result.get().loadedRowCount());
             return result.get();
@@ -356,7 +430,7 @@ String listStr(String... values) {
     }
 
     String loadSupportTables() {
-        return supportTables.stream().map(t -> t + " = read('/data/" + t + ".parquet').select()\n")
+        return supportTables.stream().map(t -> t + " = bench_api_read('/data/" + t + ".parquet').select()\n")
                 .collect(Collectors.joining(""));
     }
 
@@ -435,7 +509,7 @@ boolean generateNamedTable(String name, String distribution, String[] groups) {
     }
 
     boolean generateSourceTable(String distribution, String[] groups) {
-        return api.table("source")
+        var t = api.table("source")
                 .add("num1", "double", "[0-4]", distribution)
                 .add("num2", "double", "[1-10]", distribution)
                 .add("key1", "string", "[1-100]", distribution)
@@ -444,8 +518,8 @@ boolean generateSourceTable(String distribution, String[] groups) {
                 .add("key4", "int", "[0-98]", distribution)
                 .add("key5", "string", "[1-1000000]", distribution)
                 .withRowCount(getGeneratedRowCount())
-                .withColumnGrouping(groups)
-                .generateParquet();
+                .withColumnGrouping(groups);
+        return useLocalParquet ? t.generateLocalParquet() : t.generateParquet();
     }
 
     boolean generateRightTable(String distribution, String[] groups) {
@@ -469,7 +543,7 @@ boolean generateRightTable(String distribution, String[] groups) {
     boolean generateTimedTable(String distribution, String[] groups) {
         long minTime = 1676557157537L;
         long maxTime = minTime + getGeneratedRowCount() - 1;
-        return api.table("timed")
+        var t = api.table("timed")
                 .add("timestamp", "timestamp-millis", "[" + minTime + "-" + maxTime + "]", "ascending")
                 .add("num1", "double", "[0-4]", distribution)
                 .add("num2", "double", "[1-10]", distribution)
@@ -478,8 +552,8 @@ boolean generateTimedTable(String distribution, String[] groups) {
                 .add("key3", "int", "[0-8]", distribution)
                 .add("key4", "int", "[0-98]", distribution)
                 .withFixedRowCount(true)
-                .withColumnGrouping(groups)
-                .generateParquet();
+                .withColumnGrouping(groups);
+        return useLocalParquet ? t.generateLocalParquet() : t.generateParquet();
     }
 
     record Result(long loadedRowCount, Duration elapsedTime, long resultRowCount) {