From d338ff3d747f0c25551277be3ac6ba986635e148 Mon Sep 17 00:00:00 2001 From: andreatp Date: Mon, 2 Mar 2026 18:38:41 +0000 Subject: [PATCH] cma and improvements --- README.md | 5 ++- core/pom.xml | 13 ++++--- .../io/roastedroot/pglite4j/core/PGLite.java | 34 +++++++++++++++++-- .../roastedroot/pglite4j/core/PGLiteTest.java | 34 +++++++++++++++++++ .../pglite4j/jdbc/PgLiteDriverTest.java | 16 +++++++++ wasm-build/Makefile | 2 +- wasm-build/build.sh | 3 +- wasm-build/docker/entrypoint.sh | 2 +- wasm-build/patches/pglite-wasm/build.sh.diff | 14 +++++++- 9 files changed, 111 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 2af0c8e..23c535d 100644 --- a/README.md +++ b/README.md @@ -111,12 +111,15 @@ pglite4j/ - [ ] **Only `memory://` is supported** — no persistent / file-backed databases yet - [ ] **Single connection only** — PGlite is single-threaded; connection pool max size must be 1 -- [ ] **CMA buffer size is fixed** — large messages that exceed the CMA buffer (~12 MB total, ~16 KB per single message) are not yet handled via the file transport fallback - [ ] **Limited extensions** — only `plpgsql` and `dict_snowball` are bundled; adding more requires rebuilding the WASM binary - [ ] **Startup time** — first connection has some overhead it can be optimized more - [ ] **Binary size** — the WASM binary + pgdata resources add several MBs to the classpath - [ ] **Error recovery** — `clear_error()` integration for automatic transaction recovery is not yet wired up +### CMA (Contiguous Memory Allocator) + +CMA is a preallocated contiguous region at the start of WASM linear memory used for zero-copy data transfer between Java and the PostgreSQL backend (similar concept to [Linux CMA](https://developer.toradex.com/software/linux-resources/linux-features/contiguous-memory-allocator-cma-linux/)). Messages that fit within the CMA buffer (default 12 MB) are transferred directly via shared memory. For responses that exceed the CMA buffer, the C code automatically falls back to file-based transport (`/pgdata/.s.PGSQL.5432.out`), which the Java side reads transparently. + ## Building from source ```bash diff --git a/core/pom.xml b/core/pom.xml index 5d73321..31264c3 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -72,12 +72,15 @@ io.roastedroot.pglite4j.core.PGLiteModule ../wasm-build/output/pglite.wasi - - 5708 - 11479 - 17648 - 25134 + 2918 + 4335 + 4336 + 4494 + 4551 + 6394 + 6397 + 11038 diff --git a/core/src/main/java/io/roastedroot/pglite4j/core/PGLite.java b/core/src/main/java/io/roastedroot/pglite4j/core/PGLite.java index 9108551..b813d4f 100644 --- a/core/src/main/java/io/roastedroot/pglite4j/core/PGLite.java +++ b/core/src/main/java/io/roastedroot/pglite4j/core/PGLite.java @@ -5,7 +5,6 @@ import com.dylibso.chicory.runtime.Instance; import com.dylibso.chicory.wasi.WasiOptions; import com.dylibso.chicory.wasi.WasiPreview1; -import com.dylibso.chicory.wasm.types.MemoryLimits; import io.roastedroot.zerofs.Configuration; import io.roastedroot.zerofs.ZeroFs; import java.io.BufferedReader; @@ -87,7 +86,6 @@ private PGLite() { .withImportValues(imports) .withMachineFactory(PGLiteModule::create) .withStart(false) - .withMemoryLimits(new MemoryLimits(2571)) .build(); this.exports = new PGLite_ModuleExports(this.instance); @@ -127,6 +125,11 @@ public byte[] execProtocolRaw(byte[] message) { return concat(replies); } + /** Returns the CMA buffer size in bytes (for diagnostics / testing). */ + public int getBufferSize() { + return exports.getBufferSize(0); + } + public static Builder builder() { return new Builder(); } @@ -170,7 +173,34 @@ private byte[] wireRecvCma() { return resp; } + private byte[] wireRecvFile() { + try { + Path outFile = fs.getPath("/pgdata/.s.PGSQL.5432.out"); + if (!Files.exists(outFile)) { + return null; + } + byte[] resp = Files.readAllBytes(outFile); + Files.delete(outFile); + exports.interactiveWrite(0); + pendingWireLen = 0; + return resp; + } catch (IOException e) { + throw new RuntimeException("Failed to read file transport output", e); + } + } + private boolean collectReply(List replies) { + // Check channel: negative means C code fell back to file transport. + // Must check BEFORE wireRecvCma() since interactiveRead() would + // consume the read signal even when data went to file. + if (exports.getChannel() < 0) { + byte[] resp = wireRecvFile(); + if (resp != null) { + replies.add(resp); + return true; + } + return false; + } byte[] resp = wireRecvCma(); if (resp != null) { replies.add(resp); diff --git a/core/src/test/java/io/roastedroot/pglite4j/core/PGLiteTest.java b/core/src/test/java/io/roastedroot/pglite4j/core/PGLiteTest.java index a0937cd..233d942 100644 --- a/core/src/test/java/io/roastedroot/pglite4j/core/PGLiteTest.java +++ b/core/src/test/java/io/roastedroot/pglite4j/core/PGLiteTest.java @@ -68,6 +68,40 @@ public void createTableAndInsert() { } } + @Test + public void cmaBufferOverflow() { + try (PGLite pg = PGLite.builder().build()) { + doHandshake(pg); + + int bufSize = pg.getBufferSize(); + System.out.println( + "CMA buffer size: " + bufSize + " bytes (" + (bufSize / 1024) + " KB)"); + + // Generate a wire protocol response that exceeds the CMA buffer. + // repeat('x', N) returns an N-byte string in the DataRow message. + int repeatLen = bufSize + 1000; + String sql = "SELECT repeat('x', " + repeatLen + ");"; + System.out.println("Query: SELECT repeat('x', " + repeatLen + ")"); + + byte[] result = pg.execProtocolRaw(PgWireCodec.queryMessage(sql)); + System.out.println("Response length: " + result.length + " bytes"); + assertNotNull(result); + // The response must contain the full string + wire protocol overhead + assertTrue( + result.length > repeatLen, + "Expected response > " + repeatLen + " but got " + result.length); + assertTrue( + PgWireCodec.hasReadyForQuery(result), + "Expected ReadyForQuery in overflow response"); + + // Verify a normal query still works after the overflow + byte[] r2 = pg.execProtocolRaw(PgWireCodec.queryMessage("SELECT 42;")); + String data = PgWireCodec.parseDataRows(r2); + System.out.println("Post-overflow query: SELECT 42 => " + data); + assertTrue(data.contains("42"), "Normal query should work after CMA overflow"); + } + } + static void doHandshake(PGLite pg) { byte[] startup = PgWireCodec.startupMessage("postgres", "template1"); byte[] resp1 = pg.execProtocolRaw(startup); diff --git a/jdbc/src/test/java/io/roastedroot/pglite4j/jdbc/PgLiteDriverTest.java b/jdbc/src/test/java/io/roastedroot/pglite4j/jdbc/PgLiteDriverTest.java index c23fa5e..04a8264 100644 --- a/jdbc/src/test/java/io/roastedroot/pglite4j/jdbc/PgLiteDriverTest.java +++ b/jdbc/src/test/java/io/roastedroot/pglite4j/jdbc/PgLiteDriverTest.java @@ -135,6 +135,22 @@ void selectWithWhere() throws SQLException { @Test @Order(8) + void largeResultSet() throws SQLException { + try (Statement stmt = connection.createStatement(); + ResultSet rs = + stmt.executeQuery( + "SELECT n, repeat('x', 100) AS data" + + " FROM generate_series(1, 500) AS n")) { + int count = 0; + while (rs.next()) { + count++; + } + assertEquals(500, count); + } + } + + @Test + @Order(9) void driverAcceptsUrl() throws SQLException { PgLiteDriver driver = new PgLiteDriver(); assertTrue(driver.acceptsURL("jdbc:pglite:memory://")); diff --git a/wasm-build/Makefile b/wasm-build/Makefile index 08b9394..da202ad 100644 --- a/wasm-build/Makefile +++ b/wasm-build/Makefile @@ -6,7 +6,7 @@ WASM_FILE := $(OUTPUT_DIR)/pglite.wasi .PHONY: build clean unpack -WASM_OPT_FLAGS ?= -Oz --strip-debug +WASM_OPT_FLAGS ?= -O3 --strip-debug build: WASM_OPT_FLAGS="$(WASM_OPT_FLAGS)" $(SCRIPT_DIR)build.sh diff --git a/wasm-build/build.sh b/wasm-build/build.sh index 65ce1f1..4edfb54 100755 --- a/wasm-build/build.sh +++ b/wasm-build/build.sh @@ -38,11 +38,12 @@ docker run --rm \ -v "${OUTPUT_DIR}/sdk-dist:/tmp/sdk/dist:rw" \ -v "${OUTPUT_DIR}/pglite:/tmp/pglite:rw" \ -v "${OUTPUT_DIR}/pgdata:/pgdata:rw" \ + -e CMA_MB="${CMA_MB:-12}" \ -e DEBUG="${DEBUG:-true}" \ -e PG_VERSION="${PG_VERSION:-17.5}" \ -e PG_BRANCH="${PG_BRANCH:-REL_17_5_WASM-pglite}" \ -e CI="${CI:-true}" \ - -e WASM_OPT_FLAGS="${WASM_OPT_FLAGS:--Oz --strip-debug}" \ + -e WASM_OPT_FLAGS="${WASM_OPT_FLAGS:--O3 --strip-debug}" \ "${IMAGE_NAME}" echo " diff --git a/wasm-build/docker/entrypoint.sh b/wasm-build/docker/entrypoint.sh index 2c28e99..3de9780 100755 --- a/wasm-build/docker/entrypoint.sh +++ b/wasm-build/docker/entrypoint.sh @@ -13,7 +13,7 @@ export ZIC=${ZIC:-/usr/sbin/zic} export GETZIC=${GETZIC:-false} export WASI=true export CI=${CI:-true} -export CMA_MB=${CMA_MB:-2} +export CMA_MB=${CMA_MB:-12} export PGCRYPTO=${PGCRYPTO:-false} export NATIVE=${NATIVE:-false} export CONTAINER_PATH="" diff --git a/wasm-build/patches/pglite-wasm/build.sh.diff b/wasm-build/patches/pglite-wasm/build.sh.diff index 4a2b0f2..a6d0f59 100644 --- a/wasm-build/patches/pglite-wasm/build.sh.diff +++ b/wasm-build/patches/pglite-wasm/build.sh.diff @@ -18,7 +18,7 @@ else echo "compilation of libpglite ${BUILD} support failed" fi -@@ -114,8 +114,91 @@ +@@ -114,8 +114,103 @@ if [ -f ${PG_DIST}/pglite.wasi ] then @@ -103,6 +103,18 @@ + echo "wasi-vfs pack output:" + ls -lh ${PG_DIST}/pglite-packed.wasi + mv ${PG_DIST}/pglite-packed.wasi ${PG_DIST}/pglite.wasi ++ ++ echo "=== running wasm-opt ${WASM_OPT_FLAGS} ===" ++ ls -lh ${PG_DIST}/pglite.wasi ++ wasm-opt ${WASM_OPT_FLAGS} ${PG_DIST}/pglite.wasi -o ${PG_DIST}/pglite-opt.wasi ++ OPT_RC=$? ++ if [ $OPT_RC -ne 0 ] || [ ! -s ${PG_DIST}/pglite-opt.wasi ]; then ++ echo "FATAL: wasm-opt failed (exit code $OPT_RC)" ++ exit 1 ++ fi ++ mv ${PG_DIST}/pglite-opt.wasi ${PG_DIST}/pglite.wasi ++ echo "wasm-opt output:" ++ ls -lh ${PG_DIST}/pglite.wasi + cp ${PG_DIST}/pglite.wasi ${PGROOT}/bin/ touch ${PGROOT}/bin/initdb ${PGROOT}/bin/postgres