diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/CapturingLogger.java b/parquet-cli/src/test/java/org/apache/parquet/cli/CapturingLogger.java
new file mode 100644
index 0000000000..646f51c707
--- /dev/null
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/CapturingLogger.java
@@ -0,0 +1,202 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.cli;
+
+import org.slf4j.helpers.MarkerIgnoringBase;
+import org.slf4j.helpers.MessageFormatter;
+
+// CapturingLogger is a wrapper around the slf4j logger to capture CLI ourput to use with tests.
+final class CapturingLogger extends MarkerIgnoringBase implements org.slf4j.Logger {
+ private final StringBuilder buf = new StringBuilder();
+
+ @Override
+ public String getName() {
+ return "CliTestLogger";
+ }
+
+ private void append(String msg) {
+ if (msg != null && !msg.isEmpty()) {
+ buf.append(msg).append('\n');
+ }
+ }
+
+ private void log(String fmt, Object... args) {
+ String message = MessageFormatter.arrayFormat(fmt, args).getMessage();
+ append(message);
+ }
+
+ String dump() {
+ return buf.toString();
+ }
+
+ // Since the CLI logic can call any console method, this is some needed delegator code to
+ // ensure all methods are coverted and that the test harness does not miss anything.
+ // Unfortunately slf4j API does not make this easy to do in a generic way, so we
+ // have to manually add each method.
+
+ @Override
+ public boolean isTraceEnabled() {
+ return true;
+ }
+
+ @Override
+ public boolean isDebugEnabled() {
+ return true;
+ }
+
+ @Override
+ public boolean isInfoEnabled() {
+ return true;
+ }
+
+ @Override
+ public boolean isWarnEnabled() {
+ return true;
+ }
+
+ @Override
+ public boolean isErrorEnabled() {
+ return true;
+ }
+
+ @Override
+ public void trace(String msg) {
+ append(msg);
+ }
+
+ @Override
+ public void trace(String format, Object arg) {
+ log(format, arg);
+ }
+
+ @Override
+ public void trace(String format, Object arg1, Object arg2) {
+ log(format, arg1, arg2);
+ }
+
+ @Override
+ public void trace(String format, Object... arguments) {
+ log(format, arguments);
+ }
+
+ @Override
+ public void trace(String msg, Throwable t) {
+ append(msg);
+ }
+
+ @Override
+ public void debug(String msg) {
+ append(msg);
+ }
+
+ @Override
+ public void debug(String format, Object arg) {
+ log(format, arg);
+ }
+
+ @Override
+ public void debug(String format, Object arg1, Object arg2) {
+ log(format, arg1, arg2);
+ }
+
+ @Override
+ public void debug(String format, Object... arguments) {
+ log(format, arguments);
+ }
+
+ @Override
+ public void debug(String msg, Throwable t) {
+ append(msg);
+ }
+
+ @Override
+ public void info(String msg) {
+ append(msg);
+ }
+
+ @Override
+ public void info(String format, Object arg) {
+ log(format, arg);
+ }
+
+ @Override
+ public void info(String format, Object arg1, Object arg2) {
+ log(format, arg1, arg2);
+ }
+
+ @Override
+ public void info(String format, Object... arguments) {
+ log(format, arguments);
+ }
+
+ @Override
+ public void info(String msg, Throwable t) {
+ append(msg);
+ }
+
+ @Override
+ public void warn(String msg) {
+ append(msg);
+ }
+
+ @Override
+ public void warn(String format, Object arg) {
+ log(format, arg);
+ }
+
+ @Override
+ public void warn(String format, Object arg1, Object arg2) {
+ log(format, arg1, arg2);
+ }
+
+ @Override
+ public void warn(String format, Object... arguments) {
+ log(format, arguments);
+ }
+
+ @Override
+ public void warn(String msg, Throwable t) {
+ append(msg);
+ }
+
+ @Override
+ public void error(String msg) {
+ append(msg);
+ }
+
+ @Override
+ public void error(String format, Object arg) {
+ log(format, arg);
+ }
+
+ @Override
+ public void error(String format, Object arg1, Object arg2) {
+ log(format, arg1, arg2);
+ }
+
+ @Override
+ public void error(String format, Object... arguments) {
+ log(format, arguments);
+ }
+
+ @Override
+ public void error(String msg, Throwable t) {
+ append(msg);
+ }
+}
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/CliHarness.java b/parquet-cli/src/test/java/org/apache/parquet/cli/CliHarness.java
new file mode 100644
index 0000000000..435abe7d36
--- /dev/null
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/CliHarness.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.cli;
+
+import org.apache.hadoop.conf.Configuration;
+import org.slf4j.Logger;
+
+public final class CliHarness {
+ public CliResult run(String[] args) throws Exception {
+ CapturingLogger logger = new CapturingLogger();
+ Main main = new Main((Logger) logger);
+ main.setConf(new Configuration());
+ int code = main.run(args);
+
+ CliResult result = new CliResult(code, logger.dump());
+ return result;
+ }
+}
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/CliResult.java b/parquet-cli/src/test/java/org/apache/parquet/cli/CliResult.java
new file mode 100644
index 0000000000..c610341fa1
--- /dev/null
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/CliResult.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.cli;
+
+import static org.junit.Assert.*;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+public final class CliResult {
+ public final int exitCode;
+ public final String text;
+
+ CliResult(int exitCode, String text) {
+ this.exitCode = exitCode;
+ this.text = text;
+ }
+
+ public CliResult ok() {
+ assertEquals("exit", 0, exitCode);
+ return this;
+ }
+
+ public CliResult fails(int code) {
+ assertEquals("exit", code, exitCode);
+ return this;
+ }
+
+ public CliResult outputContains(String... parts) {
+ for (String p : parts) assertTrue("missing: " + p, text.contains(p));
+ return this;
+ }
+
+ public CliResult outputNotContains(String... parts) {
+ for (String p : parts) assertFalse("should not contain: " + p, text.contains(p));
+ return this;
+ }
+
+ public CliResult lineCount(int expected) {
+ long cnt = 0;
+ for (String line : text.split("\n")) {
+ if (!line.trim().isEmpty()) {
+ cnt++;
+ }
+ }
+ assertEquals(expected, cnt);
+ return this;
+ }
+
+ public CliResult matchOutputFromFile(String filePath) throws Exception {
+ String expected = new String(Files.readAllBytes(Paths.get(filePath)), StandardCharsets.UTF_8);
+ return outputContains(expected);
+ }
+}
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/CliTestBase.java b/parquet-cli/src/test/java/org/apache/parquet/cli/CliTestBase.java
new file mode 100644
index 0000000000..98c81e9d43
--- /dev/null
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/CliTestBase.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.cli;
+
+import org.apache.parquet.cli.commands.ParquetFileTest;
+
+/**
+ * Base class for CLI integration tests with an API for testing command output.
+ *
+ * Developer Usage Examples:
+ *
+ * // Basic command execution and assertion
+ * cli("schema file.parquet")
+ * .ok()
+ * .outputContains("int32_field", "int64_field");
+ *
+ * // Test help output
+ * cli("help size-stats")
+ * .ok()
+ * .matchOutputFromFile("expected-help.txt");
+ *
+ * // Test error conditions
+ * cli("invalid-command")
+ * .fails(1)
+ * .outputContains("Unknown command");
+ *
+ * // Test command with multiple arguments
+ * cli("size-stats parquetFile.getAbsolutePath()")
+ * .ok()
+ * .lineCount(8);
+ *
+ */
+public abstract class CliTestBase extends ParquetFileTest {
+ private final CliHarness harness = new CliHarness();
+
+ protected CliResult cli(Object... args) throws Exception {
+ String[] a = new String[args.length];
+ for (int i = 0; i < args.length; i++) {
+ a[i] = String.valueOf(args[i]);
+ }
+ return harness.run(a);
+ }
+
+ protected CliResult cli(String commandLine) throws Exception {
+ String[] args = commandLine.split("\\s+");
+ return cli((Object[]) args);
+ }
+}
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/SchemaCliTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/SchemaCliTest.java
new file mode 100644
index 0000000000..1d35f80a75
--- /dev/null
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/SchemaCliTest.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.cli;
+
+import java.io.File;
+import org.junit.Test;
+
+public class SchemaCliTest extends CliTestBase {
+
+ @Test
+ public void showsSchemaOutput() throws Exception {
+ File file = parquetFile();
+ cli("schema " + file.getAbsolutePath()).ok().matchOutputFromFile("src/test/resources/cli-outputs/schema.txt");
+ }
+}
diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/ShowSizeStatisticsCliTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/ShowSizeStatisticsCliTest.java
new file mode 100644
index 0000000000..78d28d3e91
--- /dev/null
+++ b/parquet-cli/src/test/java/org/apache/parquet/cli/ShowSizeStatisticsCliTest.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.parquet.cli;
+
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.FLOAT;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
+import static org.apache.parquet.schema.Type.Repetition.OPTIONAL;
+import static org.apache.parquet.schema.Type.Repetition.REPEATED;
+import static org.apache.parquet.schema.Type.Repetition.REQUIRED;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Random;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.parquet.example.data.simple.SimpleGroup;
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.parquet.hadoop.example.ExampleParquetWriter;
+import org.apache.parquet.hadoop.example.GroupWriteSupport;
+import org.apache.parquet.io.api.Binary;
+import org.apache.parquet.schema.MessageType;
+import org.apache.parquet.schema.PrimitiveType;
+import org.junit.Test;
+
+public class ShowSizeStatisticsCliTest extends CliTestBase {
+
+ private final int numRecord = 10000;
+
+ @Test
+ public void showSizeStatistics() throws Exception {
+ File file = createParquetFileWithStats();
+
+ cli("size-stats " + file.getAbsolutePath())
+ .ok()
+ .matchOutputFromFile("src/test/resources/cli-outputs/size-stats.txt");
+ }
+
+ private File createParquetFileWithStats() throws IOException {
+ MessageType schema = new MessageType(
+ "schema",
+ new PrimitiveType(REQUIRED, INT64, "DocId"),
+ new PrimitiveType(REQUIRED, INT32, "CategoryId"),
+ new PrimitiveType(OPTIONAL, BOOLEAN, "IsActive"),
+ new PrimitiveType(REPEATED, FLOAT, "Prices"),
+ new PrimitiveType(REPEATED, BINARY, "Tags"),
+ new PrimitiveType(REQUIRED, BINARY, "ProductName"),
+ new PrimitiveType(OPTIONAL, BINARY, "Description"),
+ new PrimitiveType(REQUIRED, FIXED_LEN_BYTE_ARRAY, 16, "UUID"));
+
+ Configuration conf = new Configuration();
+ conf.set(GroupWriteSupport.PARQUET_EXAMPLE_SCHEMA, schema.toString());
+
+ File file = new File(getTempFolder(), "test.parquet");
+ String filePath = file.getAbsolutePath();
+ ExampleParquetWriter.Builder builder = ExampleParquetWriter.builder(new Path(filePath))
+ .withType(schema)
+ .withSizeStatisticsEnabled(true)
+ .withPageRowCountLimit(50)
+ .withMinRowCountForPageSizeCheck(5)
+ .withDictionaryEncoding(true)
+ .withValidation(false)
+ .withConf(conf);
+
+ Random rnd = new Random(42);
+ try (ParquetWriter writer = builder.build()) {
+ for (int i = 0; i < numRecord; i++) {
+ SimpleGroup g = new SimpleGroup(schema);
+
+ g.add("DocId", rnd.nextLong());
+
+ g.add("CategoryId", rnd.nextInt(100));
+
+ // Operations to generate some non null meaningful test statistics on the parquet file.
+ if (i % 4 != 0) {
+ g.add("IsActive", rnd.nextBoolean());
+ }
+
+ int priceCount = rnd.nextInt(4);
+ for (int p = 0; p < priceCount; p++) {
+ g.add("Prices", rnd.nextFloat() * 1000);
+ }
+
+ String[] possibleTags = {"electronics", "bestseller", "new", "discount", "premium"};
+ int tagCount = rnd.nextInt(5);
+ for (int t = 0; t < tagCount; t++) {
+ g.add("Tags", Binary.fromString(possibleTags[rnd.nextInt(possibleTags.length)]));
+ }
+
+ String[] products = {
+ "Laptop",
+ "Mouse",
+ "Keyboard",
+ "Monitor",
+ "Headphones",
+ "Smartphone",
+ "Tablet",
+ "Camera",
+ "Printer",
+ "Speaker"
+ };
+ g.add("ProductName", Binary.fromString(products[i % products.length] + "_Model_" + (i % 50)));
+
+ if (i % 3 != 0) {
+ StringBuilder desc = new StringBuilder();
+ desc.append("Product description for item ").append(i).append(": ");
+ int descLength = rnd.nextInt(200) + 50;
+ for (int j = 0; j < descLength; j++) {
+ desc.append((char) ('a' + rnd.nextInt(26)));
+ }
+ g.add("Description", Binary.fromString(desc.toString()));
+ }
+
+ byte[] uuid = new byte[16];
+ rnd.nextBytes(uuid);
+ g.add("UUID", Binary.fromConstantByteArray(uuid));
+
+ writer.write(g);
+ }
+ }
+
+ return file;
+ }
+
+ @Test
+ public void showsHelpMessage() throws Exception {
+ cli("help size-stats").ok().matchOutputFromFile("src/test/resources/cli-outputs/size-stats-help.txt");
+ }
+}
diff --git a/parquet-cli/src/test/resources/cli-outputs/schema.txt b/parquet-cli/src/test/resources/cli-outputs/schema.txt
new file mode 100644
index 0000000000..1856e22cf9
--- /dev/null
+++ b/parquet-cli/src/test/resources/cli-outputs/schema.txt
@@ -0,0 +1,33 @@
+{
+ "type" : "record",
+ "name" : "schema",
+ "fields" : [ {
+ "name" : "int32_field",
+ "type" : "int"
+ }, {
+ "name" : "int64_field",
+ "type" : "long"
+ }, {
+ "name" : "float_field",
+ "type" : "float"
+ }, {
+ "name" : "double_field",
+ "type" : "double"
+ }, {
+ "name" : "binary_field",
+ "type" : "bytes"
+ }, {
+ "name" : "flba_field",
+ "type" : {
+ "type" : "fixed",
+ "name" : "flba_field",
+ "size" : 12
+ }
+ }, {
+ "name" : "date_field",
+ "type" : {
+ "type" : "int",
+ "logicalType" : "date"
+ }
+ } ]
+}
diff --git a/parquet-cli/src/test/resources/cli-outputs/size-stats-help.txt b/parquet-cli/src/test/resources/cli-outputs/size-stats-help.txt
new file mode 100644
index 0000000000..39e887614b
--- /dev/null
+++ b/parquet-cli/src/test/resources/cli-outputs/size-stats-help.txt
@@ -0,0 +1,10 @@
+Usage: parquet [general options] size-stats [command options]
+
+ Description:
+
+ Print size statistics for a Parquet file
+
+ Examples:
+
+ # Show size statistics for a Parquet file
+ parquet size-stats sample.parquet
diff --git a/parquet-cli/src/test/resources/cli-outputs/size-stats.txt b/parquet-cli/src/test/resources/cli-outputs/size-stats.txt
new file mode 100644
index 0000000000..06f261276b
--- /dev/null
+++ b/parquet-cli/src/test/resources/cli-outputs/size-stats.txt
@@ -0,0 +1,13 @@
+
+
+Row group 0
+--------------------------------------------------------------------------------
+column unencoded bytes rep level histogram def level histogram
+[DocId] - - -
+[CategoryId] - - -
+[IsActive] - - -
+[Prices] - [10000, 7425] -
+[Tags] 152.405 kB [10000, 11931] -
+[ProductName] 156.250 kB - -
+[Description] 1.170 MB - -
+[UUID] - - -
diff --git a/pom.xml b/pom.xml
index 71d0615727..36979e2294 100644
--- a/pom.xml
+++ b/pom.xml
@@ -507,6 +507,7 @@
thrift-${thrift.version}.tar.gz
**/dependency-reduced-pom.xml
**/*.rej
+ **/cli-outputs/**