From a6b4194230652c1a547564056faf831131e5a08d Mon Sep 17 00:00:00 2001 From: arnavb Date: Tue, 2 Sep 2025 12:19:02 +0000 Subject: [PATCH 1/3] update --- parquet-cli/README.md | 4 ++++ .../java/org/apache/parquet/cli/Main.java | 20 ++++++++++++++++++ .../java/org/apache/parquet/cli/MainTest.java | 21 +++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/parquet-cli/README.md b/parquet-cli/README.md index c7b3540a4b..9feb665b91 100644 --- a/parquet-cli/README.md +++ b/parquet-cli/README.md @@ -137,6 +137,7 @@ Usage: parquet [options] [command] [command options] ### Configuration Options - `--conf` or `--property`: Set any configuration property in format `key=value`. Can be specified multiple times. +- `--config-file`: Path to a properties configuration file containing key=value pairs. Examples: ```bash @@ -147,4 +148,7 @@ parquet convert input.avro -o output.parquet --conf parquet.avro.write-old-list- # Multiple options parquet convert-csv input.csv -o output.parquet --schema schema.avsc --conf parquet.avro.write-parquet-uuid=true --conf parquet.avro.write-old-list-structure=false +# Using config file +parquet convert input.avro -o output.parquet --config-file config.properties + ``` diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java index e93a21e899..593b235c9d 100644 --- a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java +++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java @@ -25,6 +25,9 @@ import com.beust.jcommander.Parameters; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.Properties; import java.util.List; import java.util.Set; import org.apache.commons.logging.LogFactory; @@ -73,6 +76,11 @@ public class Main extends Configured implements Tool { description = "Set a configuration property (format: key=value). Can be specified multiple times.") private List confProperties; + @Parameter( + names = {"--config-file"}, + description = "Path to a properties configuration file containing key=value pairs.") + private String configFilePath; + @VisibleForTesting @Parameter(names = "--dollar-zero", description = "A way for the runtime path to be passed in", hidden = true) String programName = DEFAULT_PROGRAM_NAME; @@ -172,6 +180,18 @@ public int run(String[] args) throws Exception { // If the command does not support the configs, it would simply be ignored. if (command instanceof Configurable) { Configuration merged = new Configuration(getConf()); + + if (configFilePath != null) { + try (InputStream in = new FileInputStream(configFilePath)) { + Properties props = new Properties(); + props.load(in); + props.forEach((key, value) -> merged.set(key.toString(), value.toString())); + console.debug("Loaded configuration from file: {}", configFilePath); + } catch (Exception e) { + throw new IllegalArgumentException("Failed to load config file '" + configFilePath + "': " + e.getMessage(), e); + } + } + if (confProperties != null) { for (String prop : confProperties) { String[] parts = prop.split("=", 2); diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java index ec4f8cc1f2..5dcf4f3d44 100644 --- a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java +++ b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java @@ -18,6 +18,10 @@ */ package org.apache.parquet.cli; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ToolRunner; import org.junit.Assert; @@ -31,4 +35,21 @@ public void mainTest() throws Exception { ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] {}); Assert.assertTrue("we simply verify there are no errors here", true); } + + @Test + public void testConfigFileLoading() throws Exception { + File configFile = File.createTempFile("test-config", ".properties"); + configFile.deleteOnExit(); + + try (FileWriter writer = new FileWriter(configFile)) { + writer.write("test.key=test.value\n"); + } + + try { + new Main(LoggerFactory.getLogger(MainTest.class)).run(new String[]{"--config-file", configFile.getAbsolutePath(), "help"}); + Assert.assertTrue("Config file loading should not throw exception", true); + } catch (IllegalArgumentException e) { + Assert.fail("Config file loading failed: " + e.getMessage()); + } + } } From af71b7cc5b8a01ce8c0e6a01f4d71585fcde7c50 Mon Sep 17 00:00:00 2001 From: arnavb Date: Wed, 3 Sep 2025 03:21:09 +0000 Subject: [PATCH 2/3] lint --- parquet-cli/src/main/java/org/apache/parquet/cli/Main.java | 5 +++-- .../src/test/java/org/apache/parquet/cli/MainTest.java | 5 ++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java index 593b235c9d..6e8c97098e 100644 --- a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java +++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java @@ -27,8 +27,8 @@ import com.google.common.collect.ImmutableSet; import java.io.FileInputStream; import java.io.InputStream; -import java.util.Properties; import java.util.List; +import java.util.Properties; import java.util.Set; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configurable; @@ -188,7 +188,8 @@ public int run(String[] args) throws Exception { props.forEach((key, value) -> merged.set(key.toString(), value.toString())); console.debug("Loaded configuration from file: {}", configFilePath); } catch (Exception e) { - throw new IllegalArgumentException("Failed to load config file '" + configFilePath + "': " + e.getMessage(), e); + throw new IllegalArgumentException( + "Failed to load config file '" + configFilePath + "': " + e.getMessage(), e); } } diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java index 5dcf4f3d44..a9ac32e3a0 100644 --- a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java +++ b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java @@ -20,8 +20,6 @@ import java.io.File; import java.io.FileWriter; -import java.io.IOException; -import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.ToolRunner; import org.junit.Assert; @@ -46,7 +44,8 @@ public void testConfigFileLoading() throws Exception { } try { - new Main(LoggerFactory.getLogger(MainTest.class)).run(new String[]{"--config-file", configFile.getAbsolutePath(), "help"}); + new Main(LoggerFactory.getLogger(MainTest.class)) + .run(new String[] {"--config-file", configFile.getAbsolutePath(), "help"}); Assert.assertTrue("Config file loading should not throw exception", true); } catch (IllegalArgumentException e) { Assert.fail("Config file loading failed: " + e.getMessage()); From af7c33da2329b81aa043e23ab4a105de9ee7ce34 Mon Sep 17 00:00:00 2001 From: arnavb Date: Mon, 8 Sep 2025 02:46:31 +0000 Subject: [PATCH 3/3] update --- parquet-cli/README.md | 2 +- .../java/org/apache/parquet/cli/Main.java | 41 ++++++++++++++++--- .../java/org/apache/parquet/cli/MainTest.java | 16 ++++++++ .../src/test/resources/test-config.properties | 21 ++++++++++ .../src/test/resources/test-config.xml | 36 ++++++++++++++++ 5 files changed, 109 insertions(+), 7 deletions(-) create mode 100644 parquet-cli/src/test/resources/test-config.properties create mode 100644 parquet-cli/src/test/resources/test-config.xml diff --git a/parquet-cli/README.md b/parquet-cli/README.md index 9feb665b91..4e9aea10e1 100644 --- a/parquet-cli/README.md +++ b/parquet-cli/README.md @@ -137,7 +137,7 @@ Usage: parquet [options] [command] [command options] ### Configuration Options - `--conf` or `--property`: Set any configuration property in format `key=value`. Can be specified multiple times. -- `--config-file`: Path to a properties configuration file containing key=value pairs. +- `--config-file`: Path to a configuration file (`.properties` or `.xml` format). Examples: ```bash diff --git a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java index 6e8c97098e..0f4932f3d9 100644 --- a/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java +++ b/parquet-cli/src/main/java/org/apache/parquet/cli/Main.java @@ -34,6 +34,7 @@ import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Level; @@ -78,7 +79,7 @@ public class Main extends Configured implements Tool { @Parameter( names = {"--config-file"}, - description = "Path to a properties configuration file containing key=value pairs.") + description = "Path to a configuration file (properties or Hadoop XML format).") private String configFilePath; @VisibleForTesting @@ -182,11 +183,16 @@ public int run(String[] args) throws Exception { Configuration merged = new Configuration(getConf()); if (configFilePath != null) { - try (InputStream in = new FileInputStream(configFilePath)) { - Properties props = new Properties(); - props.load(in); - props.forEach((key, value) -> merged.set(key.toString(), value.toString())); - console.debug("Loaded configuration from file: {}", configFilePath); + try { + if (isXmlConfigFile(configFilePath)) { + loadXmlConfiguration(merged, configFilePath); + } else if (isPropertiesConfigFile(configFilePath)) { + loadPropertiesConfiguration(merged, configFilePath); + } else { + throw new IllegalArgumentException( + "Unsupported config file format. Only .xml and .properties files are supported: " + + configFilePath); + } } catch (Exception e) { throw new IllegalArgumentException( "Failed to load config file '" + configFilePath + "': " + e.getMessage(), e); @@ -239,4 +245,27 @@ public static void main(String[] args) throws Exception { int rc = ToolRunner.run(new Configuration(), new Main(console), args); System.exit(rc); } + + private boolean isXmlConfigFile(String filePath) { + return filePath.toLowerCase().endsWith(".xml"); + } + + private boolean isPropertiesConfigFile(String filePath) { + String lowerPath = filePath.toLowerCase(); + return lowerPath.endsWith(".properties"); + } + + private void loadXmlConfiguration(Configuration config, String filePath) { + config.addResource(new Path(filePath)); + console.debug("Loaded XML configuration from file: {}", filePath); + } + + private void loadPropertiesConfiguration(Configuration config, String filePath) throws Exception { + try (InputStream in = new FileInputStream(filePath)) { + Properties props = new Properties(); + props.load(in); + props.forEach((key, value) -> config.set(key.toString(), value.toString())); + console.debug("Loaded properties configuration from file: {}", filePath); + } + } } diff --git a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java index a9ac32e3a0..6bf54bdf05 100644 --- a/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java +++ b/parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java @@ -51,4 +51,20 @@ public void testConfigFileLoading() throws Exception { Assert.fail("Config file loading failed: " + e.getMessage()); } } + + @Test + public void testLocalPropertiesFile() throws Exception { + String configFile = getClass().getResource("/test-config.properties").getPath(); + ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] { + "--config-file", configFile, "version" + }); + } + + @Test + public void testLocalXmlFile() throws Exception { + String configFile = getClass().getResource("/test-config.xml").getPath(); + ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] { + "--config-file", configFile, "version" + }); + } } diff --git a/parquet-cli/src/test/resources/test-config.properties b/parquet-cli/src/test/resources/test-config.properties new file mode 100644 index 0000000000..40d792fcab --- /dev/null +++ b/parquet-cli/src/test/resources/test-config.properties @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +test.key=test.value +parquet.avro.write-old-list-structure=false +parquet.compression=SNAPPY +parquet.block.size=134217728 diff --git a/parquet-cli/src/test/resources/test-config.xml b/parquet-cli/src/test/resources/test-config.xml new file mode 100644 index 0000000000..b7d17e917e --- /dev/null +++ b/parquet-cli/src/test/resources/test-config.xml @@ -0,0 +1,36 @@ + + + + + test.key + test.value + + + + parquet.avro.write-old-list-structure + false + + + + parquet.compression + SNAPPY + + +