Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions parquet-cli/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ Usage: parquet [options] [command] [command options]
### Configuration Options

- `--conf` or `--property`: Set any configuration property in format `key=value`. Can be specified multiple times.
- `--config-file`: Path to a configuration file (`.properties` or `.xml` format).

Examples:
```bash
Expand All @@ -147,4 +148,7 @@ parquet convert input.avro -o output.parquet --conf parquet.avro.write-old-list-
# Multiple options
parquet convert-csv input.csv -o output.parquet --schema schema.avsc --conf parquet.avro.write-parquet-uuid=true --conf parquet.avro.write-old-list-structure=false

# Using config file
parquet convert input.avro -o output.parquet --config-file config.properties

```
50 changes: 50 additions & 0 deletions parquet-cli/src/main/java/org/apache/parquet/cli/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,16 @@
import com.beust.jcommander.Parameters;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSet;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Level;
Expand Down Expand Up @@ -73,6 +77,11 @@ public class Main extends Configured implements Tool {
description = "Set a configuration property (format: key=value). Can be specified multiple times.")
private List<String> confProperties;

@Parameter(
names = {"--config-file"},
description = "Path to a configuration file (properties or Hadoop XML format).")
private String configFilePath;

@VisibleForTesting
@Parameter(names = "--dollar-zero", description = "A way for the runtime path to be passed in", hidden = true)
String programName = DEFAULT_PROGRAM_NAME;
Expand Down Expand Up @@ -172,6 +181,24 @@ public int run(String[] args) throws Exception {
// If the command does not support the configs, it would simply be ignored.
if (command instanceof Configurable) {
Configuration merged = new Configuration(getConf());

if (configFilePath != null) {
try {
if (isXmlConfigFile(configFilePath)) {
loadXmlConfiguration(merged, configFilePath);
} else if (isPropertiesConfigFile(configFilePath)) {
loadPropertiesConfiguration(merged, configFilePath);
} else {
throw new IllegalArgumentException(
"Unsupported config file format. Only .xml and .properties files are supported: "
+ configFilePath);
}
} catch (Exception e) {
throw new IllegalArgumentException(
"Failed to load config file '" + configFilePath + "': " + e.getMessage(), e);
}
}

if (confProperties != null) {
for (String prop : confProperties) {
String[] parts = prop.split("=", 2);
Expand Down Expand Up @@ -218,4 +245,27 @@ public static void main(String[] args) throws Exception {
int rc = ToolRunner.run(new Configuration(), new Main(console), args);
System.exit(rc);
}

private boolean isXmlConfigFile(String filePath) {
return filePath.toLowerCase().endsWith(".xml");
}

private boolean isPropertiesConfigFile(String filePath) {
String lowerPath = filePath.toLowerCase();
return lowerPath.endsWith(".properties");
}

private void loadXmlConfiguration(Configuration config, String filePath) {
config.addResource(new Path(filePath));
console.debug("Loaded XML configuration from file: {}", filePath);
}

private void loadPropertiesConfiguration(Configuration config, String filePath) throws Exception {
try (InputStream in = new FileInputStream(filePath)) {
Properties props = new Properties();
props.load(in);
props.forEach((key, value) -> config.set(key.toString(), value.toString()));
console.debug("Loaded properties configuration from file: {}", filePath);
}
}
}
36 changes: 36 additions & 0 deletions parquet-cli/src/test/java/org/apache/parquet/cli/MainTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
*/
package org.apache.parquet.cli;

import java.io.File;
import java.io.FileWriter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.util.ToolRunner;
import org.junit.Assert;
Expand All @@ -31,4 +33,38 @@ public void mainTest() throws Exception {
ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] {});
Assert.assertTrue("we simply verify there are no errors here", true);
}

@Test
public void testConfigFileLoading() throws Exception {
File configFile = File.createTempFile("test-config", ".properties");
configFile.deleteOnExit();

try (FileWriter writer = new FileWriter(configFile)) {
writer.write("test.key=test.value\n");
}

try {
new Main(LoggerFactory.getLogger(MainTest.class))
.run(new String[] {"--config-file", configFile.getAbsolutePath(), "help"});
Assert.assertTrue("Config file loading should not throw exception", true);
} catch (IllegalArgumentException e) {
Assert.fail("Config file loading failed: " + e.getMessage());
}
}

@Test
public void testLocalPropertiesFile() throws Exception {
String configFile = getClass().getResource("/test-config.properties").getPath();
ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] {
"--config-file", configFile, "version"
});
}

@Test
public void testLocalXmlFile() throws Exception {
String configFile = getClass().getResource("/test-config.xml").getPath();
ToolRunner.run(new Configuration(), new Main(LoggerFactory.getLogger(MainTest.class)), new String[] {
"--config-file", configFile, "version"
});
}
}
21 changes: 21 additions & 0 deletions parquet-cli/src/test/resources/test-config.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

test.key=test.value
parquet.avro.write-old-list-structure=false
parquet.compression=SNAPPY
parquet.block.size=134217728
36 changes: 36 additions & 0 deletions parquet-cli/src/test/resources/test-config.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
<configuration>
<property>
<name>test.key</name>
<value>test.value</value>
</property>

<property>
<name>parquet.avro.write-old-list-structure</name>
<value>false</value>
</property>

<property>
<name>parquet.compression</name>
<value>SNAPPY</value>
</property>

</configuration>