diff --git a/CHANGELOG.md b/CHANGELOG.md index d83178ed..4603dd5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- [#184](https://github.com/green-code-initiative/creedengo-java/issues/184) GCI99 - Add new Java rule, avoid CSV format, prefer Apache Parquet + ### Changed - [#119](https://github.com/green-code-initiative/creedengo-java/issues/119) GCI94 - reduce false positives: rule no longer flags `orElse()` when argument is a constant, literal, static field or null; detection extended to Optional variables (semantic type check) and to computed arguments nested inside concatenation, ternary or object instantiation diff --git a/src/it/java/org/greencodeinitiative/creedengo/java/integration/tests/GCIRulesIT.java b/src/it/java/org/greencodeinitiative/creedengo/java/integration/tests/GCIRulesIT.java index 86c5f5fc..f98f40c2 100644 --- a/src/it/java/org/greencodeinitiative/creedengo/java/integration/tests/GCIRulesIT.java +++ b/src/it/java/org/greencodeinitiative/creedengo/java/integration/tests/GCIRulesIT.java @@ -529,4 +529,25 @@ void testGCI94() { checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_1MIN); } + @Test + void testGCI99() { + String filePath = "src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormat.java"; + String ruleId = "creedengo-java:GCI99"; + String ruleMsg = "Avoid CSV format, prefer Parquet format for better performance and smaller footprint."; + int[] startLines = new int[]{20, 21, 22, 23, 24, 25}; + int[] endLines = new int[]{20, 21, 22, 23, 24, 25}; + + checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_20MIN); + } + + @Test + void testGCI99_good() { + String filePath = "src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormatNoIssue.java"; + String ruleId = "creedengo-java:GCI99"; + String ruleMsg = "Avoid CSV format, prefer Parquet format for better performance and smaller footprint."; + int[] startLines = new int[]{}; + int[] endLines = new int[]{}; + + checkIssuesForFile(filePath, ruleId, ruleMsg, startLines, endLines, SEVERITY, TYPE, EFFORT_20MIN); + } } diff --git a/src/it/test-projects/creedengo-java-plugin-test-project/pom.xml b/src/it/test-projects/creedengo-java-plugin-test-project/pom.xml index cfb7ba4f..5572bce7 100644 --- a/src/it/test-projects/creedengo-java-plugin-test-project/pom.xml +++ b/src/it/test-projects/creedengo-java-plugin-test-project/pom.xml @@ -32,6 +32,37 @@ spring-beans 5.3.25 + + + com.opencsv + opencsv + 5.9 + + + org.apache.commons + commons-csv + 1.11.0 + + + com.univocity + univocity-parsers + 2.9.1 + + + com.fasterxml.jackson.dataformat + jackson-dataformat-csv + 2.18.2 + + + net.sf.supercsv + super-csv + 2.4.0 + + + net.sf.flatpack + flatpack + 4.0.15 + \ No newline at end of file diff --git a/src/it/test-projects/creedengo-java-plugin-test-project/src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormat.java b/src/it/test-projects/creedengo-java-plugin-test-project/src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormat.java new file mode 100644 index 00000000..f61802e4 --- /dev/null +++ b/src/it/test-projects/creedengo-java-plugin-test-project/src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormat.java @@ -0,0 +1,28 @@ +/* + * creedengo - Java language - Provides rules to reduce the environmental footprint of your Java programs + * Copyright © 2024 Green Code Initiative (https://green-code-initiative.org/) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.greencodeinitiative.creedengo.java.checks; + +import com.opencsv.CSVWriter; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}} +import org.apache.commons.csv.CSVPrinter; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}} +import com.univocity.parsers.csv.CsvParser; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}} +import com.fasterxml.jackson.dataformat.csv.CsvMapper; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}} +import org.supercsv.io.CsvBeanWriter; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}} +import net.sf.flatpack.DataSet; // Noncompliant {{Avoid CSV format, prefer Parquet format for better performance and smaller footprint.}} + +public class AvoidCSVFormat { +} diff --git a/src/it/test-projects/creedengo-java-plugin-test-project/src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormatNoIssue.java b/src/it/test-projects/creedengo-java-plugin-test-project/src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormatNoIssue.java new file mode 100644 index 00000000..b9e9ff53 --- /dev/null +++ b/src/it/test-projects/creedengo-java-plugin-test-project/src/main/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormatNoIssue.java @@ -0,0 +1,34 @@ +/* + * creedengo - Java language - Provides rules to reduce the environmental footprint of your Java programs + * Copyright © 2024 Green Code Initiative (https://green-code-initiative.org/) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.greencodeinitiative.creedengo.java.checks; + +import java.io.FileOutputStream; +import java.io.IOException; + +/** + * Compliant — uses standard I/O only, no CSV library. + * In a real project this would use Apache Parquet or Apache Avro instead. + */ +public class AvoidCSVFormatNoIssue { + + public void writeData(String path) throws IOException { + try (FileOutputStream fos = new FileOutputStream(path)) { + fos.write("data".getBytes()); + } + } +} diff --git a/src/main/java/org/greencodeinitiative/creedengo/java/JavaCheckRegistrar.java b/src/main/java/org/greencodeinitiative/creedengo/java/JavaCheckRegistrar.java index 791f0cef..3d7dc498 100644 --- a/src/main/java/org/greencodeinitiative/creedengo/java/JavaCheckRegistrar.java +++ b/src/main/java/org/greencodeinitiative/creedengo/java/JavaCheckRegistrar.java @@ -50,7 +50,8 @@ public class JavaCheckRegistrar implements CheckRegistrar { FreeResourcesOfAutoCloseableInterface.class, AvoidMultipleIfElseStatement.class, UseOptionalOrElseGetVsOrElse.class, - MakeNonReassignedVariablesConstants.class + MakeNonReassignedVariablesConstants.class, + AvoidCSVFormat.class ); /** diff --git a/src/main/java/org/greencodeinitiative/creedengo/java/checks/AvoidCSVFormat.java b/src/main/java/org/greencodeinitiative/creedengo/java/checks/AvoidCSVFormat.java new file mode 100644 index 00000000..9c5e15cf --- /dev/null +++ b/src/main/java/org/greencodeinitiative/creedengo/java/checks/AvoidCSVFormat.java @@ -0,0 +1,74 @@ +/* + * creedengo - Java language - Provides rules to reduce the environmental footprint of your Java programs + * Copyright © 2024 Green Code Initiative (https://green-code-initiative.org/) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.greencodeinitiative.creedengo.java.checks; + +import java.util.Collections; +import java.util.List; +import java.util.Set; + +import javax.annotation.Nonnull; +import org.sonar.check.Rule; +import org.sonar.plugins.java.api.IssuableSubscriptionVisitor; +import org.sonar.plugins.java.api.tree.IdentifierTree; +import org.sonar.plugins.java.api.tree.ImportTree; +import org.sonar.plugins.java.api.tree.MemberSelectExpressionTree; +import org.sonar.plugins.java.api.tree.Tree; + +@Rule(key = "GCI99") +public class AvoidCSVFormat extends IssuableSubscriptionVisitor { + + protected static final String MESSAGE_RULE = "Avoid CSV format, prefer Parquet format for better performance and smaller footprint."; + + // Known Java CSV library package prefixes. + private static final Set CSV_PACKAGES = Set.of( + "com.opencsv.", + "org.apache.commons.csv.", + "com.univocity.parsers.csv.", + "com.fasterxml.jackson.dataformat.csv.", + "org.supercsv.", + "net.sf.flatpack." + ); + + @Override + public List nodesToVisit() { + return Collections.singletonList(Tree.Kind.IMPORT); + } + + @Override + public void visitNode(@Nonnull Tree tree) { + ImportTree importTree = (ImportTree) tree; + String importName = buildImportString(importTree.qualifiedIdentifier()); + for (String csvPackage : CSV_PACKAGES) { + if (importName.startsWith(csvPackage)) { + reportIssue(importTree, MESSAGE_RULE); + return; + } + } + } + + private static String buildImportString(Tree tree) { + if (tree instanceof IdentifierTree) { + return ((IdentifierTree) tree).name(); + } + if (tree instanceof MemberSelectExpressionTree) { + MemberSelectExpressionTree mset = (MemberSelectExpressionTree) tree; + return buildImportString(mset.expression()) + "." + mset.identifier().name(); + } + return ""; + } +} diff --git a/src/main/resources/org/green-code-initiative/rules/java/GCI99.html b/src/main/resources/org/green-code-initiative/rules/java/GCI99.html new file mode 100644 index 00000000..cf7d9458 --- /dev/null +++ b/src/main/resources/org/green-code-initiative/rules/java/GCI99.html @@ -0,0 +1,41 @@ +
+

Avoid using CSV format for data exchange. Prefer Apache Parquet instead.

+
+
+

The CSV format has several drawbacks compared to columnar binary formats like Parquet:

+
+
+
    +
  • Size: Parquet compresses data significantly better than plain-text CSV, reducing storage and network transfer costs.

  • +
  • Read performance: Parquet supports column pruning and predicate push-down, so only the required columns and rows are read.

  • +
  • Write performance: Parquet encoding (dictionary, RLE, bit-packing) makes writes faster for large datasets.

  • +
  • Schema: Parquet embeds schema metadata, removing the need for fragile header-row parsing.

  • +
+
+
+

This rule detects imports from the most popular Java CSV libraries (OpenCSV, Apache Commons CSV, Univocity Parsers, Jackson CSV, Super CSV).

+
+
+

Noncompliant Code Example

+
+
+
+
import com.opencsv.CSVWriter;               // Noncompliant
+import org.apache.commons.csv.CSVPrinter;   // Noncompliant
+import com.univocity.parsers.csv.CsvParser; // Noncompliant
+
+
+
+
+
+

Compliant Solution

+
+
+
+
// Use Apache Parquet, Apache Avro, or Apache ORC instead
+import org.apache.parquet.hadoop.ParquetWriter;
+import org.apache.avro.file.DataFileWriter;
+
+
+
+
diff --git a/src/main/resources/org/green-code-initiative/rules/java/GCI99.json b/src/main/resources/org/green-code-initiative/rules/java/GCI99.json new file mode 100644 index 00000000..6523db04 --- /dev/null +++ b/src/main/resources/org/green-code-initiative/rules/java/GCI99.json @@ -0,0 +1,16 @@ +{ + "title": "Avoid CSV format, prefer Parquet.", + "type": "CODE_SMELL", + "status": "ready", + "remediation": { + "func": "Constant\/Issue", + "constantCost": "20min" + }, + "tags": [ + "eco-design", + "performance", + "data", + "creedengo" + ], + "defaultSeverity": "Minor" +} diff --git a/src/main/resources/org/greencodeinitiative/creedengo/java/creedengo_way_profile.json b/src/main/resources/org/greencodeinitiative/creedengo/java/creedengo_way_profile.json index 059bf0f5..4cf572d6 100644 --- a/src/main/resources/org/greencodeinitiative/creedengo/java/creedengo_way_profile.json +++ b/src/main/resources/org/greencodeinitiative/creedengo/java/creedengo_way_profile.json @@ -18,6 +18,7 @@ "GCI78", "GCI79", "GCI82", - "GCI94" + "GCI94", + "GCI99" ] } diff --git a/src/test/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormatTest.java b/src/test/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormatTest.java new file mode 100644 index 00000000..4e017d47 --- /dev/null +++ b/src/test/java/org/greencodeinitiative/creedengo/java/checks/GCI99/AvoidCSVFormatTest.java @@ -0,0 +1,42 @@ +/* + * creedengo - Java language - Provides rules to reduce the environmental footprint of your Java programs + * Copyright © 2024 Green Code Initiative (https://green-code-initiative.org/) + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.greencodeinitiative.creedengo.java.checks.GCI99; + +import org.greencodeinitiative.creedengo.java.checks.AvoidCSVFormat; +import org.junit.jupiter.api.Test; +import org.sonar.java.checks.verifier.CheckVerifier; + +class AvoidCSVFormatTest { + + @Test + void testHasIssues() { + CheckVerifier.newVerifier() + .onFile(System.getProperty("testfiles.path") + "/GCI99/AvoidCSVFormat.java") + .withCheck(new AvoidCSVFormat()) + .verifyIssues(); + } + + @Test + void testNoIssues() { + CheckVerifier.newVerifier() + .onFile(System.getProperty("testfiles.path") + "/GCI99/AvoidCSVFormatNoIssue.java") + .withCheck(new AvoidCSVFormat()) + .verifyNoIssues(); + } + +}