Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ jobs:
with:
fetch-depth: 0

- name: Install JDK 11
- name: Install JDK 21
uses: actions/setup-java@v4
with:
distribution: zulu
java-version: '11'
java-version: '21'
java-package: jdk

- name: Install SBT
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ jobs:
- name: Checkout
uses: actions/checkout@v4

- name: Install JDK 11
- name: Install JDK 21
uses: actions/setup-java@v4
with:
distribution: zulu
java-version: '11'
java-version: '21'
java-package: jdk

- name: Install SBT
Expand Down
2 changes: 2 additions & 0 deletions .jvmopts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
--enable-preview
--add-modules=jdk.incubator.vector
2 changes: 1 addition & 1 deletion .scalafmt.conf
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ rewrite.rules = [ Imports ]
rewrite.imports.expand = true
rewrite.imports.sort = ascii
rewrite.imports.groups = [
["(?!javax?\\.|scala\\.).+"],
["(?!javax?\\.|jdk\\.|scala\\.).+"],
]
rewrite.trailingCommas.style = always
spaces.neverAroundInfixTypes = [ "##" ]
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
The MIT License (MIT)

Copyright (c) 2022-2022 Sam Guymer
Copyright (c) 2022-2025 Sam Guymer

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,8 @@ case class Test(
int: Int,
bool: Boolean,
optInt: Option[Int],
)
) derives CsvRecordDecoder
object Test {
implicit val decoder: CsvRecordDecoder[Test] = CsvRecordDecoder.derive
val header = ::("str", List("int", "bool", "opt_int"))
val csvHeader = CsvHeader.create(header)(decoder)
}
Expand Down
20 changes: 12 additions & 8 deletions benchmark/README.md
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@

`benchmark/Jmh/run -i 10 -wi 5 -f 1 -t 2 ceesvee.benchmark.ParserBenchmark`

AMD Ryzen 9 9950X
```
# JMH version: 1.37
# VM version: JDK 25.0.1, OpenJDK 64-Bit Server VM, 25.0.1
Benchmark Mode Cnt Score Error Units
ParserBenchmark.ceesvee avgt 10 261.357 ± 1.787 us/op
ParserBenchmark.scalaCsv avgt 10 741.778 ± 6.433 us/op
ParserBenchmark.univocity avgt 10 200.482 ± 2.715 us/op
Benchmark Mode Cnt Score Error Units
ParserBenchmark.ceesvee avgt 10 263.230 ± 0.679 us/op
ParserBenchmark.ceesveeVector avgt 10 134.205 ± 0.302 us/op
ParserBenchmark.scalaCsv avgt 10 748.232 ± 2.016 us/op
ParserBenchmark.univocity avgt 10 198.765 ± 0.982 us/op
```

```
# JMH version: 1.37
# VM version: JDK 25, OpenJDK 64-Bit Server VM, 25+37-jvmci-b01
Benchmark Mode Cnt Score Error Units
ParserBenchmark.ceesvee avgt 10 197.994 ± 2.344 us/op
ParserBenchmark.scalaCsv avgt 10 776.080 ± 1.457 us/op
ParserBenchmark.univocity avgt 10 208.226 ± 2.501 us/op
Benchmark Mode Cnt Score Error Units
ParserBenchmark.ceesvee avgt 10 187.441 ± 1.345 us/op
ParserBenchmark.ceesveeVector avgt 10 1484.755 ± 14.298 us/op
ParserBenchmark.scalaCsv avgt 10 780.945 ± 2.340 us/op
ParserBenchmark.univocity avgt 10 204.178 ± 1.702 us/op
```

`benchmark/Jmh/run -i 10 -wi 5 -f 1 -t 2 ceesvee.benchmark.DecoderBenchmark`

AMD Ryzen 9 9950X
```
# JMH version: 1.37
# VM version: JDK 25.0.1, OpenJDK 64-Bit Server VM, 25.0.1
Expand Down
20 changes: 16 additions & 4 deletions benchmark/src/main/scala/ceesvee/benchmark/ParserBenchmark.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,23 @@ import java.util.concurrent.TimeUnit
@State(Scope.Thread)
@BenchmarkMode(Array(Mode.AverageTime))
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Fork(
jvmArgs = Array(
"--enable-preview",
"--add-modules=jdk.incubator.vector",
),
)
class ParserBenchmark {

private def line(i: Int) = List("basic string", " \"quoted \nstring\" ", i.toString, "456.789", "true").mkString(",")

private val charset = StandardCharsets.UTF_8
private val lines = (1 to 1000).map(line(_)).mkString("\n")
private def linesChunked = lines.grouped(8192)
private val linesBytes = lines.getBytes(charset)
private def linesReader = {
val streams = new java.util.ArrayList[ByteArrayInputStream]()
linesChunked.foreach { str =>
streams.add(new ByteArrayInputStream(str.getBytes(StandardCharsets.UTF_8)))
linesBytes.grouped(8192).foreach { bytes =>
streams.add(new ByteArrayInputStream(bytes))
}
val is = new SequenceInputStream(java.util.Collections.enumeration(streams))
new InputStreamReader(is)
Expand All @@ -32,7 +39,12 @@ class ParserBenchmark {

@Benchmark
def ceesvee: List[List[String]] = {
_root_.ceesvee.CsvParser.parse[List](linesChunked, ceesveeOptions).toList
_root_.ceesvee.CsvParser.parse[List](lines.grouped(8192), ceesveeOptions).toList
}

@Benchmark
def ceesveeVector: List[List[String]] = {
_root_.ceesvee.CsvParserVector.parse[List](linesBytes.grouped(8192), charset, ceesveeOptions).toList
}

@Benchmark
Expand Down
8 changes: 3 additions & 5 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ lazy val commonSettings = Seq(
"-deprecation",
"-encoding", "UTF-8",
"-feature",
"-release", "11",
"-release", "21",
"-unchecked",
),
scalacOptions ++= (CrossVersion.partialVersion(scalaVersion.value) match {
Expand All @@ -47,6 +47,7 @@ lazy val commonSettings = Seq(
case Some((2, _)) => Seq(
"-Vimplicits",
"-Vtype-diffs",
"-Wconf:cat=scala3-migration:silent",
"-Wdead-code",
"-Wextra-implicit",
"-Wnonunit-statement",
Expand All @@ -59,15 +60,12 @@ lazy val commonSettings = Seq(
"-Xlint:_,-byname-implicit", // exclude byname-implicit https://github.com/scala/bug/issues/12072
)
case _ => Seq(
"-Wconf:name=PatternMatchExhaustivity:error",
"-Wnonunit-statement",
"-Wunused:all",
"-Wvalue-discard",
)
}),
Test / scalacOptions ++= (CrossVersion.partialVersion(scalaVersion.value) match {
case Some((2, _)) => Seq("-Wconf:cat=scala3-migration:silent")
case _ => Seq.empty
}),

Compile / console / scalacOptions ~= filterScalacConsoleOpts,
Test / console / scalacOptions ~= filterScalacConsoleOpts,
Expand Down
9 changes: 9 additions & 0 deletions docs/architecture.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
## Architecture

### Module Structure

- **core** - Main CSV parsing, encoding/decoding logic. Only dependency is an optional one on [cats](https://github.com/typelevel/cats)
- **fs2** - Integration with [fs2](https://github.com/typelevel/fs2) streams
- **zio** - Integration with [ZIO](https://github.com/zio/zio) streams
- **benchmark** - JMH performance benchmarks comparing against other CSV libraries
- **tests** - Integration tests with real-world CSV files
2 changes: 1 addition & 1 deletion modules/core/src/main/scala/ceesvee/CsvParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ object CsvParser {
fields.result()
}

private def trimString(options: Options, str: String) = {
private[ceesvee] def trimString(options: Options, str: String) = {
// always ignore whitespace around a quoted cell
val trimmed = Options.Trim.True.strip(str)

Expand Down
Loading