diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bd6b846..c54eb3a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,7 +7,7 @@ on: jobs: test: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 name: Test strategy: matrix: @@ -26,6 +26,9 @@ jobs: java-version: '11' java-package: jdk + - name: Install SBT + uses: sbt/setup-sbt@v1 + - name: Cache SBT uses: coursier/cache-action@v6 with: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 99892ba..75096a0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,7 +5,7 @@ on: jobs: publish: - runs-on: ubuntu-22.04 + runs-on: ubuntu-24.04 name: Publish steps: - name: Checkout @@ -18,6 +18,9 @@ jobs: java-version: '11' java-package: jdk + - name: Install SBT + uses: sbt/setup-sbt@v1 + - name: Cache SBT uses: coursier/cache-action@v6 with: diff --git a/.scalafmt.conf b/.scalafmt.conf index 7b4abe6..2a837fc 100644 --- a/.scalafmt.conf +++ b/.scalafmt.conf @@ -1,47 +1,41 @@ -version = 3.8.1 +version = 3.8.5 runner.dialect = scala213source3 align.preset = none align.stripMargin = false align.tokens = [] assumeStandardLibraryStripMargin = true -continuationIndent.callSite = 2 -continuationIndent.defnSite = 2 -continuationIndent.extendSite = 2 -danglingParentheses.exclude = [ - trait -] +danglingParentheses.exclude = [ trait ] +docstrings.style = Asterisk +docstrings.wrapMaxColumn = 80 +indent.callSite = 2 +indent.defnSite = 2 +indent.extendSite = 2 indentOperator.exclude = "^(&&|\\|\\||\\||\\|@\\||<\\*>|<&>|::|~)$" indentOperator.topLevelOnly = false maxColumn = 120 newlines.afterCurlyLambdaParams = preserve -newlines.avoidForSimpleOverflow = [ - toolong - punct -] +newlines.avoidForSimpleOverflow = [ toolong, punct ] newlines.avoidInResultType = true newlines.beforeMultiline = keep newlines.beforeMultilineDef = keep +newlines.inInterpolation = avoid newlines.sometimesBeforeColonInMethodReturnType = false newlines.source = keep -rewrite.rules = [Imports] +rewrite.rules = [ Imports ] rewrite.imports.expand = true rewrite.imports.sort = ascii rewrite.imports.groups = [ ["(?!javax?\\.|scala\\.).+"], ] -rewrite.scala3.convertToNewSyntax = false rewrite.trailingCommas.style = always -runner.optimizer.forceConfigStyleMinArgCount = 10 verticalMultiline.arityThreshold = 120 -docstrings.style = Asterisk -docstrings.wrapMaxColumn = 80 -spaces.neverAroundInfixTypes = [ - "##" -] +spaces.neverAroundInfixTypes = [ "##" ] fileOverride { "glob:**/scala-3/**" { runner.dialect = scala3 + runner.dialectOverride.withAllowEndMarker = false + runner.dialectOverride.allowSignificantIndentation = false } } diff --git a/benchmark/README.md b/benchmark/README.md index 9ce7dc0..fb411df 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -3,19 +3,19 @@ ``` # JMH version: 1.37 -# VM version: JDK 21.0.2, OpenJDK 64-Bit Server VM, 21.0.2+13-jvmci-23.1-b30 -Benchmark Mode Cnt Score Error Units -ParserBenchmark.ceesvee avgt 10 322.808 ± 3.382 us/op -ParserBenchmark.scalaCsv avgt 10 1066.774 ± 10.905 us/op -ParserBenchmark.univocity avgt 10 309.066 ± 7.617 us/op +# VM version: JDK 23.0.2, OpenJDK 64-Bit Server VM, 23.0.2+7-jvmci-b01 +Benchmark Mode Cnt Score Error Units +ParserBenchmark.ceesvee avgt 10 267.157 ± 2.295 us/op +ParserBenchmark.scalaCsv avgt 10 776.875 ± 3.156 us/op +ParserBenchmark.univocity avgt 10 190.484 ± 0.927 us/op ``` `benchmark/Jmh/run -i 10 -wi 5 -f 1 -t 2 ceesvee.benchmark.DecoderBenchmark` ``` # JMH version: 1.37 -# VM version: JDK 21.0.2, OpenJDK 64-Bit Server VM, 21.0.2+13-jvmci-23.1-b30 +# VM version: JDK 23.0.2, OpenJDK 64-Bit Server VM, 23.0.2+7-jvmci-b01 Benchmark Mode Cnt Score Error Units -DecoderBenchmark.ceesvee avgt 10 0.136 ± 0.001 us/op -DecoderBenchmark.univocity avgt 10 0.012 ± 0.001 us/op +DecoderBenchmark.ceesvee avgt 10 0.115 ± 0.001 us/op +DecoderBenchmark.univocity avgt 10 0.011 ± 0.001 us/op ``` diff --git a/benchmark/src/main/scala/ceesvee/benchmark/ParserBenchmark.scala b/benchmark/src/main/scala/ceesvee/benchmark/ParserBenchmark.scala index a2de5da..fb4018b 100644 --- a/benchmark/src/main/scala/ceesvee/benchmark/ParserBenchmark.scala +++ b/benchmark/src/main/scala/ceesvee/benchmark/ParserBenchmark.scala @@ -37,7 +37,7 @@ class ParserBenchmark { @Benchmark def scalaCsv: List[List[String]] = { - import com.github.tototoshi.csv.defaultCSVFormat + import com.github.tototoshi.csv.CSVFormat.defaultCSVFormat com.github.tototoshi.csv.CSVReader.open(linesReader).all() } diff --git a/build.sbt b/build.sbt index be5c17f..f516008 100644 --- a/build.sbt +++ b/build.sbt @@ -1,11 +1,11 @@ // format: off -val catsVersion = "2.10.0" -val fs2Version = "3.10.2" -val zioVersion = "2.0.21" +val catsVersion = "2.13.0" +val fs2Version = "3.11.0" +val zioVersion = "2.1.6" -val Scala213 = "2.13.13" -val Scala3 = "3.3.3" +val Scala213 = "2.13.16" +val Scala3 = "3.3.4" inThisBuild(Seq( organization := "io.github.guymers", @@ -128,13 +128,13 @@ lazy val core = module("core") libraryDependencies ++= Seq( "org.typelevel" %% "cats-core" % catsVersion % Optional, "org.typelevel" %% "cats-laws" % catsVersion % Test, - "org.typelevel" %% "discipline-munit" % "1.0.9" % Test, + "org.typelevel" %% "discipline-munit" % "2.0.0" % Test, ), libraryDependencies ++= (CrossVersion.partialVersion(scalaVersion.value) match { case Some((2, _)) => Seq( - "com.softwaremill.magnolia1_2" %% "magnolia" % "1.1.7", + "com.softwaremill.magnolia1_2" %% "magnolia" % "1.1.10", "org.scala-lang" % "scala-reflect" % scalaVersion.value, - "com.chuusai" %% "shapeless" % "2.3.10" % Test, + "com.chuusai" %% "shapeless" % "2.3.12" % Test, ) case _ => Seq.empty }), @@ -144,7 +144,7 @@ lazy val fs2 = module("fs2") .settings( libraryDependencies ++= Seq( "co.fs2" %% "fs2-core" % fs2Version, - "dev.zio" %% "zio-interop-cats" % "23.1.0.1" % Test, + "dev.zio" %% "zio-interop-cats" % "23.1.0.2" % Test, ), libraryDependencies ++= (CrossVersion.partialVersion(scalaVersion.value) match { case Some((2, _)) => Seq(compilerPlugin("org.typelevel" % "kind-projector" % "0.13.3" cross CrossVersion.full)) @@ -167,7 +167,7 @@ lazy val benchmark = proj("benchmark", None) .settings( libraryDependencies ++= Seq( "com.univocity" % "univocity-parsers" % "2.9.1", - "com.github.tototoshi" %% "scala-csv" % "1.3.10", + "com.github.tototoshi" %% "scala-csv" % "2.0.0", ), ) .dependsOn(core) @@ -193,7 +193,7 @@ val TestCsvFiles = Map( // https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads "uk-property-sales-price-paid-2019.csv" -> ( "http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-2019.csv", - "0a433381ae42d1d59a047678dd3cb5b3e9ca2a02", + "f0c8da0dad28e849b78e9cd8f17927d83e0bb14c", ), ) diff --git a/modules/core/src/main/scala-3/ceesvee/CsvRecordDecoderDeriveScalaVersion.scala b/modules/core/src/main/scala-3/ceesvee/CsvRecordDecoderDeriveScalaVersion.scala index fd186a2..ed58afe 100644 --- a/modules/core/src/main/scala-3/ceesvee/CsvRecordDecoderDeriveScalaVersion.scala +++ b/modules/core/src/main/scala-3/ceesvee/CsvRecordDecoderDeriveScalaVersion.scala @@ -5,6 +5,7 @@ import scala.compiletime.erasedValue import scala.compiletime.summonInline import scala.deriving.Mirror +@SuppressWarnings(Array("org.wartremover.warts.Recursion")) trait CsvRecordDecoderDeriveScalaVersion { self: CsvRecordDecoder.type => inline def summonAll[T <: Tuple]: List[CsvRecordDecoder[?]] = inline erasedValue[T] match { diff --git a/modules/core/src/main/scala-3/ceesvee/CsvRecordEncoderDeriveScalaVersion.scala b/modules/core/src/main/scala-3/ceesvee/CsvRecordEncoderDeriveScalaVersion.scala index 421c9e8..064ea80 100644 --- a/modules/core/src/main/scala-3/ceesvee/CsvRecordEncoderDeriveScalaVersion.scala +++ b/modules/core/src/main/scala-3/ceesvee/CsvRecordEncoderDeriveScalaVersion.scala @@ -4,6 +4,7 @@ import scala.compiletime.erasedValue import scala.compiletime.summonInline import scala.deriving.Mirror +@SuppressWarnings(Array("org.wartremover.warts.Recursion")) trait CsvRecordEncoderDeriveScalaVersion { self: CsvRecordEncoder.type => inline def summonAll[T <: Tuple]: List[CsvRecordEncoder[?]] = inline erasedValue[T] match { diff --git a/modules/core/src/main/scala/ceesvee/CsvHeader.scala b/modules/core/src/main/scala/ceesvee/CsvHeader.scala index 2563b92..eba790a 100644 --- a/modules/core/src/main/scala/ceesvee/CsvHeader.scala +++ b/modules/core/src/main/scala/ceesvee/CsvHeader.scala @@ -32,7 +32,7 @@ object CsvHeader { errors: SortedMap[String, CsvRecordDecoder.Errors.Error], ) extends RuntimeException({ val reasons = errors.toList.map({ case (h, e) => s"column $h ${e.toString}" }) - s"Failed to decode ${raw.mkString(",").take(64)} because: ${reasons.toString}" + s"Failed to decode ${raw.mkString(",").take(64)} because: ${reasons.mkString(";")}" }) with NoStackTrace /** diff --git a/modules/core/src/main/scala/ceesvee/CsvParser.scala b/modules/core/src/main/scala/ceesvee/CsvParser.scala index 0f2b73f..ace5991 100644 --- a/modules/core/src/main/scala/ceesvee/CsvParser.scala +++ b/modules/core/src/main/scala/ceesvee/CsvParser.scala @@ -96,15 +96,17 @@ object CsvParser { * Both '"' and '\' are valid escapes for nested double quotes. */ @throws[Error.LineTooLong]("if a line is longer than `maximumLineLength`") + @SuppressWarnings(Array( + "org.wartremover.warts.MutableDataStructures", + "org.wartremover.warts.Throw", + "org.wartremover.warts.Var", + )) def splitLines(in: Iterator[String], options: Options): Iterator[String] = new Iterator[String] { - @SuppressWarnings(Array("org.wartremover.warts.MutableDataStructures")) private val toOutput = mutable.Queue.empty[String] - @SuppressWarnings(Array("org.wartremover.warts.Var")) private var state = State.initial override def hasNext: Boolean = toOutput.nonEmpty || in.hasNext || state.leftover.nonEmpty - @SuppressWarnings(Array("org.wartremover.warts.Throw")) @tailrec override def next(): String = { if (toOutput.nonEmpty) { toOutput.dequeue() diff --git a/modules/core/src/main/scala/ceesvee/CsvRecordDecoder.scala b/modules/core/src/main/scala/ceesvee/CsvRecordDecoder.scala index bbd7276..21b13a7 100644 --- a/modules/core/src/main/scala/ceesvee/CsvRecordDecoder.scala +++ b/modules/core/src/main/scala/ceesvee/CsvRecordDecoder.scala @@ -11,13 +11,14 @@ trait CsvRecordDecoder[A] { self => final def map[B](f: A => B): CsvRecordDecoder[B] = emap(a => Right(f(a))) - final def emap[B](f: A => Either[String, B]): CsvRecordDecoder[B] = new CsvRecordDecoder[B] { + final def emap[B](f: A => Either[String, B]): CsvRecordDecoder[B] = emapAtIndex(a => f(a).left.map((0, _))) + + final def emapAtIndex[B](f: A => Either[(Int, String), B]): CsvRecordDecoder[B] = new CsvRecordDecoder[B] { override val numFields = self.numFields override def decode(fields: IndexedSeq[String]) = { self.decode(fields).flatMap { a => - f(a).left.map { msg => - // stick the error on the first column - val errors = SortedMap(0 -> CsvRecordDecoder.Errors.Record(msg)) + f(a).left.map { case (i, msg) => + val errors = SortedMap(i -> CsvRecordDecoder.Errors.Record(msg)) CsvRecordDecoder.Errors(fields, errors) } } @@ -49,7 +50,7 @@ object CsvRecordDecoder extends CsvRecordDecoder1 { errors: SortedMap[Int, Errors.Error], ) extends RuntimeException({ val reasons = errors.toList.map({ case (i, e) => s"index ${i.toString} ${e.toString}" }) - s"Failed to decode ${raw.mkString(",").take(64)} because: ${reasons.toString}" + s"Failed to decode ${raw.mkString(",").take(64)} because: ${reasons.mkString(";")}" }) with NoStackTrace object Errors { diff --git a/project/build.properties b/project/build.properties index 04267b1..73df629 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.9.9 +sbt.version=1.10.7 diff --git a/project/plugins.sbt b/project/plugins.sbt index d29fa0e..f2f623c 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -1,9 +1,9 @@ addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.4.7") -addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2") -addSbtPlugin("org.wartremover" % "sbt-wartremover" % "3.1.6") +addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.4") +addSbtPlugin("org.wartremover" % "sbt-wartremover" % "3.2.7") -addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.1.3") -addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.12") +addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.1.4") +addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.9.2") //addSbtPlugin("com.timushev.sbt" % "sbt-updates" % "0.6.4") diff --git a/tests/src/test/scala/ceesvee/tests/RealWorldCsvSpec.scala b/tests/src/test/scala/ceesvee/tests/RealWorldCsvSpec.scala index 914d320..e5db808 100644 --- a/tests/src/test/scala/ceesvee/tests/RealWorldCsvSpec.scala +++ b/tests/src/test/scala/ceesvee/tests/RealWorldCsvSpec.scala @@ -84,7 +84,7 @@ object RealWorldCsvSpec extends ZIOSpecDefault { assertHeaderTotal("nz-greenhouse-gas-emissions-2019.csv", NZGreenhouseGasEmissions.csvHeader, total) }*), suite("UK property sales 2019")({ - val total = 1005888L + val total = 1010985L assertTotal("uk-property-sales-price-paid-2019.csv", UkPropertySalesPricePaid.decoder, total) }*), ) @@ TestAspect.timeout(60.seconds) @@ TestAspect.timed