diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index f40077c53311b..0c40c30d6c810 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -441,7 +441,34 @@ trait GetArrayItemUtil { */ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { - // todo: current search is O(n), improve it. + @transient private var lastMap: MapData = _ + @transient private var lastIndex: java.util.HashMap[Any, Int] = _ + + /** + * The threshold to determine whether to use hash lookup for map lookup expressions. + * If the map size is small, the cost of building hash map exceeds the cost of a linear scan. + * This is configured by `spark.sql.mapLookupHashThreshold`. + */ + @transient private lazy val hashLookupThreshold = + SQLConf.get.getConf(SQLConf.MAP_LOOKUP_HASH_THRESHOLD) + + private def getOrBuildIndex(map: MapData, keyType: DataType): java.util.HashMap[Any, Int] = { + if (lastMap ne map) { + val keys = map.keyArray() + val len = keys.numElements() + val hm = new java.util.HashMap[Any, Int]((len * 1.5).toInt) + var i = 0 + while (i < len) { + val k = keys.get(i, keyType) + hm.putIfAbsent(k, i) + i += 1 + } + lastIndex = hm + lastMap = map + } + lastIndex + } + def getValueEval( value: Any, ordinal: Any, @@ -449,6 +476,25 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { ordering: Ordering[Any]): Any = { val map = value.asInstanceOf[MapData] val length = map.numElements() + + if (length < hashLookupThreshold || !TypeUtils.typeWithProperEquals(keyType)) { + getValueEvalLinear(map, ordinal, keyType, ordering) + } else { + val idx = getOrBuildIndex(map, keyType).getOrDefault(ordinal, -1) + if (idx == -1 || map.valueArray().isNullAt(idx)) { + null + } else { + map.valueArray().get(idx, dataType) + } + } + } + + private def getValueEvalLinear( + map: MapData, + ordinal: Any, + keyType: DataType, + ordering: Ordering[Any]): Any = { + val length = map.numElements() val keys = map.keyArray() val values = map.valueArray() @@ -473,38 +519,178 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { ctx: CodegenContext, ev: ExprCode, mapType: MapType): ExprCode = { + val keyType = mapType.keyType + if (supportsHashLookup(keyType)) { + doGetValueGenCodeWithHashOpt(ctx, ev, mapType) + } else { + doGetValueGenCodeLinear(ctx, ev, mapType) + } + } + + private def supportsHashLookup(keyType: DataType): Boolean = keyType match { + case BooleanType | ByteType | ShortType | IntegerType | LongType | + FloatType | DoubleType | DateType | TimestampType | + TimestampNTZType | _: YearMonthIntervalType | + _: DayTimeIntervalType => true + case st: StringType if st.supportsBinaryEquality => true + case _ => false + } + + private def doGetValueGenCodeLinear( + ctx: CodegenContext, + ev: ExprCode, + mapType: MapType): ExprCode = { + val index = ctx.freshName("index") + val length = ctx.freshName("length") + val keys = ctx.freshName("keys") + val values = ctx.freshName("values") + val keyType = mapType.keyType + + val keyJavaType = CodeGenerator.javaType(keyType) + val loopKey = ctx.freshName("loopKey") + val i = ctx.freshName("i") + + val nullValueCheck = if (mapType.valueContainsNull) { + s""" + |else if ($values.isNullAt($index)) { + | ${ev.isNull} = true; + |} + """.stripMargin + } else { + "" + } + + nullSafeCodeGen(ctx, ev, (eval1, eval2) => { + s""" + |final int $length = $eval1.numElements(); + |final ArrayData $keys = $eval1.keyArray(); + |final ArrayData $values = $eval1.valueArray(); + |int $index = -1; + | + |for (int $i = 0; $i < $length; $i++) { + | $keyJavaType $loopKey = ${CodeGenerator.getValue(keys, keyType, i)}; + | if (${ctx.genEqual(keyType, loopKey, eval2)}) { + | $index = $i; + | break; + | } + |} + | + |if ($index < 0) { + | ${ev.isNull} = true; + |} $nullValueCheck else { + | ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)}; + |} + """.stripMargin + }) + } + + /** + * Generates code for map lookups. + * If the map size is small (less than HASH_LOOKUP_THRESHOLD), it uses a linear scan. + * If the map size is large, it builds a hash index for O(1) lookup. + */ + private def doGetValueGenCodeWithHashOpt( + ctx: CodegenContext, + ev: ExprCode, + mapType: MapType): ExprCode = { val index = ctx.freshName("index") val length = ctx.freshName("length") val keys = ctx.freshName("keys") - val key = ctx.freshName("key") val values = ctx.freshName("values") val keyType = mapType.keyType - val nullCheck = if (mapType.valueContainsNull) { - s" || $values.isNullAt($index)" + + val nullValueCheck = if (mapType.valueContainsNull) { + s""" + |else if ($values.isNullAt($index)) { + | ${ev.isNull} = true; + |} + """.stripMargin } else { "" } val keyJavaType = CodeGenerator.javaType(keyType) + val lastKeyArray = ctx.addMutableState("ArrayData", "lastKeyArray", v => s"$v = null;") + val hashBuckets = ctx.addMutableState("int[]", "hashBuckets", v => s"$v = null;") + val hashMask = ctx.addMutableState("int", "hashMask", v => s"$v = 0;") + + def genHash(v: String): String = keyType match { + case BooleanType => s"($v ? 1 : 0)" + case ByteType | ShortType | IntegerType | DateType | _: YearMonthIntervalType => s"$v" + case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType => + s"(int)($v ^ ($v >>> 32))" + case FloatType => s"Float.floatToIntBits($v)" + case DoubleType => + s"(int)(Double.doubleToLongBits($v) ^ (Double.doubleToLongBits($v) >>> 32))" + case _ => s"$v.hashCode()" + } + nullSafeCodeGen(ctx, ev, (eval1, eval2) => { + val i = ctx.freshName("i") + val h = ctx.freshName("h") + val cap = ctx.freshName("cap") + val idx = ctx.freshName("idx") + val candidate = ctx.freshName("candidate") + val loopKey = ctx.freshName("loopKey") + + val buildIndex = + s""" + |int $cap = Math.max(Integer.highestOneBit(Math.max($length * 2 - 1, 1)) << 1, 4); + |if ($hashBuckets == null || $hashBuckets.length < $cap) { + | $hashBuckets = new int[$cap]; + |} + |java.util.Arrays.fill($hashBuckets, 0, $cap, -1); + |$hashMask = $cap - 1; + |for (int $i = 0; $i < $length; $i++) { + | $keyJavaType $loopKey = ${CodeGenerator.getValue(keys, keyType, i)}; + | int $h = (${genHash(loopKey)}) & $hashMask; + | while ($hashBuckets[$h] != -1) { + | $h = ($h + 1) & $hashMask; + | } + | $hashBuckets[$h] = $i; + |} + |$lastKeyArray = $keys; + """.stripMargin + + val lookup = + s""" + |int $h = (${genHash(eval2)}) & $hashMask; + |$index = -1; + |while ($hashBuckets[$h] != -1) { + | int $idx = $hashBuckets[$h]; + | $keyJavaType $candidate = ${CodeGenerator.getValue(keys, keyType, idx)}; + | if (${ctx.genEqual(keyType, candidate, eval2)}) { + | $index = $idx; + | break; + | } + | $h = ($h + 1) & $hashMask; + |} + """.stripMargin + s""" final int $length = $eval1.numElements(); final ArrayData $keys = $eval1.keyArray(); final ArrayData $values = $eval1.valueArray(); + int $index = -1; - int $index = 0; - while ($index < $length) { - final $keyJavaType $key = ${CodeGenerator.getValue(keys, keyType, index)}; - if (${ctx.genEqual(keyType, key, eval2)}) { - break; - } else { - $index++; + if ($length >= $hashLookupThreshold) { + if ($keys != $lastKeyArray) { + $buildIndex + } + $lookup + } else { + for (int $i = 0; $i < $length; $i++) { + $keyJavaType $loopKey = ${CodeGenerator.getValue(keys, keyType, i)}; + if (${ctx.genEqual(keyType, loopKey, eval2)}) { + $index = $i; + break; + } } } - if ($index == $length$nullCheck) { + if ($index < 0) { ${ev.isNull} = true; - } else { + } $nullValueCheck else { ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)}; } """ @@ -547,15 +733,10 @@ case class GetMapValue(child: Expression, key: Expression) /** * `Null` is returned for invalid ordinals. - * - * TODO: We could make nullability more precise in foldable cases (e.g., literal input). - * But, since the key search is O(n), it takes much time to compute nullability. - * If we find efficient key searches, revisit this. */ override def nullable: Boolean = true override def dataType: DataType = child.dataType.asInstanceOf[MapType].valueType - // todo: current search is O(n), improve it. override def nullSafeEval(value: Any, ordinal: Any): Any = { getValueEval(value, ordinal, keyType, ordering) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 8c9796b716896..4a6d67a1ca026 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2501,6 +2501,18 @@ object SQLConf { .intConf .createWithDefault(-1) + val MAP_LOOKUP_HASH_THRESHOLD = + buildConf("spark.sql.optimizer.mapLookupHashThreshold") + .internal() + .doc("The minimum number of map entries to attempt hash-based lookup in `element_at` and " + + "the `[]` operator. Below this threshold, linear scan is used. For key types that do not " + + "support hashing (e.g. arrays, structs), linear scan is always used regardless of map size.") + .version("4.2.0") + .withBindingPolicy(ConfigBindingPolicy.SESSION) + .intConf + .checkValue(_ >= 0, "The threshold must be non-negative.") + .createWithDefault(1000) + val FILES_MAX_PARTITION_BYTES = buildConf("spark.sql.files.maxPartitionBytes") .doc("The maximum number of bytes to pack into a single partition when reading files. " + "This configuration is effective only when using file-based sources such as Parquet, JSON " + diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 1907ec7c23aa6..108b833694337 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch -import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeTestUtils, DateTimeUtils, GenericArrayData} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{outstandingZoneIds, LA, UTC} import org.apache.spark.sql.catalyst.util.IntervalUtils._ import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber @@ -1887,79 +1887,140 @@ class CollectionExpressionsSuite checkEvaluation(ArrayPosition(aa1, aae), 0L) } - test("elementAt") { - val a0 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType)) - val a1 = Literal.create(Seq[String](null, ""), ArrayType(StringType)) - val a2 = Literal.create(Seq(null), ArrayType(LongType)) - val a3 = Literal.create(null, ArrayType(StringType)) - - intercept[Exception] { - checkEvaluation(ElementAt(a0, Literal(0)), null) - }.getMessage.contains("SQL array indices start at 1") - intercept[Exception] { checkEvaluation(ElementAt(a0, Literal(1.1)), null) } - withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { - checkEvaluation(ElementAt(a0, Literal(4)), null) - checkEvaluation(ElementAt(a0, Literal(-4)), null) - } + Seq((Int.MaxValue, "Linear Lookup"), (0, "Hash Lookup")).foreach { case (threshold, name) => + test(s"elementAt - $name") { + withSQLConf(SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> threshold.toString) { + val a0 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType)) + val a1 = Literal.create(Seq[String](null, ""), ArrayType(StringType)) + val a2 = Literal.create(Seq(null), ArrayType(LongType)) + val a3 = Literal.create(null, ArrayType(StringType)) + + intercept[Exception] { + checkEvaluation(ElementAt(a0, Literal(0)), null) + }.getMessage.contains("SQL array indices start at 1") + intercept[Exception] { checkEvaluation(ElementAt(a0, Literal(1.1)), null) } + withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { + checkEvaluation(ElementAt(a0, Literal(4)), null) + checkEvaluation(ElementAt(a0, Literal(-4)), null) + } - checkEvaluation(ElementAt(a0, Literal(1)), 1) - checkEvaluation(ElementAt(a0, Literal(2)), 2) - checkEvaluation(ElementAt(a0, Literal(3)), 3) - checkEvaluation(ElementAt(a0, Literal(-3)), 1) - checkEvaluation(ElementAt(a0, Literal(-2)), 2) - checkEvaluation(ElementAt(a0, Literal(-1)), 3) + checkEvaluation(ElementAt(a0, Literal(1)), 1) + checkEvaluation(ElementAt(a0, Literal(2)), 2) + checkEvaluation(ElementAt(a0, Literal(3)), 3) + checkEvaluation(ElementAt(a0, Literal(-3)), 1) + checkEvaluation(ElementAt(a0, Literal(-2)), 2) + checkEvaluation(ElementAt(a0, Literal(-1)), 3) + + checkEvaluation(ElementAt(a1, Literal(1)), null) + checkEvaluation(ElementAt(a1, Literal(2)), "") + checkEvaluation(ElementAt(a1, Literal(-2)), null) + checkEvaluation(ElementAt(a1, Literal(-1)), "") + + checkEvaluation(ElementAt(a2, Literal(1)), null) + + checkEvaluation(ElementAt(a3, Literal(1)), null) + + + val m0 = + Literal.create(Map("a" -> "1", "b" -> "2", "c" -> null), + MapType(StringType, StringType)) + val m1 = Literal.create(Map[String, String](), MapType(StringType, StringType)) + val m2 = Literal.create(null, MapType(StringType, StringType)) + + assert(ElementAt(m0, Literal(1.0)).checkInputDataTypes() == + DataTypeMismatch( + errorSubClass = "MAP_FUNCTION_DIFF_TYPES", + messageParameters = Map( + "functionName" -> "`element_at`", + "dataType" -> "\"MAP\"", + "leftType" -> "\"MAP\"", + "rightType" -> "\"DOUBLE\"" + ) + ) + ) - checkEvaluation(ElementAt(a1, Literal(1)), null) - checkEvaluation(ElementAt(a1, Literal(2)), "") - checkEvaluation(ElementAt(a1, Literal(-2)), null) - checkEvaluation(ElementAt(a1, Literal(-1)), "") + withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { + checkEvaluation(ElementAt(m0, Literal("d")), null) + checkEvaluation(ElementAt(m1, Literal("a")), null) + } - checkEvaluation(ElementAt(a2, Literal(1)), null) + checkEvaluation(ElementAt(m0, Literal("a")), "1") + checkEvaluation(ElementAt(m0, Literal("b")), "2") + checkEvaluation(ElementAt(m0, Literal("c")), null) - checkEvaluation(ElementAt(a3, Literal(1)), null) + checkEvaluation(ElementAt(m2, Literal("a")), null) + // test binary type as keys + val mb0 = Literal.create( + Map(Array[Byte](1, 2) -> "1", Array[Byte](3, 4) -> null, Array[Byte](2, 1) -> "2"), + MapType(BinaryType, StringType)) + val mb1 = Literal.create(Map[Array[Byte], String](), MapType(BinaryType, StringType)) - val m0 = - Literal.create(Map("a" -> "1", "b" -> "2", "c" -> null), MapType(StringType, StringType)) - val m1 = Literal.create(Map[String, String](), MapType(StringType, StringType)) - val m2 = Literal.create(null, MapType(StringType, StringType)) - - assert(ElementAt(m0, Literal(1.0)).checkInputDataTypes() == - DataTypeMismatch( - errorSubClass = "MAP_FUNCTION_DIFF_TYPES", - messageParameters = Map( - "functionName" -> "`element_at`", - "dataType" -> "\"MAP\"", - "leftType" -> "\"MAP\"", - "rightType" -> "\"DOUBLE\"" - ) - ) - ) - - withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { - checkEvaluation(ElementAt(m0, Literal("d")), null) - checkEvaluation(ElementAt(m1, Literal("a")), null) + withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { + checkEvaluation(ElementAt(mb0, Literal(Array[Byte](1, 2, 3))), null) + checkEvaluation(ElementAt(mb1, Literal(Array[Byte](1, 2))), null) + } + checkEvaluation(ElementAt(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2") + checkEvaluation(ElementAt(mb0, Literal(Array[Byte](3, 4))), null) + + // Int keys + val intMap = Literal.create(Map(1 -> 10, 2 -> 20, 3 -> 30), + MapType(IntegerType, IntegerType)) + checkEvaluation(ElementAt(intMap, Literal(1)), 10) + checkEvaluation(ElementAt(intMap, Literal(2)), 20) + checkEvaluation(ElementAt(intMap, Literal(4)), null) + + // Duplicate keys + val keys = new GenericArrayData(Array(1, 2, 1)) + val values = new GenericArrayData(Array(10, 20, 30)) + val dupMapData = new ArrayBasedMapData(keys, values) + val dupMap = Literal.create(dupMapData, MapType(IntegerType, IntegerType)) + checkEvaluation(ElementAt(dupMap, Literal(1)), 10) + checkEvaluation(ElementAt(dupMap, Literal(2)), 20) + + // Null values + val nullValueMap = Literal.create(Map(1 -> null), MapType(IntegerType, StringType)) + checkEvaluation(ElementAt(nullValueMap, Literal(1)), null) + + // NaN keys + val nan = Double.NaN + val doubleMap = Literal.create(Map(1.0 -> 10, nan -> 20), + MapType(DoubleType, IntegerType)) + checkEvaluation(ElementAt(doubleMap, Literal(1.0)), 10) + checkEvaluation(ElementAt(doubleMap, Literal(nan)), 20) + + // Nested Map Value + val mapNested = Literal.create( + Map(1 -> Map(10 -> 100), 2 -> Map(20 -> 200)), + MapType(IntegerType, MapType(IntegerType, IntegerType))) + checkEvaluation(ElementAt(mapNested, Literal(1)), Map(10 -> 100)) + checkEvaluation(ElementAt(mapNested, Literal(2)), Map(20 -> 200)) + checkEvaluation(ElementAt(mapNested, Literal(3)), null) + + // Array Keys + val arrayType = ArrayType(IntegerType) + val arrayMap = Literal.create( + Map(Array(1, 2) -> 10, Array(3, 4) -> 20), + MapType(arrayType, IntegerType)) + checkEvaluation(ElementAt(arrayMap, Literal.create(Array(1, 2), arrayType)), 10) + checkEvaluation(ElementAt(arrayMap, Literal.create(Array(3, 4), arrayType)), 20) + checkEvaluation(ElementAt(arrayMap, Literal.create(Array(5, 6), arrayType)), null) + + // Struct Keys + val structType = new StructType().add("a", "int").add("b", "int") + val structMap = Literal.create( + Map(create_row(1, 1) -> 10, create_row(2, 2) -> 20), + MapType(structType, IntegerType)) + checkEvaluation(ElementAt(structMap, Literal.create(create_row(1, 1), structType)), 10) + checkEvaluation(ElementAt(structMap, Literal.create(create_row(2, 2), structType)), 20) + checkEvaluation(ElementAt(structMap, Literal.create(create_row(3, 3), structType)), null) + } } + } - checkEvaluation(ElementAt(m0, Literal("a")), "1") - checkEvaluation(ElementAt(m0, Literal("b")), "2") - checkEvaluation(ElementAt(m0, Literal("c")), null) - - checkEvaluation(ElementAt(m2, Literal("a")), null) - - // test binary type as keys - val mb0 = Literal.create( - Map(Array[Byte](1, 2) -> "1", Array[Byte](3, 4) -> null, Array[Byte](2, 1) -> "2"), - MapType(BinaryType, StringType)) - val mb1 = Literal.create(Map[Array[Byte], String](), MapType(BinaryType, StringType)) - withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { - checkEvaluation(ElementAt(mb0, Literal(Array[Byte](1, 2, 3))), null) - checkEvaluation(ElementAt(mb1, Literal(Array[Byte](1, 2))), null) - } - checkEvaluation(ElementAt(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2") - checkEvaluation(ElementAt(mb0, Literal(Array[Byte](3, 4))), null) + test("defaultValueOutOfBound") { // test defaultValueOutOfBound withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { val delimiter = Literal.create(".", StringType) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 7baad5ea92a00..e83e2dcd85db7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -129,22 +129,102 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { assert(GetArrayItem(stArray4, Literal(1)).nullable) } - test("GetMapValue") { - val typeM = MapType(StringType, StringType) - val map = Literal.create(Map("a" -> "b"), typeM) - val nullMap = Literal.create(null, typeM) - val nullString = Literal.create(null, StringType) - - checkEvaluation(GetMapValue(map, Literal("a")), "b") - checkEvaluation(GetMapValue(map, nullString), null) - checkEvaluation(GetMapValue(nullMap, nullString), null) - checkEvaluation(GetMapValue(map, nullString), null) - - val nonNullMap = Literal.create(Map("a" -> 1), MapType(StringType, IntegerType, false)) - checkEvaluation(GetMapValue(nonNullMap, Literal("a")), 1) - - val nestedMap = Literal.create(Map("a" -> Map("b" -> "c")), MapType(StringType, typeM)) - checkEvaluation(GetMapValue(nestedMap, Literal("a")), Map("b" -> "c")) + Seq((Int.MaxValue, "Linear Lookup"), (0, "Hash Lookup")).foreach { case (threshold, name) => + test(s"GetMapValue - $name") { + withSQLConf(SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> threshold.toString) { + val typeM = MapType(StringType, StringType) + val map = Literal.create(Map("a" -> "b"), typeM) + val nullMap = Literal.create(null, typeM) + val nullString = Literal.create(null, StringType) + + // 1. Basic lookup (String keys) + checkEvaluation(GetMapValue(map, Literal("a")), "b") + checkEvaluation(GetMapValue(map, nullString), null) + checkEvaluation(GetMapValue(nullMap, nullString), null) + checkEvaluation(GetMapValue(map, nullString), null) + + val nonNullMap = Literal.create(Map("a" -> 1), MapType(StringType, IntegerType, false)) + checkEvaluation(GetMapValue(nonNullMap, Literal("a")), 1) + + // 2. Nested map + val nestedMap = Literal.create(Map("a" -> Map("b" -> "c")), MapType(StringType, typeM)) + checkEvaluation(GetMapValue(nestedMap, Literal("a")), Map("b" -> "c")) + + // 3. Basic lookup (Int keys) + val intMap = Literal.create(Map(1 -> 10, 2 -> 20, 3 -> 30), + MapType(IntegerType, IntegerType)) + checkEvaluation(GetMapValue(intMap, Literal(1)), 10) + checkEvaluation(GetMapValue(intMap, Literal(2)), 20) + checkEvaluation(GetMapValue(intMap, Literal(3)), 30) + checkEvaluation(GetMapValue(intMap, Literal(4)), null) + + val emptyMap = Literal.create(Map.empty[Int, Int], MapType(IntegerType, IntegerType)) + checkEvaluation(GetMapValue(emptyMap, Literal(1)), null) + + // 4. Special data + // Duplicate keys: Spark MapType doesn't enforce uniqueness in the underlying + // data structure (ArrayBasedMapData) + // We construct it manually to simulate duplicates. + val keys = new GenericArrayData(Array(1, 2, 1)) + val values = new GenericArrayData(Array(10, 20, 30)) + val dupMapData = new ArrayBasedMapData(keys, values) + val dupMap = Literal.create(dupMapData, MapType(IntegerType, IntegerType)) + // Should return the first match + checkEvaluation(GetMapValue(dupMap, Literal(1)), 10) + checkEvaluation(GetMapValue(dupMap, Literal(2)), 20) + + // Null values + val nullValueMap = Literal.create(Map(1 -> null), MapType(IntegerType, StringType)) + checkEvaluation(GetMapValue(nullValueMap, Literal(1)), null) + + // NaN keys + val nan = Double.NaN + val floatNan = Float.NaN + val doubleMap = Literal.create(Map(1.0 -> 10, nan -> 20), MapType(DoubleType, IntegerType)) + checkEvaluation(GetMapValue(doubleMap, Literal(1.0)), 10) + checkEvaluation(GetMapValue(doubleMap, Literal(nan)), 20) + + val floatMap = Literal.create(Map(1.0f -> 10, floatNan -> 20), + MapType(FloatType, IntegerType)) + checkEvaluation(GetMapValue(floatMap, Literal(1.0f)), 10) + checkEvaluation(GetMapValue(floatMap, Literal(floatNan)), 20) + + // 5. Key types + // Long + val longMap = Literal.create(Map(1L -> 10, 2L -> 20), MapType(LongType, IntegerType)) + checkEvaluation(GetMapValue(longMap, Literal(1L)), 10) + checkEvaluation(GetMapValue(longMap, Literal(3L)), null) + + // String + val stringMap = Literal.create(Map("a" -> "A", "b" -> "B"), MapType(StringType, StringType)) + checkEvaluation(GetMapValue(stringMap, Literal("a")), "A") + checkEvaluation(GetMapValue(stringMap, Literal("c")), null) + + // 6. Binary Keys + val binaryMap = Literal.create(Map(Array(1.toByte) -> 10, Array(2.toByte) -> 20), + MapType(BinaryType, IntegerType)) + checkEvaluation(GetMapValue(binaryMap, Literal(Array(1.toByte))), 10) + checkEvaluation(GetMapValue(binaryMap, Literal(Array(3.toByte))), null) + + // 7. Array Keys + val arrayType = ArrayType(IntegerType) + val arrayMap = Literal.create( + Map(Array(1, 2) -> 10, Array(3, 4) -> 20), + MapType(arrayType, IntegerType)) + checkEvaluation(GetMapValue(arrayMap, Literal.create(Array(1, 2), arrayType)), 10) + checkEvaluation(GetMapValue(arrayMap, Literal.create(Array(3, 4), arrayType)), 20) + checkEvaluation(GetMapValue(arrayMap, Literal.create(Array(5, 6), arrayType)), null) + + // 8. Struct Keys + val structType = new StructType().add("a", "int").add("b", "int") + val structMap = Literal.create( + Map(create_row(1, 1) -> 10, create_row(2, 2) -> 20), + MapType(structType, IntegerType)) + checkEvaluation(GetMapValue(structMap, Literal.create(create_row(1, 1), structType)), 10) + checkEvaluation(GetMapValue(structMap, Literal.create(create_row(2, 2), structType)), 20) + checkEvaluation(GetMapValue(structMap, Literal.create(create_row(3, 3), structType)), null) + } + } } test("GetStructField") { diff --git a/sql/core/benchmarks/MapLookupBenchmark-jdk21-results.txt b/sql/core/benchmarks/MapLookupBenchmark-jdk21-results.txt new file mode 100644 index 0000000000000..aacce18afd07a --- /dev/null +++ b/sql/core/benchmarks/MapLookupBenchmark-jdk21-results.txt @@ -0,0 +1,273 @@ +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 10026 23010 11278 0.0 1002647.5 1.0X +GetMapValue interpreted - Hash Lookup 1198 1356 141 0.0 119820.5 8.4X +GetMapValue codegen - Linear Lookup 12500 17389 4235 0.0 1249953.2 0.8X +GetMapValue codegen - Hash Lookup 1631 1861 200 0.0 163089.3 6.1X +ElementAt interpreted - Linear Lookup 25953 26404 426 0.0 2595299.3 0.4X +ElementAt interpreted - Hash Lookup 1165 1360 299 0.0 116544.6 8.6X +ElementAt codegen - Linear Lookup 17482 18188 617 0.0 1748227.1 0.6X +ElementAt codegen - Hash Lookup 1869 2017 172 0.0 186850.1 5.4X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 42492 42633 242 0.0 4249196.2 1.0X +GetMapValue interpreted - Hash Lookup 1464 1601 154 0.0 146399.6 29.0X +GetMapValue codegen - Linear Lookup 12001 20809 7629 0.0 1200106.8 3.5X +GetMapValue codegen - Hash Lookup 1800 1970 191 0.0 180018.1 23.6X +ElementAt interpreted - Linear Lookup 43724 44931 1073 0.0 4372412.0 1.0X +ElementAt interpreted - Hash Lookup 1171 1503 317 0.0 117088.2 36.3X +ElementAt codegen - Linear Lookup 24145 24366 360 0.0 2414490.0 1.8X +ElementAt codegen - Hash Lookup 1640 1862 197 0.0 163983.0 25.9X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 57909 58999 1062 0.0 5790941.2 1.0X +GetMapValue interpreted - Hash Lookup 1113 1429 296 0.0 111346.2 52.0X +GetMapValue codegen - Linear Lookup 34713 35982 1174 0.0 3471341.5 1.7X +GetMapValue codegen - Hash Lookup 1663 1823 138 0.0 166340.3 34.8X +ElementAt interpreted - Linear Lookup 58047 58688 581 0.0 5804675.6 1.0X +ElementAt interpreted - Hash Lookup 1416 1545 114 0.0 141555.3 40.9X +ElementAt codegen - Linear Lookup 36685 36917 317 0.0 3668505.8 1.6X +ElementAt codegen - Hash Lookup 1834 1970 128 0.0 183373.1 31.6X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 1315 1318 4 0.0 131520.2 1.0X +GetMapValue interpreted - Hash Lookup 110 120 17 0.1 11046.6 11.9X +GetMapValue codegen - Linear Lookup 895 901 7 0.0 89512.7 1.5X +GetMapValue codegen - Hash Lookup 143 151 10 0.1 14283.0 9.2X +ElementAt interpreted - Linear Lookup 1281 1284 4 0.0 128062.2 1.0X +ElementAt interpreted - Hash Lookup 112 120 12 0.1 11174.7 11.8X +ElementAt codegen - Linear Lookup 894 896 1 0.0 89425.2 1.5X +ElementAt codegen - Hash Lookup 141 147 13 0.1 14054.9 9.4X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 1972 2017 40 0.0 197218.8 1.0X +GetMapValue interpreted - Hash Lookup 107 115 11 0.1 10704.7 18.4X +GetMapValue codegen - Linear Lookup 1265 1460 169 0.0 126465.9 1.6X +GetMapValue codegen - Hash Lookup 136 142 9 0.1 13647.2 14.5X +ElementAt interpreted - Linear Lookup 1696 1704 9 0.0 169598.3 1.2X +ElementAt interpreted - Hash Lookup 109 119 13 0.1 10856.4 18.2X +ElementAt codegen - Linear Lookup 1266 1272 10 0.0 126615.6 1.6X +ElementAt codegen - Hash Lookup 138 146 16 0.1 13831.2 14.3X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 2222 2228 11 0.0 222165.8 1.0X +GetMapValue interpreted - Hash Lookup 105 116 14 0.1 10477.0 21.2X +GetMapValue codegen - Linear Lookup 1645 1660 14 0.0 164490.3 1.4X +GetMapValue codegen - Hash Lookup 131 142 16 0.1 13062.0 17.0X +ElementAt interpreted - Linear Lookup 2506 2570 55 0.0 250633.2 0.9X +ElementAt interpreted - Hash Lookup 103 110 11 0.1 10314.8 21.5X +ElementAt codegen - Linear Lookup 1683 1707 21 0.0 168336.7 1.3X +ElementAt codegen - Hash Lookup 132 143 13 0.1 13191.2 16.8X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 136 142 3 0.1 13555.8 1.0X +GetMapValue interpreted - Hash Lookup 25 30 6 0.4 2495.6 5.4X +GetMapValue codegen - Linear Lookup 95 97 1 0.1 9488.8 1.4X +GetMapValue codegen - Hash Lookup 27 31 4 0.4 2680.3 5.1X +ElementAt interpreted - Linear Lookup 120 122 2 0.1 11953.9 1.1X +ElementAt interpreted - Hash Lookup 24 27 5 0.4 2361.7 5.7X +ElementAt codegen - Linear Lookup 95 100 8 0.1 9457.3 1.4X +ElementAt codegen - Hash Lookup 26 28 3 0.4 2577.3 5.3X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 180 185 5 0.1 17965.3 1.0X +GetMapValue interpreted - Hash Lookup 22 24 2 0.4 2250.0 8.0X +GetMapValue codegen - Linear Lookup 136 137 1 0.1 13566.0 1.3X +GetMapValue codegen - Hash Lookup 25 28 4 0.4 2521.6 7.1X +ElementAt interpreted - Linear Lookup 174 177 3 0.1 17436.6 1.0X +ElementAt interpreted - Hash Lookup 23 25 4 0.4 2262.5 7.9X +ElementAt codegen - Linear Lookup 134 137 6 0.1 13379.1 1.3X +ElementAt codegen - Hash Lookup 24 28 4 0.4 2449.1 7.3X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 233 235 1 0.0 23304.9 1.0X +GetMapValue interpreted - Hash Lookup 22 24 4 0.5 2150.9 10.8X +GetMapValue codegen - Linear Lookup 175 177 1 0.1 17518.0 1.3X +GetMapValue codegen - Hash Lookup 24 26 4 0.4 2410.4 9.7X +ElementAt interpreted - Linear Lookup 229 238 15 0.0 22886.9 1.0X +ElementAt interpreted - Hash Lookup 21 24 4 0.5 2128.1 11.0X +ElementAt codegen - Linear Lookup 175 176 1 0.1 17538.1 1.3X +ElementAt codegen - Hash Lookup 24 27 4 0.4 2398.8 9.7X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 21 24 5 0.5 2069.1 1.0X +GetMapValue interpreted - Hash Lookup 14 16 3 0.7 1357.4 1.5X +GetMapValue codegen - Linear Lookup 18 20 3 0.6 1756.8 1.2X +GetMapValue codegen - Hash Lookup 14 16 4 0.7 1389.9 1.5X +ElementAt interpreted - Linear Lookup 21 22 4 0.5 2067.5 1.0X +ElementAt interpreted - Hash Lookup 14 16 3 0.7 1355.4 1.5X +ElementAt codegen - Linear Lookup 17 20 3 0.6 1709.6 1.2X +ElementAt codegen - Hash Lookup 13 16 4 0.8 1317.6 1.6X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 22 24 3 0.4 2229.0 1.0X +GetMapValue interpreted - Hash Lookup 13 15 3 0.8 1257.1 1.8X +GetMapValue codegen - Linear Lookup 18 21 4 0.5 1821.8 1.2X +GetMapValue codegen - Hash Lookup 13 16 3 0.7 1336.1 1.7X +ElementAt interpreted - Linear Lookup 23 25 3 0.4 2268.2 1.0X +ElementAt interpreted - Hash Lookup 13 15 3 0.8 1251.0 1.8X +ElementAt codegen - Linear Lookup 19 20 3 0.5 1851.5 1.2X +ElementAt codegen - Hash Lookup 12 14 2 0.8 1208.9 1.8X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 24 27 4 0.4 2388.1 1.0X +GetMapValue interpreted - Hash Lookup 12 15 3 0.8 1185.9 2.0X +GetMapValue codegen - Linear Lookup 20 22 3 0.5 1966.4 1.2X +GetMapValue codegen - Hash Lookup 12 14 2 0.8 1209.0 2.0X +ElementAt interpreted - Linear Lookup 24 27 3 0.4 2374.5 1.0X +ElementAt interpreted - Hash Lookup 12 14 2 0.8 1181.1 2.0X +ElementAt codegen - Linear Lookup 20 22 2 0.5 2027.9 1.2X +ElementAt codegen - Hash Lookup 12 14 2 0.8 1186.8 2.0X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 12 13 2 0.8 1181.4 1.0X +GetMapValue interpreted - Hash Lookup 11 13 3 0.9 1116.8 1.1X +GetMapValue codegen - Linear Lookup 12 13 2 0.8 1181.9 1.0X +GetMapValue codegen - Hash Lookup 11 13 2 0.9 1102.2 1.1X +ElementAt interpreted - Linear Lookup 13 15 2 0.8 1330.9 0.9X +ElementAt interpreted - Hash Lookup 11 13 2 0.9 1129.0 1.0X +ElementAt codegen - Linear Lookup 11 13 2 0.9 1135.0 1.0X +ElementAt codegen - Hash Lookup 11 13 2 0.9 1122.6 1.1X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 12 12 2 0.9 1157.6 1.0X +GetMapValue interpreted - Hash Lookup 11 12 2 0.9 1079.0 1.1X +GetMapValue codegen - Linear Lookup 12 13 2 0.8 1192.9 1.0X +GetMapValue codegen - Hash Lookup 11 13 2 0.9 1115.4 1.0X +ElementAt interpreted - Linear Lookup 13 14 2 0.8 1274.4 0.9X +ElementAt interpreted - Hash Lookup 12 13 2 0.9 1151.2 1.0X +ElementAt codegen - Linear Lookup 11 12 2 0.9 1114.9 1.0X +ElementAt codegen - Hash Lookup 11 13 3 0.9 1063.8 1.1X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 12 13 2 0.8 1178.5 1.0X +GetMapValue interpreted - Hash Lookup 11 13 2 0.9 1120.2 1.1X +GetMapValue codegen - Linear Lookup 11 13 2 0.9 1136.8 1.0X +GetMapValue codegen - Hash Lookup 11 12 2 0.9 1090.8 1.1X +ElementAt interpreted - Linear Lookup 12 14 2 0.8 1208.6 1.0X +ElementAt interpreted - Hash Lookup 11 12 2 0.9 1070.0 1.1X +ElementAt codegen - Linear Lookup 12 13 2 0.9 1167.2 1.0X +ElementAt codegen - Hash Lookup 11 12 2 0.9 1077.1 1.1X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 11 13 2 0.9 1071.9 1.0X +GetMapValue interpreted - Hash Lookup 11 13 2 0.9 1123.4 1.0X +GetMapValue codegen - Linear Lookup 11 13 2 0.9 1130.3 0.9X +GetMapValue codegen - Hash Lookup 11 12 2 0.9 1102.7 1.0X +ElementAt interpreted - Linear Lookup 11 12 2 0.9 1075.3 1.0X +ElementAt interpreted - Hash Lookup 11 11 2 0.9 1059.3 1.0X +ElementAt codegen - Linear Lookup 10 12 2 1.0 1043.1 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 1037.0 1.0X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 11 12 2 0.9 1070.1 1.0X +GetMapValue interpreted - Hash Lookup 11 13 2 0.9 1079.4 1.0X +GetMapValue codegen - Linear Lookup 10 12 2 1.0 1032.1 1.0X +GetMapValue codegen - Hash Lookup 11 13 2 0.9 1111.8 1.0X +ElementAt interpreted - Linear Lookup 11 12 2 0.9 1062.4 1.0X +ElementAt interpreted - Hash Lookup 11 13 2 0.9 1085.6 1.0X +ElementAt codegen - Linear Lookup 11 12 2 0.9 1076.0 1.0X +ElementAt codegen - Hash Lookup 10 12 2 1.0 1043.6 1.0X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 10 11 2 1.0 1043.5 1.0X +GetMapValue interpreted - Hash Lookup 10 12 2 1.0 1040.9 1.0X +GetMapValue codegen - Linear Lookup 10 12 2 1.0 1021.2 1.0X +GetMapValue codegen - Hash Lookup 10 12 2 1.0 1048.9 1.0X +ElementAt interpreted - Linear Lookup 11 12 2 0.9 1053.8 1.0X +ElementAt interpreted - Hash Lookup 10 12 2 1.0 1018.0 1.0X +ElementAt codegen - Linear Lookup 10 11 2 1.0 1047.0 1.0X +ElementAt codegen - Hash Lookup 10 12 2 1.0 1026.8 1.0X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 11 11 2 0.9 1060.5 1.0X +GetMapValue interpreted - Hash Lookup 11 12 2 0.9 1061.4 1.0X +GetMapValue codegen - Linear Lookup 10 11 2 1.0 1018.5 1.0X +GetMapValue codegen - Hash Lookup 10 12 2 1.0 1045.5 1.0X +ElementAt interpreted - Linear Lookup 10 11 2 1.0 1047.3 1.0X +ElementAt interpreted - Hash Lookup 11 12 2 1.0 1051.0 1.0X +ElementAt codegen - Linear Lookup 10 12 2 1.0 1029.1 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 1013.0 1.0X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 10 11 2 1.0 1004.8 1.0X +GetMapValue interpreted - Hash Lookup 10 11 2 1.0 997.9 1.0X +GetMapValue codegen - Linear Lookup 10 11 2 1.0 1002.3 1.0X +GetMapValue codegen - Hash Lookup 10 11 2 1.0 1028.3 1.0X +ElementAt interpreted - Linear Lookup 10 12 2 1.0 1039.1 1.0X +ElementAt interpreted - Hash Lookup 10 11 2 1.0 1008.8 1.0X +ElementAt codegen - Linear Lookup 10 11 2 1.0 984.1 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 982.7 1.0X + +OpenJDK 64-Bit Server VM 21.0.10+7-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 10 10 1 1.0 984.1 1.0X +GetMapValue interpreted - Hash Lookup 10 11 2 1.0 991.0 1.0X +GetMapValue codegen - Linear Lookup 10 11 2 1.0 984.6 1.0X +GetMapValue codegen - Hash Lookup 10 11 2 1.0 985.1 1.0X +ElementAt interpreted - Linear Lookup 10 11 2 1.0 994.0 1.0X +ElementAt interpreted - Hash Lookup 10 11 2 1.0 988.4 1.0X +ElementAt codegen - Linear Lookup 10 11 2 1.0 969.3 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 979.9 1.0X + diff --git a/sql/core/benchmarks/MapLookupBenchmark-jdk25-results.txt b/sql/core/benchmarks/MapLookupBenchmark-jdk25-results.txt new file mode 100644 index 0000000000000..e663922616206 --- /dev/null +++ b/sql/core/benchmarks/MapLookupBenchmark-jdk25-results.txt @@ -0,0 +1,273 @@ +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 29613 30212 849 0.0 2961282.8 1.0X +GetMapValue interpreted - Hash Lookup 1573 1720 127 0.0 157347.0 18.8X +GetMapValue codegen - Linear Lookup 5283 15168 8604 0.0 528254.8 5.6X +GetMapValue codegen - Hash Lookup 1803 2002 188 0.0 180305.1 16.4X +ElementAt interpreted - Linear Lookup 27158 27562 553 0.0 2715764.2 1.1X +ElementAt interpreted - Hash Lookup 1326 1574 307 0.0 132589.2 22.3X +ElementAt codegen - Linear Lookup 4953 15159 8850 0.0 495255.4 6.0X +ElementAt codegen - Hash Lookup 2006 2194 179 0.0 200622.1 14.8X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 29550 38070 7378 0.0 2955023.1 1.0X +GetMapValue interpreted - Hash Lookup 1581 1853 408 0.0 158062.3 18.7X +GetMapValue codegen - Linear Lookup 29686 30480 693 0.0 2968596.7 1.0X +GetMapValue codegen - Hash Lookup 1522 1748 255 0.0 152202.0 19.4X +ElementAt interpreted - Linear Lookup 10169 31677 18640 0.0 1016905.7 2.9X +ElementAt interpreted - Hash Lookup 1171 1584 366 0.0 117069.3 25.2X +ElementAt codegen - Linear Lookup 6115 11592 9203 0.0 611468.4 4.8X +ElementAt codegen - Hash Lookup 1569 1680 111 0.0 156930.4 18.8X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 13098 41457 24563 0.0 1309770.8 1.0X +GetMapValue interpreted - Hash Lookup 1455 1841 343 0.0 145480.8 9.0X +GetMapValue codegen - Linear Lookup 41874 43187 1877 0.0 4187354.3 0.3X +GetMapValue codegen - Hash Lookup 2039 2242 205 0.0 203886.9 6.4X +ElementAt interpreted - Linear Lookup 56593 57729 1561 0.0 5659287.7 0.2X +ElementAt interpreted - Hash Lookup 1355 1518 158 0.0 135468.6 9.7X +ElementAt codegen - Linear Lookup 39751 40310 500 0.0 3975123.7 0.3X +ElementAt codegen - Hash Lookup 1691 1923 315 0.0 169138.7 7.7X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 1246 1251 6 0.0 124601.4 1.0X +GetMapValue interpreted - Hash Lookup 113 122 14 0.1 11310.7 11.0X +GetMapValue codegen - Linear Lookup 1098 1105 10 0.0 109840.9 1.1X +GetMapValue codegen - Hash Lookup 144 155 18 0.1 14351.7 8.7X +ElementAt interpreted - Linear Lookup 1280 1292 11 0.0 128005.9 1.0X +ElementAt interpreted - Hash Lookup 113 121 11 0.1 11256.8 11.1X +ElementAt codegen - Linear Lookup 1097 1099 2 0.0 109718.9 1.1X +ElementAt codegen - Hash Lookup 145 157 18 0.1 14546.0 8.6X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 1688 1708 21 0.0 168766.8 1.0X +GetMapValue interpreted - Hash Lookup 114 121 11 0.1 11359.8 14.9X +GetMapValue codegen - Linear Lookup 1570 1573 3 0.0 156970.3 1.1X +GetMapValue codegen - Hash Lookup 139 148 14 0.1 13905.3 12.1X +ElementAt interpreted - Linear Lookup 1684 1728 41 0.0 168375.7 1.0X +ElementAt interpreted - Hash Lookup 107 119 11 0.1 10749.4 15.7X +ElementAt codegen - Linear Lookup 1572 1573 1 0.0 157227.0 1.1X +ElementAt codegen - Hash Lookup 139 150 16 0.1 13889.7 12.2X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 1952 2099 127 0.0 195172.8 1.0X +GetMapValue interpreted - Hash Lookup 106 114 11 0.1 10608.9 18.4X +GetMapValue codegen - Linear Lookup 2039 2045 10 0.0 203905.7 1.0X +GetMapValue codegen - Hash Lookup 139 147 15 0.1 13855.6 14.1X +ElementAt interpreted - Linear Lookup 2133 2164 27 0.0 213265.3 0.9X +ElementAt interpreted - Hash Lookup 108 123 19 0.1 10843.7 18.0X +ElementAt codegen - Linear Lookup 2040 2051 15 0.0 203988.3 1.0X +ElementAt codegen - Hash Lookup 140 148 15 0.1 13975.4 14.0X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 119 122 2 0.1 11896.7 1.0X +GetMapValue interpreted - Hash Lookup 25 28 4 0.4 2476.6 4.8X +GetMapValue codegen - Linear Lookup 124 126 3 0.1 12395.4 1.0X +GetMapValue codegen - Hash Lookup 26 30 5 0.4 2643.5 4.5X +ElementAt interpreted - Linear Lookup 116 121 3 0.1 11642.2 1.0X +ElementAt interpreted - Hash Lookup 23 27 5 0.4 2331.2 5.1X +ElementAt codegen - Linear Lookup 123 124 1 0.1 12277.5 1.0X +ElementAt codegen - Hash Lookup 25 27 2 0.4 2541.3 4.7X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 172 175 1 0.1 17236.9 1.0X +GetMapValue interpreted - Hash Lookup 22 24 2 0.5 2192.1 7.9X +GetMapValue codegen - Linear Lookup 169 170 2 0.1 16851.4 1.0X +GetMapValue codegen - Hash Lookup 24 27 5 0.4 2379.3 7.2X +ElementAt interpreted - Linear Lookup 171 175 2 0.1 17115.7 1.0X +ElementAt interpreted - Hash Lookup 22 24 4 0.5 2209.8 7.8X +ElementAt codegen - Linear Lookup 169 170 1 0.1 16880.9 1.0X +ElementAt codegen - Hash Lookup 25 27 4 0.4 2500.4 6.9X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 225 230 3 0.0 22512.3 1.0X +GetMapValue interpreted - Hash Lookup 21 24 4 0.5 2143.0 10.5X +GetMapValue codegen - Linear Lookup 215 216 2 0.0 21453.3 1.0X +GetMapValue codegen - Hash Lookup 23 26 4 0.4 2336.9 9.6X +ElementAt interpreted - Linear Lookup 228 230 2 0.0 22799.4 1.0X +ElementAt interpreted - Hash Lookup 21 24 5 0.5 2100.4 10.7X +ElementAt codegen - Linear Lookup 214 215 2 0.0 21386.2 1.1X +ElementAt codegen - Hash Lookup 23 26 5 0.4 2328.5 9.7X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 19 23 4 0.5 1947.5 1.0X +GetMapValue interpreted - Hash Lookup 13 16 3 0.7 1346.7 1.4X +GetMapValue codegen - Linear Lookup 22 25 3 0.5 2211.2 0.9X +GetMapValue codegen - Hash Lookup 13 15 4 0.8 1257.3 1.5X +ElementAt interpreted - Linear Lookup 20 22 2 0.5 1985.4 1.0X +ElementAt interpreted - Hash Lookup 13 15 4 0.8 1297.9 1.5X +ElementAt codegen - Linear Lookup 23 25 3 0.4 2313.0 0.8X +ElementAt codegen - Hash Lookup 13 15 4 0.8 1285.5 1.5X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 23 24 2 0.4 2250.7 1.0X +GetMapValue interpreted - Hash Lookup 12 15 3 0.8 1248.2 1.8X +GetMapValue codegen - Linear Lookup 27 29 3 0.4 2725.2 0.8X +GetMapValue codegen - Hash Lookup 13 14 3 0.8 1254.8 1.8X +ElementAt interpreted - Linear Lookup 22 24 4 0.5 2174.6 1.0X +ElementAt interpreted - Hash Lookup 13 15 2 0.8 1308.6 1.7X +ElementAt codegen - Linear Lookup 28 29 2 0.4 2755.9 0.8X +ElementAt codegen - Hash Lookup 13 14 2 0.8 1254.0 1.8X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 24 26 2 0.4 2425.7 1.0X +GetMapValue interpreted - Hash Lookup 13 15 3 0.8 1264.2 1.9X +GetMapValue codegen - Linear Lookup 32 34 3 0.3 3150.3 0.8X +GetMapValue codegen - Hash Lookup 12 14 2 0.8 1246.0 1.9X +ElementAt interpreted - Linear Lookup 24 26 2 0.4 2425.3 1.0X +ElementAt interpreted - Hash Lookup 12 14 2 0.8 1224.1 2.0X +ElementAt codegen - Linear Lookup 31 33 2 0.3 3145.8 0.8X +ElementAt codegen - Hash Lookup 11 13 2 0.9 1143.9 2.1X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 12 13 2 0.9 1170.0 1.0X +GetMapValue interpreted - Hash Lookup 11 13 2 0.9 1116.3 1.0X +GetMapValue codegen - Linear Lookup 12 14 2 0.8 1217.9 1.0X +GetMapValue codegen - Hash Lookup 10 12 3 1.0 1022.6 1.1X +ElementAt interpreted - Linear Lookup 11 13 2 0.9 1145.3 1.0X +ElementAt interpreted - Hash Lookup 10 12 2 1.0 1041.4 1.1X +ElementAt codegen - Linear Lookup 11 13 2 0.9 1142.6 1.0X +ElementAt codegen - Hash Lookup 11 12 2 1.0 1050.4 1.1X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 11 12 2 0.9 1116.4 1.0X +GetMapValue interpreted - Hash Lookup 11 13 2 0.9 1101.3 1.0X +GetMapValue codegen - Linear Lookup 11 12 2 0.9 1118.0 1.0X +GetMapValue codegen - Hash Lookup 10 11 2 1.0 992.0 1.1X +ElementAt interpreted - Linear Lookup 11 13 2 0.9 1143.3 1.0X +ElementAt interpreted - Hash Lookup 10 12 2 1.0 1036.5 1.1X +ElementAt codegen - Linear Lookup 11 12 2 0.9 1110.7 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 981.6 1.1X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=100, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 11 12 2 0.9 1133.4 1.0X +GetMapValue interpreted - Hash Lookup 10 11 2 1.0 998.9 1.1X +GetMapValue codegen - Linear Lookup 12 13 2 0.9 1163.8 1.0X +GetMapValue codegen - Hash Lookup 10 11 2 1.0 989.1 1.1X +ElementAt interpreted - Linear Lookup 11 12 2 0.9 1135.1 1.0X +ElementAt interpreted - Hash Lookup 10 11 2 1.0 996.4 1.1X +ElementAt codegen - Linear Lookup 11 13 2 0.9 1140.3 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 970.8 1.2X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 10 11 2 1.0 994.1 1.0X +GetMapValue interpreted - Hash Lookup 10 11 2 1.0 990.1 1.0X +GetMapValue codegen - Linear Lookup 10 11 2 1.0 970.9 1.0X +GetMapValue codegen - Hash Lookup 10 11 2 1.0 958.9 1.0X +ElementAt interpreted - Linear Lookup 10 11 2 1.0 1011.1 1.0X +ElementAt interpreted - Hash Lookup 10 11 2 1.0 996.2 1.0X +ElementAt codegen - Linear Lookup 10 11 2 1.0 959.8 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 958.6 1.0X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 10 11 2 1.0 981.1 1.0X +GetMapValue interpreted - Hash Lookup 10 11 2 1.0 977.1 1.0X +GetMapValue codegen - Linear Lookup 9 11 2 1.1 949.2 1.0X +GetMapValue codegen - Hash Lookup 10 11 2 1.0 959.6 1.0X +ElementAt interpreted - Linear Lookup 10 11 2 1.0 1010.3 1.0X +ElementAt interpreted - Hash Lookup 10 11 2 1.0 1018.2 1.0X +ElementAt codegen - Linear Lookup 10 11 2 1.0 972.2 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 998.7 1.0X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=10, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 10 11 2 1.0 1022.4 1.0X +GetMapValue interpreted - Hash Lookup 10 12 2 1.0 992.3 1.0X +GetMapValue codegen - Linear Lookup 10 11 2 1.0 1013.1 1.0X +GetMapValue codegen - Hash Lookup 10 11 2 1.0 1001.9 1.0X +ElementAt interpreted - Linear Lookup 10 12 2 1.0 1045.1 1.0X +ElementAt interpreted - Hash Lookup 10 12 2 1.0 1041.5 1.0X +ElementAt codegen - Linear Lookup 10 11 2 1.0 994.5 1.0X +ElementAt codegen - Hash Lookup 10 12 2 1.0 996.8 1.0X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 10 12 2 1.0 1039.9 1.0X +GetMapValue interpreted - Hash Lookup 11 12 2 0.9 1055.8 1.0X +GetMapValue codegen - Linear Lookup 10 11 2 1.0 981.2 1.1X +GetMapValue codegen - Hash Lookup 10 12 2 1.0 1022.1 1.0X +ElementAt interpreted - Linear Lookup 10 11 2 1.0 1040.8 1.0X +ElementAt interpreted - Hash Lookup 10 13 3 1.0 1018.8 1.0X +ElementAt codegen - Linear Lookup 10 11 2 1.0 999.3 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 1022.8 1.0X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 10 11 2 1.0 964.3 1.0X +GetMapValue interpreted - Hash Lookup 10 11 2 1.0 974.7 1.0X +GetMapValue codegen - Linear Lookup 10 11 2 1.0 984.7 1.0X +GetMapValue codegen - Hash Lookup 10 11 2 1.0 981.5 1.0X +ElementAt interpreted - Linear Lookup 10 11 2 1.0 1015.1 0.9X +ElementAt interpreted - Hash Lookup 11 12 2 0.9 1057.9 0.9X +ElementAt codegen - Linear Lookup 10 11 2 1.0 991.9 1.0X +ElementAt codegen - Hash Lookup 10 11 2 1.0 995.4 1.0X + +OpenJDK 64-Bit Server VM 25.0.2+10-LTS on Linux 6.14.0-1017-azure +AMD EPYC 7763 64-Core Processor +MapLookup (size=1, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 10 12 2 1.0 1018.2 1.0X +GetMapValue interpreted - Hash Lookup 10 11 2 1.0 962.3 1.1X +GetMapValue codegen - Linear Lookup 9 10 2 1.1 919.6 1.1X +GetMapValue codegen - Hash Lookup 9 10 2 1.1 912.6 1.1X +ElementAt interpreted - Linear Lookup 9 11 2 1.1 945.9 1.1X +ElementAt interpreted - Hash Lookup 9 11 2 1.1 944.0 1.1X +ElementAt codegen - Linear Lookup 9 11 2 1.1 923.1 1.1X +ElementAt codegen - Hash Lookup 9 11 2 1.1 931.4 1.1X + diff --git a/sql/core/benchmarks/MapLookupBenchmark-results.txt b/sql/core/benchmarks/MapLookupBenchmark-results.txt new file mode 100644 index 0000000000000..f3c71d5f943e1 --- /dev/null +++ b/sql/core/benchmarks/MapLookupBenchmark-results.txt @@ -0,0 +1,273 @@ +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1000000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 22624 36868 12336 0.0 2262398.9 1.0X +GetMapValue interpreted - Hash Lookup 1693 1789 142 0.0 169332.4 13.4X +GetMapValue codegen - Linear Lookup 8798 29525 17951 0.0 879755.0 2.6X +GetMapValue codegen - Hash Lookup 2342 2482 147 0.0 234180.1 9.7X +ElementAt interpreted - Linear Lookup 7806 20753 20310 0.0 780558.3 2.9X +ElementAt interpreted - Hash Lookup 1807 1837 33 0.0 180659.2 12.5X +ElementAt codegen - Linear Lookup 7000 15056 13043 0.0 699956.1 3.2X +ElementAt codegen - Hash Lookup 2523 2619 114 0.0 252261.9 9.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1000000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 11143 47997 31917 0.0 1114253.0 1.0X +GetMapValue interpreted - Hash Lookup 1528 1804 268 0.0 152839.8 7.3X +GetMapValue codegen - Linear Lookup 8654 21962 22963 0.0 865436.7 1.3X +GetMapValue codegen - Hash Lookup 2398 2532 117 0.0 239815.2 4.6X +ElementAt interpreted - Linear Lookup 66164 66399 205 0.0 6616373.1 0.2X +ElementAt interpreted - Hash Lookup 1521 1787 231 0.0 152062.6 7.3X +ElementAt codegen - Linear Lookup 8854 35996 23610 0.0 885415.4 1.3X +ElementAt codegen - Hash Lookup 2490 2536 73 0.0 248981.7 4.5X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1000000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 88708 88898 270 0.0 8870788.8 1.0X +GetMapValue interpreted - Hash Lookup 1415 1557 223 0.0 141535.3 62.7X +GetMapValue codegen - Linear Lookup 11692 38390 27852 0.0 1169181.6 7.6X +GetMapValue codegen - Hash Lookup 2156 2306 170 0.0 215553.5 41.2X +ElementAt interpreted - Linear Lookup 89030 89097 114 0.0 8903010.9 1.0X +ElementAt interpreted - Hash Lookup 1501 1711 250 0.0 150081.7 59.1X +ElementAt codegen - Linear Lookup 67459 68732 1176 0.0 6745912.0 1.3X +ElementAt codegen - Hash Lookup 2479 2525 42 0.0 247904.1 35.8X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=100000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 1997 2258 227 0.0 199662.0 1.0X +GetMapValue interpreted - Hash Lookup 129 156 15 0.1 12896.4 15.5X +GetMapValue codegen - Linear Lookup 1783 1787 4 0.0 178315.4 1.1X +GetMapValue codegen - Hash Lookup 166 173 5 0.1 16604.0 12.0X +ElementAt interpreted - Linear Lookup 1973 1976 4 0.0 197333.1 1.0X +ElementAt interpreted - Hash Lookup 131 137 8 0.1 13079.7 15.3X +ElementAt codegen - Linear Lookup 1778 1784 5 0.0 177842.6 1.1X +ElementAt codegen - Hash Lookup 168 174 7 0.1 16823.4 11.9X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=100000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 2652 3109 396 0.0 265207.1 1.0X +GetMapValue interpreted - Hash Lookup 127 133 5 0.1 12725.6 20.8X +GetMapValue codegen - Linear Lookup 2202 2207 7 0.0 220180.4 1.2X +GetMapValue codegen - Hash Lookup 166 175 11 0.1 16576.3 16.0X +ElementAt interpreted - Linear Lookup 3331 3340 14 0.0 333090.9 0.8X +ElementAt interpreted - Hash Lookup 130 135 5 0.1 13035.5 20.3X +ElementAt codegen - Linear Lookup 2206 2211 5 0.0 220605.4 1.2X +ElementAt codegen - Hash Lookup 166 171 5 0.1 16619.1 16.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=100000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +---------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 4366 4370 5 0.0 436594.1 1.0X +GetMapValue interpreted - Hash Lookup 119 127 6 0.1 11904.2 36.7X +GetMapValue codegen - Linear Lookup 2880 2886 7 0.0 288000.1 1.5X +GetMapValue codegen - Hash Lookup 153 157 4 0.1 15253.2 28.6X +ElementAt interpreted - Linear Lookup 3417 3419 2 0.0 341703.2 1.3X +ElementAt interpreted - Hash Lookup 120 128 17 0.1 11961.5 36.5X +ElementAt codegen - Linear Lookup 3431 3433 2 0.0 343087.8 1.3X +ElementAt codegen - Hash Lookup 152 159 7 0.1 15183.9 28.8X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=10000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 161 162 1 0.1 16111.0 1.0X +GetMapValue interpreted - Hash Lookup 32 35 3 0.3 3233.5 5.0X +GetMapValue codegen - Linear Lookup 81 83 2 0.1 8099.3 2.0X +GetMapValue codegen - Hash Lookup 35 38 4 0.3 3530.7 4.6X +ElementAt interpreted - Linear Lookup 160 163 3 0.1 16031.2 1.0X +ElementAt interpreted - Hash Lookup 32 35 3 0.3 3233.5 5.0X +ElementAt codegen - Linear Lookup 81 84 6 0.1 8058.3 2.0X +ElementAt codegen - Hash Lookup 35 37 2 0.3 3460.9 4.7X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=10000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 211 213 2 0.0 21128.4 1.0X +GetMapValue interpreted - Hash Lookup 31 32 2 0.3 3054.4 6.9X +GetMapValue codegen - Linear Lookup 102 103 1 0.1 10195.1 2.1X +GetMapValue codegen - Hash Lookup 33 35 3 0.3 3300.5 6.4X +ElementAt interpreted - Linear Lookup 211 212 1 0.0 21074.5 1.0X +ElementAt interpreted - Hash Lookup 30 32 2 0.3 3040.3 6.9X +ElementAt codegen - Linear Lookup 102 104 3 0.1 10176.9 2.1X +ElementAt codegen - Hash Lookup 33 35 3 0.3 3267.9 6.5X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=10000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +--------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 262 263 0 0.0 26217.5 1.0X +GetMapValue interpreted - Hash Lookup 30 32 3 0.3 2983.8 8.8X +GetMapValue codegen - Linear Lookup 123 125 3 0.1 12330.7 2.1X +GetMapValue codegen - Hash Lookup 32 34 2 0.3 3193.2 8.2X +ElementAt interpreted - Linear Lookup 262 264 5 0.0 26206.2 1.0X +ElementAt interpreted - Hash Lookup 29 31 2 0.3 2900.4 9.0X +ElementAt codegen - Linear Lookup 123 125 2 0.1 12281.0 2.1X +ElementAt codegen - Hash Lookup 31 33 3 0.3 3146.4 8.3X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1000, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 34 35 2 0.3 3356.9 1.0X +GetMapValue interpreted - Hash Lookup 21 23 2 0.5 2093.5 1.6X +GetMapValue codegen - Linear Lookup 26 27 2 0.4 2568.5 1.3X +GetMapValue codegen - Hash Lookup 21 23 2 0.5 2088.6 1.6X +ElementAt interpreted - Linear Lookup 33 35 1 0.3 3309.3 1.0X +ElementAt interpreted - Hash Lookup 21 22 1 0.5 2094.5 1.6X +ElementAt codegen - Linear Lookup 26 27 2 0.4 2588.7 1.3X +ElementAt codegen - Hash Lookup 21 22 1 0.5 2087.0 1.6X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1000, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 39 41 2 0.3 3894.9 1.0X +GetMapValue interpreted - Hash Lookup 20 22 2 0.5 2016.7 1.9X +GetMapValue codegen - Linear Lookup 28 29 2 0.4 2753.7 1.4X +GetMapValue codegen - Hash Lookup 21 22 1 0.5 2069.5 1.9X +ElementAt interpreted - Linear Lookup 39 40 1 0.3 3890.8 1.0X +ElementAt interpreted - Hash Lookup 20 22 2 0.5 2028.5 1.9X +ElementAt codegen - Linear Lookup 28 29 2 0.4 2777.9 1.4X +ElementAt codegen - Hash Lookup 20 21 1 0.5 2038.8 1.9X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1000, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +-------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 44 45 1 0.2 4419.0 1.0X +GetMapValue interpreted - Hash Lookup 20 22 2 0.5 1987.5 2.2X +GetMapValue codegen - Linear Lookup 30 31 1 0.3 2958.1 1.5X +GetMapValue codegen - Hash Lookup 20 22 3 0.5 2019.9 2.2X +ElementAt interpreted - Linear Lookup 44 46 3 0.2 4386.5 1.0X +ElementAt interpreted - Hash Lookup 20 22 2 0.5 1969.9 2.2X +ElementAt codegen - Linear Lookup 30 32 3 0.3 3004.8 1.5X +ElementAt codegen - Hash Lookup 20 22 1 0.5 2032.5 2.2X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=100, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 20 22 2 0.5 2019.1 1.0X +GetMapValue interpreted - Hash Lookup 19 21 2 0.5 1914.3 1.1X +GetMapValue codegen - Linear Lookup 20 22 2 0.5 1971.4 1.0X +GetMapValue codegen - Hash Lookup 19 21 2 0.5 1940.3 1.0X +ElementAt interpreted - Linear Lookup 20 22 2 0.5 2033.9 1.0X +ElementAt interpreted - Hash Lookup 19 20 2 0.5 1906.2 1.1X +ElementAt codegen - Linear Lookup 19 20 1 0.5 1901.1 1.1X +ElementAt codegen - Hash Lookup 19 20 2 0.5 1881.5 1.1X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=100, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 20 22 2 0.5 2038.5 1.0X +GetMapValue interpreted - Hash Lookup 18 20 2 0.5 1849.4 1.1X +GetMapValue codegen - Linear Lookup 19 21 2 0.5 1920.8 1.1X +GetMapValue codegen - Hash Lookup 19 20 1 0.5 1855.1 1.1X +ElementAt interpreted - Linear Lookup 20 22 2 0.5 2040.9 1.0X +ElementAt interpreted - Hash Lookup 19 20 1 0.5 1916.7 1.1X +ElementAt codegen - Linear Lookup 19 21 2 0.5 1917.0 1.1X +ElementAt codegen - Hash Lookup 19 20 1 0.5 1875.5 1.1X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=100, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 21 22 1 0.5 2121.5 1.0X +GetMapValue interpreted - Hash Lookup 18 20 1 0.5 1839.0 1.2X +GetMapValue codegen - Linear Lookup 19 20 1 0.5 1923.0 1.1X +GetMapValue codegen - Hash Lookup 18 20 1 0.5 1840.3 1.2X +ElementAt interpreted - Linear Lookup 21 22 1 0.5 2086.0 1.0X +ElementAt interpreted - Hash Lookup 19 20 1 0.5 1871.2 1.1X +ElementAt codegen - Linear Lookup 19 21 1 0.5 1913.4 1.1X +ElementAt codegen - Hash Lookup 18 19 1 0.5 1840.8 1.2X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=10, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 19 20 1 0.5 1875.0 1.0X +GetMapValue interpreted - Hash Lookup 19 20 2 0.5 1865.2 1.0X +GetMapValue codegen - Linear Lookup 18 19 1 0.5 1843.4 1.0X +GetMapValue codegen - Hash Lookup 19 21 2 0.5 1891.7 1.0X +ElementAt interpreted - Linear Lookup 19 20 1 0.5 1866.3 1.0X +ElementAt interpreted - Hash Lookup 19 20 1 0.5 1864.8 1.0X +ElementAt codegen - Linear Lookup 18 19 1 0.6 1812.8 1.0X +ElementAt codegen - Hash Lookup 18 19 1 0.5 1847.9 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=10, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 18 19 1 0.6 1789.4 1.0X +GetMapValue interpreted - Hash Lookup 18 19 1 0.5 1818.3 1.0X +GetMapValue codegen - Linear Lookup 18 19 1 0.5 1820.4 1.0X +GetMapValue codegen - Hash Lookup 18 19 2 0.6 1812.6 1.0X +ElementAt interpreted - Linear Lookup 18 19 1 0.5 1831.7 1.0X +ElementAt interpreted - Hash Lookup 19 19 1 0.5 1858.2 1.0X +ElementAt codegen - Linear Lookup 18 19 1 0.5 1820.1 1.0X +ElementAt codegen - Hash Lookup 18 19 1 0.5 1822.6 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=10, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------ +GetMapValue interpreted - Linear Lookup 18 19 1 0.5 1834.2 1.0X +GetMapValue interpreted - Hash Lookup 18 19 1 0.5 1821.2 1.0X +GetMapValue codegen - Linear Lookup 18 19 1 0.5 1838.8 1.0X +GetMapValue codegen - Hash Lookup 18 19 1 0.6 1791.8 1.0X +ElementAt interpreted - Linear Lookup 18 19 1 0.6 1797.1 1.0X +ElementAt interpreted - Hash Lookup 18 19 1 0.6 1793.7 1.0X +ElementAt codegen - Linear Lookup 18 19 1 0.6 1803.3 1.0X +ElementAt codegen - Hash Lookup 18 19 1 0.6 1796.1 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1, hit=1.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 18 19 1 0.6 1794.5 1.0X +GetMapValue interpreted - Hash Lookup 18 19 1 0.6 1815.4 1.0X +GetMapValue codegen - Linear Lookup 18 19 1 0.6 1791.0 1.0X +GetMapValue codegen - Hash Lookup 18 19 1 0.6 1774.3 1.0X +ElementAt interpreted - Linear Lookup 18 19 1 0.6 1810.9 1.0X +ElementAt interpreted - Hash Lookup 18 19 1 0.5 1822.9 1.0X +ElementAt codegen - Linear Lookup 18 19 1 0.6 1775.3 1.0X +ElementAt codegen - Hash Lookup 18 19 1 0.6 1769.4 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1, hit=0.5, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 18 19 1 0.6 1793.9 1.0X +GetMapValue interpreted - Hash Lookup 18 19 1 0.6 1793.4 1.0X +GetMapValue codegen - Linear Lookup 18 19 1 0.6 1765.9 1.0X +GetMapValue codegen - Hash Lookup 18 19 1 0.6 1788.2 1.0X +ElementAt interpreted - Linear Lookup 18 19 1 0.6 1772.9 1.0X +ElementAt interpreted - Hash Lookup 18 19 1 0.6 1790.7 1.0X +ElementAt codegen - Linear Lookup 18 19 1 0.6 1778.2 1.0X +ElementAt codegen - Hash Lookup 17 19 1 0.6 1743.9 1.0X + +OpenJDK 64-Bit Server VM 17.0.18+8-LTS on Linux 6.14.0-1017-azure +Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +MapLookup (size=1, hit=0.0, type=IntegerType): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +----------------------------------------------------------------------------------------------------------------------------- +GetMapValue interpreted - Linear Lookup 18 19 2 0.6 1798.7 1.0X +GetMapValue interpreted - Hash Lookup 18 19 1 0.6 1752.1 1.0X +GetMapValue codegen - Linear Lookup 18 18 1 0.6 1755.0 1.0X +GetMapValue codegen - Hash Lookup 17 19 2 0.6 1748.2 1.0X +ElementAt interpreted - Linear Lookup 18 19 1 0.6 1764.6 1.0X +ElementAt interpreted - Hash Lookup 18 19 1 0.6 1756.2 1.0X +ElementAt codegen - Linear Lookup 18 19 1 0.6 1755.2 1.0X +ElementAt codegen - Hash Lookup 17 19 1 0.6 1741.8 1.0X + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MapLookupBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MapLookupBenchmark.scala new file mode 100644 index 0000000000000..e9ec876e1f603 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MapLookupBenchmark.scala @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import org.apache.spark.benchmark.Benchmark +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +/** + * Benchmark to measure performance of map lookup operations. + * To run this benchmark: + * {{{ + * 1. without sbt: + * bin/spark-submit --class --jars + * 2. build/sbt "sql/Test/runMain org.apache.spark.sql.execution.benchmark.MapLookupBenchmark" + * 3. generate result: + * SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt \ + * "sql/Test/runMain org.apache.spark.sql.execution.benchmark.MapLookupBenchmark" + * Results will be written to "benchmarks/MapLookupBenchmark-results.txt". + * }}} + */ +object MapLookupBenchmark extends SqlBasedBenchmark { + private val NUMBER_OF_ITER = 3 + + override def getSparkSession: SparkSession = { + SparkSession.builder() + .master("local[1]") + .appName("MapLookupBenchmark") + .config("spark.driver.memory", "6g") + .config("spark.executor.memory", "6g") + .getOrCreate() + } + + private def run( + mapSize: Int, + hitRate: Double, + keyType: DataType): Unit = { + val numRows = 10000 + + val benchmark = new Benchmark( + s"MapLookup (size=$mapSize, hit=$hitRate, type=$keyType)", + numRows, + NUMBER_OF_ITER, + output = output) + + import spark.implicits._ + + // Create a DataFrame with a single column 'm' containing the map, + // and 'k' containing the key to lookup. + // Use `typedLit` to create the map literal directly. + val keys = (0 until mapSize).toArray + val map = keys.zip(keys.map(_.toString)).toMap + val mapCol = typedLit(map) + + // Generate lookup keys + val lookupKeys = (0 until numRows).map { i => + if (i < numRows * hitRate) keys(i % mapSize) else -1 + } + + val lookupDf = lookupKeys.toDF("key").select(mapCol.as("m"), $"key") + + val expr = col("m").getItem(col("key")) + val elementAtExpr = element_at(col("m"), col("key")) + + benchmark.addCase("GetMapValue interpreted - Linear Lookup") { _ => + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN", + SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> Int.MaxValue.toString, + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) { + lookupDf.select(expr).noop() + } + } + + benchmark.addCase("GetMapValue interpreted - Hash Lookup") { _ => + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN", + SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> 0.toString, + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) { + lookupDf.select(expr).noop() + } + } + + benchmark.addCase("GetMapValue codegen - Linear Lookup") { _ => + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true", + SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY", + SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> Int.MaxValue.toString, + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) { + lookupDf.select(expr).noop() + } + } + + benchmark.addCase("GetMapValue codegen - Hash Lookup") { _ => + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true", + SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY", + SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> 0.toString, + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) { + lookupDf.select(expr).noop() + } + } + + benchmark.addCase("ElementAt interpreted - Linear Lookup") { _ => + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN", + SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> Int.MaxValue.toString, + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) { + lookupDf.select(elementAtExpr).noop() + } + } + + benchmark.addCase("ElementAt interpreted - Hash Lookup") { _ => + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN", + SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> 0.toString, + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) { + lookupDf.select(elementAtExpr).noop() + } + } + + benchmark.addCase("ElementAt codegen - Linear Lookup") { _ => + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true", + SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY", + SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> Int.MaxValue.toString, + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) { + lookupDf.select(elementAtExpr).noop() + } + } + + benchmark.addCase("ElementAt codegen - Hash Lookup") { _ => + withSQLConf( + SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true", + SQLConf.CODEGEN_FACTORY_MODE.key -> "CODEGEN_ONLY", + SQLConf.MAP_LOOKUP_HASH_THRESHOLD.key -> 0.toString, + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> ConvertToLocalRelation.ruleName) { + lookupDf.select(elementAtExpr).noop() + } + } + + benchmark.run() + System.gc() + } + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + val sizes = Seq(1000000, 100000, 10000, 1000, 100, 10, 1) + for (size <- sizes) { + run(size, 1.0, IntegerType) + run(size, 0.5, IntegerType) + run(size, 0.0, IntegerType) + } + } +}