diff --git a/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/execution/UTF8String.java b/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/execution/UTF8String.java index fe7adc221..51308e846 100644 --- a/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/execution/UTF8String.java +++ b/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/execution/UTF8String.java @@ -2,8 +2,6 @@ import static java.util.Objects.requireNonNull; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.List; @@ -11,10 +9,10 @@ import se.kuseman.payloadbuilder.api.catalog.ResolvedType; /** - * A bytes reference used for data types that supports operations directly on under laying byte structures like Strings etc. NOTE! {@link ValueVector} is implemented here to let a single string become - * a literal value vector of it self to avoid creating a literal + * A string implementation that works directly on an under laying byte-array without realizing a java.lang.String. Equals/hash/comparision/concats etc. can be performed on byte level. Instance can be + * created from both UTF8 bytes aswell as Latin1 (ISO_8859_1) encoded byte arrays. A java.lang.String can also be used to instantiate then all operations are performed via java.lang.String. */ -public class UTF8String implements Comparable, ValueVector +public class UTF8String implements Comparable, ValueVector, CharSequence { private static final ThreadLocal BUILDER = new ThreadLocal<>(); @@ -30,38 +28,44 @@ public class UTF8String implements Comparable, ValueVector private static final int START = 17; private static final int CONSTANT = 37; + private final boolean latin1; private String string; - + private int charLength = -1; private boolean hashIsZero; private int hash; private byte[] bytes; private int offset; - private int length; + private int byteLength; private UTF8String(String string) { + requireNonNull(string); this.string = string; + this.charLength = string.length(); + this.latin1 = false; } - private UTF8String(byte[] bytes, int offset, int length) + private UTF8String(byte[] bytes, int offset, int length, boolean latin1) { + requireNonNull(bytes); this.bytes = requireNonNull(bytes, "bytes"); this.offset = offset; - this.length = length; + this.byteLength = length; + this.latin1 = latin1; } /** Return a copy of the underlying utf8 bytes for this string */ public byte[] getBytes() { getBytesInternal(); - return Arrays.copyOfRange(bytes, offset, offset + length); + return Arrays.copyOfRange(bytes, offset, offset + byteLength); } /** Return the bytes of this instance into destination byte array. Caller is responsible for correct length */ public void getBytes(byte[] destination) { getBytesInternal(); - System.arraycopy(this.bytes, offset, destination, 0, length); + System.arraycopy(this.bytes, offset, destination, 0, byteLength); } // ValueVector @@ -113,28 +117,192 @@ public int compareTo(UTF8String that) return string.compareTo(that.string); } - // UTF8 can be compared lexicographically by unsigned byte comparison - byte[] thisBytes = getBytesInternal(); - int thisOffset = this.offset; - byte[] thatBytes = that.getBytesInternal(); - int thatOffset = that.offset; + // Same encoding => compare bytes + if (latin1 == that.latin1) + { + // UTF8 can be compared lexicographically by unsigned byte comparison + byte[] thisBytes = getBytesInternal(); + int thisOffset = this.offset; + byte[] thatBytes = that.getBytesInternal(); + int thatOffset = that.offset; + + int size = thisOffset + Math.min(this.byteLength, that.byteLength); - int size = thisOffset + Math.min(this.length, that.length); + while (thisOffset < size) + { + int a = thisBytes[thisOffset++] & 0xff; + int b = thatBytes[thatOffset++] & 0xff; + int diff = a - b; + if (diff != 0) + { + return diff; + } + } - while (thisOffset < size) + return this.byteLength - that.byteLength; + } + + int length = length(); + int thatLength = that.length(); + // Else compare using charAt + int size = Math.min(length, thatLength); + for (int i = 0; i < size; i++) { - int a = thisBytes[thisOffset++] & 0xff; - int b = thatBytes[thatOffset++] & 0xff; + char a = charAt(i); + char b = that.charAt(i); int diff = a - b; if (diff != 0) { return diff; } } + return length - thatLength; + } + + // CharSequence + + @Override + public int length() + { + if (charLength >= 0) + { + return charLength; + } + + if (latin1) + { + charLength = byteLength; + return charLength; + } + + // Count UTF8 chars + int count = 0; + int i = offset; + int end = offset + byteLength; + + while (i < end) + { + int b = bytes[i] & 0xFF; + + if (b < 0x80) + { + i += 1; + count += 1; + } + else if ((b >> 5) == 0b110) + { + i += 2; + count += 1; + } + else if ((b >> 4) == 0b1110) + { + i += 3; + count += 1; + } + else if ((b >> 3) == 0b11110) + { + i += 4; + count += 2; + } + else + { + i += 1; + count += 1; + } + } - return this.length - that.length; + charLength = count; + return count; } + @Override + public char charAt(int index) + { + if (index < 0) + throw new IndexOutOfBoundsException(); + + if (string != null) + { + return string.charAt(index); + } + + if (latin1) + { + if (index >= byteLength) + throw new IndexOutOfBoundsException(); + return (char) (bytes[offset + index] & 0xFF); + } + + // Find UTF8 char + int i = offset; + int end = offset + byteLength; + int charPos = 0; + + while (i < end) + { + int b = bytes[i] & 0xFF; + int codePoint; + int byteCount; + + if (b < 0x80) + { + codePoint = b; + byteCount = 1; + } + else if ((b >> 5) == 0b110) + { + codePoint = ((b & 0x1F) << 6) | (bytes[i + 1] & 0x3F); + byteCount = 2; + } + else if ((b >> 4) == 0b1110) + { + codePoint = ((b & 0x0F) << 12) | ((bytes[i + 1] & 0x3F) << 6) | (bytes[i + 2] & 0x3F); + byteCount = 3; + } + else if ((b >> 3) == 0b11110) + { + codePoint = ((b & 0x07) << 18) | ((bytes[i + 1] & 0x3F) << 12) | ((bytes[i + 2] & 0x3F) << 6) | (bytes[i + 3] & 0x3F); + byteCount = 4; + } + else + { + codePoint = b; + byteCount = 1; + } + + if (codePoint <= 0xFFFF) + { + if (charPos == index) + return (char) codePoint; + charPos++; + } + else + { + int cpPrime = codePoint - 0x10000; + char high = (char) ((cpPrime >> 10) + 0xD800); + char low = (char) ((cpPrime & 0x3FF) + 0xDC00); + + if (charPos == index) + return high; + if (charPos + 1 == index) + return low; + charPos += 2; + } + + i += byteCount; + } + + throw new IndexOutOfBoundsException(); + } + + @Override + public CharSequence subSequence(int start, int end) + { + return toString().subSequence(start, end); + } + + // End Of CharSequence + @Override public int hashCode() { @@ -147,7 +315,7 @@ public int hashCode() && !hashIsZero) { result = START; - int end = offset + length; + int end = offset + byteLength; for (int i = offset; i < end; i++) { result = result * CONSTANT + bytes[i]; @@ -185,20 +353,41 @@ else if (obj instanceof UTF8String that) return string.equals(that.string); } - byte[] bytes1 = getBytesInternal(); - byte[] bytes2 = that.getBytesInternal(); - - if (length != that.length) + // Same encoding => compare bytes + if (latin1 == that.latin1) { - return false; - } + byte[] bytes1 = getBytesInternal(); + byte[] bytes2 = that.getBytesInternal(); + + if (byteLength != that.byteLength) + { + return false; + } - for (int i = 0; i < length; i++) + for (int i = 0; i < byteLength; i++) + { + if (bytes1[offset + i] != bytes2[that.offset + i]) + { + return false; + } + } + } + // Else compare using charAt + else { - if (bytes1[offset + i] != bytes2[that.offset + i]) + int length = this.length(); + if (length != that.length()) { return false; } + + for (int i = 0; i < length; i++) + { + if (charAt(i) != that.charAt(i)) + { + return false; + } + } } return true; } @@ -211,11 +400,19 @@ public boolean hasString() return string != null; } + /** + * Returns true if this string is latin1 encoded. NOTE! Only applicable if {@link #hasString()} is false, then the string is encoded in whatever the java.lang.String is encoded with. + */ + public boolean isLatin1() + { + return latin1; + } + /** Return the byte length of this instance. */ public int getByteLength() { getBytesInternal(); - return length; + return byteLength; } private byte[] getBytesInternal() @@ -227,7 +424,7 @@ private byte[] getBytesInternal() else if (string != null) { bytes = string.getBytes(StandardCharsets.UTF_8); - length = bytes.length; + byteLength = bytes.length; offset = 0; } return bytes; @@ -251,7 +448,14 @@ public String toString() return string; } - string = new String(bytes, offset, length, StandardCharsets.UTF_8); + if (latin1) + { + string = new String(bytes, offset, byteLength, StandardCharsets.ISO_8859_1); + } + else + { + string = new String(bytes, offset, byteLength, StandardCharsets.UTF_8); + } return string; } @@ -306,14 +510,14 @@ else if (strings.size() == 1) for (int i = 0; i < count; i++) { UTF8String str = strings.get(i); - System.arraycopy(str.getBytesInternal(), str.offset, bytes, offset, str.length); - offset += str.length; + System.arraycopy(str.getBytesInternal(), str.offset, bytes, offset, str.byteLength); + offset += str.byteLength; // Don't add a last delimiter if (i < count - 1 - && delimeter.length > 0) + && delimeter.byteLength > 0) { - System.arraycopy(delimeter.getBytesInternal(), delimeter.offset, bytes, offset, delimeter.length); - offset += delimeter.length; + System.arraycopy(delimeter.getBytesInternal(), delimeter.offset, bytes, offset, delimeter.byteLength); + offset += delimeter.byteLength; } } @@ -352,6 +556,7 @@ private static StringBuilder getBuilder() */ public static UTF8String from(Object object) { + requireNonNull(object); if (object instanceof Boolean) { return ((Boolean) object).booleanValue() ? TRUE @@ -363,8 +568,8 @@ else if (object instanceof UTF8String utf8s) } else if (object instanceof byte[] bytes) { - // Assume utf8 bytes - return new UTF8String(bytes, 0, bytes.length); + boolean latin1 = detectLatin1(bytes, 0, bytes.length); + return new UTF8String(bytes, 0, bytes.length, latin1); } return from(String.valueOf(object)); } @@ -385,9 +590,10 @@ public static UTF8String utf8(byte[] bytes) return utf8(bytes, 0, bytes.length); } + /** Constructs a string from utf8 bytes. */ public static UTF8String utf8(byte[] bytes, int offset, int length) { - return new UTF8String(bytes, offset, length); + return new UTF8String(bytes, offset, length, false); } public static UTF8String latin(byte[] bytes) @@ -396,13 +602,53 @@ public static UTF8String latin(byte[] bytes) } /** - * Create a utf8 string from latin encoded bytes. NOTE! Recommended usage is utf8 since this method allocates some when converting bytes. + * Create a utf8 string from latin encoded bytes. NOTE! This does assume all bytes are latin1 encoded and does not check if that holds true. */ public static UTF8String latin(byte[] bytes, int offset, int length) { - ByteBuffer buffer = ByteBuffer.wrap(bytes, offset, length); - CharBuffer charBuffer = StandardCharsets.ISO_8859_1.decode(buffer); - ByteBuffer encoded = StandardCharsets.UTF_8.encode(charBuffer); - return utf8(encoded.array(), 0, encoded.limit()); + return new UTF8String(bytes, offset, length, true); + } + + /** Detects if provided bytes are all latin1 encoded. */ + public static boolean detectLatin1(byte[] bytes, int offset, int length) + { + int end = offset + length; + int i = offset; + + while (i < end) + { + int b = bytes[i] & 0xFF; + + if (b < 0x80) + { + i++; + } + else if ((b >> 5) == 0b110 + && i + 1 < end + && (bytes[i + 1] & 0xC0) == 0x80) + { + return false; + } + else if ((b >> 4) == 0b1110 + && i + 2 < end + && (bytes[i + 1] & 0xC0) == 0x80 + && (bytes[i + 2] & 0xC0) == 0x80) + { + return false; + } + else if ((b >> 3) == 0b11110 + && i + 3 < end + && (bytes[i + 1] & 0xC0) == 0x80 + && (bytes[i + 2] & 0xC0) == 0x80 + && (bytes[i + 3] & 0xC0) == 0x80) + { + return false; + } + else + { + i++; + } + } + return true; } } diff --git a/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/execution/ValueVector.java b/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/execution/ValueVector.java index c640e9163..9da760f02 100644 --- a/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/execution/ValueVector.java +++ b/payloadbuilder-api/src/main/java/se/kuseman/payloadbuilder/api/execution/ValueVector.java @@ -69,6 +69,7 @@ else if (type == Type.Long) { return UTF8String.from(getInt(row)); } + return UTF8String.from(getAny(row)); } @@ -459,7 +460,7 @@ default Object valueAsObject(int row) } } - /** Return value as Java string for provided row */ + /** Return value as Java string for provided row. Null is returned when value is null. */ default String valueAsString(int row) { Object value = valueAsObject(row); diff --git a/payloadbuilder-api/src/test/java/se/kuseman/payloadbuilder/api/execution/UTF8StringTest.java b/payloadbuilder-api/src/test/java/se/kuseman/payloadbuilder/api/execution/UTF8StringTest.java new file mode 100644 index 000000000..36ed67e85 --- /dev/null +++ b/payloadbuilder-api/src/test/java/se/kuseman/payloadbuilder/api/execution/UTF8StringTest.java @@ -0,0 +1,147 @@ +package se.kuseman.payloadbuilder.api.execution; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.charset.StandardCharsets; + +import org.junit.jupiter.api.Test; + +class UTF8StringTest +{ + @Test + void test_hash_code_equals_between_encodings() + { + UTF8String str1 = UTF8String.utf8("three".getBytes(StandardCharsets.UTF_8)); + UTF8String str2 = UTF8String.latin("three".getBytes(StandardCharsets.ISO_8859_1)); + assertEquals(str1.hashCode(), str2.hashCode()); + } + + @Test + void test_charSequence_String() + { + // String + UTF8String str = UTF8String.from("three"); + assertFalse(str.isLatin1()); + assertEquals(5, str.length()); + assertEquals(5, str.length()); + assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(-1)); + assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(5)); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(3, 2)); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(-1, 2)); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(0, 6)); + assertEquals('t', str.charAt(0)); + assertEquals('h', str.charAt(1)); + assertEquals('r', str.charAt(2)); + assertEquals('e', str.charAt(3)); + assertEquals('e', str.charAt(4)); + assertEquals("ee", str.subSequence(3, 5)); + assertEquals("three", str.toString()); + } + + @Test + void test_charSequence_UTF8() + { + // CSOFF + UTF8String str = UTF8String.from("\u2705\u5F3A".getBytes(StandardCharsets.UTF_8)); + assertFalse(str.isLatin1()); + assertFalse(str.hasString()); + assertEquals(2, str.length()); + assertEquals(2, str.length()); + assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(-1)); + assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(5)); + assertFalse(str.hasString()); + assertEquals('\u2705', str.charAt(0)); + assertEquals('\u5F3A', str.charAt(1)); + assertEquals("\u2705\u5F3A", str.toString()); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(3, 2)); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(-1, 2)); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(0, 6)); + // CSON + } + + @Test + void test_complex_UTF8() + { + UTF8String str = UTF8String.from("Зарегистрируйтесь сейчас на Десятую Международную Конференцию по".getBytes(StandardCharsets.UTF_8)); + assertFalse(str.isLatin1()); + assertEquals(64, str.length()); + assertEquals(122, str.getByteLength()); + assertEquals('р', str.charAt(10)); + + assertEquals(64, str.toString() + .length()); + assertEquals('р', str.toString() + .charAt(10)); + + str = UTF8String.from("สิบสองกษัตริย์ก่อนหน้าแลถัดไป".getBytes(StandardCharsets.UTF_8)); + assertFalse(str.isLatin1()); + assertEquals(29, str.length()); + assertEquals(87, str.getByteLength()); + assertEquals('ร', str.charAt(10)); + + assertEquals(29, str.toString() + .length()); + assertEquals('ร', str.toString() + .charAt(10)); + + str = UTF8String.from("ድር ቢያብር አንበሳ ያስር።".getBytes(StandardCharsets.UTF_8)); + assertFalse(str.isLatin1()); + assertEquals(17, str.length()); + assertEquals(45, str.getByteLength()); + assertEquals('በ', str.charAt(10)); + + assertEquals(17, str.toString() + .length()); + assertEquals('በ', str.toString() + .charAt(10)); + + str = UTF8String.from("⠗⠑⠛⠜⠙ ⠁ ⠊⠕⠋⠋⠔⠤⠝".getBytes(StandardCharsets.UTF_8)); + assertFalse(str.isLatin1()); + assertEquals(15, str.length()); + assertEquals(41, str.getByteLength()); + assertEquals('⠋', str.charAt(10)); + + assertEquals(15, str.toString() + .length()); + assertEquals('⠋', str.toString() + .charAt(10)); + + // 4 bytes chars + str = UTF8String.from("𓄉𓄫𓅒𓅤".getBytes(StandardCharsets.UTF_8)); + assertEquals(8, str.length()); + assertEquals(16, str.getByteLength()); + assertEquals(56619, str.charAt(3)); + + assertEquals(8, str.toString() + .length()); + assertEquals(56619, str.toString() + .charAt(3)); + + } + + @Test + void test_charSequence_Latin1() + { + // Latin1 bytes + UTF8String str = UTF8String.from("three".getBytes(StandardCharsets.ISO_8859_1)); + assertTrue(str.isLatin1()); + assertFalse(str.hasString()); + assertEquals(5, str.length()); + assertEquals(5, str.length()); + assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(-1)); + assertThrows(IndexOutOfBoundsException.class, () -> str.charAt(5)); + assertFalse(str.hasString()); + assertEquals('t', str.charAt(0)); + assertEquals('h', str.charAt(1)); + assertEquals('r', str.charAt(2)); + assertEquals('e', str.charAt(3)); + assertEquals('e', str.charAt(4)); + assertEquals("three", str.toString()); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(3, 2)); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(-1, 2)); + assertThrows(IndexOutOfBoundsException.class, () -> str.subSequence(0, 6)); + } +} diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/AReferenceVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/AReferenceVectorWriter.java index f6fd562df..37995f462 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/AReferenceVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/AReferenceVectorWriter.java @@ -1,6 +1,7 @@ package se.kuseman.payloadbuilder.bytes; import se.kuseman.payloadbuilder.api.execution.ValueVector; +import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings; /** A writer that writes vectors who's items are references to other places in the buffer */ abstract class AReferenceVectorWriter implements VectorWriter @@ -8,28 +9,26 @@ abstract class AReferenceVectorWriter implements VectorWriter @Override public void write(BytesWriter writer, WriteCache cache, ValueVector vector, int from, int to, int nullCount) { + Encoding encoding = getEncoding(vector, from, to, cache.getSettings()); + // Find out if we have a literal vector - if (nullCount == 0) + if (nullCount == 0 + && encoding.isLiteral) { - boolean literal = isLiteral(vector, from, to); - - if (literal) - { - writer.putByte(PayloadReader.LITERAL_ENCODING); + writer.putByte(encoding.encoding); - int valueOffset = writer.position(); + int valueOffset = writer.position(); - // Set writer to position after literl data - writer.position(valueOffset + AVector.REFERENCE_HEADER_SIZE); + // Set writer to position after literl data + writer.position(valueOffset + AVector.REFERENCE_HEADER_SIZE); - // Get cached position - int position = getAndCachePosition(writer, cache, vector, from); - writer.putInt(valueOffset, position); - return; - } + // Get cached position + int position = getAndCachePosition(writer, cache, vector, from); + writer.putInt(valueOffset, position); + return; } - writer.putByte(PayloadReader.REGULAR_ENCODING); + writer.putByte(encoding.encoding); writeMeta(writer, vector); @@ -63,9 +62,35 @@ protected void writeMeta(BytesWriter writer, ValueVector vector) { } - /** Returns true if vector is literal */ - protected abstract boolean isLiteral(ValueVector vector, int from, int to); + /** + * Return encoding byte for this vector. Reserved encodings: 0 - REGULAR_LITERAL_ENCODING 1 - REGULAR_ENCODING + */ + protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings) + { + return Encoding.REGULAR; + } /** Get and cache position of provided row */ protected abstract int getAndCachePosition(BytesWriter writer, WriteCache cache, ValueVector vector, int row); + + record Encoding(byte encoding, boolean isLiteral) + { + + static final Encoding REGULAR = new Encoding(PayloadReader.REGULAR_ENCODING, false); + static final Encoding REGULAR_LITERAL = new Encoding(PayloadReader.REGULAR_LITERAL_ENCODING, true); + + Encoding + { + if (isLiteral + && encoding == PayloadReader.REGULAR_ENCODING) + { + throw new IllegalArgumentException("Illegal encoding byte. " + PayloadReader.REGULAR_ENCODING + " is reserved for regular encoding"); + } + else if (!isLiteral + && encoding == PayloadReader.REGULAR_LITERAL_ENCODING) + { + throw new IllegalArgumentException("Illegal literal flag. Encoding byte: " + PayloadReader.REGULAR_LITERAL_ENCODING + " is reserved for regular literal encoding"); + } + } + } } diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/ArrayVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/ArrayVectorWriter.java index b00878fe2..9dd3ae99f 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/ArrayVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/ArrayVectorWriter.java @@ -14,12 +14,6 @@ public byte getVersion() return ArrayVector.VERSION; } - @Override - protected boolean isLiteral(ValueVector vector, int from, int to) - { - return false; - } - @Override protected int getAndCachePosition(BytesWriter writer, WriteCache cache, ValueVector vector, int row) { diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/BooleanVector.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/BooleanVector.java index 81c289135..5b0fe9389 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/BooleanVector.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/BooleanVector.java @@ -32,7 +32,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf } int encoding = buffer.get(position++); - if (encoding == PayloadReader.LITERAL_ENCODING) + if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING) { byte value = buffer.get(position); return ValueVector.literalBoolean(value == 1, size); diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/BooleanVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/BooleanVectorWriter.java index ba6e6e5ee..b72ffc41c 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/BooleanVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/BooleanVectorWriter.java @@ -37,7 +37,7 @@ public void write(BytesWriter writer, WriteCache cache, ValueVector vector, int if (literal) { - writer.putByte(PayloadReader.LITERAL_ENCODING); + writer.putByte(PayloadReader.REGULAR_LITERAL_ENCODING); // Literal boolean then we have the literal value in the data position writer.putByte((byte) (value ? 1 : 0)); diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DateTimeVector.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DateTimeVector.java index 953322082..926363a9b 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DateTimeVector.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DateTimeVector.java @@ -34,7 +34,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf } int encoding = buffer.get(position++); - if (encoding == PayloadReader.LITERAL_ENCODING) + if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING) { int valueOffset = buffer.getInt(position); return ValueVector.literalDateTime(EpochDateTime.from(buffer.getLong(valueOffset)), size); diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DateTimeVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DateTimeVectorWriter.java index b00e71dc1..8a369e1c0 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DateTimeVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DateTimeVectorWriter.java @@ -2,6 +2,7 @@ import se.kuseman.payloadbuilder.api.catalog.Column; import se.kuseman.payloadbuilder.api.execution.ValueVector; +import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings; /** Writer of {@link Column.Type#DateTime} */ class DateTimeVectorWriter extends AReferenceVectorWriter @@ -15,19 +16,29 @@ public byte getVersion() } @Override - protected boolean isLiteral(ValueVector vector, int from, int to) + protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings) { - long value = vector.getDateTime(from) - .getEpoch(); - for (int i = from + 1; i < to; i++) + boolean firstSet = false; + long value = -1; + for (int i = from + 0; i < to; i++) { - if (value != vector.getDateTime(i) + if (vector.isNull(i)) + { + return Encoding.REGULAR; + } + else if (!firstSet) + { + value = vector.getDateTime(i) + .getEpoch(); + firstSet = true; + } + else if (value != vector.getDateTime(i) .getEpoch()) { - return false; + return Encoding.REGULAR; } } - return true; + return Encoding.REGULAR_LITERAL; } @Override diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DecimalVector.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DecimalVector.java index 5f0863eb3..09559ae46 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DecimalVector.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DecimalVector.java @@ -38,7 +38,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, ReadContext contex } int encoding = buffer.get(position++); - if (encoding == PayloadReader.LITERAL_ENCODING) + if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING) { int valueOffset = buffer.getInt(position); return ValueVector.literalDecimal(getDecimal(buffer, context, valueOffset), size); diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DecimalVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DecimalVectorWriter.java index 96c2eeee6..bc6e6e862 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DecimalVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DecimalVectorWriter.java @@ -6,6 +6,7 @@ import se.kuseman.payloadbuilder.api.catalog.Column; import se.kuseman.payloadbuilder.api.execution.Decimal; import se.kuseman.payloadbuilder.api.execution.ValueVector; +import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings; /** Writer of {@link Column.Type#Decimal} */ class DecimalVectorWriter extends AReferenceVectorWriter @@ -19,17 +20,26 @@ public byte getVersion() } @Override - protected boolean isLiteral(ValueVector vector, int from, int to) + protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings) { - Decimal value = vector.getDecimal(from); - for (int i = from + 1; i < to; i++) + Decimal value = null; + for (int i = from + 0; i < to; i++) { - if (!value.equals(vector.getDecimal(i))) + if (vector.isNull(i)) { - return false; + return Encoding.REGULAR; + } + else if (value == null) + { + value = vector.getDecimal(i); + continue; + } + else if (!value.equals(vector.getDecimal(i))) + { + return Encoding.REGULAR; } } - return true; + return Encoding.REGULAR_LITERAL; } @Override diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DoubleVector.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DoubleVector.java index c5d3846fb..eadec1597 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DoubleVector.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DoubleVector.java @@ -33,7 +33,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf } int encoding = buffer.get(position++); - if (encoding == PayloadReader.LITERAL_ENCODING) + if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING) { int valueOffset = buffer.getInt(position); return ValueVector.literalDouble(buffer.getDouble(valueOffset), size); diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DoubleVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DoubleVectorWriter.java index a329c836b..fd2c17a21 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DoubleVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/DoubleVectorWriter.java @@ -2,6 +2,7 @@ import se.kuseman.payloadbuilder.api.catalog.Column; import se.kuseman.payloadbuilder.api.execution.ValueVector; +import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings; /** Writer of {@link Column.Type#Double} */ class DoubleVectorWriter extends AReferenceVectorWriter @@ -15,17 +16,27 @@ public byte getVersion() } @Override - protected boolean isLiteral(ValueVector vector, int from, int to) + protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings) { - double value = vector.getDouble(from); - for (int i = from + 1; i < to; i++) + boolean firstValue = false; + double value = -1; + for (int i = from + 0; i < to; i++) { - if (value != vector.getDouble(i)) + if (vector.isNull(i)) { - return false; + return Encoding.REGULAR; + } + else if (!firstValue) + { + value = vector.getDouble(i); + firstValue = true; + } + else if (value != vector.getDouble(i)) + { + return Encoding.REGULAR; } } - return true; + return Encoding.REGULAR_LITERAL; } @Override diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/FloatVector.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/FloatVector.java index 0930c0819..76e03dd1d 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/FloatVector.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/FloatVector.java @@ -32,7 +32,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf } int encoding = buffer.get(position++); - if (encoding == PayloadReader.LITERAL_ENCODING) + if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING) { float value = buffer.getFloat(position); return ValueVector.literalFloat(value, size); diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/FloatVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/FloatVectorWriter.java index e678fb591..cef0ad422 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/FloatVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/FloatVectorWriter.java @@ -37,7 +37,7 @@ public void write(BytesWriter writer, WriteCache cache, ValueVector vector, int if (literal) { - writer.putByte(PayloadReader.LITERAL_ENCODING); + writer.putByte(PayloadReader.REGULAR_LITERAL_ENCODING); // Literal float then we have the literal value in the data position writer.putFloat(value); return; diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/IntVector.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/IntVector.java index eb357ec55..8f4e1f941 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/IntVector.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/IntVector.java @@ -32,7 +32,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf } int encoding = buffer.get(position++); - if (encoding == PayloadReader.LITERAL_ENCODING) + if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING) { int value = buffer.getInt(position); return ValueVector.literalInt(value, size); diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/IntVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/IntVectorWriter.java index 47a8ce65a..e5559628b 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/IntVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/IntVectorWriter.java @@ -37,7 +37,7 @@ public void write(BytesWriter writer, WriteCache cache, ValueVector vector, int if (literal) { - writer.putByte(PayloadReader.LITERAL_ENCODING); + writer.putByte(PayloadReader.REGULAR_LITERAL_ENCODING); // Literal int then we have the literal value in the data position writer.putInt(value); return; diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/LongVector.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/LongVector.java index d61c722c8..47103c997 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/LongVector.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/LongVector.java @@ -33,7 +33,7 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf } int encoding = buffer.get(position++); - if (encoding == PayloadReader.LITERAL_ENCODING) + if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING) { int valueOffset = buffer.getInt(position); return ValueVector.literalLong(buffer.getLong(valueOffset), size); diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/LongVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/LongVectorWriter.java index 679d26dc4..d6e03d789 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/LongVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/LongVectorWriter.java @@ -2,6 +2,7 @@ import se.kuseman.payloadbuilder.api.catalog.Column; import se.kuseman.payloadbuilder.api.execution.ValueVector; +import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings; /** Writer of {@link Column.Type#Long} */ class LongVectorWriter extends AReferenceVectorWriter @@ -15,17 +16,27 @@ public byte getVersion() } @Override - protected boolean isLiteral(ValueVector vector, int from, int to) + protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings) { - long value = vector.getLong(from); - for (int i = from + 1; i < to; i++) + boolean firstValue = false; + long value = -1; + for (int i = from + 0; i < to; i++) { - if (value != vector.getLong(i)) + if (vector.isNull(i)) { - return false; + return Encoding.REGULAR; + } + else if (!firstValue) + { + value = vector.getLong(i); + firstValue = true; + } + else if (value != vector.getLong(i)) + { + return Encoding.REGULAR; } } - return true; + return Encoding.REGULAR_LITERAL; } @Override diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/NullBuffer.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/NullBuffer.java index 8a2e5411b..53067e76c 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/NullBuffer.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/NullBuffer.java @@ -5,8 +5,8 @@ /** Null buffer that reads null bits from bytes */ class NullBuffer { - private static final NullBuffer NO_NULL = new NoNullBuffer(); - private static final NullBuffer ALL_NULL = new AllNullBuffer(); + static final NullBuffer NO_NULL = new NoNullBuffer(); + static final NullBuffer ALL_NULL = new AllNullBuffer(); private final ByteBuffer buffer; private final int startPosition; diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/PayloadReader.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/PayloadReader.java index 89d86ecba..590632817 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/PayloadReader.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/PayloadReader.java @@ -23,7 +23,7 @@ private PayloadReader() static final byte P = 'P'; static final byte L = 'L'; static final byte B = 'B'; - static final byte LITERAL_ENCODING = 0; + static final byte REGULAR_LITERAL_ENCODING = 0; static final byte REGULAR_ENCODING = 1; /** Checks if provided payload is valid payload */ diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/PayloadWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/PayloadWriter.java index f4903ddc7..6d9e6da19 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/PayloadWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/PayloadWriter.java @@ -3,6 +3,7 @@ import static java.util.Objects.requireNonNull; import se.kuseman.payloadbuilder.api.catalog.Column.Type; +import se.kuseman.payloadbuilder.api.execution.UTF8String; import se.kuseman.payloadbuilder.api.execution.ValueVector; /** @@ -14,8 +15,14 @@ private PayloadWriter() { } - /** Writes vector to bytes */ + /** Writes vector to bytes with default settings. */ public static byte[] write(ValueVector vector) + { + return write(vector, new WriterSettings()); + } + + /** Writes vector to bytes with specified settings. */ + public static byte[] write(ValueVector vector, WriterSettings settings) { requireNonNull(vector, "vector"); @@ -26,7 +33,7 @@ public static byte[] write(ValueVector vector) writer.putByte(PayloadReader.B); writer.putVarInt(PayloadReader.VERSION); - WriteCache cache = new WriteCache(); + WriteCache cache = new WriteCache(settings); writeVector(writer, cache, vector, 0, vector.size(), true); // Put a last byte used as checksum @@ -87,4 +94,23 @@ private static void writeVector(BytesWriter writer, WriteCache cache, ValueVecto vectorWriter.write(writer, cache, vector, from, to, nullCount); } + + /** Setting used when wrting bytes. */ + public static class WriterSettings + { + private boolean useLatin1EncodedStrings = false; + + public boolean isUseLatin1EncodedStrings() + { + return useLatin1EncodedStrings; + } + + /** + * Should {@link UTF8String}'s be encoded with latin1. This is really useful in performance aspects since latin1 encoded strings used via {@link CharSequence} is much faster than UTF8. + */ + public void setUseLatin1EncodedStrings(boolean useLatin1EncodedStrings) + { + this.useLatin1EncodedStrings = useLatin1EncodedStrings; + } + } } diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/ReadContext.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/ReadContext.java index 95e073460..514517e2c 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/ReadContext.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/ReadContext.java @@ -1,8 +1,8 @@ package se.kuseman.payloadbuilder.bytes; import java.math.BigDecimal; -import java.util.HashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Supplier; import se.kuseman.payloadbuilder.api.catalog.Schema; @@ -12,7 +12,7 @@ class ReadContext { private final Schema schema; private final boolean expandSchema; - private Map bigDecimalCache = new HashMap<>(); + private Map bigDecimalCache = new ConcurrentHashMap<>(); ReadContext() { diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/StringVector.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/StringVector.java index e10df3aaf..77a3df02b 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/StringVector.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/StringVector.java @@ -1,6 +1,7 @@ package se.kuseman.payloadbuilder.bytes; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import se.kuseman.payloadbuilder.api.catalog.Column.Type; import se.kuseman.payloadbuilder.api.catalog.ResolvedType; @@ -11,10 +12,31 @@ class StringVector extends AVector { static final byte VERSION = 1; + static final byte LATIN1_LITERAL_ENCODING = 2; + static final byte LATIN1_ENCODING = 3; + private final boolean latin1; - StringVector(ByteBuffer buffer, int startPosition, NullBuffer nullBuffer, int size) + StringVector(ByteBuffer buffer, int startPosition, NullBuffer nullBuffer, int size, boolean latin1) { super(buffer, ResolvedType.of(Type.String), size, nullBuffer, startPosition); + this.latin1 = latin1; + } + + /** Create a java.lang.String directly from bytes without going through UTF8String. */ + @Override + public String valueAsString(int row) + { + if (isNull(row)) + { + return null; + } + + int offset = dataStartPosition + (row * REFERENCE_HEADER_SIZE); + int valueOffset = buffer.getInt(offset); + int length = Utils.readVarInt(buffer, valueOffset); + valueOffset += Utils.sizeOfVarInt(length); + return new String(buffer.array(), valueOffset, length, latin1 ? StandardCharsets.ISO_8859_1 + : StandardCharsets.UTF_8); } @Override @@ -22,7 +44,7 @@ public UTF8String getString(int row) { int offset = dataStartPosition + (row * REFERENCE_HEADER_SIZE); int valueOffset = buffer.getInt(offset); - return getString(buffer, valueOffset); + return getString(buffer, valueOffset, latin1); } /** Create string vector. */ @@ -34,21 +56,27 @@ static ValueVector getVector(ByteBuffer buffer, int position, NullBuffer nullBuf } int encoding = buffer.get(position++); - if (encoding == PayloadReader.LITERAL_ENCODING) + if (encoding == PayloadReader.REGULAR_LITERAL_ENCODING) + { + int valueOffset = buffer.getInt(position); + return ValueVector.literalString(getString(buffer, valueOffset, false), size); + } + else if (encoding == LATIN1_LITERAL_ENCODING) { int valueOffset = buffer.getInt(position); - return ValueVector.literalString(getString(buffer, valueOffset), size); + return ValueVector.literalString(getString(buffer, valueOffset, true), size); } - return new StringVector(buffer, position, nullBuffer, size); + return new StringVector(buffer, position, nullBuffer, size, encoding == LATIN1_ENCODING); } // CSOFF - private static UTF8String getString(ByteBuffer buffer, int position) + private static UTF8String getString(ByteBuffer buffer, int position, boolean latin1) // CSON { int length = Utils.readVarInt(buffer, position); position += Utils.sizeOfVarInt(length); - return UTF8String.utf8(buffer.array(), position, length); + return latin1 ? UTF8String.latin(buffer.array(), position, length) + : UTF8String.utf8(buffer.array(), position, length); } } diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/StringVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/StringVectorWriter.java index 0e28bcf3d..291450343 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/StringVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/StringVectorWriter.java @@ -3,11 +3,14 @@ import se.kuseman.payloadbuilder.api.catalog.Column; import se.kuseman.payloadbuilder.api.execution.UTF8String; import se.kuseman.payloadbuilder.api.execution.ValueVector; +import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings; /** Writer of {@link Column.Type#String} */ class StringVectorWriter extends AReferenceVectorWriter { static StringVectorWriter INSTANCE = new StringVectorWriter(); + static final Encoding LITERAL_LATIN1_ENCODING = new Encoding(StringVector.LATIN1_LITERAL_ENCODING, true); + static final Encoding LATIN1_ENCODING = new Encoding(StringVector.LATIN1_ENCODING, false); @Override public byte getVersion() @@ -16,17 +19,46 @@ public byte getVersion() } @Override - protected boolean isLiteral(ValueVector vector, int from, int to) + protected Encoding getEncoding(ValueVector vector, int from, int to, WriterSettings settings) { - UTF8String value = vector.getString(from); - for (int i = from + 1; i < to; i++) + boolean detectLatin1 = settings.isUseLatin1EncodedStrings(); + UTF8String value = null; + boolean isLatin1 = detectLatin1; + boolean isLiteral = true; + for (int i = from + 0; i < to; i++) { - if (!value.equals(vector.getString(i))) + if (vector.isNull(i)) { - return false; + isLiteral = false; + continue; + } + + UTF8String current = vector.getString(i); + if (detectLatin1 + && isLatin1) + { + byte[] bytes = current.getBytes(); + isLatin1 = UTF8String.detectLatin1(bytes, 0, bytes.length); + } + + if (value == null) + { + value = current; + continue; + } + else if (!value.equals(current)) + { + isLiteral = false; } } - return true; + if (isLiteral) + { + return isLatin1 ? LITERAL_LATIN1_ENCODING + : Encoding.REGULAR_LITERAL; + } + + return isLatin1 ? LATIN1_ENCODING + : Encoding.REGULAR; } @Override diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/TableVectorWriter.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/TableVectorWriter.java index 63e51aa39..a22b8c151 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/TableVectorWriter.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/TableVectorWriter.java @@ -17,12 +17,6 @@ public byte getVersion() return TableVector.VERSION; } - @Override - protected boolean isLiteral(ValueVector vector, int from, int to) - { - return false; - } - @Override protected void writeMeta(BytesWriter writer, ValueVector vector) { diff --git a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/WriteCache.java b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/WriteCache.java index 0fa84c464..e99277104 100644 --- a/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/WriteCache.java +++ b/payloadbuilder-bytes/src/main/java/se/kuseman/payloadbuilder/bytes/WriteCache.java @@ -5,15 +5,28 @@ import se.kuseman.payloadbuilder.api.execution.Decimal; import se.kuseman.payloadbuilder.api.execution.UTF8String; +import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings; /** Cache used during writing to reuse bytes from equals string/bigdecimals/longs etc. */ class WriteCache { + private final WriterSettings settings; + private Map longCache = new HashMap<>(); private Map doubleCache = new HashMap<>(); private Map stringCache = new HashMap<>(); private Map decimalCache = new HashMap<>(); + WriteCache(WriterSettings settings) + { + this.settings = settings; + } + + WriterSettings getSettings() + { + return settings; + } + /** Get position to provided long value */ Integer getLongPosition(long value) { diff --git a/payloadbuilder-bytes/src/test/java/se/kuseman/payloadbuilder/bytes/BackwardsCompatibilityPayloadReaderTest.java b/payloadbuilder-bytes/src/test/java/se/kuseman/payloadbuilder/bytes/BackwardsCompatibilityPayloadReaderTest.java new file mode 100644 index 000000000..0383e2d3c --- /dev/null +++ b/payloadbuilder-bytes/src/test/java/se/kuseman/payloadbuilder/bytes/BackwardsCompatibilityPayloadReaderTest.java @@ -0,0 +1,171 @@ +package se.kuseman.payloadbuilder.bytes; + +import static se.kuseman.payloadbuilder.test.VectorTestUtils.assertVectorsEquals; +import static se.kuseman.payloadbuilder.test.VectorTestUtils.vv; + +import java.math.BigDecimal; + +import org.junit.jupiter.api.Test; + +import se.kuseman.payloadbuilder.api.catalog.Column; +import se.kuseman.payloadbuilder.api.execution.EpochDateTime; + +/** + * Test that aims to verify that we dont break backwards compatibility when changing code in write parts. We keep byte sequences from all previous changes and verifies that those still holds. + */ +class BackwardsCompatibilityPayloadReaderTest +{ + @Test + void test_intvector_1() + { + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Int, 1,1,1)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Int, null,null,null)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Int, 1,null,3)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Int, 1,2,3)))); + // {80,76,66,2,1,3,0,1,0,0,0,0,1,1} + // {80,76,66,2,1,3,3,0} + // {80,76,66,2,1,3,1,2,1,1,0,0,0,1,0,0,0,0,0,0,0,3,2} + // {80,76,66,2,1,3,0,1,1,0,0,0,1,0,0,0,2,0,0,0,3,1} + assertVectorsEquals(vv(Column.Type.Int, 1, 1, 1), PayloadReader.read(new byte[] { 80, 76, 66, 2, 1, 3, 0, 1, 0, 0, 0, 0, 1, 1 })); + assertVectorsEquals(vv(Column.Type.Int, null, null, null), PayloadReader.read(new byte[] { 80, 76, 66, 2, 1, 3, 3, 0 })); + assertVectorsEquals(vv(Column.Type.Int, 1, null, 3), PayloadReader.read(new byte[] { 80, 76, 66, 2, 1, 3, 1, 2, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 2 })); + assertVectorsEquals(vv(Column.Type.Int, 1, 2, 3), PayloadReader.read(new byte[] { 80, 76, 66, 2, 1, 3, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 1 })); + } + + @Test + void test_longvector_1() + { + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Long, 1,1,1)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Long, null,null,null)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Long, 1,null,3)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Long, 1,2,3)))); + // {80,76,66,2,2,3,0,1,0,0,0,0,13,0,0,0,0,0,0,0,1,1} + // {80,76,66,2,2,3,3,0} + // {80,76,66,2,2,3,1,2,1,1,0,0,0,22,0,0,0,0,0,0,0,30,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3,2} + // {80,76,66,2,2,3,0,1,1,0,0,0,21,0,0,0,29,0,0,0,37,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,3,1} + assertVectorsEquals(vv(Column.Type.Long, 1, 1, 1), PayloadReader.read(new byte[] { 80, 76, 66, 2, 2, 3, 0, 1, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 1, 1 })); + assertVectorsEquals(vv(Column.Type.Long, null, null, null), PayloadReader.read(new byte[] { 80, 76, 66, 2, 2, 3, 3, 0 })); + assertVectorsEquals(vv(Column.Type.Long, 1, null, 3), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 2, 3, 1, 2, 1, 1, 0, 0, 0, 22, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 2 })); + assertVectorsEquals(vv(Column.Type.Long, 1, 2, 3), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 2, 3, 0, 1, 1, 0, 0, 0, 21, 0, 0, 0, 29, 0, 0, 0, 37, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 3, 1 })); + } + + @Test + void test_floatvector_1() + { + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Float, 1.11f,1.11f,1.11f)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Float, null,null,null)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Float, 1.11f,null,3.33f)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Float, 1.11f,2.22f,3.33f)))); + // {80,76,66,2,3,3,0,1,0,63,-114,20,123,1} + // {80,76,66,2,3,3,3,0} + // {80,76,66,2,3,3,1,2,1,1,63,-114,20,123,0,0,0,0,64,85,30,-72,2} + // {80,76,66,2,3,3,0,1,1,63,-114,20,123,64,14,20,123,64,85,30,-72,1} + assertVectorsEquals(vv(Column.Type.Float, 1.11f, 1.11f, 1.11f), PayloadReader.read(new byte[] { 80, 76, 66, 2, 3, 3, 0, 1, 0, 63, -114, 20, 123, 1 })); + assertVectorsEquals(vv(Column.Type.Float, null, null, null), PayloadReader.read(new byte[] { 80, 76, 66, 2, 3, 3, 3, 0 })); + assertVectorsEquals(vv(Column.Type.Float, 1.11f, null, 3.33f), PayloadReader.read(new byte[] { 80, 76, 66, 2, 3, 3, 1, 2, 1, 1, 63, -114, 20, 123, 0, 0, 0, 0, 64, 85, 30, -72, 2 })); + assertVectorsEquals(vv(Column.Type.Float, 1.11f, 2.22f, 3.33f), PayloadReader.read(new byte[] { 80, 76, 66, 2, 3, 3, 0, 1, 1, 63, -114, 20, 123, 64, 14, 20, 123, 64, 85, 30, -72, 1 })); + } + + @Test + void test_doublevector_1() + { + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Double, 1.11d,1.11d,1.11d)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Double, null,null,null)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Double, 1.11d,null,3.33d)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Double, 1.11d,2.22d,3.33d)))); + // {80,76,66,2,4,3,0,1,0,0,0,0,13,63,-15,-62,-113,92,40,-11,-61,1} + // {80,76,66,2,4,3,3,0} + // {80,76,66,2,4,3,1,2,1,1,0,0,0,22,0,0,0,0,0,0,0,30,63,-15,-62,-113,92,40,-11,-61,64,10,-93,-41,10,61,112,-92,2} + // {80,76,66,2,4,3,0,1,1,0,0,0,21,0,0,0,29,0,0,0,37,63,-15,-62,-113,92,40,-11,-61,64,1,-62,-113,92,40,-11,-61,64,10,-93,-41,10,61,112,-92,1} + assertVectorsEquals(vv(Column.Type.Double, 1.11d, 1.11d, 1.11d), PayloadReader.read(new byte[] { 80, 76, 66, 2, 4, 3, 0, 1, 0, 0, 0, 0, 13, 63, -15, -62, -113, 92, 40, -11, -61, 1 })); + assertVectorsEquals(vv(Column.Type.Double, null, null, null), PayloadReader.read(new byte[] { 80, 76, 66, 2, 4, 3, 3, 0 })); + assertVectorsEquals(vv(Column.Type.Double, 1.11d, null, 3.33d), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 4, 3, 1, 2, 1, 1, 0, 0, 0, 22, 0, 0, 0, 0, 0, 0, 0, 30, 63, -15, -62, -113, 92, 40, -11, -61, 64, 10, -93, -41, 10, 61, 112, -92, 2 })); + assertVectorsEquals(vv(Column.Type.Double, 1.11d, 2.22d, 3.33d), + PayloadReader.read(new byte[] { + 80, 76, 66, 2, 4, 3, 0, 1, 1, 0, 0, 0, 21, 0, 0, 0, 29, 0, 0, 0, 37, 63, -15, -62, -113, 92, 40, -11, -61, 64, 1, -62, -113, 92, 40, -11, -61, 64, 10, -93, -41, 10, 61, 112, + -92, 1 })); + } + + @Test + void test_booleanvector_1() + { + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Boolean, true,true,true)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Boolean, null,null,null)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Boolean, true,null,false)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Boolean, true,false,true)))); + // {80,76,66,2,0,3,0,1,0,1,1} + // {80,76,66,2,0,3,3,0} + // {80,76,66,2,0,3,1,2,1,1,1,1,2} + // {80,76,66,2,0,3,0,1,1,1,5,1} + assertVectorsEquals(vv(Column.Type.Boolean, true, true, true), PayloadReader.read(new byte[] { 80, 76, 66, 2, 0, 3, 0, 1, 0, 1, 1 })); + assertVectorsEquals(vv(Column.Type.Boolean, null, null, null), PayloadReader.read(new byte[] { 80, 76, 66, 2, 0, 3, 3, 0 })); + assertVectorsEquals(vv(Column.Type.Boolean, true, null, false), PayloadReader.read(new byte[] { 80, 76, 66, 2, 0, 3, 1, 2, 1, 1, 1, 1, 2 })); + assertVectorsEquals(vv(Column.Type.Boolean, true, false, true), PayloadReader.read(new byte[] { 80, 76, 66, 2, 0, 3, 0, 1, 1, 1, 5, 1 })); + } + + @Test + void test_stringvector_1() + { + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.String, "sv", "sv", "sv")))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.String, null,null,null)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.String, "sv",null,"da")))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.String, "sv","da","fi")))); + // {80,76,66,2,5,3,0,1,0,0,0,0,13,2,115,118,1} + // {80,76,66,2,5,3,3,0} + // {80,76,66,2,5,3,1,2,1,1,0,0,0,22,0,0,0,0,0,0,0,25,2,115,118,2,100,97,2} + // {80,76,66,2,5,3,0,1,1,0,0,0,21,0,0,0,24,0,0,0,27,2,115,118,2,100,97,2,102,105,1} + assertVectorsEquals(vv(Column.Type.String, "sv", "sv", "sv"), PayloadReader.read(new byte[] { 80, 76, 66, 2, 5, 3, 0, 1, 0, 0, 0, 0, 13, 2, 115, 118, 1 })); + assertVectorsEquals(vv(Column.Type.String, null, null, null), PayloadReader.read(new byte[] { 80, 76, 66, 2, 5, 3, 3, 0 })); + assertVectorsEquals(vv(Column.Type.String, "sv", null, "da"), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 5, 3, 1, 2, 1, 1, 0, 0, 0, 22, 0, 0, 0, 0, 0, 0, 0, 25, 2, 115, 118, 2, 100, 97, 2 })); + assertVectorsEquals(vv(Column.Type.String, "sv", "da", "fi"), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 5, 3, 0, 1, 1, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 27, 2, 115, 118, 2, 100, 97, 2, 102, 105, 1 })); + } + + @Test + void test_decimalvector_1() + { + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Decimal, new BigDecimal("10.10"), new BigDecimal("10.10"), new BigDecimal("10.10"))))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Decimal, null,null,null)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Decimal, new BigDecimal("10.10"), null, new BigDecimal("30.30"))))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.Decimal, new BigDecimal("10.10"),new BigDecimal("20.20"),new BigDecimal("30.30"))))); + // {80,76,66,2,7,3,0,1,0,0,0,0,13,2,3,-14,2,1} + // {80,76,66,2,7,3,3,0} + // {80,76,66,2,7,3,1,2,1,1,0,0,0,22,0,0,0,0,0,0,0,26,2,3,-14,2,2,11,-42,2,2} + // {80,76,66,2,7,3,0,1,1,0,0,0,21,0,0,0,25,0,0,0,29,2,3,-14,2,2,7,-28,2,2,11,-42,2,1} + assertVectorsEquals(vv(Column.Type.Decimal, new BigDecimal("10.10"), new BigDecimal("10.10"), new BigDecimal("10.10")), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 7, 3, 0, 1, 0, 0, 0, 0, 13, 2, 3, -14, 2, 1 })); + assertVectorsEquals(vv(Column.Type.Decimal, null, null, null), PayloadReader.read(new byte[] { 80, 76, 66, 2, 7, 3, 3, 0 })); + assertVectorsEquals(vv(Column.Type.Decimal, new BigDecimal("10.10"), null, new BigDecimal("30.30")), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 7, 3, 1, 2, 1, 1, 0, 0, 0, 22, 0, 0, 0, 0, 0, 0, 0, 26, 2, 3, -14, 2, 2, 11, -42, 2, 2 })); + assertVectorsEquals(vv(Column.Type.Decimal, new BigDecimal("10.10"), new BigDecimal("20.20"), new BigDecimal("30.30")), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 7, 3, 0, 1, 1, 0, 0, 0, 21, 0, 0, 0, 25, 0, 0, 0, 29, 2, 3, -14, 2, 2, 7, -28, 2, 2, 11, -42, 2, 1 })); + } + + @Test + void test_datetimevector_1() + { + // long epoch = 1772178000345L; + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.DateTime, EpochDateTime.from(epoch), EpochDateTime.from(epoch), EpochDateTime.from(epoch))))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.DateTime, null,null,null)))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.DateTime, EpochDateTime.from(epoch), null, EpochDateTime.from(epoch + 20000))))); + // System.out.println(ArrayUtils.toString(PayloadWriter.write(VectorTestUtils.vv(Type.DateTime, EpochDateTime.from(epoch),EpochDateTime.from(epoch + 10000),EpochDateTime.from(epoch + + // 20000))))); + // {80,76,66,2,6,3,0,1,0,0,0,0,13,0,0,1,-100,-98,10,73,-39,1} + // {80,76,66,2,6,3,3,0} + // {80,76,66,2,6,3,1,2,1,1,0,0,0,22,0,0,0,0,0,0,0,30,0,0,1,-100,-98,10,73,-39,0,0,1,-100,-98,10,-105,-7,2} + // {80,76,66,2,6,3,0,1,1,0,0,0,21,0,0,0,29,0,0,0,37,0,0,1,-100,-98,10,73,-39,0,0,1,-100,-98,10,112,-23,0,0,1,-100,-98,10,-105,-7,1} + + long epoch = 1772178000345L; + assertVectorsEquals(vv(Column.Type.DateTime, EpochDateTime.from(epoch), EpochDateTime.from(epoch), EpochDateTime.from(epoch)), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 6, 3, 0, 1, 0, 0, 0, 0, 13, 0, 0, 1, -100, -98, 10, 73, -39, 1 })); + assertVectorsEquals(vv(Column.Type.DateTime, null, null, null), PayloadReader.read(new byte[] { 80, 76, 66, 2, 6, 3, 3, 0 })); + assertVectorsEquals(vv(Column.Type.DateTime, EpochDateTime.from(epoch), null, EpochDateTime.from(epoch + 20000)), + PayloadReader.read(new byte[] { 80, 76, 66, 2, 6, 3, 1, 2, 1, 1, 0, 0, 0, 22, 0, 0, 0, 0, 0, 0, 0, 30, 0, 0, 1, -100, -98, 10, 73, -39, 0, 0, 1, -100, -98, 10, -105, -7, 2 })); + assertVectorsEquals(vv(Column.Type.DateTime, EpochDateTime.from(epoch), EpochDateTime.from(epoch + 10000), EpochDateTime.from(epoch + 20000)), PayloadReader.read(new byte[] { + 80, 76, 66, 2, 6, 3, 0, 1, 1, 0, 0, 0, 21, 0, 0, 0, 29, 0, 0, 0, 37, 0, 0, 1, -100, -98, 10, 73, -39, 0, 0, 1, -100, -98, 10, 112, -23, 0, 0, 1, -100, -98, 10, -105, -7, 1 })); + } +} diff --git a/payloadbuilder-bytes/src/test/java/se/kuseman/payloadbuilder/bytes/PayloadWriterTest.java b/payloadbuilder-bytes/src/test/java/se/kuseman/payloadbuilder/bytes/PayloadWriterTest.java index 8e4a8740a..71de459fe 100644 --- a/payloadbuilder-bytes/src/test/java/se/kuseman/payloadbuilder/bytes/PayloadWriterTest.java +++ b/payloadbuilder-bytes/src/test/java/se/kuseman/payloadbuilder/bytes/PayloadWriterTest.java @@ -2,12 +2,15 @@ import static java.util.Arrays.asList; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static se.kuseman.payloadbuilder.test.VectorTestUtils.vv; +import java.nio.ByteBuffer; import java.util.List; import org.junit.jupiter.api.Test; @@ -19,11 +22,19 @@ import se.kuseman.payloadbuilder.api.execution.ObjectVector; import se.kuseman.payloadbuilder.api.execution.TupleVector; import se.kuseman.payloadbuilder.api.execution.ValueVector; +import se.kuseman.payloadbuilder.bytes.PayloadWriter.WriterSettings; import se.kuseman.payloadbuilder.test.VectorTestUtils; /** Test of {@link PayloadWriter} */ class PayloadWriterTest { + @Test + void test_Encoding() + { + assertThrows(IllegalArgumentException.class, () -> new AReferenceVectorWriter.Encoding(PayloadReader.REGULAR_ENCODING, true)); + assertThrows(IllegalArgumentException.class, () -> new AReferenceVectorWriter.Encoding(PayloadReader.REGULAR_LITERAL_ENCODING, false)); + } + @Test void test_fail_any() { @@ -55,6 +66,33 @@ void test_invalid_payload_3() assertThrows(IllegalArgumentException.class, () -> PayloadReader.read(new byte[] { PayloadReader.P, PayloadReader.L, 1, 2, PayloadReader.B })); } + /** Tests special overrides for string vectors. valueAsString */ + @Test + void test_StringVector_specialization() + { + // CSOFF + byte[] bytes = PayloadWriter.write(vv(Type.String, "one", "two", "three", "\u2705\u5F3A", null)); + ValueVector vector = PayloadReader.read(bytes); + assertEquals("one", vector.valueAsString(0)); + assertEquals("two", vector.valueAsString(1)); + assertEquals("three", vector.valueAsString(2)); + assertEquals("\u2705\u5F3A", vector.valueAsString(3)); + assertNull(vector.valueAsString(4)); + // CSON + + WriterSettings settings = new WriterSettings(); + settings.setUseLatin1EncodedStrings(true); + // CSOFF + bytes = PayloadWriter.write(vv(Type.String, "one", "two", "three", "four", null), settings); + vector = PayloadReader.read(bytes); + assertEquals("one", vector.valueAsString(0)); + assertEquals("two", vector.valueAsString(1)); + assertEquals("three", vector.valueAsString(2)); + assertEquals("four", vector.valueAsString(3)); + assertNull(vector.valueAsString(4)); + // CSON + } + @Test void test_schema_recreation_with_more_expected_columns() { @@ -646,6 +684,8 @@ void test_array() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> ArrayVector.getVector(ByteBuffer.allocate(10), 0, new ReadContext(), NullBuffer.ALL_NULL, ResolvedType.array(Type.String), (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(ResolvedType.array(Type.Int)); @@ -703,6 +743,36 @@ void test_array() VectorTestUtils.assertVectorsEquals(v, actual); } + @Test + void test_decimal_literal() + { + ValueVector v; + ValueVector actual; + byte[] bytes; + + v = VectorTestUtils.vv(Type.Decimal, 0, 0, 0, 0); + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertFalse(actual instanceof DecimalVector); // Literal + VectorTestUtils.assertVectorsEquals(vv(Type.Decimal, 0, 0, 0, 0), actual); + + // Test that literal encoding with nulls fails + v = VectorTestUtils.vv(Type.Decimal, 0, 0, null, 0); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof DecimalVector); // Not literal + VectorTestUtils.assertVectorsEquals(vv(Type.Decimal, 0, 0, null, 0), actual); + + // Test null as first item + v = VectorTestUtils.vv(Type.Decimal, null, 0, 0, 0); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof DecimalVector); // Not literal + VectorTestUtils.assertVectorsEquals(vv(Type.Decimal, null, 0, 0, 0), actual); + } + @Test void test_decimal() { @@ -710,6 +780,8 @@ void test_decimal() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> DecimalVector.getVector(ByteBuffer.allocate(10), 0, new ReadContext(), NullBuffer.ALL_NULL, (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(Type.Decimal); @@ -760,6 +832,36 @@ void test_decimal() VectorTestUtils.assertVectorsEquals(v, actual); } + @Test + void test_datetime_literal() + { + ValueVector v; + ValueVector actual; + byte[] bytes; + + v = VectorTestUtils.vv(Type.DateTime, 0L, 0L, 0L, 0L); + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertFalse(actual instanceof DateTimeVector); // Literal + VectorTestUtils.assertVectorsEquals(vv(Type.DateTime, 0L, 0L, 0L, 0L), actual); + + // Test that literal encoding with nulls fails + v = VectorTestUtils.vv(Type.DateTime, 0L, 0L, null, 0L); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof DateTimeVector); // Not literal + VectorTestUtils.assertVectorsEquals(vv(Type.DateTime, 0L, 0L, null, 0L), actual); + + // Test null first items + v = VectorTestUtils.vv(Type.DateTime, null, 0L, 0L, 0L); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof DateTimeVector); // Not literal + VectorTestUtils.assertVectorsEquals(vv(Type.DateTime, null, 0L, 0L, 0L), actual); + } + @Test void test_datetime() { @@ -767,6 +869,8 @@ void test_datetime() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> DateTimeVector.getVector(ByteBuffer.allocate(10), 0, NullBuffer.ALL_NULL, (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(Type.DateTime); @@ -824,6 +928,8 @@ void test_int() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> IntVector.getVector(ByteBuffer.allocate(10), 0, NullBuffer.ALL_NULL, (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(Type.Int); @@ -864,6 +970,36 @@ void test_int() VectorTestUtils.assertVectorsEquals(v, actual); } + @Test + void test_long_literal() + { + ValueVector v; + ValueVector actual; + byte[] bytes; + + v = VectorTestUtils.vv(Type.Long, 0, 0, 0, 0); + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertFalse(actual instanceof LongVector); // A literal + VectorTestUtils.assertVectorsEquals(vv(Type.Long, 0, 0, 0, 0), actual); + + // Test that literal encoding with nulls fails + v = VectorTestUtils.vv(Type.Long, 0, 0, null, 0); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof LongVector); // Not a literal + VectorTestUtils.assertVectorsEquals(vv(Type.Long, 0, 0, null, 0), actual); + + // Test null first + v = VectorTestUtils.vv(Type.Long, null, 0, 0, 0); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof LongVector); // Not a literal + VectorTestUtils.assertVectorsEquals(vv(Type.Long, null, 0, 0, 0), actual); + } + @Test void test_long_cache() { @@ -1023,6 +1159,8 @@ void test_long() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> LongVector.getVector(ByteBuffer.allocate(10), 0, NullBuffer.ALL_NULL, (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(Type.Long); @@ -1072,6 +1210,93 @@ void test_long() VectorTestUtils.assertVectorsEquals(v, actual); } + @Test + void test_string_literal_without_latin1() + { + ValueVector v; + ValueVector actual; + byte[] bytes; + + v = VectorTestUtils.vv(Type.String, "se", "se", "se"); + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertFalse(actual instanceof StringVector); // A literal + VectorTestUtils.assertVectorsEquals(vv(Type.String, "se", "se", "se"), actual); + + for (int i = 0; i < actual.size(); i++) + { + assertFalse(actual.getString(i) + .isLatin1()); + } + + // Test that literal encoding with nulls fails + v = VectorTestUtils.vv(Type.String, "se", "se", null, "se"); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof StringVector); // Not a literal + VectorTestUtils.assertVectorsEquals(vv(Type.String, "se", "se", null, "se"), actual); + + for (int i = 0; i < actual.size(); i++) + { + assertFalse(actual.getString(i) + .isLatin1()); + } + } + + @Test + void test_string_literal_with_latin1() + { + ValueVector v; + ValueVector actual; + byte[] bytes; + + WriterSettings settings = new WriterSettings(); + settings.setUseLatin1EncodedStrings(true); + + v = VectorTestUtils.vv(Type.String, "se", "se", "se"); + bytes = PayloadWriter.write(v, settings); + actual = PayloadReader.read(bytes); + assertFalse(actual instanceof StringVector); // A literal + VectorTestUtils.assertVectorsEquals(vv(Type.String, "se", "se", "se"), actual); + + // Verify that all string are latin1 + for (int i = 0; i < actual.size(); i++) + { + assertTrue(actual.getString(i) + .isLatin1()); + } + + // Test that literal encoding with nulls fails + v = VectorTestUtils.vv(Type.String, "se", "se", null, "se"); + + bytes = PayloadWriter.write(v, settings); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof StringVector); // Not a literal + VectorTestUtils.assertVectorsEquals(vv(Type.String, "se", "se", null, "se"), actual); + + for (int i = 0; i < actual.size(); i++) + { + assertEquals(i == 2, actual.isNull(i)); + assertTrue(actual.getString(i) + .isLatin1()); + } + + // Test that utf 8 strings don't get encoded to latin1 + v = VectorTestUtils.vv(Type.String, "se", "Между", null, "se"); + bytes = PayloadWriter.write(v, settings); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof StringVector); // Not a literal + VectorTestUtils.assertVectorsEquals(vv(Type.String, "se", "Между", null, "se"), actual); + + for (int i = 0; i < actual.size(); i++) + { + assertEquals(i == 2, actual.isNull(i)); + assertFalse(actual.getString(i) + .isLatin1()); + } + } + @Test void test_string() { @@ -1079,6 +1304,8 @@ void test_string() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> StringVector.getVector(ByteBuffer.allocate(10), 0, NullBuffer.ALL_NULL, (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(Type.Long); @@ -1095,6 +1322,9 @@ void test_string() assertEquals(70, bytes.length); actual = PayloadReader.read(bytes); + assertFalse(actual.getString(0) + .isLatin1()); + assertEquals("hello", actual.valueAsString(1)); VectorTestUtils.assertVectorsEquals(v, actual); @@ -1114,6 +1344,9 @@ void test_string() assertEquals(17, bytes.length); actual = PayloadReader.read(bytes); + assertFalse(actual.getString(0) + .isLatin1()); + assertEquals("sv", actual.valueAsString(2)); VectorTestUtils.assertVectorsEquals(v, actual); @@ -1126,6 +1359,62 @@ void test_string() actual = PayloadReader.read(bytes); VectorTestUtils.assertVectorsEquals(v, actual); + + // UTF8 + v = VectorTestUtils.vv(Type.String, "sv", "sv", "Зареги", "sv"); + + bytes = PayloadWriter.write(v); + + assertEquals(42, bytes.length); + actual = PayloadReader.read(bytes); + assertFalse(actual.getString(0) + .isLatin1()); + VectorTestUtils.assertVectorsEquals(v, actual); + + assertEquals("Зареги", actual.valueAsString(2)); + + // Null first + v = VectorTestUtils.vv(Type.String, null, 1, "hello", 4, "hello", 6, 7, 8, "world"); + + bytes = PayloadWriter.write(v); + + assertEquals(70, bytes.length); + actual = PayloadReader.read(bytes); + assertFalse(actual.getString(0) + .isLatin1()); + assertEquals("hello", actual.valueAsString(2)); + + VectorTestUtils.assertVectorsEquals(v, actual); + } + + @Test + void test_double_literal() + { + ValueVector v; + ValueVector actual; + byte[] bytes; + + v = VectorTestUtils.vv(Type.Double, 0, 0, 0, 0); + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertFalse(actual instanceof DoubleVector); // Literal + VectorTestUtils.assertVectorsEquals(vv(Type.Double, 0, 0, 0, 0), actual); + + // Test that literal encoding with nulls fails + v = VectorTestUtils.vv(Type.Double, 0, 0, null, 0); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof DoubleVector); // Not literal + VectorTestUtils.assertVectorsEquals(vv(Type.Double, 0, 0, null, 0), actual); + + // Null first item + v = VectorTestUtils.vv(Type.Double, null, 0, 0, 0); + + bytes = PayloadWriter.write(v); + actual = PayloadReader.read(bytes); + assertTrue(actual instanceof DoubleVector); // Not literal + VectorTestUtils.assertVectorsEquals(vv(Type.Double, null, 0, 0, 0), actual); } @Test @@ -1135,6 +1424,8 @@ void test_double() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> DoubleVector.getVector(ByteBuffer.allocate(10), 0, NullBuffer.ALL_NULL, (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(Type.Double); @@ -1191,6 +1482,8 @@ void test_boolean() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> BooleanVector.getVector(ByteBuffer.allocate(10), 0, NullBuffer.ALL_NULL, (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(Type.Boolean); @@ -1257,6 +1550,8 @@ void test_float() ValueVector actual; byte[] bytes; + assertThrows(IllegalArgumentException.class, () -> FloatVector.getVector(ByteBuffer.allocate(10), 0, NullBuffer.ALL_NULL, (byte) 2, 1)); + // Empty v = VectorTestUtils.vv(Type.Float); @@ -1932,6 +2227,8 @@ void test_table() Schema expectedSchema; TupleVector expected; + assertThrows(IllegalArgumentException.class, () -> TableVector.getVector(ByteBuffer.allocate(10), 0, new ReadContext(), NullBuffer.ALL_NULL, ResolvedType.ANY, ResolvedType.ANY, (byte) 2, 1)); + // @formatter:off Schema schema = Schema.of( Column.of("integer", Column.Type.Int), @@ -1995,6 +2292,9 @@ void test_object() Schema expectedSchema; TupleVector expected; + assertThrows(IllegalArgumentException.class, + () -> se.kuseman.payloadbuilder.bytes.ObjectVector.getVector(ByteBuffer.allocate(10), 0, new ReadContext(), NullBuffer.ALL_NULL, ResolvedType.ANY, ResolvedType.ANY, (byte) 2, 1)); + // @formatter:off Schema schema = Schema.of( Column.of("integer", Column.Type.Int), diff --git a/payloadbuilder-catalog/src/main/java/se/kuseman/payloadbuilder/catalog/jdbc/dialect/SqlDialect.java b/payloadbuilder-catalog/src/main/java/se/kuseman/payloadbuilder/catalog/jdbc/dialect/SqlDialect.java index 4607a805a..4776cba09 100644 --- a/payloadbuilder-catalog/src/main/java/se/kuseman/payloadbuilder/catalog/jdbc/dialect/SqlDialect.java +++ b/payloadbuilder-catalog/src/main/java/se/kuseman/payloadbuilder/catalog/jdbc/dialect/SqlDialect.java @@ -185,7 +185,11 @@ else if (type == Type.String) } else { - vector.setString(row, UTF8String.from(rs.getString(ordinal))); + String string = rs.getString(ordinal); + if (string != null) + { + vector.setString(row, UTF8String.from(string)); + } } } else if (type == Type.DateTime) diff --git a/payloadbuilder-core/src/main/java/se/kuseman/payloadbuilder/core/JsonOutputWriter.java b/payloadbuilder-core/src/main/java/se/kuseman/payloadbuilder/core/JsonOutputWriter.java index 0e7c8dd04..ccc2f7267 100644 --- a/payloadbuilder-core/src/main/java/se/kuseman/payloadbuilder/core/JsonOutputWriter.java +++ b/payloadbuilder-core/src/main/java/se/kuseman/payloadbuilder/core/JsonOutputWriter.java @@ -34,7 +34,8 @@ public class JsonOutputWriter implements OutputWriter private final JsonSettings settings; private String currentField; private boolean firstResultSet = true; - private byte[] stringBuffer; + private byte[] byteBuffer; + private char[] charBuffer; private String rowSeparator; private String resultSetSeparator; @@ -201,29 +202,42 @@ public void writeFieldName(String name) @Override public void writeString(UTF8String string) { - if (!outputStream - || string.hasString()) + try { - writeValue(string.toString()); - return; - } + if (!outputStream + || string.hasString() + || string.isLatin1()) + { + int length = string.length(); + // Extend buffer if needed + if (charBuffer == null + || charBuffer.length < length) + { + charBuffer = new char[length]; + } + for (int i = 0; i < length; i++) + { + charBuffer[i] = string.charAt(i); + } - // In outputstream mode we can write the raw uf8 bytes - // this is more performant since we don't have to create a String first - int length = string.getByteLength(); + writeFieldNameInternal(); + generator.writeString(charBuffer, 0, length); + return; + } - // Extend buffer if needed - if (stringBuffer == null - || stringBuffer.length < length) - { - stringBuffer = new byte[length]; - } + // When having an output stream we can write raw utf8 bytes + // which is more performant than traversing through CharSequence#charAt + int length = string.getByteLength(); + // Extend buffer if needed + if (byteBuffer == null + || byteBuffer.length < length) + { + byteBuffer = new byte[length]; + } + string.getBytes(byteBuffer); - string.getBytes(stringBuffer); - try - { writeFieldNameInternal(); - generator.writeUTF8String(stringBuffer, 0, length); + generator.writeUTF8String(byteBuffer, 0, length); } catch (IOException e) { diff --git a/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/JsonOutputWriterTest.java b/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/JsonOutputWriterTest.java index 979d7044f..b09efcac0 100644 --- a/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/JsonOutputWriterTest.java +++ b/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/JsonOutputWriterTest.java @@ -38,7 +38,11 @@ void test_outputstream_escape() p.getValue().startRow(); p.getValue().startObject(); p.getValue().writeFieldName("key"); - p.getValue().writeString(UTF8String.utf8(",\"#\\n".getBytes(StandardCharsets.UTF_8))); + p.getValue().writeString(UTF8String.utf8(",\"#\\nåäö".getBytes(StandardCharsets.UTF_8))); + p.getValue().writeFieldName("key2"); + p.getValue().writeString(UTF8String.utf8(",\"#\\nåäö hello world".getBytes(StandardCharsets.UTF_8))); + p.getValue().writeFieldName("key3"); + p.getValue().writeString(UTF8String.utf8("1".getBytes(StandardCharsets.UTF_8))); p.getValue().endObject(); p.getValue().endResult(); //@formatter:on @@ -46,9 +50,35 @@ void test_outputstream_escape() p.getValue() .close(); - assertEquals("{\"key\":\",\\\"#\\\\n\"}", p.getKey() + assertEquals("{\"key\":\",\\\"#\\\\nåäö\",\"key2\":\",\\\"#\\\\nåäö hello world\",\"key3\":\"1\"}", p.getKey() .toString()); + } + + @Test + void test_outputstream_latin1_string() + { + Pair p = outputstream(new JsonSettings()); + //@formatter:off + // First result set + p.getValue().initResult(new String[0]); + p.getValue().startRow(); + p.getValue().startObject(); + p.getValue().writeFieldName("key"); + p.getValue().writeString(UTF8String.latin(",\"#\\nåäö".getBytes(StandardCharsets.ISO_8859_1))); + p.getValue().writeFieldName("key2"); + p.getValue().writeString(UTF8String.latin(",\"#\\nåäö hello world".getBytes(StandardCharsets.ISO_8859_1))); + p.getValue().writeFieldName("key3"); + p.getValue().writeString(UTF8String.latin("1".getBytes(StandardCharsets.ISO_8859_1))); + p.getValue().endObject(); + p.getValue().endResult(); + //@formatter:on + + p.getValue() + .close(); + + assertEquals("{\"key\":\",\\\"#\\\\nåäö\",\"key2\":\",\\\"#\\\\nåäö hello world\",\"key3\":\"1\"}", p.getKey() + .toString()); } @Test diff --git a/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/physicalplan/UTF8StringTest.java b/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/physicalplan/UTF8StringTest.java index 022ef091e..210a4fe44 100644 --- a/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/physicalplan/UTF8StringTest.java +++ b/payloadbuilder-core/src/test/java/se/kuseman/payloadbuilder/core/physicalplan/UTF8StringTest.java @@ -2,6 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -13,11 +14,23 @@ import org.junit.jupiter.api.Test; +import se.kuseman.payloadbuilder.api.catalog.ResolvedType; import se.kuseman.payloadbuilder.api.execution.UTF8String; /** Test of {@link UTF8String} */ class UTF8StringTest { + + @Test + void test_valuevector() + { + UTF8String str = UTF8String.from("hello"); + assertEquals(1, str.size()); + assertFalse(str.isNull(0)); + assertEquals(ResolvedType.STRING, str.type()); + assertEquals(str.getString(0), str); + } + @Test void test_get_bytes() { @@ -129,6 +142,7 @@ void test_compare() @Test void test_compareTo() { + // String UTF8String utf1 = UTF8String.from("hello"); UTF8String utf2 = UTF8String.from("world"); UTF8String utf3 = UTF8String.from("hello world"); @@ -139,6 +153,72 @@ void test_compareTo() assertTrue(utf1.compareTo(utf2) < 0); assertTrue(utf2.compareTo(utf1) > 0); assertTrue(utf1.compareTo(utf3) < 0); + assertTrue(utf3.compareTo(utf1) > 0); + + // Same encoding + utf1 = UTF8String.from("hello".getBytes(StandardCharsets.UTF_8)); + utf2 = UTF8String.from("world".getBytes(StandardCharsets.UTF_8)); + utf3 = UTF8String.from("hello world".getBytes(StandardCharsets.UTF_8)); + assertEquals(0, utf1.compareTo(utf1)); + assertFalse(utf1.equals(utf2)); + assertFalse(utf1.equals(utf3)); + + assertTrue(utf1.compareTo(utf2) < 0); + assertTrue(utf2.compareTo(utf1) > 0); + assertTrue(utf1.compareTo(utf3) < 0); + assertTrue(utf3.compareTo(utf1) > 0); + + // Mixed encoding + UTF8String s1 = UTF8String.utf8("hello".getBytes(StandardCharsets.UTF_8)); + UTF8String s2 = UTF8String.latin("world".getBytes(StandardCharsets.ISO_8859_1)); + + assertEquals(0, s1.compareTo(s1)); + assertTrue(s1.compareTo(s2) < 0); + assertTrue(s2.compareTo(s1) > 0); + + // Same prefix + s1 = UTF8String.utf8("hello".getBytes(StandardCharsets.UTF_8)); + s2 = UTF8String.latin("hello 12".getBytes(StandardCharsets.ISO_8859_1)); + + assertTrue(s1.compareTo(s2) < 0); + assertTrue(s2.compareTo(s1) > 0); + } + + @Test + void test_equals() + { + // Same encoding + UTF8String s1 = UTF8String.utf8("hello".getBytes(StandardCharsets.UTF_8)); + UTF8String s2 = UTF8String.utf8("hello".getBytes(StandardCharsets.UTF_8)); + + assertNotEquals(s1, null); + assertNotEquals(s1, 123); + assertEquals(s1, s1); + assertEquals(s1, s2); + assertEquals(s2, s1); + + s2 = UTF8String.utf8("world".getBytes(StandardCharsets.UTF_8)); + assertNotEquals(s1, s2); + assertNotEquals(s2, s1); + + s2 = UTF8String.utf8("world 123".getBytes(StandardCharsets.UTF_8)); + assertNotEquals(s1, s2); + assertNotEquals(s2, s1); + + // Mized + s1 = UTF8String.utf8("hello".getBytes(StandardCharsets.UTF_8)); + s2 = UTF8String.latin("hello".getBytes(StandardCharsets.ISO_8859_1)); + + assertEquals(s1, s2); + assertEquals(s2, s1); + + s2 = UTF8String.latin("world".getBytes(StandardCharsets.ISO_8859_1)); + assertNotEquals(s1, s2); + assertNotEquals(s2, s1); + + s2 = UTF8String.latin("world 123".getBytes(StandardCharsets.ISO_8859_1)); + assertNotEquals(s1, s2); + assertNotEquals(s2, s1); } @Test