Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions cmd/sling/sling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -859,9 +859,6 @@ func runOneTask(t *testing.T, ctx context.Context, file g.FileItem, connType dbi
if srcType == dbio.TypeDbMariaDB && strings.EqualFold(colName, "json_data") {
correctType = iop.TextType // mariadb's `json` type is `longtext`
}
if srcType == dbio.TypeDbStarRocks && strings.EqualFold(colName, "json_data") {
correctType = iop.TextType // starrocks's `json` type is `varchar(65500)`
}
case tgtType.IsMySQLLike():
if g.In(correctType, iop.TimestampType, iop.TimestampzType) {
correctType = iop.DatetimeType // mysql/mariadb uses datetime
Expand Down
139 changes: 139 additions & 0 deletions cmd/sling/tests/pipelines/p.23.ternary_length_mixed_types.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
# Reproduce: Wildcard transform with type_of + length fails on non-string columns
# Issue: goval ternary operator does NOT short-circuit, so both branches are always
# evaluated. When using a wildcard transform like:
# type_of(value) == "string" ? (length(value) > 509 ? substring(value,0,505) + "..." : value) : value
# length(value) is called on ALL columns regardless of type, causing:
# "function error: 'length' - cannot get length of type int64"
#
# User report: Oracle -> Postgres, wants to truncate long strings via wildcard transform
# without having to define per-column transforms.

steps:
# 0. Cleanup
- connection: oracle
query: |
BEGIN EXECUTE IMMEDIATE 'DROP TABLE SYSTEM.TEST_TERNARY_LENGTH'; EXCEPTION WHEN OTHERS THEN NULL; END;

- connection: POSTGRES
query: DROP TABLE IF EXISTS public.test_ternary_length

# 1. Create Oracle source table with mixed types (string, integer, date, number)
- connection: oracle
query: |
CREATE TABLE SYSTEM.TEST_TERNARY_LENGTH (
id NUMBER(10),
short_name VARCHAR2(50),
long_description VARCHAR2(4000),
amount NUMBER(12,2),
created_at DATE
)

- connection: oracle
query: |
INSERT INTO SYSTEM.TEST_TERNARY_LENGTH VALUES (
1,
'Alice',
'This is a short description',
123.45,
TO_DATE('2025-01-15', 'YYYY-MM-DD')
)

- connection: oracle
query: |
INSERT INTO SYSTEM.TEST_TERNARY_LENGTH VALUES (
2,
'Bob',
RPAD('Very long text that exceeds 509 characters. ', 600, 'ABCDEFGHIJ'),
99999.99,
TO_DATE('2025-06-20', 'YYYY-MM-DD')
)

- connection: oracle
query: |
INSERT INTO SYSTEM.TEST_TERNARY_LENGTH VALUES (
3,
'Charlie',
NULL,
0.00,
NULL
)

- connection: oracle
query: COMMIT

- log: "Created Oracle source table with mixed types (integer, string, number, date)"

# 2. Replicate Oracle -> Postgres with wildcard transform using type_of + length
# This should fail because goval evaluates length(value) even on non-string columns
- replication:
source: oracle
target: POSTGRES
defaults:
mode: full-refresh
streams:
SYSTEM.TEST_TERNARY_LENGTH:
object: public.test_ternary_length
transforms:
- '*': 'type_of(value) == "string" ? (length(value) > 509 ? substring(value,0,505) + "..." : value) : value'
on_failure: warn

# 3. Verify data landed in Postgres
- connection: POSTGRES
query: SELECT count(*) as cnt FROM public.test_ternary_length
into: row_count

- log: "Row count => {store.row_count[0].cnt}"

- check: int_parse(store.row_count[0].cnt) == 3
failure_message: "Expected 3 rows, got {store.row_count[0].cnt}"

# 4. Verify long string was truncated (505 chars + "..." = 508 chars)
# Note: wildcard transform converts all columns to string type,
# so we use string comparisons for the id column.
- connection: POSTGRES
query: |
SELECT id, length(long_description) as desc_len
FROM public.test_ternary_length
WHERE id = '2'
into: long_row

- log: "Long string length for id=2 => {store.long_row[0].desc_len}"

- check: int_parse(store.long_row[0].desc_len) == 508
failure_message: "Expected truncated length 508, got {store.long_row[0].desc_len}"

# 5. Verify short string was NOT truncated
- connection: POSTGRES
query: |
SELECT long_description
FROM public.test_ternary_length
WHERE id = '1'
into: short_row

- check: store.short_row[0].long_description == "This is a short description"
failure_message: "Short string was incorrectly modified"

# 6. Verify integer and numeric columns survived the transform
- connection: POSTGRES
query: |
SELECT id, amount
FROM public.test_ternary_length
WHERE id = '1'
into: numeric_row

- log: "Numeric values => id={store.numeric_row[0].id}, amount={store.numeric_row[0].amount}"

- check: store.numeric_row[0].id == "1"
failure_message: "Integer column 'id' value incorrect, got {store.numeric_row[0].id}"

- log: "SUCCESS: Wildcard transform with type_of + length works on mixed-type columns"

# 7. Cleanup
- connection: oracle
query: |
BEGIN EXECUTE IMMEDIATE 'DROP TABLE SYSTEM.TEST_TERNARY_LENGTH'; EXCEPTION WHEN OTHERS THEN NULL; END;

- connection: POSTGRES
query: DROP TABLE IF EXISTS public.test_ternary_length

- log: "Ternary length mixed types test complete"
75 changes: 75 additions & 0 deletions cmd/sling/tests/pipelines/p.25.oracle_sqlldr_char_sizing.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Reproduce: sqlldr ctl file uses char(400000) even when column_typing.string.max_length is set
# Issue: When replicating CSV to Oracle with use_bulk=true and column_typing.string.max_length=255,
# the Oracle table columns get correct VARCHAR2 sizing, but the sqlldr control file still
# hardcodes char(400000) for all string columns. This causes sqlldr to pre-allocate massive
# memory (10s of GB for moderately sized tables).
#
# The bug is in getColumnsString() in database_oracle.go:
# char(400000) is always used unless col.DbPrecision > 400000
# It should respect col.DbPrecision when it's set (e.g. from column_typing).
#
# This test:
# 1. Replicates a CSV file to Oracle with use_bulk=true and column_typing max_length=255
# 2. Verifies the table was created with correct column sizing (< 4000)
# 3. Checks debug output does NOT contain char(400000) in the sqlldr ctl

steps:
# 0. Cleanup
- connection: oracle
query: |
BEGIN EXECUTE IMMEDIATE 'DROP TABLE SYSTEM.TEST_SQLLDR_CHAR'; EXCEPTION WHEN OTHERS THEN NULL; END;

# 1. Replicate CSV to Oracle with column_typing + use_bulk=true
- replication:
source: local
target: oracle
defaults:
mode: full-refresh
streams:
file://cmd/sling/tests/files/test_wide_columns.csv:
object: SYSTEM.TEST_SQLLDR_CHAR
target_options:
use_bulk: true
column_typing:
string:
max_length: 255
min_length: 50
length_factor: 1

# 2. Verify table columns are correctly sized (not VARCHAR2(4000))
- connection: oracle
query: |
SELECT column_name, data_type, data_length
FROM all_tab_columns
WHERE table_name = 'TEST_SQLLDR_CHAR' AND owner = 'SYSTEM'
AND column_name LIKE 'COL_%'
ORDER BY column_name
into: col_info

- log: |
Oracle column metadata:
{pretty_table(store.col_info)}

# 3. Verify row count
- connection: oracle
query: SELECT COUNT(*) as cnt FROM SYSTEM.TEST_SQLLDR_CHAR
into: row_count

- check: int_parse(store.row_count[0].cnt) == 5
failure_message: "Expected 5 rows, got {store.row_count[0].cnt}"

# 4. Verify table columns respect column_typing (should be <= 255)
- check: int_parse(store.col_info[0].data_length) <= 255
failure_message: "COL_001 data_length={store.col_info[0].data_length} (expected <= 255 from column_typing)"

- log: "SUCCESS: Oracle table columns correctly sized from column_typing"

# 5. The real bug check happens via output_does_not_contain in suite.cli.yaml:
# The debug output prints "sqlldr ctl file content" which should NOT contain char(400000)
# when column_typing.string.max_length=255 is set.
- log: "Oracle sqlldr char sizing test complete"

# 6. Cleanup
- connection: oracle
query: |
BEGIN EXECUTE IMMEDIATE 'DROP TABLE SYSTEM.TEST_SQLLDR_CHAR'; EXCEPTION WHEN OTHERS THEN NULL; END;
26 changes: 26 additions & 0 deletions cmd/sling/tests/pipelines/p.26.duckdb_arrow_ipc_output.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
steps:
# Read local parquet via Arrow IPC mode and write to temp CSV
- replication:
source: LOCAL
target: LOCAL
defaults:
mode: full-refresh
streams:
cmd/sling/tests/files/test1.parquet:
object: file:///tmp/sling/arrow_test_output.csv

# Verify the output file has data rows (header + data rows)
# test1.parquet has 1000 data rows, CSV output has 1 header + 1000 rows
- type: command
command: |
data_lines=$(tail -n +2 /tmp/sling/arrow_test_output.csv | grep -c .)
echo "Arrow IPC output data row count: $data_lines"
if [ "$data_lines" -ge 999 ]; then
echo "SUCCESS: DuckDB Arrow IPC output produced correct row count"
else
echo "FAIL: Expected at least 999 data rows, got $data_lines"
exit 1
fi

- type: log
message: "DuckDB Arrow IPC output test complete"
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# Test for MySQL TINYINT vs TINYINT(1) type mapping to Snowflake
# Issue: MySQL TINYINT columns with values beyond 0/1 (e.g. 1, 2, 4) were mapped
# to BOOLEAN in Snowflake, causing all non-zero values to become TRUE.
# Fix: TINYINT maps to SMALLINT (integer), TINYINT(1) maps to BOOLEAN.
source: mysql
target: snowflake

defaults:
mode: full-refresh

hooks:
start:
# Create source table in MySQL with both TINYINT and TINYINT(1) columns
- type: query
connection: '{source.name}'
query: |
DROP TABLE IF EXISTS mysql.tinyint_bool_test;
CREATE TABLE mysql.tinyint_bool_test (
id INT PRIMARY KEY,
status TINYINT,
flag TINYINT(1)
);
INSERT INTO mysql.tinyint_bool_test (id, status, flag) VALUES
(1, 1, 1),
(2, 2, 0),
(3, 4, 1),
(4, 0, 0),
(5, NULL, NULL);

- type: query
connection: '{source.name}'
query: SELECT id, status, flag FROM mysql.tinyint_bool_test ORDER BY id
into: source_data

- type: log
message: |
Source data (MySQL):
{pretty_table(store.source_data)}

end:
# Check execution succeeded
- type: check
check: execution.status.error == 0
on_failure: break

# ===== Verify values in Snowflake =====

- type: query
connection: '{target.name}'
query: SELECT id, status, flag FROM public.tinyint_bool_test ORDER BY id
into: result

- type: log
message: |
Snowflake result data:
{pretty_table(store.result)}

# Check column types in Snowflake
- type: query
connection: '{target.name}'
query: |
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_schema = 'PUBLIC'
AND table_name = 'TINYINT_BOOL_TEST'
AND column_name IN ('STATUS', 'FLAG')
ORDER BY column_name
into: col_types

- type: log
message: |
Snowflake column types:
{pretty_table(store.col_types)}

# Verify row count
- type: check
check: length(store.result) == 5
failure_message: "Expected 5 rows but found {length(store.result)}"

# ===== Verify STATUS column (TINYINT -> SMALLINT, integer values preserved) =====

- type: check
check: int_parse(store.result[0].status) == 1
failure_message: "Row 1 status should be 1, got {store.result[0].status}"

# Key test: value=2 must NOT become TRUE/1
- type: check
check: int_parse(store.result[1].status) == 2
failure_message: "Row 2 status should be 2, got {store.result[1].status} (TINYINT value lost due to BOOLEAN mapping)"

# Key test: value=4 must NOT become TRUE/1
- type: check
check: int_parse(store.result[2].status) == 4
failure_message: "Row 3 status should be 4, got {store.result[2].status} (TINYINT value lost due to BOOLEAN mapping)"

- type: check
check: int_parse(store.result[3].status) == 0
failure_message: "Row 4 status should be 0, got {store.result[3].status}"

- type: check
check: store.result[4].status == nil
failure_message: "Row 5 status should be NULL, got {store.result[4].status}"

# ===== Verify FLAG column (TINYINT(1) -> BOOLEAN) =====

- type: check
check: store.result[0].flag == true || store.result[0].flag == "true" || store.result[0].flag == "1"
failure_message: "Row 1 flag should be true, got {store.result[0].flag}"

- type: check
check: store.result[1].flag == false || store.result[1].flag == "false" || store.result[1].flag == "0"
failure_message: "Row 2 flag should be false, got {store.result[1].flag}"

- type: log
message: "SUCCESS: MySQL TINYINT values correctly preserved in Snowflake (not collapsed to BOOLEAN)"
- type: log
message: "SUCCESS: MySQL TINYINT(1) correctly mapped to BOOLEAN in Snowflake"

# Cleanup
- type: query
connection: '{source.name}'
query: DROP TABLE IF EXISTS mysql.tinyint_bool_test

- type: query
connection: '{target.name}'
query: DROP TABLE IF EXISTS public.tinyint_bool_test

streams:
mysql.tinyint_bool_test:
object: public.tinyint_bool_test
mode: full-refresh
Loading