slingdata-io · flarco · Apr 6, 2026 · Mar 26, 2026 · Mar 30, 2026 · Mar 30, 2026
diff --git a/cmd/sling/sling_test.go b/cmd/sling/sling_test.go
@@ -859,9 +859,6 @@ func runOneTask(t *testing.T, ctx context.Context, file g.FileItem, connType dbi
 				if srcType == dbio.TypeDbMariaDB && strings.EqualFold(colName, "json_data") {
 					correctType = iop.TextType // mariadb's `json` type is `longtext`
 				}
-				if srcType == dbio.TypeDbStarRocks && strings.EqualFold(colName, "json_data") {
-					correctType = iop.TextType // starrocks's `json` type is `varchar(65500)`
-				}
 			case tgtType.IsMySQLLike():
 				if g.In(correctType, iop.TimestampType, iop.TimestampzType) {
 					correctType = iop.DatetimeType // mysql/mariadb uses datetime

diff --git a/cmd/sling/tests/pipelines/p.23.ternary_length_mixed_types.yaml b/cmd/sling/tests/pipelines/p.23.ternary_length_mixed_types.yaml
@@ -0,0 +1,139 @@
+# Reproduce: Wildcard transform with type_of + length fails on non-string columns
+# Issue: goval ternary operator does NOT short-circuit, so both branches are always
+# evaluated. When using a wildcard transform like:
+#   type_of(value) == "string" ? (length(value) > 509 ? substring(value,0,505) + "..." : value) : value
+# length(value) is called on ALL columns regardless of type, causing:
+#   "function error: 'length' - cannot get length of type int64"
+#
+# User report: Oracle -> Postgres, wants to truncate long strings via wildcard transform
+# without having to define per-column transforms.
+
+steps:
+  # 0. Cleanup
+  - connection: oracle
+    query: |
+      BEGIN EXECUTE IMMEDIATE 'DROP TABLE SYSTEM.TEST_TERNARY_LENGTH'; EXCEPTION WHEN OTHERS THEN NULL; END;
+
+  - connection: POSTGRES
+    query: DROP TABLE IF EXISTS public.test_ternary_length
+
+  # 1. Create Oracle source table with mixed types (string, integer, date, number)
+  - connection: oracle
+    query: |
+      CREATE TABLE SYSTEM.TEST_TERNARY_LENGTH (
+        id NUMBER(10),
+        short_name VARCHAR2(50),
+        long_description VARCHAR2(4000),
+        amount NUMBER(12,2),
+        created_at DATE
+      )
+
+  - connection: oracle
+    query: |
+      INSERT INTO SYSTEM.TEST_TERNARY_LENGTH VALUES (
+        1,
+        'Alice',
+        'This is a short description',
+        123.45,
+        TO_DATE('2025-01-15', 'YYYY-MM-DD')
+      )
+
+  - connection: oracle
+    query: |
+      INSERT INTO SYSTEM.TEST_TERNARY_LENGTH VALUES (
+        2,
+        'Bob',
+        RPAD('Very long text that exceeds 509 characters. ', 600, 'ABCDEFGHIJ'),
+        99999.99,
+        TO_DATE('2025-06-20', 'YYYY-MM-DD')
+      )
+
+  - connection: oracle
+    query: |
+      INSERT INTO SYSTEM.TEST_TERNARY_LENGTH VALUES (
+        3,
+        'Charlie',
+        NULL,
+        0.00,
+        NULL
+      )
+
+  - connection: oracle
+    query: COMMIT
+
+  - log: "Created Oracle source table with mixed types (integer, string, number, date)"
+
+  # 2. Replicate Oracle -> Postgres with wildcard transform using type_of + length
+  #    This should fail because goval evaluates length(value) even on non-string columns
+  - replication:
+      source: oracle
+      target: POSTGRES
+      defaults:
+        mode: full-refresh
+      streams:
+        SYSTEM.TEST_TERNARY_LENGTH:
+          object: public.test_ternary_length
+          transforms:
+            - '*': 'type_of(value) == "string" ? (length(value) > 509 ? substring(value,0,505) + "..." : value) : value'
+    on_failure: warn
+
+  # 3. Verify data landed in Postgres
+  - connection: POSTGRES
+    query: SELECT count(*) as cnt FROM public.test_ternary_length
+    into: row_count
+
+  - log: "Row count => {store.row_count[0].cnt}"
+
+  - check: int_parse(store.row_count[0].cnt) == 3
+    failure_message: "Expected 3 rows, got {store.row_count[0].cnt}"
+
+  # 4. Verify long string was truncated (505 chars + "..." = 508 chars)
+  #    Note: wildcard transform converts all columns to string type,
+  #    so we use string comparisons for the id column.
+  - connection: POSTGRES
+    query: |
+      SELECT id, length(long_description) as desc_len
+      FROM public.test_ternary_length
+      WHERE id = '2'
+    into: long_row
+
+  - log: "Long string length for id=2 => {store.long_row[0].desc_len}"
+
+  - check: int_parse(store.long_row[0].desc_len) == 508
+    failure_message: "Expected truncated length 508, got {store.long_row[0].desc_len}"
+
+  # 5. Verify short string was NOT truncated
+  - connection: POSTGRES
+    query: |
+      SELECT long_description
+      FROM public.test_ternary_length
+      WHERE id = '1'
+    into: short_row
+
+  - check: store.short_row[0].long_description == "This is a short description"
+    failure_message: "Short string was incorrectly modified"
+
+  # 6. Verify integer and numeric columns survived the transform
+  - connection: POSTGRES
+    query: |
+      SELECT id, amount
+      FROM public.test_ternary_length
+      WHERE id = '1'
+    into: numeric_row
+
+  - log: "Numeric values => id={store.numeric_row[0].id}, amount={store.numeric_row[0].amount}"
+
+  - check: store.numeric_row[0].id == "1"
+    failure_message: "Integer column 'id' value incorrect, got {store.numeric_row[0].id}"
+
+  - log: "SUCCESS: Wildcard transform with type_of + length works on mixed-type columns"
+
+  # 7. Cleanup
+  - connection: oracle
+    query: |
+      BEGIN EXECUTE IMMEDIATE 'DROP TABLE SYSTEM.TEST_TERNARY_LENGTH'; EXCEPTION WHEN OTHERS THEN NULL; END;
+
+  - connection: POSTGRES
+    query: DROP TABLE IF EXISTS public.test_ternary_length
+
+  - log: "Ternary length mixed types test complete"
diff --git a/cmd/sling/tests/pipelines/p.25.oracle_sqlldr_char_sizing.yaml b/cmd/sling/tests/pipelines/p.25.oracle_sqlldr_char_sizing.yaml
@@ -0,0 +1,75 @@
+# Reproduce: sqlldr ctl file uses char(400000) even when column_typing.string.max_length is set
+# Issue: When replicating CSV to Oracle with use_bulk=true and column_typing.string.max_length=255,
+# the Oracle table columns get correct VARCHAR2 sizing, but the sqlldr control file still
+# hardcodes char(400000) for all string columns. This causes sqlldr to pre-allocate massive
+# memory (10s of GB for moderately sized tables).
+#
+# The bug is in getColumnsString() in database_oracle.go:
+#   char(400000) is always used unless col.DbPrecision > 400000
+#   It should respect col.DbPrecision when it's set (e.g. from column_typing).
+#
+# This test:
+#   1. Replicates a CSV file to Oracle with use_bulk=true and column_typing max_length=255
+#   2. Verifies the table was created with correct column sizing (< 4000)
+#   3. Checks debug output does NOT contain char(400000) in the sqlldr ctl
+
+steps:
+  # 0. Cleanup
+  - connection: oracle
+    query: |
+      BEGIN EXECUTE IMMEDIATE 'DROP TABLE SYSTEM.TEST_SQLLDR_CHAR'; EXCEPTION WHEN OTHERS THEN NULL; END;
+
+  # 1. Replicate CSV to Oracle with column_typing + use_bulk=true
+  - replication:
+      source: local
+      target: oracle
+      defaults:
+        mode: full-refresh
+      streams:
+        file://cmd/sling/tests/files/test_wide_columns.csv:
+          object: SYSTEM.TEST_SQLLDR_CHAR
+          target_options:
+            use_bulk: true
+            column_typing:
+              string:
+                max_length: 255
+                min_length: 50
+                length_factor: 1
+
+  # 2. Verify table columns are correctly sized (not VARCHAR2(4000))
+  - connection: oracle
+    query: |
+      SELECT column_name, data_type, data_length
+      FROM all_tab_columns
+      WHERE table_name = 'TEST_SQLLDR_CHAR' AND owner = 'SYSTEM'
+      AND column_name LIKE 'COL_%'
+      ORDER BY column_name
+    into: col_info
+
+  - log: |
+      Oracle column metadata:
+      {pretty_table(store.col_info)}
+
+  # 3. Verify row count
+  - connection: oracle
+    query: SELECT COUNT(*) as cnt FROM SYSTEM.TEST_SQLLDR_CHAR
+    into: row_count
+
+  - check: int_parse(store.row_count[0].cnt) == 5
+    failure_message: "Expected 5 rows, got {store.row_count[0].cnt}"
+
+  # 4. Verify table columns respect column_typing (should be <= 255)
+  - check: int_parse(store.col_info[0].data_length) <= 255
+    failure_message: "COL_001 data_length={store.col_info[0].data_length} (expected <= 255 from column_typing)"
+
+  - log: "SUCCESS: Oracle table columns correctly sized from column_typing"
+
+  # 5. The real bug check happens via output_does_not_contain in suite.cli.yaml:
+  #    The debug output prints "sqlldr ctl file content" which should NOT contain char(400000)
+  #    when column_typing.string.max_length=255 is set.
+  - log: "Oracle sqlldr char sizing test complete"
+
+  # 6. Cleanup
+  - connection: oracle
+    query: |
+      BEGIN EXECUTE IMMEDIATE 'DROP TABLE SYSTEM.TEST_SQLLDR_CHAR'; EXCEPTION WHEN OTHERS THEN NULL; END;
diff --git a/cmd/sling/tests/pipelines/p.26.duckdb_arrow_ipc_output.yaml b/cmd/sling/tests/pipelines/p.26.duckdb_arrow_ipc_output.yaml
@@ -0,0 +1,26 @@
+steps:
+  # Read local parquet via Arrow IPC mode and write to temp CSV
+  - replication:
+      source: LOCAL
+      target: LOCAL
+      defaults:
+        mode: full-refresh
+      streams:
+        cmd/sling/tests/files/test1.parquet:
+          object: file:///tmp/sling/arrow_test_output.csv
+
+  # Verify the output file has data rows (header + data rows)
+  # test1.parquet has 1000 data rows, CSV output has 1 header + 1000 rows
+  - type: command
+    command: |
+      data_lines=$(tail -n +2 /tmp/sling/arrow_test_output.csv | grep -c .)
+      echo "Arrow IPC output data row count: $data_lines"
+      if [ "$data_lines" -ge 999 ]; then
+        echo "SUCCESS: DuckDB Arrow IPC output produced correct row count"
+      else
+        echo "FAIL: Expected at least 999 data rows, got $data_lines"
+        exit 1
+      fi
+
+  - type: log
+    message: "DuckDB Arrow IPC output test complete"
diff --git a/cmd/sling/tests/replications/r.109.mysql_snowflake_tinyint_boolean.yaml b/cmd/sling/tests/replications/r.109.mysql_snowflake_tinyint_boolean.yaml
@@ -0,0 +1,131 @@
+# Test for MySQL TINYINT vs TINYINT(1) type mapping to Snowflake
+# Issue: MySQL TINYINT columns with values beyond 0/1 (e.g. 1, 2, 4) were mapped
+# to BOOLEAN in Snowflake, causing all non-zero values to become TRUE.
+# Fix: TINYINT maps to SMALLINT (integer), TINYINT(1) maps to BOOLEAN.
+source: mysql
+target: snowflake
+
+defaults:
+  mode: full-refresh
+
+hooks:
+  start:
+    # Create source table in MySQL with both TINYINT and TINYINT(1) columns
+    - type: query
+      connection: '{source.name}'
+      query: |
+        DROP TABLE IF EXISTS mysql.tinyint_bool_test;
+        CREATE TABLE mysql.tinyint_bool_test (
+          id INT PRIMARY KEY,
+          status TINYINT,
+          flag TINYINT(1)
+        );
+        INSERT INTO mysql.tinyint_bool_test (id, status, flag) VALUES
+          (1, 1, 1),
+          (2, 2, 0),
+          (3, 4, 1),
+          (4, 0, 0),
+          (5, NULL, NULL);
+
+    - type: query
+      connection: '{source.name}'
+      query: SELECT id, status, flag FROM mysql.tinyint_bool_test ORDER BY id
+      into: source_data
+
+    - type: log
+      message: |
+        Source data (MySQL):
+        {pretty_table(store.source_data)}
+
+  end:
+    # Check execution succeeded
+    - type: check
+      check: execution.status.error == 0
+      on_failure: break
+
+    # ===== Verify values in Snowflake =====
+
+    - type: query
+      connection: '{target.name}'
+      query: SELECT id, status, flag FROM public.tinyint_bool_test ORDER BY id
+      into: result
+
+    - type: log
+      message: |
+        Snowflake result data:
+        {pretty_table(store.result)}
+
+    # Check column types in Snowflake
+    - type: query
+      connection: '{target.name}'
+      query: |
+        SELECT column_name, data_type
+        FROM information_schema.columns
+        WHERE table_schema = 'PUBLIC'
+          AND table_name = 'TINYINT_BOOL_TEST'
+          AND column_name IN ('STATUS', 'FLAG')
+        ORDER BY column_name
+      into: col_types
+
+    - type: log
+      message: |
+        Snowflake column types:
+        {pretty_table(store.col_types)}
+
+    # Verify row count
+    - type: check
+      check: length(store.result) == 5
+      failure_message: "Expected 5 rows but found {length(store.result)}"
+
+    # ===== Verify STATUS column (TINYINT -> SMALLINT, integer values preserved) =====
+
+    - type: check
+      check: int_parse(store.result[0].status) == 1
+      failure_message: "Row 1 status should be 1, got {store.result[0].status}"
+
+    # Key test: value=2 must NOT become TRUE/1
+    - type: check
+      check: int_parse(store.result[1].status) == 2
+      failure_message: "Row 2 status should be 2, got {store.result[1].status} (TINYINT value lost due to BOOLEAN mapping)"
+
+    # Key test: value=4 must NOT become TRUE/1
+    - type: check
+      check: int_parse(store.result[2].status) == 4
+      failure_message: "Row 3 status should be 4, got {store.result[2].status} (TINYINT value lost due to BOOLEAN mapping)"
+
+    - type: check
+      check: int_parse(store.result[3].status) == 0
+      failure_message: "Row 4 status should be 0, got {store.result[3].status}"
+
+    - type: check
+      check: store.result[4].status == nil
+      failure_message: "Row 5 status should be NULL, got {store.result[4].status}"
+
+    # ===== Verify FLAG column (TINYINT(1) -> BOOLEAN) =====
+
+    - type: check
+      check: store.result[0].flag == true || store.result[0].flag == "true" || store.result[0].flag == "1"
+      failure_message: "Row 1 flag should be true, got {store.result[0].flag}"
+
+    - type: check
+      check: store.result[1].flag == false || store.result[1].flag == "false" || store.result[1].flag == "0"
+      failure_message: "Row 2 flag should be false, got {store.result[1].flag}"
+
+    - type: log
+      message: "SUCCESS: MySQL TINYINT values correctly preserved in Snowflake (not collapsed to BOOLEAN)"
+    - type: log
+      message: "SUCCESS: MySQL TINYINT(1) correctly mapped to BOOLEAN in Snowflake"
+
+    # Cleanup
+    - type: query
+      connection: '{source.name}'
+      query: DROP TABLE IF EXISTS mysql.tinyint_bool_test
+
+    - type: query
+      connection: '{target.name}'
+      query: DROP TABLE IF EXISTS public.tinyint_bool_test
+
+streams:
+  mysql.tinyint_bool_test:
+    object: public.tinyint_bool_test
+    mode: full-refresh