From 3ff5793c9dcf739507474a57daa236f0a3f42af4 Mon Sep 17 00:00:00 2001 From: jferrl Date: Mon, 1 Dec 2025 13:49:20 +0100 Subject: [PATCH] feat(spanner): add DML statement support in migration files --- database/spanner/README.md | 111 +++++++- .../1621360368_seed_users_data.down.sql | 1 + .../1621360368_seed_users_data.up.sql | 2 + .../1621360369_update_user_emails.down.sql | 1 + .../1621360369_update_user_emails.up.sql | 1 + ...370_add_user_status_with_comments.down.sql | 2 + ...60370_add_user_status_with_comments.up.sql | 9 + database/spanner/spanner.go | 244 +++++++++++++---- database/spanner/spanner_test.go | 249 ++++++++++++------ 9 files changed, 472 insertions(+), 148 deletions(-) create mode 100644 database/spanner/examples/migrations/1621360368_seed_users_data.down.sql create mode 100644 database/spanner/examples/migrations/1621360368_seed_users_data.up.sql create mode 100644 database/spanner/examples/migrations/1621360369_update_user_emails.down.sql create mode 100644 database/spanner/examples/migrations/1621360369_update_user_emails.up.sql create mode 100644 database/spanner/examples/migrations/1621360370_add_user_status_with_comments.down.sql create mode 100644 database/spanner/examples/migrations/1621360370_add_user_status_with_comments.up.sql diff --git a/database/spanner/README.md b/database/spanner/README.md index dda7fe9b0..725d2a908 100644 --- a/database/spanner/README.md +++ b/database/spanner/README.md @@ -14,14 +14,14 @@ as described in [README.md#database-urls](../../README.md#database-urls) | Param | WithInstance Config | Description | | ----- | ------------------- | ----------- | | `x-migrations-table` | `MigrationsTable` | Name of the migrations table | -| `x-clean-statements` | `CleanStatements` | Whether to parse and clean DDL statements before running migration towards Spanner (Required for comments and multiple statements) | +| `x-clean-statements` | `CleanStatements` | **Deprecated.** This parameter is ignored. Statements are now always automatically parsed and comments are stripped. | | `url` | `DatabaseName` | The full path to the Spanner database resource. If provided as part of `Config` it must not contain a scheme or query string to match the format `projects/{projectId}/instances/{instanceId}/databases/{databaseName}`| -| `projectId` || The Google Cloud Platform project id -| `instanceId` || The id of the instance running Spanner -| `databaseName` || The name of the Spanner database +| `projectId` || The Google Cloud Platform project id | +| `instanceId` || The id of the instance running Spanner | +| `databaseName` || The name of the Spanner database | -> **Note:** Google Cloud Spanner migrations can take a considerable amount of -> time. The migrations provided as part of the example take about 6 minutes to +> **Note:** Google Cloud Spanner DDL migrations can take a considerable amount of +> time. The migrations provided as part of the example take about 6 minutes to > run on a small instance. > > ```log @@ -29,18 +29,101 @@ as described in [README.md#database-urls](../../README.md#database-urls) > 1496539702/u add_city_to_users (41.647359754s) > 1496601752/u add_index_on_user_emails (2m12.155787369s) > 1496602638/u create_books_table (2m30.77299181s) +> ``` -## DDL with comments +## Supported Statement Types -At the moment the GCP Spanner backed does not seem to allow for comments (See https://issuetracker.google.com/issues/159730604) -so in order to be able to use migration with DDL containing comments `x-clean-statements` is required +The Spanner driver supports three types of statements in migration files: -## Multiple statements +### DDL (Data Definition Language) -In order to be able to use more than 1 DDL statement in the same migration file, the file has to be parsed and therefore the `x-clean-statements` flag is required +Schema modification statements like `CREATE TABLE`, `ALTER TABLE`, `DROP TABLE`, `CREATE INDEX`, etc. +These are executed using Spanner's `UpdateDatabaseDdl` API. + +```sql +CREATE TABLE Users ( + UserId INT64 NOT NULL, + Name STRING(100) +) PRIMARY KEY(UserId); +``` + +### DML (Data Manipulation Language) - INSERT + +`INSERT` statements are executed within a read-write transaction, allowing multiple inserts to be atomic. + +```sql +INSERT INTO Users (UserId, Name) VALUES (1, 'Alice'); +INSERT INTO Users (UserId, Name) VALUES (2, 'Bob'); +``` + +### Partitioned DML - UPDATE and DELETE + +`UPDATE` and `DELETE` statements are executed using Spanner's `PartitionedUpdate` API, +which is optimized for large-scale data modifications. + +```sql +UPDATE Users SET Name = 'Updated' WHERE UserId = 1; +``` + +```sql +DELETE FROM Users WHERE UserId = 1; +``` + +### Statement Type Restrictions + +**Important:** Each migration file must contain only one type of statement. You cannot mix: + +- DDL with DML +- INSERT with UPDATE/DELETE + +For example, the following migration file will fail: + +```sql +-- This will fail: mixing INSERT and UPDATE +INSERT INTO Users (UserId, Name) VALUES (1, 'Alice'); +UPDATE Users SET Name = 'Bob' WHERE UserId = 1; +``` + +## Comments in Migrations + +Migration files can contain SQL comments. The driver automatically parses and strips comments +before execution since Spanner's `UpdateDatabaseDdl` API does not support comments. + +Supported comment styles: + +- Single-line comments: `-- comment` +- Multi-line comments: `/* comment */` + +```sql +-- This migration creates the users table +/* + * Author: migrate + * Description: Initial schema setup + */ +CREATE TABLE Users ( + UserId INT64 NOT NULL, -- primary key + Name STRING(100) +) PRIMARY KEY(UserId); +``` + +## Multiple Statements + +Multiple statements of the same type can be included in a single migration file, +separated by semicolons: + +```sql +CREATE TABLE Users ( + UserId INT64 NOT NULL +) PRIMARY KEY(UserId); + +CREATE INDEX UsersByName ON Users(Name); +``` ## Testing -To unit test the `spanner` driver, `SPANNER_DATABASE` needs to be set. You'll -need to sign-up to Google Cloud Platform (GCP) and have a running Spanner -instance since it is not possible to run Google Spanner outside GCP. +The Spanner driver can be tested using the Spanner emulator provided by the +`cloud.google.com/go/spanner/spannertest` package. The unit tests use this +emulator and do not require a real Spanner instance. + +For integration testing against a real Spanner instance, set the `SPANNER_DATABASE` +environment variable to your database's full resource path. diff --git a/database/spanner/examples/migrations/1621360368_seed_users_data.down.sql b/database/spanner/examples/migrations/1621360368_seed_users_data.down.sql new file mode 100644 index 000000000..d0062ec61 --- /dev/null +++ b/database/spanner/examples/migrations/1621360368_seed_users_data.down.sql @@ -0,0 +1 @@ +DELETE FROM Users WHERE UserId IN (1, 2); diff --git a/database/spanner/examples/migrations/1621360368_seed_users_data.up.sql b/database/spanner/examples/migrations/1621360368_seed_users_data.up.sql new file mode 100644 index 000000000..f9d5ade53 --- /dev/null +++ b/database/spanner/examples/migrations/1621360368_seed_users_data.up.sql @@ -0,0 +1,2 @@ +INSERT INTO Users (UserId, Name, Email) VALUES (1, 'Alice', 'alice@example.com'); +INSERT INTO Users (UserId, Name, Email) VALUES (2, 'Bob', 'bob@example.com'); diff --git a/database/spanner/examples/migrations/1621360369_update_user_emails.down.sql b/database/spanner/examples/migrations/1621360369_update_user_emails.down.sql new file mode 100644 index 000000000..fe3d869d4 --- /dev/null +++ b/database/spanner/examples/migrations/1621360369_update_user_emails.down.sql @@ -0,0 +1 @@ +UPDATE Users SET Email = 'alice@example.com' WHERE UserId = 1; diff --git a/database/spanner/examples/migrations/1621360369_update_user_emails.up.sql b/database/spanner/examples/migrations/1621360369_update_user_emails.up.sql new file mode 100644 index 000000000..45832b1ff --- /dev/null +++ b/database/spanner/examples/migrations/1621360369_update_user_emails.up.sql @@ -0,0 +1 @@ +UPDATE Users SET Email = 'alice.updated@example.com' WHERE UserId = 1; diff --git a/database/spanner/examples/migrations/1621360370_add_user_status_with_comments.down.sql b/database/spanner/examples/migrations/1621360370_add_user_status_with_comments.down.sql new file mode 100644 index 000000000..b51fa7bae --- /dev/null +++ b/database/spanner/examples/migrations/1621360370_add_user_status_with_comments.down.sql @@ -0,0 +1,2 @@ +-- Rollback: Remove status column from Users table +ALTER TABLE Users DROP COLUMN Status; diff --git a/database/spanner/examples/migrations/1621360370_add_user_status_with_comments.up.sql b/database/spanner/examples/migrations/1621360370_add_user_status_with_comments.up.sql new file mode 100644 index 000000000..f1752b389 --- /dev/null +++ b/database/spanner/examples/migrations/1621360370_add_user_status_with_comments.up.sql @@ -0,0 +1,9 @@ +-- Migration: Add status column to Users table +-- Author: migrate +-- Description: This migration adds a status column to track user account state + +/* + * The Status column will store the current state of the user account. + * Valid values: 'active', 'inactive', 'suspended' + */ +ALTER TABLE Users ADD COLUMN Status STRING(20); -- default will be NULL diff --git a/database/spanner/spanner.go b/database/spanner/spanner.go index 914c79532..cc86322a1 100644 --- a/database/spanner/spanner.go +++ b/database/spanner/spanner.go @@ -31,28 +31,61 @@ func init() { // DefaultMigrationsTable is used if no custom table is specified const DefaultMigrationsTable = "SchemaMigrations" -// Driver errors -var ( - ErrNilConfig = errors.New("no config") - ErrNoDatabaseName = errors.New("no database name") - ErrNoSchema = errors.New("no schema") - ErrDatabaseDirty = errors.New("database is dirty") - ErrLockHeld = errors.New("unable to obtain lock") - ErrLockNotHeld = errors.New("unable to release already released lock") +type statementKind int + +const ( + statementKindDDL statementKind = iota + 1 + statementKindDML + statementKindPartitionedDML ) -// Config used for a Spanner instance +// ErrNilConfig is returned when no configuration is provided. +var ErrNilConfig = errors.New("no config") + +// ErrNoDatabaseName is returned when the database name is not specified in the configuration. +var ErrNoDatabaseName = errors.New("no database name") + +// ErrNoSchema is returned when no schema is available. +var ErrNoSchema = errors.New("no schema") + +// ErrDatabaseDirty is returned when the database has a dirty migration state. +var ErrDatabaseDirty = errors.New("database is dirty") + +// ErrLockHeld is returned when attempting to acquire a lock that is already held. +var ErrLockHeld = errors.New("unable to obtain lock") + +// ErrLockNotHeld is returned when attempting to release a lock that is not held. +var ErrLockNotHeld = errors.New("unable to release already released lock") + +// ErrMixedStatements is returned when a migration file contains a mix of DDL, +// DML (INSERT), and partitioned DML (UPDATE or DELETE) statements. +var ErrMixedStatements = errors.New("DDL, DML (INSERT), and partitioned DML (UPDATE or DELETE) must not be combined in the same migration file") + +// ErrEmptyMigration is returned when a migration file is empty or contains only whitespace. +var ErrEmptyMigration = errors.New("empty migration") + +// ErrInvalidDMLStatementKind is returned when an unrecognized DML statement type is encountered. +var ErrInvalidDMLStatementKind = errors.New("invalid DML statement kind") + +// Config holds the configuration for a Spanner database driver instance. type Config struct { + // MigrationsTable is the name of the table used to track migration versions. + // If empty, DefaultMigrationsTable is used. MigrationsTable string - DatabaseName string - // Whether to parse the migration DDL with spansql before - // running them towards Spanner. - // Parsing outputs clean DDL statements such as reformatted - // and void of comments. + + // DatabaseName is the fully qualified Spanner database name + // (e.g., "projects/{project}/instances/{instance}/databases/{database}"). + DatabaseName string + + // Deprecated: CleanStatements is no longer needed. Migration statements are + // now automatically parsed to detect their type (DDL, DML, PartitionedDML) + // and comments are stripped during parsing. This field is ignored. CleanStatements bool } -// Spanner implements database.Driver for Google Cloud Spanner +// Spanner implements database.Driver for Google Cloud Spanner. +// It supports DDL statements (CREATE, ALTER, DROP), DML statements (INSERT), +// and partitioned DML statements (UPDATE, DELETE) in migration files. type Spanner struct { db *DB @@ -61,11 +94,16 @@ type Spanner struct { lock atomic.Bool } +// DB holds the Spanner client connections for both administrative operations +// (schema changes) and data operations (queries and mutations). type DB struct { admin *sdb.DatabaseAdminClient data *spanner.Client } +// NewDB creates a new DB instance with the provided admin and data clients. +// The admin client is used for DDL operations, while the data client is used +// for DML operations and queries. func NewDB(admin sdb.DatabaseAdminClient, data spanner.Client) *DB { return &DB{ admin: &admin, @@ -73,7 +111,8 @@ func NewDB(admin sdb.DatabaseAdminClient, data spanner.Client) *DB { } } -// WithInstance implements database.Driver +// WithInstance creates a new Spanner driver using an existing DB instance. +// It validates the configuration and ensures the migrations version table exists. func WithInstance(instance *DB, config *Config) (database.Driver, error) { if config == nil { return nil, ErrNilConfig @@ -99,7 +138,12 @@ func WithInstance(instance *DB, config *Config) (database.Driver, error) { return sx, nil } -// Open implements database.Driver +// Open parses the connection URL and creates a new Spanner driver instance. +// The URL format is: spanner://projects/{project}/instances/{instance}/databases/{database} +// +// Supported query parameters: +// - x-migrations-table: Custom name for the migrations tracking table +// - x-clean-statements: Deprecated, this parameter is ignored. Statements are now always parsed. func (s *Spanner) Open(url string) (database.Driver, error) { purl, err := nurl.Parse(url) if err != nil { @@ -137,14 +181,16 @@ func (s *Spanner) Open(url string) (database.Driver, error) { }) } -// Close implements database.Driver +// Close releases all resources held by the Spanner driver, including +// both the admin and data client connections. func (s *Spanner) Close() error { s.db.data.Close() return s.db.admin.Close() } -// Lock implements database.Driver but doesn't do anything because Spanner only -// enqueues the UpdateDatabaseDdlRequest. +// Lock acquires an in-memory lock to prevent concurrent migrations. +// Note: This is a local lock only; Spanner DDL operations are inherently +// serialized by the service through UpdateDatabaseDdl request queuing. func (s *Spanner) Lock() error { if swapped := s.lock.CompareAndSwap(false, true); swapped { return nil @@ -152,7 +198,7 @@ func (s *Spanner) Lock() error { return ErrLockHeld } -// Unlock implements database.Driver but no action required, see Lock. +// Unlock releases the in-memory lock acquired by Lock. func (s *Spanner) Unlock() error { if swapped := s.lock.CompareAndSwap(true, false); swapped { return nil @@ -160,27 +206,43 @@ func (s *Spanner) Unlock() error { return ErrLockNotHeld } -// Run implements database.Driver +// Run executes the migration statements read from the provided reader. +// It automatically detects the statement type and routes execution accordingly: +// - DDL statements (CREATE, ALTER, DROP): Executed via UpdateDatabaseDdl +// - DML statements (INSERT): Executed within a read-write transaction +// - Partitioned DML (UPDATE, DELETE): Executed via PartitionedUpdate +// +// Migration files must not mix different statement types. func (s *Spanner) Run(migration io.Reader) error { migr, err := io.ReadAll(migration) if err != nil { return err } - stmts := []string{string(migr)} - if s.config.CleanStatements { - stmts, err = cleanStatements(migr) - if err != nil { - return err - } + ctx := context.Background() + + stmts, kind, err := parseStatements(migr) + if err != nil { + return &database.Error{OrigErr: err, Err: "failed to parse migration", Query: migr} } - ctx := context.Background() + switch kind { + case statementKindDDL: + return s.runDDL(ctx, stmts, migr) + case statementKindDML: + return s.runDML(ctx, stmts, migr) + case statementKindPartitionedDML: + return s.runPartitionedDML(ctx, stmts, migr) + default: + return &database.Error{OrigErr: ErrInvalidDMLStatementKind, Err: "unknown statement kind", Query: migr} + } +} + +func (s *Spanner) runDDL(ctx context.Context, stmts []string, migr []byte) error { op, err := s.db.admin.UpdateDatabaseDdl(ctx, &adminpb.UpdateDatabaseDdlRequest{ Database: s.config.DatabaseName, Statements: stmts, }) - if err != nil { return &database.Error{OrigErr: err, Err: "migration failed", Query: migr} } @@ -192,17 +254,44 @@ func (s *Spanner) Run(migration io.Reader) error { return nil } -// SetVersion implements database.Driver +func (s *Spanner) runDML(ctx context.Context, stmts []string, migr []byte) error { + _, err := s.db.data.ReadWriteTransaction(ctx, + func(ctx context.Context, txn *spanner.ReadWriteTransaction) error { + for _, stmt := range stmts { + if _, err := txn.Update(ctx, spanner.Statement{SQL: stmt}); err != nil { + return err + } + } + return nil + }) + if err != nil { + return &database.Error{OrigErr: err, Err: "migration failed", Query: migr} + } + return nil +} + +func (s *Spanner) runPartitionedDML(ctx context.Context, stmts []string, migr []byte) error { + for _, stmt := range stmts { + _, err := s.db.data.PartitionedUpdate(ctx, spanner.Statement{SQL: stmt}) + if err != nil { + return &database.Error{OrigErr: err, Err: "migration failed", Query: migr} + } + } + return nil +} + +// SetVersion updates the migration version in the migrations tracking table. +// It atomically deletes all existing records and inserts the new version. func (s *Spanner) SetVersion(version int, dirty bool) error { ctx := context.Background() _, err := s.db.data.ReadWriteTransaction(ctx, - func(ctx context.Context, txn *spanner.ReadWriteTransaction) error { + func(_ context.Context, txn *spanner.ReadWriteTransaction) error { m := []*spanner.Mutation{ spanner.Delete(s.config.MigrationsTable, spanner.AllKeys()), spanner.Insert(s.config.MigrationsTable, []string{"Version", "Dirty"}, - []interface{}{version, dirty}, + []any{version, dirty}, )} return txn.BufferWrite(m) }) @@ -213,7 +302,8 @@ func (s *Spanner) SetVersion(version int, dirty bool) error { return nil } -// Version implements database.Driver +// Version returns the current migration version and dirty state. +// If no version has been set, it returns database.NilVersion. func (s *Spanner) Version() (version int, dirty bool, err error) { ctx := context.Background() @@ -242,12 +332,10 @@ func (s *Spanner) Version() (version int, dirty bool, err error) { var nameMatcher = regexp.MustCompile(`(CREATE TABLE\s(\S+)\s)|(CREATE.+INDEX\s(\S+)\s)`) -// Drop implements database.Driver. Retrieves the database schema first and -// creates statements to drop the indexes and tables accordingly. -// Note: The drop statements are created in reverse order to how they're -// provided in the schema. Assuming the schema describes how the database can -// be "build up", it seems logical to "unbuild" the database simply by going the -// opposite direction. More testing +// Drop removes all tables and indexes from the database by retrieving the +// current schema and generating DROP statements in reverse order. +// This reverse order ensures that dependent objects (like interleaved tables +// and indexes) are dropped before their parent tables. func (s *Spanner) Drop() error { ctx := context.Background() res, err := s.db.admin.GetDatabaseDdl(ctx, &adminpb.GetDatabaseDdlRequest{ @@ -305,7 +393,7 @@ func (s *Spanner) ensureVersionTable() (err error) { ctx := context.Background() tbl := s.config.MigrationsTable iter := s.db.data.Single().Read(ctx, tbl, spanner.AllKeys(), []string{"Version"}) - if err := iter.Do(func(r *spanner.Row) error { return nil }); err == nil { + if err := iter.Do(func(_ *spanner.Row) error { return nil }); err == nil { return nil } @@ -329,17 +417,69 @@ func (s *Spanner) ensureVersionTable() (err error) { return nil } -func cleanStatements(migration []byte) ([]string, error) { - // The Spanner GCP backend does not yet support comments for the UpdateDatabaseDdl RPC - // (see https://issuetracker.google.com/issues/159730604) we use - // spansql to parse the DDL and output valid stamements without comments - ddl, err := spansql.ParseDDL("", string(migration)) - if err != nil { - return nil, err +// parseStatements attempts to parse migration content as DDL first, then as DML. +// Returns the parsed statements and their kind. +func parseStatements(migration []byte) ([]string, statementKind, error) { + content := string(migration) + if strings.TrimSpace(content) == "" { + return nil, 0, ErrEmptyMigration } - stmts := make([]string, 0, len(ddl.List)) - for _, stmt := range ddl.List { - stmts = append(stmts, stmt.SQL()) + + // Try parsing as DDL first + ddl, ddlErr := spansql.ParseDDL("", content) + if ddlErr == nil && len(ddl.List) > 0 { + stmts := make([]string, 0, len(ddl.List)) + for _, stmt := range ddl.List { + stmts = append(stmts, stmt.SQL()) + } + return stmts, statementKindDDL, nil + } + + // Try parsing as DML + dml, dmlErr := spansql.ParseDML("", content) + if dmlErr == nil && len(dml.List) > 0 { + stmts := make([]string, 0, len(dml.List)) + for _, stmt := range dml.List { + stmts = append(stmts, stmt.SQL()) + } + kind, err := inspectDMLKind(dml.List) + if err != nil { + return nil, 0, err + } + return stmts, kind, nil + } + + if ddlErr != nil { + return nil, 0, ddlErr + } + return nil, 0, dmlErr +} + +// inspectDMLKind determines if DML statements are regular DML (INSERT) or +// partitioned DML (UPDATE, DELETE). Mixed statement types are not allowed. +func inspectDMLKind(stmts []spansql.DMLStmt) (statementKind, error) { + if len(stmts) == 0 { + return statementKindDDL, nil + } + + var hasDML, hasPartitionedDML bool + for _, stmt := range stmts { + switch stmt.(type) { + case *spansql.Insert: + hasDML = true + case *spansql.Update, *spansql.Delete: + hasPartitionedDML = true + default: + return 0, fmt.Errorf("%w: unknown DML statement type %T", ErrInvalidDMLStatementKind, stmt) + } + } + + switch { + case hasDML && !hasPartitionedDML: + return statementKindDML, nil + case !hasDML && hasPartitionedDML: + return statementKindPartitionedDML, nil + default: + return 0, ErrMixedStatements } - return stmts, nil } diff --git a/database/spanner/spanner_test.go b/database/spanner/spanner_test.go index d6ab4db32..53817648b 100644 --- a/database/spanner/spanner_test.go +++ b/database/spanner/spanner_test.go @@ -5,14 +5,14 @@ import ( "os" "testing" - "github.com/golang-migrate/migrate/v4" - - dt "github.com/golang-migrate/migrate/v4/database/testing" - _ "github.com/golang-migrate/migrate/v4/source/file" - "cloud.google.com/go/spanner/spannertest" + "cloud.google.com/go/spanner/spansql" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/golang-migrate/migrate/v4" + dt "github.com/golang-migrate/migrate/v4/database/testing" + _ "github.com/golang-migrate/migrate/v4/source/file" ) // withSpannerEmulator is not thread-safe and cannot be used with parallel tests since it sets the emulator @@ -61,111 +61,196 @@ func TestMigrate(t *testing.T) { }) } -func TestCleanStatements(t *testing.T) { +func TestParseStatements(t *testing.T) { testCases := []struct { - name string - multiStatement string - expected []string + name string + migration string + expectedKind statementKind + expectError error }{ { - name: "no statement", - multiStatement: "", - expected: []string{}, + name: "empty migration", + migration: "", + expectError: ErrEmptyMigration, }, { - name: "single statement, single line, no semicolon, no comment", - multiStatement: "CREATE TABLE table_name (id STRING(255) NOT NULL) PRIMARY KEY (id)", - expected: []string{"CREATE TABLE table_name (\n id STRING(255) NOT NULL,\n) PRIMARY KEY(id)"}, + name: "whitespace only migration", + migration: " \n\t ", + expectError: ErrEmptyMigration, }, { - name: "single statement, multi line, no semicolon, no comment", - multiStatement: `CREATE TABLE table_name ( - id STRING(255) NOT NULL, - ) PRIMARY KEY (id)`, - expected: []string{"CREATE TABLE table_name (\n id STRING(255) NOT NULL,\n) PRIMARY KEY(id)"}, + name: "single DDL statement - CREATE TABLE", + migration: "CREATE TABLE users (id STRING(36) NOT NULL) PRIMARY KEY (id)", + expectedKind: statementKindDDL, }, { - name: "single statement, single line, with semicolon, no comment", - multiStatement: "CREATE TABLE table_name (id STRING(255) NOT NULL) PRIMARY KEY (id);", - expected: []string{"CREATE TABLE table_name (\n id STRING(255) NOT NULL,\n) PRIMARY KEY(id)"}, + name: "multiple DDL statements", + migration: `CREATE TABLE users ( + id STRING(36) NOT NULL + ) PRIMARY KEY (id); + CREATE INDEX users_idx ON users (id);`, + expectedKind: statementKindDDL, }, { - name: "single statement, multi line, with semicolon, no comment", - multiStatement: `CREATE TABLE table_name ( - id STRING(255) NOT NULL, - ) PRIMARY KEY (id);`, - expected: []string{"CREATE TABLE table_name (\n id STRING(255) NOT NULL,\n) PRIMARY KEY(id)"}, + name: "single DML statement - INSERT", + migration: "INSERT INTO users (id, name) VALUES ('1', 'test')", + expectedKind: statementKindDML, }, { - name: "multi statement, with trailing semicolon. no comment", - // From https://github.com/mattes/migrate/pull/281 - multiStatement: `CREATE TABLE table_name ( - id STRING(255) NOT NULL, - ) PRIMARY KEY(id); - - CREATE INDEX table_name_id_idx ON table_name (id);`, - expected: []string{`CREATE TABLE table_name ( - id STRING(255) NOT NULL, -) PRIMARY KEY(id)`, "CREATE INDEX table_name_id_idx ON table_name(id)"}, + name: "multiple INSERT statements", + migration: `INSERT INTO users (id, name) VALUES ('1', 'test1'); + INSERT INTO users (id, name) VALUES ('2', 'test2');`, + expectedKind: statementKindDML, + }, + { + name: "single partitioned DML - UPDATE", + migration: "UPDATE users SET name = 'updated' WHERE id = '1'", + expectedKind: statementKindPartitionedDML, + }, + { + name: "single partitioned DML - DELETE", + migration: "DELETE FROM users WHERE id = '1'", + expectedKind: statementKindPartitionedDML, + }, + { + name: "multiple UPDATE statements", + migration: `UPDATE users SET name = 'updated1' WHERE id = '1'; + UPDATE users SET name = 'updated2' WHERE id = '2';`, + expectedKind: statementKindPartitionedDML, + }, + { + name: "mixed UPDATE and DELETE", + migration: `UPDATE users SET name = 'updated' WHERE id = '1'; + DELETE FROM users WHERE id = '2';`, + expectedKind: statementKindPartitionedDML, + }, + { + name: "DDL with inline comment", + migration: `CREATE TABLE users ( + id STRING(36) NOT NULL, -- primary identifier + name STRING(100) + ) PRIMARY KEY (id)`, + expectedKind: statementKindDDL, + }, + { + name: "DDL with standalone comment", + migration: `-- This migration creates the users table + CREATE TABLE users ( + id STRING(36) NOT NULL + ) PRIMARY KEY (id)`, + expectedKind: statementKindDDL, + }, + { + name: "DDL with multi-line comment", + migration: `/* + * This is a multi-line comment + * describing the migration + */ + CREATE TABLE users ( + id STRING(36) NOT NULL + ) PRIMARY KEY (id)`, + expectedKind: statementKindDDL, + }, + { + name: "DML INSERT with comment", + migration: `-- Seed initial user data + INSERT INTO users (id, name) VALUES ('1', 'test')`, + expectedKind: statementKindDML, + }, + { + name: "DML UPDATE with comment", + migration: `-- Update user emails + UPDATE users SET name = 'updated' WHERE id = '1'`, + expectedKind: statementKindPartitionedDML, + }, + { + name: "DML DELETE with comment", + migration: `-- Clean up test data + DELETE FROM users WHERE id = '1'`, + expectedKind: statementKindPartitionedDML, }, { - name: "multi statement, no trailing semicolon, no comment", - // From https://github.com/mattes/migrate/pull/281 - multiStatement: `CREATE TABLE table_name ( - id STRING(255) NOT NULL, - ) PRIMARY KEY(id); + name: "mixed INSERT and UPDATE - should error", + migration: `INSERT INTO users (id, name) VALUES ('1', 'test'); + UPDATE users SET name = 'updated' WHERE id = '1';`, + expectError: ErrMixedStatements, + }, + { + name: "mixed INSERT and DELETE - should error", + migration: `INSERT INTO users (id, name) VALUES ('1', 'test'); + DELETE FROM users WHERE id = '1';`, + expectError: ErrMixedStatements, + }, + } - CREATE INDEX table_name_id_idx ON table_name (id)`, - expected: []string{`CREATE TABLE table_name ( - id STRING(255) NOT NULL, -) PRIMARY KEY(id)`, "CREATE INDEX table_name_id_idx ON table_name(id)"}, - }, - { - name: "multi statement, no trailing semicolon, standalone comment", - // From https://github.com/mattes/migrate/pull/281 - multiStatement: `CREATE TABLE table_name ( - -- standalone comment - id STRING(255) NOT NULL, - ) PRIMARY KEY(id); + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + stmts, kind, err := parseStatements([]byte(tc.migration)) + if tc.expectError != nil { + require.ErrorIs(t, err, tc.expectError) + return + } + require.NoError(t, err, "Error parsing statements") + assert.Equal(t, tc.expectedKind, kind) + assert.NotEmpty(t, stmts) + }) + } +} - CREATE INDEX table_name_id_idx ON table_name (id)`, - expected: []string{`CREATE TABLE table_name ( - id STRING(255) NOT NULL, -) PRIMARY KEY(id)`, "CREATE INDEX table_name_id_idx ON table_name(id)"}, +func TestInspectDMLKind(t *testing.T) { + testCases := []struct { + name string + migration string + expectedKind statementKind + expectError error + }{ + { + name: "INSERT only", + migration: "INSERT INTO users (id) VALUES ('1')", + expectedKind: statementKindDML, }, { - name: "multi statement, no trailing semicolon, inline comment", - // From https://github.com/mattes/migrate/pull/281 - multiStatement: `CREATE TABLE table_name ( - id STRING(255) NOT NULL, -- inline comment - ) PRIMARY KEY(id); - - CREATE INDEX table_name_id_idx ON table_name (id)`, - expected: []string{`CREATE TABLE table_name ( - id STRING(255) NOT NULL, -) PRIMARY KEY(id)`, "CREATE INDEX table_name_id_idx ON table_name(id)"}, + name: "UPDATE only", + migration: "UPDATE users SET name = 'test' WHERE id = '1'", + expectedKind: statementKindPartitionedDML, }, { - name: "alter table with SET OPTIONS", - multiStatement: `ALTER TABLE users ALTER COLUMN created - SET OPTIONS (allow_commit_timestamp=true);`, - expected: []string{"ALTER TABLE users ALTER COLUMN created SET OPTIONS (allow_commit_timestamp = true)"}, + name: "DELETE only", + migration: "DELETE FROM users WHERE id = '1'", + expectedKind: statementKindPartitionedDML, }, { - name: "column with NUMERIC type", - multiStatement: `CREATE TABLE table_name ( - id STRING(255) NOT NULL, - sum NUMERIC, - ) PRIMARY KEY (id)`, - expected: []string{"CREATE TABLE table_name (\n id STRING(255) NOT NULL,\n sum NUMERIC,\n) PRIMARY KEY(id)"}, + name: "multiple INSERTs", + migration: `INSERT INTO users (id) VALUES ('1'); + INSERT INTO users (id) VALUES ('2');`, + expectedKind: statementKindDML, + }, + { + name: "UPDATE and DELETE combined", + migration: `UPDATE users SET name = 'test' WHERE id = '1'; + DELETE FROM users WHERE id = '2';`, + expectedKind: statementKindPartitionedDML, + }, + { + name: "INSERT and UPDATE mixed - error", + migration: `INSERT INTO users (id) VALUES ('1'); + UPDATE users SET name = 'test' WHERE id = '1';`, + expectError: ErrMixedStatements, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - stmts, err := cleanStatements([]byte(tc.multiStatement)) - require.NoError(t, err, "Error cleaning statements") - assert.Equal(t, tc.expected, stmts) + dml, err := spansql.ParseDML("", tc.migration) + require.NoError(t, err, "Failed to parse DML") + + kind, err := inspectDMLKind(dml.List) + if tc.expectError != nil { + require.ErrorIs(t, err, tc.expectError) + return + } + require.NoError(t, err) + assert.Equal(t, tc.expectedKind, kind) }) } }