diff --git a/src/parser.ts b/src/parser.ts index 96a6635..ef9c7c5 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -119,13 +119,29 @@ export const EXECUTION_TYPES: Record = { ANON_BLOCK: 'ANON_BLOCK', }; -const statementsWithEnds = [ - 'CREATE_TRIGGER', - 'CREATE_FUNCTION', - 'CREATE_PROCEDURE', - 'ANON_BLOCK', - 'UNKNOWN', -]; +// Statement types whose bodies may legitimately contain token-level semicolons +// (e.g. a BEGIN...END block), so a semicolon does not necessarily terminate them. +// They only terminate once their body has completed (statement.canEnd) or, for +// string/dollar-quoted bodies, once that body has been consumed (see the +// string-body handling in stateMachineStatementParser). +function getStatementsWithEnds(dialect: Dialect): StatementType[] { + const statementsWithEnds: StatementType[] = [ + 'CREATE_FUNCTION', + 'CREATE_PROCEDURE', + 'ANON_BLOCK', + 'UNKNOWN', + ]; + + // In PostgreSQL and Snowflake a trigger only references a function and never + // carries an inline body, so its first semicolon always terminates it. Other + // dialects (mssql, mysql, sqlite, ...) support inline BEGIN...END trigger + // bodies whose token-level semicolons must not terminate the statement early. + if (!['psql', 'snowflake'].includes(dialect)) { + statementsWithEnds.push('CREATE_TRIGGER'); + } + + return statementsWithEnds; +} const blockOpeners: Record = { generic: ['BEGIN', 'CASE'], @@ -1007,6 +1023,8 @@ function stateMachineStatementParser( const columnParser = new ColumnParser(dialect); const tableParser = new TableParser(dialect); + const statementsWithEnds = getStatementsWithEnds(dialect); + /* eslint arrow-body-style: 0, no-extra-parens: 0 */ const isValidToken = (step: Step, token: Token) => { if (!step.validation) { @@ -1080,6 +1098,23 @@ function stateMachineStatementParser( return; } + // A CREATE FUNCTION / PROCEDURE body can be supplied as a quoted or + // dollar-quoted string following the AS keyword (e.g. PostgreSQL + // `AS 'select 1'` / `AS $$ ... $$`, Snowflake `AS $$ ... $$`). Such a body + // is a single token, so it never opens a BEGIN...END block and canEnd would + // otherwise never be set, causing the statement to swallow whatever follows. + // Once the string body has been consumed at the top level, allow the next + // semicolon to terminate the statement. + if ( + token.type === 'string' && + openBlocks === 0 && + prevNonWhitespaceToken?.type === 'keyword' && + prevNonWhitespaceToken.value.toUpperCase() === 'AS' && + (statement.type === 'CREATE_FUNCTION' || statement.type === 'CREATE_PROCEDURE') + ) { + statement.canEnd = true; + } + if ( token.type === 'keyword' && blockOpeners[dialect].includes(token.value.toUpperCase()) && diff --git a/test/identifier/multiple-statement.spec.ts b/test/identifier/multiple-statement.spec.ts index 76fa52f..6bb77d1 100644 --- a/test/identifier/multiple-statement.spec.ts +++ b/test/identifier/multiple-statement.spec.ts @@ -249,6 +249,42 @@ describe('identifier', () => { }); }); + describe('identifying functions with non-block bodies followed by another statement', () => { + it('should identify a psql function with a string-literal body then a SELECT', () => { + const sql = `create function foo() returns void as 'select 1' language sql;\nSELECT 1;`; + const actual = identify(sql, { dialect: 'psql' }); + expect(actual.map((statement) => statement.type)).to.eql(['CREATE_FUNCTION', 'SELECT']); + expect(actual.map((statement) => statement.text)).to.eql([ + "create function foo() returns void as 'select 1' language sql;", + 'SELECT 1;', + ]); + }); + + it('should identify a psql function with a dollar-quoted body then a SELECT', () => { + const sql = `CREATE FUNCTION myfunc() RETURNS INTEGER AS $$ SELECT 1 $$ LANGUAGE sql;\nSELECT 1;`; + const actual = identify(sql, { dialect: 'psql' }); + expect(actual.map((statement) => statement.type)).to.eql(['CREATE_FUNCTION', 'SELECT']); + }); + + it('should identify a psql function with a dollar-quoted plpgsql body then a SELECT', () => { + const sql = `CREATE FUNCTION f() RETURNS int AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;\nSELECT 1;`; + const actual = identify(sql, { dialect: 'psql' }); + expect(actual.map((statement) => statement.type)).to.eql(['CREATE_FUNCTION', 'SELECT']); + }); + + it('should identify a psql trigger without a block body then a SELECT', () => { + const sql = `CREATE TRIGGER t AFTER INSERT ON tbl EXECUTE FUNCTION f();\nSELECT 1;`; + const actual = identify(sql, { dialect: 'psql' }); + expect(actual.map((statement) => statement.type)).to.eql(['CREATE_TRIGGER', 'SELECT']); + }); + + it('should still keep an mssql BEGIN...END function body intact then a SELECT', () => { + const sql = `CREATE FUNCTION dbo.f (@x int) RETURNS int AS BEGIN RETURN @x; END;\nSELECT 1;`; + const actual = identify(sql, { dialect: 'mssql' }); + expect(actual.map((statement) => statement.type)).to.eql(['CREATE_FUNCTION', 'SELECT']); + }); + }); + describe('identifying multiple statements with CTEs', () => { it('should able to detect queries with a CTE in middle query', () => { const actual = identify(