Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 42 additions & 7 deletions src/parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,29 @@ export const EXECUTION_TYPES: Record<StatementType, ExecutionType> = {
ANON_BLOCK: 'ANON_BLOCK',
};

const statementsWithEnds = [
'CREATE_TRIGGER',
'CREATE_FUNCTION',
'CREATE_PROCEDURE',
'ANON_BLOCK',
'UNKNOWN',
];
// Statement types whose bodies may legitimately contain token-level semicolons
// (e.g. a BEGIN...END block), so a semicolon does not necessarily terminate them.
// They only terminate once their body has completed (statement.canEnd) or, for
// string/dollar-quoted bodies, once that body has been consumed (see the
// string-body handling in stateMachineStatementParser).
function getStatementsWithEnds(dialect: Dialect): StatementType[] {
const statementsWithEnds: StatementType[] = [
'CREATE_FUNCTION',
'CREATE_PROCEDURE',
'ANON_BLOCK',
'UNKNOWN',
];

// In PostgreSQL and Snowflake a trigger only references a function and never
// carries an inline body, so its first semicolon always terminates it. Other
// dialects (mssql, mysql, sqlite, ...) support inline BEGIN...END trigger
// bodies whose token-level semicolons must not terminate the statement early.
if (!['psql', 'snowflake'].includes(dialect)) {
statementsWithEnds.push('CREATE_TRIGGER');
}

return statementsWithEnds;
}

const blockOpeners: Record<Dialect, string[]> = {
generic: ['BEGIN', 'CASE'],
Expand Down Expand Up @@ -1007,6 +1023,8 @@ function stateMachineStatementParser(
const columnParser = new ColumnParser(dialect);
const tableParser = new TableParser(dialect);

const statementsWithEnds = getStatementsWithEnds(dialect);

/* eslint arrow-body-style: 0, no-extra-parens: 0 */
const isValidToken = (step: Step, token: Token) => {
if (!step.validation) {
Expand Down Expand Up @@ -1080,6 +1098,23 @@ function stateMachineStatementParser(
return;
}

// A CREATE FUNCTION / PROCEDURE body can be supplied as a quoted or
// dollar-quoted string following the AS keyword (e.g. PostgreSQL
// `AS 'select 1'` / `AS $$ ... $$`, Snowflake `AS $$ ... $$`). Such a body
// is a single token, so it never opens a BEGIN...END block and canEnd would
// otherwise never be set, causing the statement to swallow whatever follows.
// Once the string body has been consumed at the top level, allow the next
// semicolon to terminate the statement.
if (
token.type === 'string' &&
openBlocks === 0 &&
prevNonWhitespaceToken?.type === 'keyword' &&
prevNonWhitespaceToken.value.toUpperCase() === 'AS' &&
(statement.type === 'CREATE_FUNCTION' || statement.type === 'CREATE_PROCEDURE')
) {
statement.canEnd = true;
}

if (
token.type === 'keyword' &&
blockOpeners[dialect].includes(token.value.toUpperCase()) &&
Expand Down
36 changes: 36 additions & 0 deletions test/identifier/multiple-statement.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,42 @@ describe('identifier', () => {
});
});

describe('identifying functions with non-block bodies followed by another statement', () => {
it('should identify a psql function with a string-literal body then a SELECT', () => {
const sql = `create function foo() returns void as 'select 1' language sql;\nSELECT 1;`;
const actual = identify(sql, { dialect: 'psql' });
expect(actual.map((statement) => statement.type)).to.eql(['CREATE_FUNCTION', 'SELECT']);
expect(actual.map((statement) => statement.text)).to.eql([
"create function foo() returns void as 'select 1' language sql;",
'SELECT 1;',
]);
});

it('should identify a psql function with a dollar-quoted body then a SELECT', () => {
const sql = `CREATE FUNCTION myfunc() RETURNS INTEGER AS $$ SELECT 1 $$ LANGUAGE sql;\nSELECT 1;`;
const actual = identify(sql, { dialect: 'psql' });
expect(actual.map((statement) => statement.type)).to.eql(['CREATE_FUNCTION', 'SELECT']);
});

it('should identify a psql function with a dollar-quoted plpgsql body then a SELECT', () => {
const sql = `CREATE FUNCTION f() RETURNS int AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;\nSELECT 1;`;
const actual = identify(sql, { dialect: 'psql' });
expect(actual.map((statement) => statement.type)).to.eql(['CREATE_FUNCTION', 'SELECT']);
});

it('should identify a psql trigger without a block body then a SELECT', () => {
const sql = `CREATE TRIGGER t AFTER INSERT ON tbl EXECUTE FUNCTION f();\nSELECT 1;`;
const actual = identify(sql, { dialect: 'psql' });
expect(actual.map((statement) => statement.type)).to.eql(['CREATE_TRIGGER', 'SELECT']);
});

it('should still keep an mssql BEGIN...END function body intact then a SELECT', () => {
const sql = `CREATE FUNCTION dbo.f (@x int) RETURNS int AS BEGIN RETURN @x; END;\nSELECT 1;`;
const actual = identify(sql, { dialect: 'mssql' });
expect(actual.map((statement) => statement.type)).to.eql(['CREATE_FUNCTION', 'SELECT']);
});
});

describe('identifying multiple statements with CTEs', () => {
it('should able to detect queries with a CTE in middle query', () => {
const actual = identify(
Expand Down
Loading