From 7f8b8012d0810218d6fe628020f22725f618a623 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Wed, 8 Apr 2026 10:42:43 +0200 Subject: [PATCH 01/11] add to false positives list at the moment, internal error are distinguished from regular errors by checking if the word 'internal' is part of the error message. However, this also leads to false positives, as some regular error messages also contain the word 'internal'. --- scripts/fuzzer_helper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/fuzzer_helper.py b/scripts/fuzzer_helper.py index d21ae81..186e83e 100644 --- a/scripts/fuzzer_helper.py +++ b/scripts/fuzzer_helper.py @@ -22,6 +22,8 @@ ] INTERNAL_ERROR_FALSE_POSITIVES = [ + ".internal", + "internal schema", "internal use", "internal_compress", "internal_decompress", From 6190c48372ff15c3f1172ec480b3cca4a8f5cad1 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Thu, 30 Apr 2026 15:34:30 +0200 Subject: [PATCH 02/11] don't raise if function reduce_sql_statement() fails, as it kills the fuzz run --- scripts/reduce_sql.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/scripts/reduce_sql.py b/scripts/reduce_sql.py index 1a0089c..b2fe4c1 100644 --- a/scripts/reduce_sql.py +++ b/scripts/reduce_sql.py @@ -53,14 +53,15 @@ def run_shell_command(shell, cmd): # reduce a single statement -def get_reduced_sql(shell, sql_query): +def get_reduce_candidates(shell, sql_query): + reduce_candidates = [] reduce_query = get_reduced_query.replace('${QUERY}', sql_query.replace("'", "''")) (stdout, stderr, returncode) = run_shell_command(shell, reduce_query) if returncode != 0: + print(f"Failed to reduce query: {sql_query}") print(stdout) print(stderr) - raise Exception("Failed to reduce query") - reduce_candidates = [] + return(reduce_candidates) for line in stdout.split('\n'): if len(line) <= 2: continue @@ -77,7 +78,7 @@ def reduce(sql_query, data_load, shell, error_msg, max_time_seconds=300): start = time.time() while True: found_new_candidate = False - reduce_candidates = get_reduced_sql(shell, sql_query) + reduce_candidates = get_reduce_candidates(shell, sql_query) for reduce_candidate in reduce_candidates: if reduce_candidate == sql_query: continue @@ -190,7 +191,7 @@ def reduce_query_log_query(start, shell, queries, query_index, max_time_seconds) sql_query = queries[query_index] while True: found_new_candidate = False - reduce_candidates = get_reduced_sql(shell, sql_query) + reduce_candidates = get_reduce_candidates(shell, sql_query) for reduce_candidate in reduce_candidates: if reduce_candidate == sql_query: continue From bebe29ceb5499212abe9ed5811bb8e00c5b917a3 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Thu, 30 Apr 2026 15:45:44 +0200 Subject: [PATCH 03/11] bump submodules --- duckdb | 2 +- extension-ci-tools | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/duckdb b/duckdb index e350934..5776358 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit e3509341f681c4cb6f2c22d1f0f4b653ed20644d +Subproject commit 5776358ab2acc5c5e947d9bde80334db38edd92d diff --git a/extension-ci-tools b/extension-ci-tools index 16d89a5..a2da793 160000 --- a/extension-ci-tools +++ b/extension-ci-tools @@ -1 +1 @@ -Subproject commit 16d89a59ee14904a62383e83c300432b19d29abc +Subproject commit a2da79398cbc9633a0878a123dd977c54f35d92f From ea7c0511a12a26f90c423b90661da42759e1c534 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Mon, 4 May 2026 15:24:30 +0200 Subject: [PATCH 04/11] apply 0001-vector-fix.patch --- src/statement_generator.cpp | 26 ++++++++++++-------------- src/statement_simplifier.cpp | 35 +++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 30 deletions(-) diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 72e8f5b..afd174d 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -18,6 +18,7 @@ #include "duckdb/parser/statement/attach_statement.hpp" #include "duckdb/parser/statement/create_statement.hpp" #include "duckdb/parser/statement/delete_statement.hpp" +#include "duckdb/parser/query_node/delete_query_node.hpp" #include "duckdb/parser/statement/detach_statement.hpp" #include "duckdb/parser/statement/insert_statement.hpp" #include "duckdb/parser/statement/multi_statement.hpp" @@ -189,12 +190,12 @@ unique_ptr StatementGenerator::GenerateDelete() { if (entry.type == CatalogType::TABLE_ENTRY) { auto result = make_uniq(); result->table_name = entry.name; - delete_statement->table = std::move(result); + delete_statement->node->table = std::move(result); } else { - delete_statement->table = GenerateTableRef(); + delete_statement->node->table = GenerateTableRef(); } } else { - delete_statement->table = GenerateTableRef(); + delete_statement->node->table = GenerateTableRef(); } return delete_statement; @@ -302,7 +303,7 @@ void StatementGenerator::GenerateCTEs(QueryNode &node) { } while (RandomPercentage(20)) { auto cte = make_uniq(); - cte->query = unique_ptr_cast(GenerateSelect()); + cte->query_node = std::move(unique_ptr_cast(GenerateSelect())->node); for (idx_t i = 0; i < 1 + RandomValue(10); i++) { cte->aliases.push_back(GenerateIdentifier()); } @@ -806,6 +807,9 @@ unique_ptr StatementGenerator::GenerateFunction() { max_parameters = min_parameters; break; } + case CatalogType::WINDOW_FUNCTION_ENTRY: + // FIXME: Use the function, not a random builtin + return GenerateWindowFunction(); default: throw InternalException("StatementGenerator::GenerateFunction"); } @@ -957,9 +961,12 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional case ExpressionType::WINDOW_NTILE: case ExpressionType::WINDOW_FIRST_VALUE: case ExpressionType::WINDOW_LAST_VALUE: + min_parameters = 1; + break; case ExpressionType::WINDOW_LEAD: case ExpressionType::WINDOW_LAG: min_parameters = 1; + min_parameters = 3; break; case ExpressionType::WINDOW_NTH_VALUE: min_parameters = 2; @@ -970,7 +977,7 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional max_parameters = min_parameters; } WindowChecker checker(*this); - auto result = make_uniq(type, INVALID_CATALOG, INVALID_SCHEMA, std::move(name)); + auto result = make_uniq(SYSTEM_CATALOG, DEFAULT_SCHEMA, std::move(name)); result->children = GenerateChildren(min_parameters, max_parameters); while (RandomPercentage(50)) { result->partitions.push_back(GenerateExpression()); @@ -1014,15 +1021,6 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional default: break; } - switch (type) { - case ExpressionType::WINDOW_LEAD: - case ExpressionType::WINDOW_LAG: - result->offset_expr = RandomExpression(30); - result->default_expr = RandomExpression(30); - break; - default: - break; - } return std::move(result); } diff --git a/src/statement_simplifier.cpp b/src/statement_simplifier.cpp index 4602928..3b46e19 100644 --- a/src/statement_simplifier.cpp +++ b/src/statement_simplifier.cpp @@ -8,8 +8,11 @@ #include "duckdb/parser/expression/list.hpp" #include "duckdb/parser/statement/delete_statement.hpp" #include "duckdb/parser/statement/insert_statement.hpp" +#include "duckdb/parser/query_node/insert_query_node.hpp" #include "duckdb/parser/statement/prepare_statement.hpp" #include "duckdb/parser/statement/update_statement.hpp" +#include "duckdb/parser/query_node/update_query_node.hpp" +#include "duckdb/parser/query_node/delete_query_node.hpp" #include "duckdb/parser/statement/select_statement.hpp" #endif @@ -206,7 +209,7 @@ void StatementSimplifier::Simplify(CommonTableExpressionMap &cte) { SimplifyMap(cte.map); for (auto &cte_child : cte.map) { // simplify individual ctes - Simplify(cte_child.second->query->node); + Simplify(cte_child.second->query_node); } } @@ -357,8 +360,6 @@ void StatementSimplifier::SimplifyExpression(duckdb::unique_ptrcte_map); + if (stmt.node->select_statement) { + Simplify(*stmt.node->select_statement); + } + SimplifyList(stmt.node->returning_list); } void StatementSimplifier::Simplify(DeleteStatement &stmt) { - Simplify(stmt.cte_map); - SimplifyOptional(stmt.condition); - SimplifyExpression(stmt.condition); - SimplifyList(stmt.using_clauses); - SimplifyList(stmt.returning_list); + Simplify(stmt.node->cte_map); + SimplifyOptional(stmt.node->condition); + SimplifyExpression(stmt.node->condition); + SimplifyList(stmt.node->using_clauses); + SimplifyList(stmt.node->returning_list); } void StatementSimplifier::Simplify(UpdateSetInfo &info) { @@ -432,11 +435,11 @@ void StatementSimplifier::Simplify(PrepareStatement &stmt) { } void StatementSimplifier::Simplify(UpdateStatement &stmt) { - Simplify(stmt.cte_map); - SimplifyOptional(stmt.from_table); - D_ASSERT(stmt.set_info); - Simplify(*stmt.set_info); - SimplifyList(stmt.returning_list); + Simplify(stmt.node->cte_map); + SimplifyOptional(stmt.node->from_table); + D_ASSERT(stmt.node->set_info); + Simplify(*stmt.node->set_info); + SimplifyList(stmt.node->returning_list); } void StatementSimplifier::Simplify(SQLStatement &stmt) { From 9edf739b8ede3974b51cdb30605084d53080b7f8 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Mon, 4 May 2026 15:25:08 +0200 Subject: [PATCH 05/11] apply 0002-protect-function-args.patch --- src/statement_generator.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index afd174d..ae18964 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -565,7 +565,7 @@ unique_ptr StatementGenerator::GenerateTableFunctionRef() { auto result = make_uniq(); vector> children; - for (idx_t i = 0; i < table_function.arguments.size(); i++) { + for (idx_t i = 0; i < table_function.GetArguments().size(); i++) { children.push_back(GenerateConstant()); } vector names; @@ -762,10 +762,10 @@ unique_ptr StatementGenerator::GenerateFunction() { auto offset = RandomValue(scalar_entry.functions.Size()); auto actual_function = scalar_entry.functions.GetFunctionByOffset(offset); name = scalar_entry.name; - arguments = actual_function.arguments; - min_parameters = actual_function.arguments.size(); + arguments = actual_function.GetArguments(); + min_parameters = actual_function.GetArguments().size(); max_parameters = min_parameters; - if (actual_function.varargs.id() != LogicalTypeId::INVALID) { + if (actual_function.GetVarArgs().id() != LogicalTypeId::INVALID) { max_parameters += 5; } break; @@ -776,9 +776,9 @@ unique_ptr StatementGenerator::GenerateFunction() { aggregate_entry.functions.GetFunctionByOffset(RandomValue(aggregate_entry.functions.Size())); name = aggregate_entry.name; - min_parameters = actual_function.arguments.size(); + min_parameters = actual_function.GetArguments().size(); max_parameters = min_parameters; - if (actual_function.varargs.id() != LogicalTypeId::INVALID) { + if (actual_function.GetVarArgs().id() != LogicalTypeId::INVALID) { max_parameters += 5; } if (RandomPercentage(10) && !in_window) { @@ -944,7 +944,7 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional if (function) { type = ExpressionType::WINDOW_AGGREGATE; name = function->name; - min_parameters = function->arguments.size(); + min_parameters = function->GetArguments().size(); max_parameters = min_parameters; } else { name = Choose({"rank", "rank_dense", "percent_rank", "row_number", "first_value", "last_value", @@ -1284,7 +1284,7 @@ string StatementGenerator::GenerateTestAllTypes(BaseScalarFunction &base_functio bool always_null = FunctionArgumentsAlwaysNull(base_function.name); vector> children; - for (auto &arg : base_function.arguments) { + for (auto &arg : base_function.GetArguments()) { // look up the type unique_ptr argument; if (!always_null) { @@ -1319,7 +1319,7 @@ string StatementGenerator::GenerateTestVectorTypes(BaseScalarFunction &base_func vector> children; vector> test_vector_types; vector column_aliases; - for (auto &arg : base_function.arguments) { + for (auto &arg : base_function.GetArguments()) { unique_ptr argument; if (!always_null) { string argument_name = "c" + to_string(column_aliases.size() + 1); From 3b786c6bcc5e60c7e0a0ccac449cb7efe8ea31d5 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Mon, 4 May 2026 15:25:54 +0200 Subject: [PATCH 06/11] apply 0003-remove-base-scalar-function.patch --- src/include/statement_generator.hpp | 4 ++-- src/statement_generator.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index 2b60c00..639860e 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -96,8 +96,8 @@ class StatementGenerator { void GenerateAllScalar(ScalarFunctionCatalogEntry &scalar_function, vector &result); void GenerateAllAggregate(AggregateFunctionCatalogEntry &aggregate_function, vector &result); - string GenerateTestAllTypes(BaseScalarFunction &base_function); - string GenerateTestVectorTypes(BaseScalarFunction &base_function); + string GenerateTestAllTypes(SimpleFunction &base_function); + string GenerateTestVectorTypes(SimpleFunction &base_function); string GenerateCast(const LogicalType &target, const string &source_name, bool add_varchar); bool FunctionArgumentsAlwaysNull(const string &name); diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index ae18964..a6361c0 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -1277,7 +1277,7 @@ bool StatementGenerator::FunctionArgumentsAlwaysNull(const string &name) { return always_null_functions.find(name) != always_null_functions.end(); } -string StatementGenerator::GenerateTestAllTypes(BaseScalarFunction &base_function) { +string StatementGenerator::GenerateTestAllTypes(SimpleFunction &base_function) { auto select = make_uniq(); auto node = make_uniq(); @@ -1310,7 +1310,7 @@ string StatementGenerator::GenerateTestAllTypes(BaseScalarFunction &base_functio return select->ToString(); } -string StatementGenerator::GenerateTestVectorTypes(BaseScalarFunction &base_function) { +string StatementGenerator::GenerateTestVectorTypes(SimpleFunction &base_function) { auto select = make_uniq(); auto node = make_uniq(); From 15f7b5eef909458ae4b0a53adba7c98758f5ed76 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Mon, 4 May 2026 15:26:37 +0200 Subject: [PATCH 07/11] apply 0004-expr.patch --- src/statement_generator.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index a6361c0..2828430 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -576,7 +576,7 @@ unique_ptr StatementGenerator::GenerateTableFunctionRef() { auto name = Choose(names); names.erase(std::find(names.begin(), names.end(), name)); auto expr = GenerateConstant(); - expr->alias = name; + expr->SetAlias(name); children.push_back(std::move(expr)); } result->function = make_uniq(entry.name, std::move(children)); From 35360132c1c82747fb858376a6b1bec208cc72b9 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Tue, 5 May 2026 11:24:36 +0200 Subject: [PATCH 08/11] apply 0005-chunkcardinality.patch --- src/sqlsmith_extension.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlsmith_extension.cpp b/src/sqlsmith_extension.cpp index c5f2274..1542369 100644 --- a/src/sqlsmith_extension.cpp +++ b/src/sqlsmith_extension.cpp @@ -118,7 +118,7 @@ static void ReduceSQLFunction(ClientContext &context, TableFunctionInput &data_p output.data[0].SetValue(count, Value(entry)); count++; } - output.SetCardinality(count); + output.SetChildCardinality(count); } struct FuzzyDuckFunctionData : public TableFunctionData { From e1f7b13f017c7836637e04fb90b52bf3fc631f0e Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Tue, 5 May 2026 11:25:22 +0200 Subject: [PATCH 09/11] apply 0006-function-expr.patch --- src/statement_generator.cpp | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 2828430..4c750ce 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -762,10 +762,12 @@ unique_ptr StatementGenerator::GenerateFunction() { auto offset = RandomValue(scalar_entry.functions.Size()); auto actual_function = scalar_entry.functions.GetFunctionByOffset(offset); name = scalar_entry.name; - arguments = actual_function.GetArguments(); - min_parameters = actual_function.GetArguments().size(); + for (auto &arg : actual_function.GetSignature().GetParameters()) { + arguments.push_back(arg.GetType()); + } + min_parameters = actual_function.GetSignature().GetParameterCount(); max_parameters = min_parameters; - if (actual_function.GetVarArgs().id() != LogicalTypeId::INVALID) { + if (actual_function.GetSignature().GetVarArgs().id() != LogicalTypeId::INVALID) { max_parameters += 5; } break; @@ -776,7 +778,7 @@ unique_ptr StatementGenerator::GenerateFunction() { aggregate_entry.functions.GetFunctionByOffset(RandomValue(aggregate_entry.functions.Size())); name = aggregate_entry.name; - min_parameters = actual_function.GetArguments().size(); + min_parameters = actual_function.GetSignature().GetParameterCount(); max_parameters = min_parameters; if (actual_function.GetVarArgs().id() != LogicalTypeId::INVALID) { max_parameters += 5; @@ -944,7 +946,7 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional if (function) { type = ExpressionType::WINDOW_AGGREGATE; name = function->name; - min_parameters = function->GetArguments().size(); + min_parameters = function->GetSignature().GetParameterCount(); max_parameters = min_parameters; } else { name = Choose({"rank", "rank_dense", "percent_rank", "row_number", "first_value", "last_value", @@ -1281,21 +1283,21 @@ string StatementGenerator::GenerateTestAllTypes(SimpleFunction &base_function) { auto select = make_uniq(); auto node = make_uniq(); - bool always_null = FunctionArgumentsAlwaysNull(base_function.name); + bool always_null = FunctionArgumentsAlwaysNull(base_function.GetName()); vector> children; - for (auto &arg : base_function.GetArguments()) { + for (auto ¶m : base_function.GetSignature().GetParameters()) { // look up the type unique_ptr argument; if (!always_null) { for (auto &test_type : generator_context->test_types) { - if (test_type.type.id() == arg.id()) { + if (test_type.type.id() == param.GetType().id()) { argument = make_uniq(test_type.name); } } } if (!argument) { - argument = make_uniq(Value(arg)); + argument = make_uniq(Value(param.GetType())); } children.push_back(std::move(argument)); } @@ -1303,7 +1305,7 @@ string StatementGenerator::GenerateTestAllTypes(SimpleFunction &base_function) { from_clause->table_name = "all_types"; node->from_table = std::move(from_clause); - auto function_expr = make_uniq(base_function.name, std::move(children)); + auto function_expr = make_uniq(base_function.GetName(), std::move(children)); node->select_list.push_back(std::move(function_expr)); select->node = std::move(node); @@ -1319,17 +1321,17 @@ string StatementGenerator::GenerateTestVectorTypes(SimpleFunction &base_function vector> children; vector> test_vector_types; vector column_aliases; - for (auto &arg : base_function.GetArguments()) { + for (auto ¶m : base_function.GetSignature().GetParameters()) { unique_ptr argument; if (!always_null) { string argument_name = "c" + to_string(column_aliases.size() + 1); column_aliases.push_back(argument_name); argument = make_uniq(std::move(argument_name)); auto constant_expr = make_uniq(Value()); - auto cast = make_uniq(arg, std::move(constant_expr)); + auto cast = make_uniq(param.GetType(), std::move(constant_expr)); test_vector_types.push_back(std::move(cast)); } else { - argument = make_uniq(Value(arg)); + argument = make_uniq(Value(param.GetType())); } children.push_back(std::move(argument)); } From 7e43270600dfd6e5aa84ae48bd370fb3767b2e2e Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Tue, 5 May 2026 11:29:15 +0200 Subject: [PATCH 10/11] bump duckdb --- duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb b/duckdb index 5776358..59b02b7 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 5776358ab2acc5c5e947d9bde80334db38edd92d +Subproject commit 59b02b756b2c63723be7b53b4b4061f8acae602a From 87a3bc187118e60c29c0bf719da62af724ce3790 Mon Sep 17 00:00:00 2001 From: Christiaan Herrewijn Date: Tue, 5 May 2026 13:03:06 +0200 Subject: [PATCH 11/11] bump MainDistributionPipeline.yml to use duckdb main branch --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index ba34dfd..d847e66 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -17,7 +17,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: extension_name: sqlsmith - duckdb_version: e3509341f681c4cb6f2c22d1f0f4b653ed20644d + duckdb_version: main ci_tools_version: main exclude_archs: '' @@ -28,7 +28,7 @@ jobs: secrets: inherit with: extension_name: sqlsmith - duckdb_version: e3509341f681c4cb6f2c22d1f0f4b653ed20644d + duckdb_version: main ci_tools_version: main exclude_archs: '' deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}