diff --git a/.github/workflows/test-fuzzer-ci-still-works.yml b/.github/workflows/test-fuzzer-ci-still-works.yml index 4b195fc..a89d5d8 100644 --- a/.github/workflows/test-fuzzer-ci-still-works.yml +++ b/.github/workflows/test-fuzzer-ci-still-works.yml @@ -13,7 +13,7 @@ jobs: uses: duckdblabs/duckdb-fuzzer-ci/.github/workflows/build_fuzzer.yml@main with: git_url: ${{ github.actor }}/duckdb_sqlsmith - git_tag: ${{ github.head_ref }} + git_tag: main timeout-minutes: 120 fuzzer: @@ -37,4 +37,7 @@ jobs: data: ${{ matrix.data }} timeout-minutes: 20 max_queries: 10 - enable_verification: ${{ matrix.enable_verification }} \ No newline at end of file + enable_verification: ${{ matrix.enable_verification }} + repo: ${{ github.repository }} + secrets: + DUCKDB_HASH: ${{ needs.build-duckdb.outputs.hash }} \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 9c7ff5c..175dd65 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ add_subdirectory(src/third_party/sqlsmith) set(EXTENSION_SOURCES src/sqlsmith_extension.cpp src/statement_generator.cpp src/statement_simplifier.cpp + src/random_nums_config.cpp src/fuzzyduck.cpp ${EXTENSION_OBJECT_FILES}) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) diff --git a/config.json b/config.json new file mode 100644 index 0000000..75daaaa --- /dev/null +++ b/config.json @@ -0,0 +1,8 @@ +{ + "select_percentage": "70", + "attach_percentage": "20", + "attach_use_percentage": "80", + "detach_percentage": "15", + "set_percentage": "5", + "delete_percentage": "25" +} \ No newline at end of file diff --git a/config_nested.json b/config_nested.json new file mode 100644 index 0000000..fa4c6cd --- /dev/null +++ b/config_nested.json @@ -0,0 +1,103 @@ +{ + "attach": { + "attach_percentage": "20", + "attach_use_percentage": "80", + "attach_read_only": "30" + }, + "detach": { + "detach_percentage": "15", + "detach_random_name": "20" + }, + "set": { + "set_percentage": "5", + "set_attached_db": "90" + }, + "delete_percentage": "25", + "select": { + "select_percentage": "70", + "select_node": { + "select_node_perc": "5", + "select_node_is_distinct_perc": "30", + "select_node_from_table_perc": "95", + "select_node_where_perc": "60", + "select_node_having_perc": "25", + "select_node_groups": { + "select_node_groups_perc": "30", + "select_node_group_by_perc": "70" + }, + "select_node_qualify_perc": "10", + "select_node_aggregate_perc": "10", + "select_node_sample": { + "select_node_sample_perc": "10", + "select_node_sample_is_perc": "50", + "select_node_sample_size": "100" + } + }, + "setop": "40", + "result_modifiers": "5", + "limit_percent_modifier": "50", + "limit_percent_modifier_limit": "30", + "limit_percent_modifier_offset": "30", + "limit_modifier_limit": "30", + "limit_modifier_offset": "30" + }, + "create": { + "create_percentage": "0", + "create_generate_select": "50", + "create_num_cols": "1000" + }, + "table_ref": { + "table_ref_base_table_ref_perc": "60", + "table_ref_expression_list_ref": "20", + "table_ref_join_ref": "40" + }, + "join_ref": { + "join_ref_cross": "10", + "join_ref_asof": "10", + "join_ref_natural": "10", + "join_ref_positional": "10", + "join_ref_general_expression": "70" + }, + "expression": { + "expression_column_ref": "50", + "expression_constant": "30", + "expression_subquery": "3" + }, + "constant_value": { + "constant_value_bigint": "50", + "constant_value_to_string": "30" + }, + "function": { + "function_aggregate": { + "function_aggregate_window_function": "10", + "function_aggregate_filter": "10", + "function_aggregate_order_by": "10", + "function_aggregate_random_expression": "10", + "function_aggregate_distinct": "10" + } + }, + "window_function": { + "window_function_partitions": "50", + "window_function_orders": "30", + "window_function_ignore_nulls": "30", + "window_function_filter_expr": "30", + "window_function_result_offset": "30", + "window_function_result_default": "30" + }, + "star": { + "star_relation_name": "10", + "star_column_name_exclude_list": "20", + "star_column_name_replace_list": "20", + "star_column_name": "20", + "star_columns": { + "star_columns_true": "50", + "star_columns_true_lambda": "50" + } + }, + "relational_name": { + "relational_name_choose_current": "80" + }, + "column_names": { + "column_names_choose_current": "80" + } +} \ No newline at end of file diff --git a/scripts/run_fuzzer.py b/scripts/run_fuzzer.py index 357e7e1..122e05d 100644 --- a/scripts/run_fuzzer.py +++ b/scripts/run_fuzzer.py @@ -33,6 +33,8 @@ perform_checks = False elif param.startswith('--enable_verification'): verification = param.replace('--enable_verification=', '').lower() == 'true' + elif param.startswith('--randoms_config_filepath'): + randoms_config_filepath = param.replace('--randoms_config_filepath=', '') elif param.startswith('--shell='): shell = param.replace('--shell=', '') elif param.startswith('--seed='): @@ -77,7 +79,8 @@ def get_fuzzer_call_statement(fuzzer): if fuzzer == 'sqlsmith': return "call sqlsmith(max_queries=${MAX_QUERIES}, max_query_length=${MAX_QUERY_LENGTH}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');" elif fuzzer == 'duckfuzz': - return "call fuzzyduck(max_queries=${MAX_QUERIES}, max_query_length=${MAX_QUERY_LENGTH}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}', enable_verification='${ENABLE_VERIFICATION}');" + return "call fuzzyduck(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}', \ + enable_verification='${ENABLE_VERIFICATION}', randoms_config_filepath='${RANDOMS_CONFIG_FILEPATH}');" elif fuzzer == 'duckfuzz_functions': return "call fuzz_all_functions(seed=${SEED}, max_query_length=${MAX_QUERY_LENGTH}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');" else: @@ -149,6 +152,7 @@ def is_known_issue(exception_msg): .replace('${COMPLETE_LOG_FILE}', complete_log_file) .replace('${SEED}', str(seed)) .replace('${ENABLE_VERIFICATION}', str(verification)) + .replace('${RANDOMS_CONFIG_FILEPATH}', randoms_config_filepath) ) print(create_db_statement) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 309e2e6..78b2ada 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,7 +7,7 @@ include_directories(third_party/sqlsmith/include) add_subdirectory(third_party) set(SQLSMITH_SOURCES - sqlsmith_extension.cpp statement_generator.cpp statement_simplifier.cpp + sqlsmith_extension.cpp statement_generator.cpp statement_simplifier.cpp random_nums_config.cpp fuzzyduck.cpp ${SQLSMITH_OBJECT_FILES}) build_static_extension(sqlsmith ${SQLSMITH_SOURCES}) diff --git a/src/fuzzyduck.cpp b/src/fuzzyduck.cpp index d4a84fc..2e8d5cd 100644 --- a/src/fuzzyduck.cpp +++ b/src/fuzzyduck.cpp @@ -25,8 +25,8 @@ void FuzzyDuck::BeginFuzzing() { if (max_query_length == 0) { throw InvalidInputException("Provide a max_query_length argument greater than 0"); } + auto &fs = FileSystem::GetFileSystem(context); if (!complete_log.empty()) { - auto &fs = FileSystem::GetFileSystem(context); TryRemoveFile(complete_log); complete_log_handle = fs.OpenFile(complete_log, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW); @@ -34,6 +34,11 @@ void FuzzyDuck::BeginFuzzing() { if (enable_verification) { RunQuery("PRAGMA enable_verification"); } + if (!randoms_config_filepath.empty()) { + config = RandomNumsConfig().GetConfigFromFile(randoms_config_filepath.c_str()); + } else { + config = RandomNumsConfig().GetDefaultConfig(); + } } void FuzzyDuck::EndFuzzing() { @@ -88,6 +93,7 @@ string FuzzyDuck::GenerateQuery(const idx_t &total_query_length) { // generate statement StatementGenerator generator(context); generator.verification_enabled = enable_verification; + generator.config = config; // accumulate statement(s) auto statement = string(""); if (generator.RandomPercentage(10)) { diff --git a/src/include/fuzzyduck.hpp b/src/include/fuzzyduck.hpp index fcc3513..087c3c9 100644 --- a/src/include/fuzzyduck.hpp +++ b/src/include/fuzzyduck.hpp @@ -10,8 +10,10 @@ #include "duckdb.hpp" #include "duckdb/parser/query_node.hpp" +#include "random_nums_config.hpp" namespace duckdb { + struct FileHandle; class FuzzyDuck { @@ -28,6 +30,9 @@ class FuzzyDuck { bool verbose_output = false; bool enable_verification = false; idx_t timeout = 30; + string randoms_config_filepath; + // RandomNumsConfig config; + unordered_map config; public: void Fuzz(); @@ -51,6 +56,7 @@ class FuzzyDuck { private: unique_ptr complete_log_handle; + unique_ptr randoms_config_handle; }; } // namespace duckdb diff --git a/src/include/random_nums_config.hpp b/src/include/random_nums_config.hpp new file mode 100644 index 0000000..1e388a6 --- /dev/null +++ b/src/include/random_nums_config.hpp @@ -0,0 +1,147 @@ +// ===----------------------------------------------------------------------===// +// DuckDB + +// random_nums_config.hpp + + +// ===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb.hpp" +#include "yyjson.hpp" + +namespace duckdb { + +enum class RandomPercentagesEnum : idx_t { + // ---------------------------------- + // Generate Attach Percentages + // ---------------------------------- + ATTACH = 0, + ATTACH_USE = 1, + ATTACH_READ_ONLY = 2, + // ---------------------------------- + // Generate Detach Percentages + // ---------------------------------- + DETACH = 3, + DETACH_RANDOM_NAME = 4, + // ---------------------------------- + // Generate Set Percentages + // ---------------------------------- + SET = 5, + SET_ATTACHED_DB = 6, + // ---------------------------------- + // Generate Select Percentages + // ---------------------------------- + DELETE = 7, + + // ---------------------------------- + // Generate Select Percentages + // ---------------------------------- + SELECT = 8, + SELECT_NODE = 9, + SELECT_NODE_IS_DISTINCT = 10, + SELECT_NODE_FROM_TABLE = 11, + SELECT_NODE_WHERE = 12, + SELECT_NODE_HAVING = 13, + SELECT_NODE_GROUPS = 14, + SELECT_NODE_GROUP_BY = 15, + SELECT_NODE_QUALIFY = 16, + SELECT_NODE_AGGREGATE = 17, + SELECT_NODE_SAMPLE = 18, + SELECT_NODE_SAMPLE_IS_PERC = 19, + SELECT_NODE_SAMPLE_SIZE = 20, + SETOP = 21, + RESULT_MODIFIERS = 22, + LIMIT_PERCENT_MODIFIER = 23, + LIMIT_PERCENT_MODIFIER_LIMIT = 24, + LIMIT_PERCENT_MODIFIER_OFFSET = 25, + LIMIT_MODIFIER_LIMIT = 26, + LIMIT_MODIFIER_OFFSET = 27, + + // ---------------------------------- + // Generate Create Percentages + // ---------------------------------- + CREATE = 28, + CREATE_GENERATE_SELECT = 29, + CREATE_NUM_COLS = 30, + // ----------------------------------- + // Generate Table Ref Percentages + // ----------------------------------- + TABLE_REF_BASE_TABLE_REF_PERC = 31, + TABLE_REF_EXPRESSION_LIST_REF = 32, + TABLE_REF_JOIN_REF = 33, + + // ----------------------------------- + // Generate Join Ref Percentages + // ----------------------------------- + JOIN_REF_CROSS = 34, + JOIN_REF_ASOF = 35, + JOIN_REF_NATURAL = 36, + JOIN_REF_POSITIONAL = 37, + JOIN_REF_GENERAL_EXPRESSION = 38, + + // ----------------------------------- + // Generate Expression Percentages + // ----------------------------------- + EXPRESSION_COLUMN_REF = 39, + EXPRESSION_CONSTANT = 40, + EXPRESSION_SUBQUERY = 41, + + // ----------------------------------- + // Generate Constant Value Percentages + // ----------------------------------- + CONSTANT_VALUE_BIGINT = 42, + CONSTANT_VALUE_TO_STRING = 43, + + // ----------------------------------- + // Generate Function Percentages + // ----------------------------------- + FUNCTION_AGGREGATE_WINDOW_FUNCTION = 44, + FUNCTION_AGGREGATE_ORDER_BY = 45, + FUNCTION_AGGREGATE_FILTER = 60, + FUNCTION_AGGREGATE_DISTINCT = 46, + FUNCTION_AGGREGATE_RANDOM_EXPRESSION = 47, + + // ----------------------------------- + // Generate Window Function Percentages + // ----------------------------------- + WINDOW_FUNCTION_PARTITIONS = 48, + WINDOW_FUNCTION_ORDERS = 49, + WINDOW_FUNCTION_FILTER_EXPRESSION = 61, + WINDOW_FUNCTION_IGNORE_NULLS = 50, + WINDOW_FUNCTION_RESULT_OFFSET = 51, + WINDOW_FUNCTION_RESULT_DEFAULT = 52, + + // ----------------------------------- + // Generate Star Percentages + // ----------------------------------- + STAR_RELATION_NAME = 53, + STAR_COLUMN_NAME = 54, + STAR_COLUMN_NAME_EXCLUDE_LIST = 55, + STAR_COLUMN_NAME_REPLACE_LIST = 62, + STAR_COLUMNS_TRUE = 56, + STAR_COLUMNS_TRUE_LAMBDA = 57, + + RELATIONAL_NAME_CHOOSE_CURRENT = 58, + COLUMN_NAMES_CHOOSE_CURRENT = 59, + + COUNT + +}; + +class RandomNumsConfig { +public: + RandomNumsConfig(); + RandomNumsConfig(const char *config_file_path); + ~RandomNumsConfig(); + + RandomPercentagesEnum percentages_selector; + + unordered_map GetDefaultConfig(); + unordered_map GetConfigFromFile(const char *json_string); + string RandomPercentagesEnumToString(RandomPercentagesEnum type); + +}; + +}// namespace duckdb \ No newline at end of file diff --git a/src/include/statement_generator.hpp b/src/include/statement_generator.hpp index 57e8446..a1cbbe3 100644 --- a/src/include/statement_generator.hpp +++ b/src/include/statement_generator.hpp @@ -11,6 +11,7 @@ #include "duckdb.hpp" #include "duckdb/parser/parsed_data/detach_info.hpp" #include "duckdb/parser/query_node.hpp" +#include "random_nums_config.hpp" #define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir" @@ -40,6 +41,7 @@ class StatementGenerator { friend class ExpressionDepthChecker; friend class AggregateChecker; friend class WindowChecker; + unordered_map config; public: StatementGenerator(ClientContext &context); @@ -55,6 +57,8 @@ class StatementGenerator { bool RandomPercentage(idx_t percentage); bool verification_enabled = false; idx_t RandomValue(idx_t max); + + string GetRandomAttachedDataBase(); unique_ptr GenerateStatement(StatementType type); // came from private diff --git a/src/random_nums_config.cpp b/src/random_nums_config.cpp new file mode 100644 index 0000000..8a96e42 --- /dev/null +++ b/src/random_nums_config.cpp @@ -0,0 +1,145 @@ +#include "include/random_nums_config.hpp" + +#include +#include +#include + +namespace duckdb { + +using namespace duckdb_yyjson; + +unordered_map StringToRandomPercentagesEnum = { + { "attach_percentage", RandomPercentagesEnum::ATTACH }, + { "attach_use_percentage", RandomPercentagesEnum::ATTACH_USE }, + { "attach_read_only", RandomPercentagesEnum::ATTACH_READ_ONLY }, + { "detach_percentage", RandomPercentagesEnum::DETACH }, + { "detach_random_name", RandomPercentagesEnum::DETACH_RANDOM_NAME }, + { "set_percentage", RandomPercentagesEnum::SET }, + { "set_attached_db", RandomPercentagesEnum::SET_ATTACHED_DB }, + { "delete_percentage", RandomPercentagesEnum::DELETE }, + { "select_percentage", RandomPercentagesEnum::SELECT }, + { "select_node_perc", RandomPercentagesEnum::SELECT_NODE }, + { "select_node_is_distinct_perc", RandomPercentagesEnum::SELECT_NODE_IS_DISTINCT }, + { "select_node_from_table_perc", RandomPercentagesEnum::SELECT_NODE_FROM_TABLE }, + { "select_node_where_perc", RandomPercentagesEnum::SELECT_NODE_WHERE }, + { "select_node_having_perc", RandomPercentagesEnum::SELECT_NODE_HAVING }, + { "select_node_groups_perc", RandomPercentagesEnum::SELECT_NODE_GROUPS }, + { "select_node_group_by_perc", RandomPercentagesEnum::SELECT_NODE_GROUP_BY }, + { "select_node_qualify_perc", RandomPercentagesEnum::SELECT_NODE_QUALIFY }, + { "select_node_aggregate_perc", RandomPercentagesEnum::SELECT_NODE_AGGREGATE }, + { "select_node_sample_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE }, + { "select_node_sample_is_perc", RandomPercentagesEnum::SELECT_NODE_SAMPLE_IS_PERC }, + { "select_node_sample_size", RandomPercentagesEnum::SELECT_NODE_SAMPLE_SIZE }, + { "setop", RandomPercentagesEnum::SETOP }, + { "result_modifiers", RandomPercentagesEnum::RESULT_MODIFIERS }, + { "limit_percent_modifier", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER }, + { "limit_percent_modifier_limit", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT }, + { "limit_percent_modifier_offset", RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_OFFSET }, + { "limit_modifier_limit", RandomPercentagesEnum::LIMIT_MODIFIER_LIMIT }, + { "limit_modifier_offset", RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET }, + { "create_percentage", RandomPercentagesEnum::CREATE }, + { "create_generate_select", RandomPercentagesEnum::CREATE_GENERATE_SELECT }, + { "create_num_cols", RandomPercentagesEnum::CREATE_NUM_COLS }, + { "table_ref_base_table_ref_perc", RandomPercentagesEnum::TABLE_REF_BASE_TABLE_REF_PERC }, + { "table_ref_expression_list_ref", RandomPercentagesEnum::TABLE_REF_EXPRESSION_LIST_REF }, + { "table_ref_join_ref", RandomPercentagesEnum::TABLE_REF_JOIN_REF }, + { "join_ref_cross", RandomPercentagesEnum::JOIN_REF_CROSS }, + { "join_ref_asof", RandomPercentagesEnum::JOIN_REF_ASOF }, + { "join_ref_natural", RandomPercentagesEnum::JOIN_REF_NATURAL }, + { "join_ref_positional", RandomPercentagesEnum::JOIN_REF_POSITIONAL }, + { "join_ref_general_expression", RandomPercentagesEnum::JOIN_REF_GENERAL_EXPRESSION }, + { "expression_column_ref", RandomPercentagesEnum::EXPRESSION_COLUMN_REF }, + { "expression_constant", RandomPercentagesEnum::EXPRESSION_CONSTANT }, + { "expression_subquery", RandomPercentagesEnum::EXPRESSION_SUBQUERY }, + { "constant_value_bigint", RandomPercentagesEnum::CONSTANT_VALUE_BIGINT }, + { "constant_value_to_string", RandomPercentagesEnum::CONSTANT_VALUE_TO_STRING }, + { "function_aggregate_window_function", RandomPercentagesEnum::FUNCTION_AGGREGATE_WINDOW_FUNCTION }, + { "function_aggregate_order_by", RandomPercentagesEnum::FUNCTION_AGGREGATE_ORDER_BY }, + { "function_aggregate_filter", RandomPercentagesEnum::FUNCTION_AGGREGATE_FILTER }, + { "function_aggregate_random_expression", RandomPercentagesEnum::FUNCTION_AGGREGATE_RANDOM_EXPRESSION }, + { "function_aggregate_distinct", RandomPercentagesEnum::FUNCTION_AGGREGATE_DISTINCT }, + { "window_function_partitions", RandomPercentagesEnum::WINDOW_FUNCTION_PARTITIONS }, + { "window_function_filter_expr", RandomPercentagesEnum::WINDOW_FUNCTION_FILTER_EXPRESSION }, + { "window_function_orders", RandomPercentagesEnum::WINDOW_FUNCTION_ORDERS }, + { "window_function_ignore_nulls", RandomPercentagesEnum::WINDOW_FUNCTION_IGNORE_NULLS }, + { "window_function_result_offset", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_OFFSET }, + { "window_function_result_default", RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_DEFAULT }, + { "star_relation_name", RandomPercentagesEnum::STAR_RELATION_NAME }, + { "star_column_name_exclude_list", RandomPercentagesEnum::STAR_COLUMN_NAME_EXCLUDE_LIST }, + { "star_column_name_replace_list", RandomPercentagesEnum::STAR_COLUMN_NAME_REPLACE_LIST }, + { "star_column_name", RandomPercentagesEnum::STAR_COLUMN_NAME }, + { "star_columns_true", RandomPercentagesEnum::STAR_COLUMNS_TRUE }, + { "star_columns_true_lambda", RandomPercentagesEnum::STAR_COLUMNS_TRUE_LAMBDA }, + { "relational_name_choose_current", RandomPercentagesEnum::RELATIONAL_NAME_CHOOSE_CURRENT }, + { "column_names_choose_current", RandomPercentagesEnum::COLUMN_NAMES_CHOOSE_CURRENT } +}; + +RandomNumsConfig::RandomNumsConfig() { + GetDefaultConfig(); +} + +RandomNumsConfig::RandomNumsConfig(const char *config_file_path) { + GetConfigFromFile(config_file_path); +} + +RandomNumsConfig::~RandomNumsConfig() { +} + +unordered_map RandomNumsConfig::GetDefaultConfig() { + unordered_map default_config = { + { RandomPercentagesEnum::ATTACH, 40 }, + { RandomPercentagesEnum::ATTACH_USE, 50 }, + { RandomPercentagesEnum::DELETE, 40 }, + { RandomPercentagesEnum::DETACH, 60 }, + { RandomPercentagesEnum::SELECT, 60 }, + { RandomPercentagesEnum::SET, 30 } + }; + return default_config; +} + +void ParseJsonObj(yyjson_val *obj, unordered_map &config_from_file) { + yyjson_obj_iter iter; + yyjson_obj_iter_init(obj, &iter); + size_t idx, max; + yyjson_val *key, *val; + yyjson_obj_foreach(obj, idx, max, key, val) { + const char* root_key = yyjson_get_str(key); + auto it = StringToRandomPercentagesEnum.find(root_key); + if (it != StringToRandomPercentagesEnum.end()) { + RandomPercentagesEnum perc_type = it->second; + auto perc_value = yyjson_get_str(val); + if (perc_value) { + config_from_file[perc_type] = std::stoi(perc_value); + } + } + if (yyjson_is_obj(val)) { + ParseJsonObj(val, config_from_file); + } + } +} + +unordered_map RandomNumsConfig::GetConfigFromFile(const char *json_string) { + + unordered_map config_from_file; + auto doc = yyjson_read_file(json_string, YYJSON_READ_NOFLAG, NULL, NULL); + if (doc) { + yyjson_val *root = yyjson_doc_get_root(doc); + if (yyjson_is_obj(root)) { + ParseJsonObj(root, config_from_file); + } + yyjson_doc_free(doc); + } else { + // Couldn't read JSON with percentages config + yyjson_doc_free(doc); + return GetDefaultConfig(); + } + // set values of missing statement types to 0 + for (idx_t i = 0; i < static_cast(RandomPercentagesEnum::COUNT); ++i) { + RandomPercentagesEnum statement_type = static_cast(i); + if (config_from_file.find(statement_type) == config_from_file.end()) { + config_from_file[statement_type] = 0; + } + } + return config_from_file; + } +} // namespace duckdb \ No newline at end of file diff --git a/src/sqlsmith_extension.cpp b/src/sqlsmith_extension.cpp index c5f2274..3ce27c8 100644 --- a/src/sqlsmith_extension.cpp +++ b/src/sqlsmith_extension.cpp @@ -147,6 +147,8 @@ static duckdb::unique_ptr FuzzyDuckBind(ClientContext &context, Ta result->fuzzer.verbose_output = BooleanValue::Get(kv.second); } else if (kv.first == "enable_verification") { result->fuzzer.enable_verification = BooleanValue::Get(kv.second); + } else if (kv.first == "randoms_config_filepath") { + result->fuzzer.randoms_config_filepath = StringValue::Get(kv.second); } } return_types.emplace_back(LogicalType::BOOLEAN); @@ -196,6 +198,7 @@ static void LoadInternal(ExtensionLoader &loader) { fuzzy_duck_fun.named_parameters["complete_log"] = LogicalType::VARCHAR; fuzzy_duck_fun.named_parameters["verbose_output"] = LogicalType::BOOLEAN; fuzzy_duck_fun.named_parameters["enable_verification"] = LogicalType::BOOLEAN; + fuzzy_duck_fun.named_parameters["randoms_config_filepath"] = LogicalType::VARCHAR; loader.RegisterFunction(fuzzy_duck_fun); TableFunction fuzz_all_functions("fuzz_all_functions", {}, FuzzAllFunctions, FuzzyDuckBind); diff --git a/src/statement_generator.cpp b/src/statement_generator.cpp index 455833b..0bf5402 100644 --- a/src/statement_generator.cpp +++ b/src/statement_generator.cpp @@ -42,8 +42,8 @@ StatementGenerator::StatementGenerator(ClientContext &context) : context(context } StatementGenerator::StatementGenerator(StatementGenerator &parent_p) - : verification_enabled(parent_p.verification_enabled), context(parent_p.context), parent(&parent_p), - generator_context(parent_p.generator_context), depth(parent_p.depth + 1) { + : config(parent_p.config), verification_enabled(parent_p.verification_enabled), context(parent_p.context), parent(&parent_p), + generator_context(parent_p.generator_context), depth(parent_p.depth + 1) { if (depth > MAX_DEPTH) { throw InternalException("depth too high"); } @@ -93,23 +93,23 @@ std::shared_ptr StatementGenerator::GetDatabaseState(ClientCon } unique_ptr StatementGenerator::GenerateStatement() { - if (RandomPercentage(80)) { + if (RandomPercentage(config[RandomPercentagesEnum::SELECT])) { return GenerateStatement(StatementType::SELECT_STATEMENT); } - if (RandomPercentage(40)) { - if (RandomPercentage(50)) { + if (RandomPercentage(config[RandomPercentagesEnum::ATTACH])) { + if (RandomPercentage(config[RandomPercentagesEnum::ATTACH_USE])) { // We call this directly so we have a higher chance to fuzz persistent databases return GenerateAttachUse(); } return GenerateStatement(StatementType::ATTACH_STATEMENT); } - if (RandomPercentage(60)) { + if (RandomPercentage(config[RandomPercentagesEnum::DETACH])) { return GenerateStatement(StatementType::DETACH_STATEMENT); } - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::SET])) { return GenerateStatement(StatementType::SET_STATEMENT); } - if (RandomPercentage(40)) { // 20 + if (RandomPercentage(config[RandomPercentagesEnum::DELETE])) { //20 return GenerateStatement(StatementType::DELETE_STATEMENT); } return GenerateStatement(StatementType::CREATE_STATEMENT); @@ -165,7 +165,7 @@ unique_ptr StatementGenerator::GenerateDetach() { // generate USE statement unique_ptr StatementGenerator::GenerateSet() { auto name_expr = make_uniq(GenerateDataBaseName()); - if (RandomPercentage(90)) { + if (RandomPercentage(config[RandomPercentagesEnum::SET])) { // 90 auto name = GetRandomAttachedDataBase(); name_expr = make_uniq(Value(name)); } @@ -206,7 +206,7 @@ unique_ptr StatementGenerator::GenerateDelete() { unique_ptr StatementGenerator::GenerateDetachInfo() { auto info = make_uniq(); - if (RandomPercentage(20)) { + if (RandomPercentage(config[RandomPercentagesEnum::DETACH_RANDOM_NAME])) { //20 info->name = "RANDOM_NAME_" + RandomString(15); } else { info->name = GetRandomAttachedDataBase(); @@ -312,23 +312,23 @@ void StatementGenerator::GenerateCTEs(QueryNode &node) { unique_ptr StatementGenerator::GenerateQueryNode() { unique_ptr result; bool is_distinct = false; - if (RandomPercentage(70)) { + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE])) { // 70 // select node auto select_node = make_uniq(); // generate CTEs GenerateCTEs(*select_node); - is_distinct = RandomPercentage(30); - if (RandomPercentage(95)) { + is_distinct = RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_IS_DISTINCT]); // 30 + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_FROM_TABLE])) { // 95 select_node->from_table = GenerateTableRef(); } select_node->select_list = GenerateChildren(1, 10); - select_node->where_clause = RandomExpression(60); - select_node->having = RandomExpression(25); - if (RandomPercentage(30)) { + select_node->where_clause = RandomExpression(config[RandomPercentagesEnum::SELECT_NODE_WHERE]); // 60 + select_node->having = RandomExpression(config[RandomPercentagesEnum::SELECT_NODE_HAVING]); // 25 + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_GROUPS])) { // 30 select_node->groups.group_expressions = GenerateChildren(1, 5); auto group_count = select_node->groups.group_expressions.size(); - if (RandomPercentage(70)) { + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_GROUP_BY])) { // 70 // single GROUP BY GroupingSet set; for (idx_t i = 0; i < group_count; i++) { @@ -352,27 +352,29 @@ unique_ptr StatementGenerator::GenerateQueryNode() { } } } - select_node->qualify = RandomExpression(10); + select_node->qualify = RandomExpression(config[RandomPercentagesEnum::SELECT_NODE_QUALIFY]); // 10 select_node->aggregate_handling = - RandomPercentage(10) ? AggregateHandling::FORCE_AGGREGATES : AggregateHandling::STANDARD_HANDLING; - if (RandomPercentage(10)) { + RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_AGGREGATE]) ? AggregateHandling::FORCE_AGGREGATES : AggregateHandling::STANDARD_HANDLING; // 10 + if (RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_SAMPLE])) { auto sample = make_uniq(); - sample->is_percentage = RandomPercentage(50); + sample->is_percentage = RandomPercentage(config[RandomPercentagesEnum::SELECT_NODE_SAMPLE_IS_PERC]); //50 if (sample->is_percentage) { - sample->sample_size = Value::BIGINT(RandomValue(100)); + sample->sample_size = Value::BIGINT(RandomValue(config[RandomPercentagesEnum::SELECT_NODE_SAMPLE_SIZE])); // 100 - maybe we can replace the whole ->is_percentage thing with the sample size defined in the config.json } else { sample->sample_size = Value::BIGINT(RandomValue(99999)); } - sample->method = Choose( + sample->method = Choose( // maybe here is also go like sample->method = (config[RandomPercentagesEnum::SELECT_NODE_SAMPLE_METHOD])? {SampleMethod::BERNOULLI_SAMPLE, SampleMethod::RESERVOIR_SAMPLE, SampleMethod::SYSTEM_SAMPLE}); select_node->sample = std::move(sample); } result = std::move(select_node); - } else { + } else if (RandomPercentage(config[RandomPercentagesEnum::SETOP])) { + // limit the count of generated setops auto setop = make_uniq(); GenerateCTEs(*setop); + //should be possible to choose: from the complete enum, from only one enum value defined in config file, from 2-3 enum values setop->setop_type = Choose({SetOperationType::EXCEPT, SetOperationType::INTERSECT, - SetOperationType::UNION, SetOperationType::UNION_BY_NAME}); + SetOperationType::UNION, SetOperationType::UNION_BY_NAME}); setop->left = GenerateQueryNode(); setop->right = GenerateQueryNode(); switch (setop->setop_type) { @@ -388,6 +390,8 @@ unique_ptr StatementGenerator::GenerateQueryNode() { throw InternalException("Unsupported set operation type"); } result = std::move(setop); + } else { + result = std::move(make_uniq()); } if (is_distinct) { @@ -396,14 +400,14 @@ unique_ptr StatementGenerator::GenerateQueryNode() { if (verification_enabled) { result->modifiers.push_back(GenerateOrderByAll()); } else if (!verification_enabled) { - if (RandomPercentage(5)) { + if (RandomPercentage(config[RandomPercentagesEnum::RESULT_MODIFIERS])) { // 5 result->modifiers.push_back(GenerateOrderBy()); } - if (RandomPercentage(50)) { + if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER])) { // 50 auto limit_percent_modifier = make_uniq(); - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_LIMIT])) { // 30 limit_percent_modifier->limit = GenerateExpression(); - } else if (RandomPercentage(30)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_PERCENT_MODIFIER_OFFSET])) { // 30 limit_percent_modifier->offset = GenerateExpression(); } else { limit_percent_modifier->limit = GenerateExpression(); @@ -412,9 +416,9 @@ unique_ptr StatementGenerator::GenerateQueryNode() { result->modifiers.push_back(std::move(limit_percent_modifier)); } else { auto limit_modifier = make_uniq(); - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_MODIFIER_LIMIT])) { // 30 limit_modifier->limit = GenerateExpression(); - } else if (RandomPercentage(30)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::LIMIT_MODIFIER_OFFSET])) { // 30 limit_modifier->offset = GenerateExpression(); } else { limit_modifier->limit = GenerateExpression(); @@ -430,13 +434,13 @@ unique_ptr StatementGenerator::GenerateQueryNode() { // Table Ref //===--------------------------------------------------------------------===// unique_ptr StatementGenerator::GenerateTableRef() { - if (RandomPercentage(60)) { + if (RandomPercentage(config[RandomPercentagesEnum::TABLE_REF_BASE_TABLE_REF_PERC])) { return GenerateBaseTableRef(); } - if (RandomPercentage(20)) { + if (RandomPercentage(config[RandomPercentagesEnum::TABLE_REF_EXPRESSION_LIST_REF])) { return GenerateExpressionListRef(); } - if (RandomPercentage(40)) { + if (RandomPercentage(config[RandomPercentagesEnum::TABLE_REF_JOIN_REF])) { return GenerateJoinRef(); } switch (RandomValue(3)) { @@ -496,13 +500,13 @@ unique_ptr StatementGenerator::GenerateExpressionListRef() { unique_ptr StatementGenerator::GenerateJoinRef() { JoinRefType join_ref; - if (RandomPercentage(10)) { + if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_CROSS])) { join_ref = JoinRefType::CROSS; - } else if (RandomPercentage(10)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_ASOF])) { join_ref = JoinRefType::ASOF; - } else if (RandomPercentage(10)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_NATURAL])) { join_ref = JoinRefType::NATURAL; - } else if (RandomPercentage(10)) { + } else if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_POSITIONAL])) { join_ref = JoinRefType::POSITIONAL; } else { join_ref = JoinRefType::REGULAR; @@ -511,7 +515,7 @@ unique_ptr StatementGenerator::GenerateJoinRef() { join->left = GenerateTableRef(); join->right = GenerateTableRef(); if (join_ref != JoinRefType::CROSS && join_ref != JoinRefType::NATURAL) { - if (RandomPercentage(70)) { + if (RandomPercentage(config[RandomPercentagesEnum::JOIN_REF_GENERAL_EXPRESSION])) { join->condition = GenerateExpression(); } else { while (true) { @@ -654,13 +658,13 @@ class ExpressionDepthChecker { unique_ptr StatementGenerator::GenerateExpression() { ExpressionDepthChecker checker(*this); - if (RandomPercentage(50) || RandomPercentage(expression_depth + depth * 5)) { + if (RandomPercentage(config[RandomPercentagesEnum::EXPRESSION_COLUMN_REF]) || RandomPercentage(expression_depth + depth * 5)) { return GenerateColumnRef(); } - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::EXPRESSION_CONSTANT])) { return GenerateConstant(); } - if (RandomPercentage(3)) { + if (RandomPercentage(config[RandomPercentagesEnum::EXPRESSION_SUBQUERY])) { return GenerateSubquery(); } switch (RandomValue(9)) { @@ -688,10 +692,10 @@ unique_ptr StatementGenerator::GenerateExpression() { } Value StatementGenerator::GenerateConstantValue() { - if (RandomPercentage(50)) { + if (RandomPercentage(config[RandomPercentagesEnum::CONSTANT_VALUE_BIGINT])) { return Value::BIGINT(RandomValue(9999)); } - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::CONSTANT_VALUE_TO_STRING])) { return Value(UUID::ToString(UUID::GenerateRandomUUID(RandomEngine::Get(context)))); } auto &val = Choose(generator_context->test_types); @@ -777,7 +781,7 @@ unique_ptr StatementGenerator::GenerateFunction() { if (actual_function.varargs.id() != LogicalTypeId::INVALID) { max_parameters += 5; } - if (RandomPercentage(10) && !in_window) { + if (RandomPercentage(config[RandomPercentagesEnum::FUNCTION_AGGREGATE_WINDOW_FUNCTION]) && !in_window) { return GenerateWindowFunction(&actual_function); } if (in_aggregate) { @@ -785,12 +789,12 @@ unique_ptr StatementGenerator::GenerateFunction() { return GenerateColumnRef(); } checker = make_uniq(*this); - filter = RandomExpression(10); - if (RandomPercentage(10)) { + filter = RandomExpression(config[RandomPercentagesEnum::FUNCTION_AGGREGATE_FILTER]); + if (RandomPercentage(config[RandomPercentagesEnum::FUNCTION_AGGREGATE_ORDER_BY])) { // generate order by order_bys = GenerateOrderBy(); } - if (RandomPercentage(10)) { + if (RandomPercentage(config[RandomPercentagesEnum::FUNCTION_AGGREGATE_DISTINCT])) { distinct = true; } break; @@ -969,15 +973,15 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional WindowChecker checker(*this); auto result = make_uniq(type, INVALID_CATALOG, INVALID_SCHEMA, std::move(name)); result->children = GenerateChildren(min_parameters, max_parameters); - while (RandomPercentage(50)) { + while (RandomPercentage(config[RandomPercentagesEnum::WINDOW_FUNCTION_PARTITIONS])) { result->partitions.push_back(GenerateExpression()); } - if (RandomPercentage(30)) { + if (RandomPercentage(config[RandomPercentagesEnum::WINDOW_FUNCTION_ORDERS])) { result->orders = std::move(GenerateOrderBy()->orders); } if (function) { - result->filter_expr = RandomExpression(30); - if (RandomPercentage(30)) { + result->filter_expr = RandomExpression(config[RandomPercentagesEnum::WINDOW_FUNCTION_FILTER_EXPRESSION]); + if (RandomPercentage(config[RandomPercentagesEnum::WINDOW_FUNCTION_IGNORE_NULLS])) { result->ignore_nulls = true; } } @@ -1014,8 +1018,8 @@ unique_ptr StatementGenerator::GenerateWindowFunction(optional switch (type) { case ExpressionType::WINDOW_LEAD: case ExpressionType::WINDOW_LAG: - result->offset_expr = RandomExpression(30); - result->default_expr = RandomExpression(30); + result->offset_expr = RandomExpression(config[RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_OFFSET]); + result->default_expr = RandomExpression(config[RandomPercentagesEnum::WINDOW_FUNCTION_RESULT_DEFAULT]); break; default: break; @@ -1050,19 +1054,19 @@ unique_ptr StatementGenerator::GenerateConjunction() { unique_ptr StatementGenerator::GenerateStar() { auto result = make_uniq(); if (!current_relation_names.empty()) { - if (RandomPercentage(10)) { + if (RandomPercentage(config[RandomPercentagesEnum::STAR_RELATION_NAME])) { result->relation_name = GenerateRelationName(); } } if (!verification_enabled) { - while (RandomPercentage(20)) { + while (RandomPercentage(config[RandomPercentagesEnum::STAR_COLUMN_NAME_EXCLUDE_LIST])) { auto column_name = GenerateColumnName(); if (column_name.empty()) { break; } result->exclude_list.insert(column_name); } - while (RandomPercentage(20)) { + while (RandomPercentage(config[RandomPercentagesEnum::STAR_COLUMN_NAME_REPLACE_LIST])) { auto column_name = GenerateColumnName(); if (column_name.empty()) { break; diff --git a/test/sql/call_fuzzyduck.test b/test/sql/call_fuzzyduck.test index 7e460f7..74a2b72 100644 --- a/test/sql/call_fuzzyduck.test +++ b/test/sql/call_fuzzyduck.test @@ -7,9 +7,9 @@ mode skip require sqlsmith statement ok -call fuzzyduck(max_queries=2, max_query_length=50000, verbose_output=1, log='sqlsmith.log', complete_log='sqlsmith.complete.log', enable_verification=True); +call fuzzyduck(max_queries=2, verbose_output=1, log='sqlsmith.log', complete_log='sqlsmith.complete.log', enable_verification=True, randoms_config_filepath='config_nested.json'); statement ok call fuzzyduck(max_queries=2, max_query_length=50000, verbose_output=1, log='__TEST_DIR__/logs.txt', complete_log='__TEST_DIR__/clog.txt', enable_verification=false); -mode unskip \ No newline at end of file +mode unskip