From 4bca237c340717e6ecd7ccd5f09feb880450704f Mon Sep 17 00:00:00 2001 From: root Date: Mon, 8 Jul 2024 07:53:48 +0000 Subject: [PATCH 1/4] backport to DuckDB v1.0.0 --- CMakeLists.txt | 2 +- src/default_table_functions.cpp | 148 +++++++++++++++++++++++ src/dynamic_sql_clickhouse_extension.cpp | 4 +- src/include/default_functions.hpp | 41 +++++++ src/include/default_table_functions.hpp | 47 +++++++ 5 files changed, 239 insertions(+), 3 deletions(-) create mode 100644 src/default_table_functions.cpp create mode 100644 src/include/default_functions.hpp create mode 100644 src/include/default_table_functions.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index f1b43e7..3b4a41d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) project(${TARGET_NAME}) include_directories(src/include) -set(EXTENSION_SOURCES src/dynamic_sql_clickhouse_extension.cpp) +set(EXTENSION_SOURCES src/dynamic_sql_clickhouse_extension.cpp src/default_table_functions.cpp) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) diff --git a/src/default_table_functions.cpp b/src/default_table_functions.cpp new file mode 100644 index 0000000..dd0e60c --- /dev/null +++ b/src/default_table_functions.cpp @@ -0,0 +1,148 @@ +#include "default_table_functions.hpp" +#include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" +#include "duckdb/parser/parser.hpp" +#include "duckdb/parser/parsed_data/create_macro_info.hpp" +#include "duckdb/parser/statement/select_statement.hpp" +#include "duckdb/function/table_macro_function.hpp" + +namespace duckdb { + +// clang-format off +static const DefaultTableMacro internal_table_macros[] = { + {DEFAULT_SCHEMA, "histogram_values", {"source", "col_name", nullptr}, {{"bin_count", "10"}, {"technique", "'auto'"}, {nullptr, nullptr}}, R"( +WITH bins AS ( + SELECT + CASE + WHEN (NOT (can_cast_implicitly(MIN(col_name), NULL::BIGINT) OR + can_cast_implicitly(MIN(col_name), NULL::DOUBLE) OR + can_cast_implicitly(MIN(col_name), NULL::TIMESTAMP)) AND technique='auto') + OR technique='sample' + THEN + approx_top_k(col_name, bin_count) + WHEN technique='equi-height' + THEN + quantile(col_name, [x / bin_count::DOUBLE for x in generate_series(1, bin_count)]) + WHEN technique='equi-width' + THEN + equi_width_bins(MIN(col_name), MAX(col_name), bin_count, false) + WHEN technique='equi-width-nice' OR technique='auto' + THEN + equi_width_bins(MIN(col_name), MAX(col_name), bin_count, true) + ELSE + error(concat('Unrecognized technique ', technique)) + END AS bins + FROM query_table(source::VARCHAR) + ) +SELECT UNNEST(map_keys(histogram)) AS bin, UNNEST(map_values(histogram)) AS count +FROM ( + SELECT CASE + WHEN (NOT (can_cast_implicitly(MIN(col_name), NULL::BIGINT) OR + can_cast_implicitly(MIN(col_name), NULL::DOUBLE) OR + can_cast_implicitly(MIN(col_name), NULL::TIMESTAMP)) AND technique='auto') + OR technique='sample' + THEN + histogram_exact(col_name, bins) + ELSE + histogram(col_name, bins) + END AS histogram + FROM query_table(source::VARCHAR), bins +); +)"}, + {DEFAULT_SCHEMA, "histogram", {"source", "col_name", nullptr}, {{"bin_count", "10"}, {"technique", "'auto'"}, {nullptr, nullptr}}, R"( +SELECT + CASE + WHEN is_histogram_other_bin(bin) + THEN '(other values)' + WHEN (NOT (can_cast_implicitly(bin, NULL::BIGINT) OR + can_cast_implicitly(bin, NULL::DOUBLE) OR + can_cast_implicitly(bin, NULL::TIMESTAMP)) AND technique='auto') + OR technique='sample' + THEN bin::VARCHAR + WHEN row_number() over () = 1 + THEN concat('x <= ', bin::VARCHAR) + ELSE concat(lag(bin::VARCHAR) over (), ' < x <= ', bin::VARCHAR) + END AS bin, + count, + bar(count, 0, max(count) over ()) AS bar +FROM histogram_values(source, col_name, bin_count := bin_count, technique := technique); +)"}, + {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr} + }; +// clang-format on + +DefaultTableFunctionGenerator::DefaultTableFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema) + : DefaultGenerator(catalog), schema(schema) { +} + +unique_ptr +DefaultTableFunctionGenerator::CreateInternalTableMacroInfo(const DefaultTableMacro &default_macro, + unique_ptr function) { + for (idx_t param_idx = 0; default_macro.parameters[param_idx] != nullptr; param_idx++) { + function->parameters.push_back(make_uniq(default_macro.parameters[param_idx])); + } + for (idx_t named_idx = 0; default_macro.named_parameters[named_idx].name != nullptr; named_idx++) { + auto expr_list = Parser::ParseExpressionList(default_macro.named_parameters[named_idx].default_value); + if (expr_list.size() != 1) { + throw InternalException("Expected a single expression"); + } + function->default_parameters.insert( + make_pair(default_macro.named_parameters[named_idx].name, std::move(expr_list[0]))); + } + + auto type = CatalogType::TABLE_MACRO_ENTRY; + auto bind_info = make_uniq(type); + bind_info->schema = default_macro.schema; + bind_info->name = default_macro.name; + bind_info->temporary = true; + bind_info->internal = true; + bind_info->function = std::move(function); + return bind_info; +} + +unique_ptr +DefaultTableFunctionGenerator::CreateTableMacroInfo(const DefaultTableMacro &default_macro) { + Parser parser; + parser.ParseQuery(default_macro.macro); + if (parser.statements.size() != 1 || parser.statements[0]->type != StatementType::SELECT_STATEMENT) { + throw InternalException("Expected a single select statement in CreateTableMacroInfo internal"); + } + auto node = std::move(parser.statements[0]->Cast().node); + + auto result = make_uniq(std::move(node)); + return CreateInternalTableMacroInfo(default_macro, std::move(result)); +} + +static unique_ptr GetDefaultTableFunction(const string &input_schema, const string &input_name) { + auto schema = StringUtil::Lower(input_schema); + auto name = StringUtil::Lower(input_name); + for (idx_t index = 0; internal_table_macros[index].name != nullptr; index++) { + if (internal_table_macros[index].schema == schema && internal_table_macros[index].name == name) { + return DefaultTableFunctionGenerator::CreateTableMacroInfo(internal_table_macros[index]); + } + } + return nullptr; +} + +unique_ptr DefaultTableFunctionGenerator::CreateDefaultEntry(ClientContext &context, + const string &entry_name) { + auto info = GetDefaultTableFunction(schema.name, entry_name); + if (info) { + return make_uniq_base(catalog, schema, info->Cast()); + } + return nullptr; +} + +vector DefaultTableFunctionGenerator::GetDefaultEntries() { + vector result; + for (idx_t index = 0; internal_table_macros[index].name != nullptr; index++) { + if (StringUtil::Lower(internal_table_macros[index].name) != internal_table_macros[index].name) { + throw InternalException("Default macro name %s should be lowercase", internal_table_macros[index].name); + } + if (internal_table_macros[index].schema == schema.name) { + result.emplace_back(internal_table_macros[index].name); + } + } + return result; +} + +} // namespace duckdb diff --git a/src/dynamic_sql_clickhouse_extension.cpp b/src/dynamic_sql_clickhouse_extension.cpp index 6fb5482..37707ad 100644 --- a/src/dynamic_sql_clickhouse_extension.cpp +++ b/src/dynamic_sql_clickhouse_extension.cpp @@ -11,8 +11,8 @@ // OpenSSL linked through vcpkg #include -#include "duckdb/catalog/default/default_functions.hpp" -#include "duckdb/catalog/default/default_table_functions.hpp" +#include "default_functions.hpp" +#include "default_table_functions.hpp" namespace duckdb { diff --git a/src/include/default_functions.hpp b/src/include/default_functions.hpp new file mode 100644 index 0000000..3466585 --- /dev/null +++ b/src/include/default_functions.hpp @@ -0,0 +1,41 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/default/default_functions.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/catalog/default/default_generator.hpp" +#include "duckdb/parser/parsed_data/create_macro_info.hpp" + +namespace duckdb { +class SchemaCatalogEntry; + +struct DefaultMacro { + const char *schema; + const char *name; + const char *parameters[8]; + const char *macro; +}; + +class DefaultFunctionGenerator : public DefaultGenerator { +public: + DefaultFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema); + + SchemaCatalogEntry &schema; + + DUCKDB_API static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro); + +public: + unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; + vector GetDefaultEntries() override; + +private: + static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro, + unique_ptr function); +}; + +} // namespace duckdb diff --git a/src/include/default_table_functions.hpp b/src/include/default_table_functions.hpp new file mode 100644 index 0000000..c0eee28 --- /dev/null +++ b/src/include/default_table_functions.hpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/catalog/default/default_table_functions.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/catalog/default/default_generator.hpp" +#include "duckdb/parser/parsed_data/create_macro_info.hpp" + +namespace duckdb { +class SchemaCatalogEntry; + +struct DefaultNamedParameter { + const char *name; + const char *default_value; +}; + +struct DefaultTableMacro { + const char *schema; + const char *name; + const char *parameters[8]; + DefaultNamedParameter named_parameters[8]; + const char *macro; +}; + +class DefaultTableFunctionGenerator : public DefaultGenerator { +public: + DefaultTableFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema); + + SchemaCatalogEntry &schema; + +public: + unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; + vector GetDefaultEntries() override; + + static unique_ptr CreateTableMacroInfo(const DefaultTableMacro &default_macro); + +private: + static unique_ptr CreateInternalTableMacroInfo(const DefaultTableMacro &default_macro, + unique_ptr function); +}; + +} // namespace duckdb From fedaf7c1ae56ac322d58e3b77bf55b8f3e15d2d5 Mon Sep 17 00:00:00 2001 From: root Date: Mon, 8 Jul 2024 07:55:20 +0000 Subject: [PATCH 2/4] Point duckdb submodule to v1.0.0 --- duckdb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/duckdb b/duckdb index 0be3e7b..1f98600 160000 --- a/duckdb +++ b/duckdb @@ -1 +1 @@ -Subproject commit 0be3e7b43680f0bfd851f8788581aaaf4bf8cd3f +Subproject commit 1f98600c2cf8722a6d2f2d805bb4af5e701319fc From 638daefa4c2c5e3af22c944c7df83327635b744e Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 8 Jul 2024 10:53:28 +0200 Subject: [PATCH 3/4] Pin 1.0.0 in MainDistributionPipeline --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 74674b0..5be21a0 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -16,7 +16,7 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: main + duckdb_version: 1.0.0 extension_name: dynamic_sql_clickhouse duckdb-stable-deploy: @@ -25,7 +25,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main secrets: inherit with: - duckdb_version: main + duckdb_version: 1.0.0 extension_name: dynamic_sql_clickhouse deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} deploy_versioned: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} From f41debe317911d2bcf29e3b4b2f7bd2863e91669 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 8 Jul 2024 11:09:05 +0200 Subject: [PATCH 4/4] Fix rookie mistake --- .github/workflows/MainDistributionPipeline.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index 5be21a0..97e5ac8 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -16,7 +16,7 @@ jobs: name: Build extension binaries uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main with: - duckdb_version: 1.0.0 + duckdb_version: v1.0.0 extension_name: dynamic_sql_clickhouse duckdb-stable-deploy: @@ -25,7 +25,7 @@ jobs: uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main secrets: inherit with: - duckdb_version: 1.0.0 + duckdb_version: v1.0.0 extension_name: dynamic_sql_clickhouse deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} deploy_versioned: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}