Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ void ApproxTopKFinalize(Vector &state_vector, AggregateFinalizeInputData &, Vect
}
// reserve space in the list vector
ListVector::Reserve(result, old_len + new_entries);
auto list_entries = FlatVector::Writer<list_entry_t>(result, offset + count, offset);
auto list_entries = FlatVector::Writer<list_entry_t>(result, count, offset);
auto &child_data = ListVector::GetChildMutable(result);

idx_t current_offset = old_len;
Expand Down Expand Up @@ -404,7 +404,6 @@ AggregateStateLayout ApproxTopKGetStateType(AggregateLayoutInput &input) {
template <class OP = HistogramGenericFunctor>
void ApproxTopKExportState(Vector &state_vector, AggregateFinalizeInputData &aggr_input_data, Vector &result,
idx_t count, idx_t offset) {
D_ASSERT(offset == 0);
auto states = state_vector.Values<ApproxTopKState *>();

auto &mask = FlatVector::ValidityMutable(result);
Expand All @@ -420,23 +419,24 @@ void ApproxTopKExportState(Vector &state_vector, AggregateFinalizeInputData &agg
idx_t total_values = ListVector::GetListSize(value_lists);
idx_t total_filters = ListVector::GetListSize(filter_lists);
for (idx_t i = 0; i < count; i++) {
const idx_t row = offset + i;
auto state_ptr = states[i].GetValue()->state;
value_entries[i].offset = total_values;
filter_entries[i].offset = total_filters;
value_entries[row].offset = total_values;
filter_entries[row].offset = total_filters;
if (!state_ptr || state_ptr->values.empty()) {
// no values have been added to this state - export NULL (children of a NULL struct must also be NULL)
mask.SetInvalid(i);
k_validity.SetInvalid(i);
value_validity.SetInvalid(i);
filter_validity.SetInvalid(i);
value_entries[i].length = 0;
filter_entries[i].length = 0;
k_data[i] = 0;
mask.SetInvalid(row);
k_validity.SetInvalid(row);
value_validity.SetInvalid(row);
filter_validity.SetInvalid(row);
value_entries[row].length = 0;
filter_entries[row].length = 0;
k_data[row] = 0;
continue;
}
k_data[i] = state_ptr->k;
value_entries[i].length = state_ptr->values.size();
filter_entries[i].length = state_ptr->filter.size();
k_data[row] = state_ptr->k;
value_entries[row].length = state_ptr->values.size();
filter_entries[row].length = state_ptr->filter.size();
total_values += state_ptr->values.size();
total_filters += state_ptr->filter.size();
}
Expand All @@ -449,29 +449,30 @@ void ApproxTopKExportState(Vector &state_vector, AggregateFinalizeInputData &agg
auto count_data = FlatVector::GetDataMutable<uint64_t>(value_fields[1]);
auto filter_data = FlatVector::GetDataMutable<uint64_t>(ListVector::GetChildMutable(filter_lists));
for (idx_t i = 0; i < count; i++) {
const idx_t row = offset + i;
auto state_ptr = states[i].GetValue()->state;
if (!state_ptr || state_ptr->values.empty()) {
continue;
}
auto &state = *state_ptr;
// write the values (in descending count order) - decoding them back to the input type
idx_t value_offset = value_entries[i].offset;
idx_t value_offset = value_entries[row].offset;
for (auto &val_ref : state.values) {
auto &val = val_ref.get();
OP::template HistogramFinalize<string_t>(val.str_val.str, value_child, value_offset);
count_data[value_offset] = val.count;
value_offset++;
}
for (idx_t filter_idx = 0; filter_idx < state.filter.size(); filter_idx++) {
filter_data[filter_entries[i].offset + filter_idx] = state.filter[filter_idx];
filter_data[filter_entries[row].offset + filter_idx] = state.filter[filter_idx];
}
}
ListVector::SetListSize(value_lists, total_values);
ListVector::SetListSize(filter_lists, total_filters);
FlatVector::SetSize(fields[0], count);
FlatVector::SetSize(value_lists, count);
FlatVector::SetSize(filter_lists, count);
FlatVector::SetSize(result, count);
FlatVector::SetSize(fields[0], offset + count);
FlatVector::SetSize(value_lists, offset + count);
FlatVector::SetSize(filter_lists, offset + count);
FlatVector::SetSize(result, offset + count);
}

template <class OP = HistogramGenericFunctor>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,8 @@ using APPROX_QUANTILE_EXPORT_TYPE =

void ApproxQuantileExportState(Vector &state_vector, AggregateFinalizeInputData &aggr_input_data, Vector &result,
idx_t count, idx_t offset) {
D_ASSERT(offset == 0);
auto states = state_vector.Values<ApproxQuantileState *>();
auto writer = FlatVector::Writer<APPROX_QUANTILE_EXPORT_TYPE>(result, count);
auto writer = FlatVector::Writer<APPROX_QUANTILE_EXPORT_TYPE>(result, count, offset);
for (idx_t i = 0; i < count; i++) {
auto &state = *states[i].GetValue();
if (!state.h || state.pos == 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ AggregateFunction ListFun::GetFunction() {
auto func = AggregateFunction({LogicalType::TEMPLATE("T")}, LogicalType::LIST(LogicalType::TEMPLATE("T")),
AggregateFunction::StateSize<ListAggState>,
AggregateFunction::StateInitialize<ListAggState, ListFunction>, ListUpdateFunction<>,
ListCombineFunction<ListFunction>, ListFinalize, nullptr, nullptr, nullptr, nullptr);
ListCombineFunction<ListFunction>, ListFinalize, ListClusterUpdate<>, nullptr,
nullptr, nullptr);
AggregateFunction::WireStructStateType<ListAggState>(func);

return func;
Expand Down
54 changes: 34 additions & 20 deletions src/duckdb/extension/core_functions/scalar/date/date_part.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,24 @@ DatePartSpecifier GetDateTypePartSpecifier(const string &specifier, const Logica
throw NotImplementedException("\"%s\" units \"%s\" not recognized", EnumUtil::ToString(type.id()), specifier);
}

template <int64_t MIN, int64_t MAX>
template <int64_t MIN, int64_t MAX, class T>
unique_ptr<BaseStatistics> PropagateSimpleDatePartStatistics(vector<BaseStatistics> &child_stats) {
// we can always propagate simple date part statistics
// since the min and max can never exceed these bounds
// we can only propagate simple date part statistics if the child has stats
auto &nstats = child_stats[0];
if (!NumericStats::HasMinMax(nstats)) {
return nullptr;
}
auto min = NumericStats::GetMin<T>(nstats);
auto max = NumericStats::GetMax<T>(nstats);
if (min > max) {
return nullptr;
}
// Infinities produce a NULL date part even though the input is not NULL,
// so we cannot propagate the validity (and thus the stats) in that case
if (!Value::IsFinite(min) || !Value::IsFinite(max)) {
return nullptr;
}
// the min and max can never exceed these bounds
auto result = NumericStats::CreateEmpty(LogicalType::BIGINT);
result.CopyValidity(child_stats[0]);
NumericStats::SetMin(result, Value::BIGINT(MIN));
Expand Down Expand Up @@ -185,7 +199,7 @@ struct DatePart {
template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
// min/max of month operator is [1, 12]
return PropagateSimpleDatePartStatistics<1, 12>(input.child_stats);
return PropagateSimpleDatePartStatistics<1, 12, T>(input.child_stats);
}
};

Expand All @@ -198,7 +212,7 @@ struct DatePart {
template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
// min/max of day operator is [1, 31]
return PropagateSimpleDatePartStatistics<1, 31>(input.child_stats);
return PropagateSimpleDatePartStatistics<1, 31, T>(input.child_stats);
}
};

Expand Down Expand Up @@ -284,7 +298,7 @@ struct DatePart {
template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
// min/max of quarter operator is [1, 4]
return PropagateSimpleDatePartStatistics<1, 4>(input.child_stats);
return PropagateSimpleDatePartStatistics<1, 4, T>(input.child_stats);
}
};

Expand All @@ -303,7 +317,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 6>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 6, T>(input.child_stats);
}
};

Expand All @@ -316,7 +330,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<1, 7>(input.child_stats);
return PropagateSimpleDatePartStatistics<1, 7, T>(input.child_stats);
}
};

Expand All @@ -328,7 +342,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<1, 366>(input.child_stats);
return PropagateSimpleDatePartStatistics<1, 366, T>(input.child_stats);
}
};

Expand All @@ -340,7 +354,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<1, 53>(input.child_stats);
return PropagateSimpleDatePartStatistics<1, 53, T>(input.child_stats);
}
};

Expand Down Expand Up @@ -429,7 +443,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 59999999999>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 59999999999, T>(input.child_stats);
}
};

Expand All @@ -441,7 +455,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 59999999>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 59999999, T>(input.child_stats);
}
};

Expand All @@ -453,7 +467,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 59999>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 59999, T>(input.child_stats);
}
};

Expand All @@ -465,7 +479,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 59>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 59, T>(input.child_stats);
}
};

Expand All @@ -477,7 +491,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 59>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 59, T>(input.child_stats);
}
};

Expand All @@ -489,7 +503,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 24>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 24, T>(input.child_stats);
}
};

Expand Down Expand Up @@ -518,7 +532,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 1>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 1, T>(input.child_stats);
}
};

Expand Down Expand Up @@ -550,7 +564,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 0>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 0, T>(input.child_stats);
}
};

Expand All @@ -563,7 +577,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 0>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 0, T>(input.child_stats);
}
};

Expand All @@ -576,7 +590,7 @@ struct DatePart {

template <class T>
static unique_ptr<BaseStatistics> PropagateStatistics(ClientContext &context, FunctionStatisticsInput &input) {
return PropagateSimpleDatePartStatistics<0, 0>(input.child_stats);
return PropagateSimpleDatePartStatistics<0, 0, T>(input.child_stats);
}
};

Expand Down
5 changes: 3 additions & 2 deletions src/duckdb/extension/json/json_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ static const DefaultMacro JSON_MACROS[] = {
{DEFAULT_SCHEMA, "json_group_array",
"(x) AS CAST('[' || string_agg(CASE WHEN x IS NULL THEN 'null'::JSON ELSE to_json(x) END, ',') || ']' AS JSON)"},
{DEFAULT_SCHEMA, "json_group_object",
"(n, v) AS CAST('{' || string_agg(to_json(n::VARCHAR) || ':' || CASE WHEN v IS NULL THEN 'null'::JSON ELSE "
"to_json(v) END, ',') || '}' AS JSON)"},
"(n, v) AS CAST('{' || string_agg(CASE WHEN n IS NULL THEN error('json_group_object key cannot be NULL') ELSE "
"to_json(n::VARCHAR) END || ':' || CASE WHEN v IS NULL THEN 'null'::JSON ELSE to_json(v) END, ',') || '}' AS "
"JSON)"},
{DEFAULT_SCHEMA, "json_group_structure", "(x) AS json_structure(json_group_array(x))->0"},
{DEFAULT_SCHEMA, "json", "(x) AS json_extract(x, '$')"},
{DEFAULT_SCHEMA, "json_copy_strftime_if_date", "(x, format) AS x, (x DATE, format) AS strftime(x, format);"},
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/extension/json/json_functions/json_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@ static unique_ptr<FunctionData> ArrayToJSONBind(BindScalarFunctionInput &input)
if (arguments[0]->HasParameter()) {
throw ParameterNotResolvedException();
}
if (arg_id != LogicalTypeId::LIST && arg_id != LogicalTypeId::SQLNULL) {
throw BinderException("array_to_json() argument type must be LIST");
if (arg_id != LogicalTypeId::LIST && arg_id != LogicalTypeId::ARRAY && arg_id != LogicalTypeId::SQLNULL) {
throw BinderException("array_to_json() argument type must be LIST or ARRAY");
}
return JSONCreateBindParams(bound_function, arguments, false);
}
Expand Down Expand Up @@ -288,7 +288,7 @@ static void AddKeyValuePairs(yyjson_mut_doc *doc, yyjson_mut_val *objs[], const
for (idx_t i = 0; i < count; i++) {
auto key_entry = keys[i];
if (!key_entry.IsValid()) {
continue;
throw InvalidInputException("JSON key cannot be NULL");
}
auto key = CreateJSONValue<string_t, string_t>::Operation(doc, key_entry.GetValue());
yyjson_mut_obj_add(objs[i], key, vals[i]);
Expand Down
42 changes: 39 additions & 3 deletions src/duckdb/extension/json/json_functions/json_table_in_out.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "json_common.hpp"
#include "json_functions.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/function/table_function.hpp"

namespace duckdb {
Expand Down Expand Up @@ -100,6 +101,40 @@ static unique_ptr<GlobalTableFunctionState> JSONTableInOutInitGlobal(ClientConte
return std::move(result);
}

//! Whether an object key can appear unquoted in a JSON path (as in "$.key"). Mirrors SQLite's json_each/json_tree:
//! only keys consisting of an ASCII letter followed by ASCII alphanumerics/underscores go unquoted
static bool JSONPathKeyNeedsQuoting(const char *data, const idx_t len) {
if (len == 0 || !StringUtil::CharacterIsAlpha(data[0])) {
return true;
}
for (idx_t i = 1; i < len; i++) {
if (!StringUtil::CharacterIsAlphaNumeric(data[i]) && data[i] != '_') {
return true;
}
}
return false;
}

//! Appends ".key" to the path, quoting the key (as in "$.\"a.b\"") when needed so that the resulting path
//! round-trips through JSON path extraction (issue #23148)
static void AppendObjectPathElement(yyjson_val *vkey, string &path) {
const auto data = unsafe_yyjson_get_str(vkey);
const auto len = unsafe_yyjson_get_len(vkey);
path += '.';
if (!JSONPathKeyNeedsQuoting(data, len)) {
path.append(data, len);
return;
}
path += '"';
for (idx_t i = 0; i < len; i++) {
if (data[i] == '"' || data[i] == '\\') {
path += '\\';
}
path += data[i];
}
path += '"';
}

struct JSONTableInOutRecursionNode {
JSONTableInOutRecursionNode(string path_p, yyjson_val *parent_val_p)
: path(std::move(path_p)), parent_val(parent_val_p), child_index(0) {
Expand Down Expand Up @@ -127,7 +162,7 @@ struct JSONTableInOutLocalState : LocalTableFunctionState {
void AddRecursionNode(yyjson_val *val, optional_ptr<yyjson_val> vkey, const optional_idx arr_index) {
string str;
if (vkey) {
str = "." + string(unsafe_yyjson_get_str(vkey.get()), unsafe_yyjson_get_len(vkey.get()));
AppendObjectPathElement(vkey.get(), str);
} else if (arr_index.IsValid()) {
str = "[" + to_string(arr_index.GetIndex()) + "]";
}
Expand Down Expand Up @@ -213,8 +248,9 @@ struct JSONTableInOutResult {
const auto path_str = lstate.GetPath();
if (fullkey.enabled) {
if (vkey) { // Object field
const auto vkey_str = string(unsafe_yyjson_get_str(vkey.get()), unsafe_yyjson_get_len(vkey.get()));
fullkey.data[count] = StringVector::AddString(fullkey.vector, path_str + "." + vkey_str);
auto fullkey_str = path_str;
AppendObjectPathElement(vkey.get(), fullkey_str);
fullkey.data[count] = StringVector::AddString(fullkey.vector, fullkey_str);
} else if (arr_el) { // Array element
const auto arr_path = "[" + to_string(recursion_nodes.back().child_index) + "]";
fullkey.data[count] = StringVector::AddString(fullkey.vector, path_str + arr_path);
Expand Down
Loading
Loading